Annotation of embedaddon/pcre/pcre_exec.c, revision 1.1.1.3

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.1.1.2   misho       9:            Copyright (c) 1997-2012 University of Cambridge
1.1       misho      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: /* This module contains pcre_exec(), the externally visible function that does
                     41: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     42: possible. There are also some static supporting functions. */
                     43: 
                     44: #ifdef HAVE_CONFIG_H
                     45: #include "config.h"
                     46: #endif
                     47: 
                     48: #define NLBLOCK md             /* Block containing newline information */
                     49: #define PSSTART start_subject  /* Field containing processed string start */
                     50: #define PSEND   end_subject    /* Field containing processed string end */
                     51: 
                     52: #include "pcre_internal.h"
                     53: 
                     54: /* Undefine some potentially clashing cpp symbols */
                     55: 
                     56: #undef min
                     57: #undef max
                     58: 
                     59: /* Values for setting in md->match_function_type to indicate two special types
                     60: of call to match(). We do it this way to save on using another stack variable,
                     61: as stack usage is to be discouraged. */
                     62: 
                     63: #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
                     64: #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
                     65: 
                     66: /* Non-error returns from the match() function. Error returns are externally
                     67: defined PCRE_ERROR_xxx codes, which are all negative. */
                     68: 
                     69: #define MATCH_MATCH        1
                     70: #define MATCH_NOMATCH      0
                     71: 
                     72: /* Special internal returns from the match() function. Make them sufficiently
                     73: negative to avoid the external error codes. */
                     74: 
                     75: #define MATCH_ACCEPT       (-999)
                     76: #define MATCH_COMMIT       (-998)
                     77: #define MATCH_KETRPOS      (-997)
                     78: #define MATCH_ONCE         (-996)
                     79: #define MATCH_PRUNE        (-995)
                     80: #define MATCH_SKIP         (-994)
                     81: #define MATCH_SKIP_ARG     (-993)
                     82: #define MATCH_THEN         (-992)
                     83: 
                     84: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     85: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     86: because the offset vector is always a multiple of 3 long. */
                     87: 
                     88: #define REC_STACK_SAVE_MAX 30
                     89: 
                     90: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     91: 
                     92: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     93: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     94: 
                     95: 
                     96: 
                     97: #ifdef PCRE_DEBUG
                     98: /*************************************************
                     99: *        Debugging function to print chars       *
                    100: *************************************************/
                    101: 
                    102: /* Print a sequence of chars in printable format, stopping at the end of the
                    103: subject if the requested.
                    104: 
                    105: Arguments:
                    106:   p           points to characters
                    107:   length      number to print
                    108:   is_subject  TRUE if printing from within md->start_subject
                    109:   md          pointer to matching data block, if is_subject is TRUE
                    110: 
                    111: Returns:     nothing
                    112: */
                    113: 
                    114: static void
1.1.1.2   misho     115: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
1.1       misho     116: {
                    117: unsigned int c;
                    118: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    119: while (length-- > 0)
                    120:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    121: }
                    122: #endif
                    123: 
                    124: 
                    125: 
                    126: /*************************************************
                    127: *          Match a back-reference                *
                    128: *************************************************/
                    129: 
                    130: /* Normally, if a back reference hasn't been set, the length that is passed is
                    131: negative, so the match always fails. However, in JavaScript compatibility mode,
                    132: the length passed is zero. Note that in caseless UTF-8 mode, the number of
                    133: subject bytes matched may be different to the number of reference bytes.
                    134: 
                    135: Arguments:
                    136:   offset      index into the offset vector
                    137:   eptr        pointer into the subject
                    138:   length      length of reference to be matched (number of bytes)
                    139:   md          points to match data block
                    140:   caseless    TRUE if caseless
                    141: 
1.1.1.3 ! misho     142: Returns:      >= 0 the number of subject bytes matched
        !           143:               -1 no match
        !           144:               -2 partial match; always given if at end subject
1.1       misho     145: */
                    146: 
                    147: static int
1.1.1.2   misho     148: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
1.1       misho     149:   BOOL caseless)
                    150: {
1.1.1.2   misho     151: PCRE_PUCHAR eptr_start = eptr;
                    152: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
1.1       misho     153: 
                    154: #ifdef PCRE_DEBUG
                    155: if (eptr >= md->end_subject)
                    156:   printf("matching subject <null>");
                    157: else
                    158:   {
                    159:   printf("matching subject ");
                    160:   pchars(eptr, length, TRUE, md);
                    161:   }
                    162: printf(" against backref ");
                    163: pchars(p, length, FALSE, md);
                    164: printf("\n");
                    165: #endif
                    166: 
1.1.1.3 ! misho     167: /* Always fail if reference not set (and not JavaScript compatible - in that
        !           168: case the length is passed as zero). */
1.1       misho     169: 
                    170: if (length < 0) return -1;
                    171: 
                    172: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    173: properly if Unicode properties are supported. Otherwise, we can check only
                    174: ASCII characters. */
                    175: 
                    176: if (caseless)
                    177:   {
1.1.1.2   misho     178: #ifdef SUPPORT_UTF
1.1       misho     179: #ifdef SUPPORT_UCP
1.1.1.2   misho     180:   if (md->utf)
1.1       misho     181:     {
                    182:     /* Match characters up to the end of the reference. NOTE: the number of
                    183:     bytes matched may differ, because there are some characters whose upper and
                    184:     lower case versions code as different numbers of bytes. For example, U+023A
                    185:     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
                    186:     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
                    187:     the latter. It is important, therefore, to check the length along the
                    188:     reference, not along the subject (earlier code did this wrong). */
                    189: 
1.1.1.2   misho     190:     PCRE_PUCHAR endptr = p + length;
1.1       misho     191:     while (p < endptr)
                    192:       {
                    193:       int c, d;
1.1.1.3 ! misho     194:       if (eptr >= md->end_subject) return -2;   /* Partial match */
1.1       misho     195:       GETCHARINC(c, eptr);
                    196:       GETCHARINC(d, p);
                    197:       if (c != d && c != UCD_OTHERCASE(d)) return -1;
                    198:       }
                    199:     }
                    200:   else
                    201: #endif
                    202: #endif
                    203: 
                    204:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    205:   is no UCP support. */
                    206:     {
                    207:     while (length-- > 0)
1.1.1.2   misho     208:       {
1.1.1.3 ! misho     209:       if (eptr >= md->end_subject) return -2;   /* Partial match */
1.1.1.2   misho     210:       if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
                    211:       p++;
                    212:       eptr++;
                    213:       }
1.1       misho     214:     }
                    215:   }
                    216: 
                    217: /* In the caseful case, we can just compare the bytes, whether or not we
                    218: are in UTF-8 mode. */
                    219: 
                    220: else
                    221:   {
1.1.1.3 ! misho     222:   while (length-- > 0)
        !           223:     {
        !           224:     if (eptr >= md->end_subject) return -2;   /* Partial match */
        !           225:     if (*p++ != *eptr++) return -1;
        !           226:     }
1.1       misho     227:   }
                    228: 
                    229: return (int)(eptr - eptr_start);
                    230: }
                    231: 
                    232: 
                    233: 
                    234: /***************************************************************************
                    235: ****************************************************************************
                    236:                    RECURSION IN THE match() FUNCTION
                    237: 
                    238: The match() function is highly recursive, though not every recursive call
                    239: increases the recursive depth. Nevertheless, some regular expressions can cause
                    240: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    241: itself recursively. This uses the stack for saving everything that has to be
                    242: saved for a recursive call. On Unix, the stack can be large, and this works
                    243: fine.
                    244: 
                    245: It turns out that on some non-Unix-like systems there are problems with
                    246: programs that use a lot of stack. (This despite the fact that every last chip
                    247: has oodles of memory these days, and techniques for extending the stack have
                    248: been known for decades.) So....
                    249: 
                    250: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    251: calls by keeping local variables that need to be preserved in blocks of memory
                    252: obtained from malloc() instead instead of on the stack. Macros are used to
                    253: achieve this so that the actual code doesn't look very different to what it
                    254: always used to.
                    255: 
                    256: The original heap-recursive code used longjmp(). However, it seems that this
                    257: can be very slow on some operating systems. Following a suggestion from Stan
                    258: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    259: provide a unique number for each call to RMATCH. There is no way of generating
                    260: a sequence of numbers at compile time in C. I have given them names, to make
                    261: them stand out more clearly.
                    262: 
                    263: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    264: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    265: tests. Furthermore, not using longjmp() means that local dynamic variables
                    266: don't have indeterminate values; this has meant that the frame size can be
                    267: reduced because the result can be "passed back" by straight setting of the
                    268: variable instead of being passed in the frame.
                    269: ****************************************************************************
                    270: ***************************************************************************/
                    271: 
                    272: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    273: below must be updated in sync.  */
                    274: 
                    275: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    276:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    277:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    278:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    279:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
                    280:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
                    281:        RM61,  RM62, RM63, RM64, RM65, RM66 };
                    282: 
                    283: /* These versions of the macros use the stack, as normal. There are debugging
                    284: versions and production versions. Note that the "rw" argument of RMATCH isn't
                    285: actually used in this definition. */
                    286: 
                    287: #ifndef NO_RECURSE
                    288: #define REGISTER register
                    289: 
                    290: #ifdef PCRE_DEBUG
                    291: #define RMATCH(ra,rb,rc,rd,re,rw) \
                    292:   { \
                    293:   printf("match() called in line %d\n", __LINE__); \
                    294:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
                    295:   printf("to line %d\n", __LINE__); \
                    296:   }
                    297: #define RRETURN(ra) \
                    298:   { \
                    299:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    300:   return ra; \
                    301:   }
                    302: #else
                    303: #define RMATCH(ra,rb,rc,rd,re,rw) \
                    304:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
                    305: #define RRETURN(ra) return ra
                    306: #endif
                    307: 
                    308: #else
                    309: 
                    310: 
                    311: /* These versions of the macros manage a private stack on the heap. Note that
                    312: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    313: argument of match(), which never changes. */
                    314: 
                    315: #define REGISTER
                    316: 
                    317: #define RMATCH(ra,rb,rc,rd,re,rw)\
                    318:   {\
1.1.1.3 ! misho     319:   heapframe *newframe = frame->Xnextframe;\
        !           320:   if (newframe == NULL)\
        !           321:     {\
        !           322:     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
        !           323:     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
        !           324:     newframe->Xnextframe = NULL;\
        !           325:     frame->Xnextframe = newframe;\
        !           326:     }\
        !           327:   frame->Xwhere = rw;\
1.1       misho     328:   newframe->Xeptr = ra;\
                    329:   newframe->Xecode = rb;\
                    330:   newframe->Xmstart = mstart;\
                    331:   newframe->Xoffset_top = rc;\
                    332:   newframe->Xeptrb = re;\
                    333:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    334:   newframe->Xprevframe = frame;\
                    335:   frame = newframe;\
                    336:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    337:   goto HEAP_RECURSE;\
                    338:   L_##rw:\
                    339:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    340:   }
                    341: 
                    342: #define RRETURN(ra)\
                    343:   {\
                    344:   heapframe *oldframe = frame;\
                    345:   frame = oldframe->Xprevframe;\
                    346:   if (frame != NULL)\
                    347:     {\
                    348:     rrc = ra;\
                    349:     goto HEAP_RETURN;\
                    350:     }\
                    351:   return ra;\
                    352:   }
                    353: 
                    354: 
                    355: /* Structure for remembering the local variables in a private frame */
                    356: 
                    357: typedef struct heapframe {
                    358:   struct heapframe *Xprevframe;
1.1.1.3 ! misho     359:   struct heapframe *Xnextframe;
1.1       misho     360: 
                    361:   /* Function arguments that may change */
                    362: 
1.1.1.2   misho     363:   PCRE_PUCHAR Xeptr;
                    364:   const pcre_uchar *Xecode;
                    365:   PCRE_PUCHAR Xmstart;
1.1       misho     366:   int Xoffset_top;
                    367:   eptrblock *Xeptrb;
                    368:   unsigned int Xrdepth;
                    369: 
                    370:   /* Function local variables */
                    371: 
1.1.1.2   misho     372:   PCRE_PUCHAR Xcallpat;
                    373: #ifdef SUPPORT_UTF
                    374:   PCRE_PUCHAR Xcharptr;
                    375: #endif
                    376:   PCRE_PUCHAR Xdata;
                    377:   PCRE_PUCHAR Xnext;
                    378:   PCRE_PUCHAR Xpp;
                    379:   PCRE_PUCHAR Xprev;
                    380:   PCRE_PUCHAR Xsaved_eptr;
1.1       misho     381: 
                    382:   recursion_info Xnew_recursive;
                    383: 
                    384:   BOOL Xcur_is_word;
                    385:   BOOL Xcondition;
                    386:   BOOL Xprev_is_word;
                    387: 
                    388: #ifdef SUPPORT_UCP
                    389:   int Xprop_type;
                    390:   int Xprop_value;
                    391:   int Xprop_fail_result;
                    392:   int Xoclength;
1.1.1.2   misho     393:   pcre_uchar Xocchars[6];
1.1       misho     394: #endif
                    395: 
                    396:   int Xcodelink;
                    397:   int Xctype;
                    398:   unsigned int Xfc;
                    399:   int Xfi;
                    400:   int Xlength;
                    401:   int Xmax;
                    402:   int Xmin;
                    403:   int Xnumber;
                    404:   int Xoffset;
                    405:   int Xop;
                    406:   int Xsave_capture_last;
                    407:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    408:   int Xstacksave[REC_STACK_SAVE_MAX];
                    409: 
                    410:   eptrblock Xnewptrb;
                    411: 
                    412:   /* Where to jump back to */
                    413: 
                    414:   int Xwhere;
                    415: 
                    416: } heapframe;
                    417: 
                    418: #endif
                    419: 
                    420: 
                    421: /***************************************************************************
                    422: ***************************************************************************/
                    423: 
                    424: 
                    425: 
                    426: /*************************************************
                    427: *         Match from current position            *
                    428: *************************************************/
                    429: 
                    430: /* This function is called recursively in many circumstances. Whenever it
                    431: returns a negative (error) response, the outer incarnation must also return the
                    432: same response. */
                    433: 
                    434: /* These macros pack up tests that are used for partial matching, and which
                    435: appear several times in the code. We set the "hit end" flag if the pointer is
                    436: at the end of the subject and also past the start of the subject (i.e.
                    437: something has been matched). For hard partial matching, we then return
                    438: immediately. The second one is used when we already know we are past the end of
                    439: the subject. */
                    440: 
                    441: #define CHECK_PARTIAL()\
                    442:   if (md->partial != 0 && eptr >= md->end_subject && \
                    443:       eptr > md->start_used_ptr) \
                    444:     { \
                    445:     md->hitend = TRUE; \
                    446:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
                    447:     }
                    448: 
                    449: #define SCHECK_PARTIAL()\
                    450:   if (md->partial != 0 && eptr > md->start_used_ptr) \
                    451:     { \
                    452:     md->hitend = TRUE; \
                    453:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
                    454:     }
                    455: 
                    456: 
                    457: /* Performance note: It might be tempting to extract commonly used fields from
1.1.1.2   misho     458: the md structure (e.g. utf, end_subject) into individual variables to improve
1.1       misho     459: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    460: made performance worse.
                    461: 
                    462: Arguments:
                    463:    eptr        pointer to current character in subject
                    464:    ecode       pointer to current position in compiled code
                    465:    mstart      pointer to the current match start position (can be modified
                    466:                  by encountering \K)
                    467:    offset_top  current top pointer
                    468:    md          pointer to "static" info for the match
                    469:    eptrb       pointer to chain of blocks containing eptr at start of
                    470:                  brackets - for testing for empty matches
                    471:    rdepth      the recursion depth
                    472: 
                    473: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    474:                MATCH_NOMATCH if failed to match  )
                    475:                a negative MATCH_xxx value for PRUNE, SKIP, etc
                    476:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    477:                  (e.g. stopped by repeated call or recursion limit)
                    478: */
                    479: 
                    480: static int
1.1.1.2   misho     481: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
                    482:   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
                    483:   unsigned int rdepth)
1.1       misho     484: {
                    485: /* These variables do not need to be preserved over recursion in this function,
                    486: so they can be ordinary variables in all cases. Mark some of them with
                    487: "register" because they are used a lot in loops. */
                    488: 
                    489: register int  rrc;         /* Returns from recursive calls */
                    490: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    491: register unsigned int c;   /* Character values not kept over RMATCH() calls */
1.1.1.2   misho     492: register BOOL utf;         /* Local copy of UTF flag for speed */
1.1       misho     493: 
                    494: BOOL minimize, possessive; /* Quantifier options */
                    495: BOOL caseless;
                    496: int condcode;
                    497: 
                    498: /* When recursion is not being used, all "local" variables that have to be
1.1.1.2   misho     499: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
                    500: frame on the stack here; subsequent instantiations are obtained from the heap
                    501: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
                    502: the top-level on the stack rather than malloc-ing them all gives a performance
                    503: boost in many cases where there is not much "recursion". */
1.1       misho     504: 
                    505: #ifdef NO_RECURSE
1.1.1.3 ! misho     506: heapframe *frame = (heapframe *)md->match_frames_base;
1.1       misho     507: 
                    508: /* Copy in the original argument variables */
                    509: 
                    510: frame->Xeptr = eptr;
                    511: frame->Xecode = ecode;
                    512: frame->Xmstart = mstart;
                    513: frame->Xoffset_top = offset_top;
                    514: frame->Xeptrb = eptrb;
                    515: frame->Xrdepth = rdepth;
                    516: 
                    517: /* This is where control jumps back to to effect "recursion" */
                    518: 
                    519: HEAP_RECURSE:
                    520: 
                    521: /* Macros make the argument variables come from the current frame */
                    522: 
                    523: #define eptr               frame->Xeptr
                    524: #define ecode              frame->Xecode
                    525: #define mstart             frame->Xmstart
                    526: #define offset_top         frame->Xoffset_top
                    527: #define eptrb              frame->Xeptrb
                    528: #define rdepth             frame->Xrdepth
                    529: 
                    530: /* Ditto for the local variables */
                    531: 
1.1.1.2   misho     532: #ifdef SUPPORT_UTF
1.1       misho     533: #define charptr            frame->Xcharptr
                    534: #endif
                    535: #define callpat            frame->Xcallpat
                    536: #define codelink           frame->Xcodelink
                    537: #define data               frame->Xdata
                    538: #define next               frame->Xnext
                    539: #define pp                 frame->Xpp
                    540: #define prev               frame->Xprev
                    541: #define saved_eptr         frame->Xsaved_eptr
                    542: 
                    543: #define new_recursive      frame->Xnew_recursive
                    544: 
                    545: #define cur_is_word        frame->Xcur_is_word
                    546: #define condition          frame->Xcondition
                    547: #define prev_is_word       frame->Xprev_is_word
                    548: 
                    549: #ifdef SUPPORT_UCP
                    550: #define prop_type          frame->Xprop_type
                    551: #define prop_value         frame->Xprop_value
                    552: #define prop_fail_result   frame->Xprop_fail_result
                    553: #define oclength           frame->Xoclength
                    554: #define occhars            frame->Xocchars
                    555: #endif
                    556: 
                    557: #define ctype              frame->Xctype
                    558: #define fc                 frame->Xfc
                    559: #define fi                 frame->Xfi
                    560: #define length             frame->Xlength
                    561: #define max                frame->Xmax
                    562: #define min                frame->Xmin
                    563: #define number             frame->Xnumber
                    564: #define offset             frame->Xoffset
                    565: #define op                 frame->Xop
                    566: #define save_capture_last  frame->Xsave_capture_last
                    567: #define save_offset1       frame->Xsave_offset1
                    568: #define save_offset2       frame->Xsave_offset2
                    569: #define save_offset3       frame->Xsave_offset3
                    570: #define stacksave          frame->Xstacksave
                    571: 
                    572: #define newptrb            frame->Xnewptrb
                    573: 
                    574: /* When recursion is being used, local variables are allocated on the stack and
                    575: get preserved during recursion in the normal way. In this environment, fi and
                    576: i, and fc and c, can be the same variables. */
                    577: 
                    578: #else         /* NO_RECURSE not defined */
                    579: #define fi i
                    580: #define fc c
                    581: 
                    582: /* Many of the following variables are used only in small blocks of the code.
                    583: My normal style of coding would have declared them within each of those blocks.
                    584: However, in order to accommodate the version of this code that uses an external
                    585: "stack" implemented on the heap, it is easier to declare them all here, so the
                    586: declarations can be cut out in a block. The only declarations within blocks
                    587: below are for variables that do not have to be preserved over a recursive call
                    588: to RMATCH(). */
                    589: 
1.1.1.2   misho     590: #ifdef SUPPORT_UTF
                    591: const pcre_uchar *charptr;
1.1       misho     592: #endif
1.1.1.2   misho     593: const pcre_uchar *callpat;
                    594: const pcre_uchar *data;
                    595: const pcre_uchar *next;
                    596: PCRE_PUCHAR       pp;
                    597: const pcre_uchar *prev;
                    598: PCRE_PUCHAR       saved_eptr;
1.1       misho     599: 
                    600: recursion_info new_recursive;
                    601: 
                    602: BOOL cur_is_word;
                    603: BOOL condition;
                    604: BOOL prev_is_word;
                    605: 
                    606: #ifdef SUPPORT_UCP
                    607: int prop_type;
                    608: int prop_value;
                    609: int prop_fail_result;
                    610: int oclength;
1.1.1.2   misho     611: pcre_uchar occhars[6];
1.1       misho     612: #endif
                    613: 
                    614: int codelink;
                    615: int ctype;
                    616: int length;
                    617: int max;
                    618: int min;
                    619: int number;
                    620: int offset;
                    621: int op;
                    622: int save_capture_last;
                    623: int save_offset1, save_offset2, save_offset3;
                    624: int stacksave[REC_STACK_SAVE_MAX];
                    625: 
                    626: eptrblock newptrb;
1.1.1.2   misho     627: 
                    628: /* There is a special fudge for calling match() in a way that causes it to
                    629: measure the size of its basic stack frame when the stack is being used for
                    630: recursion. The second argument (ecode) being NULL triggers this behaviour. It
                    631: cannot normally ever be NULL. The return is the negated value of the frame
                    632: size. */
                    633: 
                    634: if (ecode == NULL)
                    635:   {
                    636:   if (rdepth == 0)
                    637:     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
                    638:   else
                    639:     {
                    640:     int len = (char *)&rdepth - (char *)eptr;
                    641:     return (len > 0)? -len : len;
                    642:     }
                    643:   }
1.1       misho     644: #endif     /* NO_RECURSE */
                    645: 
                    646: /* To save space on the stack and in the heap frame, I have doubled up on some
                    647: of the local variables that are used only in localised parts of the code, but
                    648: still need to be preserved over recursive calls of match(). These macros define
                    649: the alternative names that are used. */
                    650: 
                    651: #define allow_zero    cur_is_word
                    652: #define cbegroup      condition
                    653: #define code_offset   codelink
                    654: #define condassert    condition
                    655: #define matched_once  prev_is_word
1.1.1.2   misho     656: #define foc           number
                    657: #define save_mark     data
1.1       misho     658: 
                    659: /* These statements are here to stop the compiler complaining about unitialized
                    660: variables. */
                    661: 
                    662: #ifdef SUPPORT_UCP
                    663: prop_value = 0;
                    664: prop_fail_result = 0;
                    665: #endif
                    666: 
                    667: 
                    668: /* This label is used for tail recursion, which is used in a few cases even
                    669: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    670: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    671: original patch. */
                    672: 
                    673: TAIL_RECURSE:
                    674: 
                    675: /* OK, now we can get on with the real code of the function. Recursive calls
                    676: are specified by the macro RMATCH and RRETURN is used to return. When
                    677: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
                    678: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
                    679: defined). However, RMATCH isn't like a function call because it's quite a
                    680: complicated macro. It has to be used in one particular way. This shouldn't,
                    681: however, impact performance when true recursion is being used. */
                    682: 
1.1.1.2   misho     683: #ifdef SUPPORT_UTF
                    684: utf = md->utf;       /* Local copy of the flag */
1.1       misho     685: #else
1.1.1.2   misho     686: utf = FALSE;
1.1       misho     687: #endif
                    688: 
                    689: /* First check that we haven't called match() too many times, or that we
                    690: haven't exceeded the recursive call limit. */
                    691: 
                    692: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    693: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    694: 
                    695: /* At the start of a group with an unlimited repeat that may match an empty
                    696: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
                    697: done this way to save having to use another function argument, which would take
                    698: up space on the stack. See also MATCH_CONDASSERT below.
                    699: 
                    700: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
                    701: such remembered pointers, to be checked when we hit the closing ket, in order
                    702: to break infinite loops that match no characters. When match() is called in
                    703: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
                    704: NOT be used with tail recursion, because the memory block that is used is on
                    705: the stack, so a new one may be required for each match(). */
                    706: 
                    707: if (md->match_function_type == MATCH_CBEGROUP)
                    708:   {
                    709:   newptrb.epb_saved_eptr = eptr;
                    710:   newptrb.epb_prev = eptrb;
                    711:   eptrb = &newptrb;
                    712:   md->match_function_type = 0;
                    713:   }
                    714: 
                    715: /* Now start processing the opcodes. */
                    716: 
                    717: for (;;)
                    718:   {
                    719:   minimize = possessive = FALSE;
                    720:   op = *ecode;
                    721: 
                    722:   switch(op)
                    723:     {
                    724:     case OP_MARK:
                    725:     md->nomatch_mark = ecode + 2;
                    726:     md->mark = NULL;    /* In case previously set by assertion */
1.1.1.2   misho     727:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1       misho     728:       eptrb, RM55);
                    729:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    730:          md->mark == NULL) md->mark = ecode + 2;
                    731: 
                    732:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    733:     argument, and we must check whether that argument matches this MARK's
                    734:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    735:     variable). If it does match, we reset that variable to the current subject
                    736:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    737:     unaltered. */
                    738: 
                    739:     else if (rrc == MATCH_SKIP_ARG &&
1.1.1.2   misho     740:         STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
1.1       misho     741:       {
                    742:       md->start_match_ptr = eptr;
                    743:       RRETURN(MATCH_SKIP);
                    744:       }
                    745:     RRETURN(rrc);
                    746: 
                    747:     case OP_FAIL:
                    748:     RRETURN(MATCH_NOMATCH);
                    749: 
                    750:     /* COMMIT overrides PRUNE, SKIP, and THEN */
                    751: 
                    752:     case OP_COMMIT:
1.1.1.2   misho     753:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     754:       eptrb, RM52);
                    755:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
                    756:         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
                    757:         rrc != MATCH_THEN)
                    758:       RRETURN(rrc);
                    759:     RRETURN(MATCH_COMMIT);
                    760: 
                    761:     /* PRUNE overrides THEN */
                    762: 
                    763:     case OP_PRUNE:
1.1.1.2   misho     764:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     765:       eptrb, RM51);
                    766:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    767:     RRETURN(MATCH_PRUNE);
                    768: 
                    769:     case OP_PRUNE_ARG:
                    770:     md->nomatch_mark = ecode + 2;
                    771:     md->mark = NULL;    /* In case previously set by assertion */
1.1.1.2   misho     772:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1       misho     773:       eptrb, RM56);
                    774:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    775:          md->mark == NULL) md->mark = ecode + 2;
                    776:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    777:     RRETURN(MATCH_PRUNE);
                    778: 
                    779:     /* SKIP overrides PRUNE and THEN */
                    780: 
                    781:     case OP_SKIP:
1.1.1.2   misho     782:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     783:       eptrb, RM53);
                    784:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    785:       RRETURN(rrc);
                    786:     md->start_match_ptr = eptr;   /* Pass back current position */
                    787:     RRETURN(MATCH_SKIP);
                    788: 
                    789:     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
                    790:     nomatch_mark. There is a flag that disables this opcode when re-matching a
                    791:     pattern that ended with a SKIP for which there was not a matching MARK. */
                    792: 
                    793:     case OP_SKIP_ARG:
                    794:     if (md->ignore_skip_arg)
                    795:       {
1.1.1.2   misho     796:       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
1.1       misho     797:       break;
                    798:       }
1.1.1.2   misho     799:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1       misho     800:       eptrb, RM57);
                    801:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    802:       RRETURN(rrc);
                    803: 
                    804:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    805:     returning the special MATCH_SKIP_ARG return code. This will either be
                    806:     caught by a matching MARK, or get to the top, where it causes a rematch
                    807:     with the md->ignore_skip_arg flag set. */
                    808: 
                    809:     md->start_match_ptr = ecode + 2;
                    810:     RRETURN(MATCH_SKIP_ARG);
                    811: 
                    812:     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
                    813:     the branch in which it occurs can be determined. Overload the start of
                    814:     match pointer to do this. */
                    815: 
                    816:     case OP_THEN:
1.1.1.2   misho     817:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     818:       eptrb, RM54);
                    819:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    820:     md->start_match_ptr = ecode;
                    821:     RRETURN(MATCH_THEN);
                    822: 
                    823:     case OP_THEN_ARG:
                    824:     md->nomatch_mark = ecode + 2;
                    825:     md->mark = NULL;    /* In case previously set by assertion */
1.1.1.2   misho     826:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
1.1       misho     827:       md, eptrb, RM58);
                    828:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    829:          md->mark == NULL) md->mark = ecode + 2;
                    830:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    831:     md->start_match_ptr = ecode;
                    832:     RRETURN(MATCH_THEN);
                    833: 
                    834:     /* Handle an atomic group that does not contain any capturing parentheses.
                    835:     This can be handled like an assertion. Prior to 8.13, all atomic groups
                    836:     were handled this way. In 8.13, the code was changed as below for ONCE, so
                    837:     that backups pass through the group and thereby reset captured values.
                    838:     However, this uses a lot more stack, so in 8.20, atomic groups that do not
                    839:     contain any captures generate OP_ONCE_NC, which can be handled in the old,
                    840:     less stack intensive way.
                    841: 
                    842:     Check the alternative branches in turn - the matching won't pass the KET
                    843:     for this kind of subpattern. If any one branch matches, we carry on as at
                    844:     the end of a normal bracket, leaving the subject pointer, but resetting
                    845:     the start-of-match value in case it was changed by \K. */
                    846: 
                    847:     case OP_ONCE_NC:
                    848:     prev = ecode;
                    849:     saved_eptr = eptr;
1.1.1.2   misho     850:     save_mark = md->mark;
1.1       misho     851:     do
                    852:       {
                    853:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
                    854:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
                    855:         {
                    856:         mstart = md->start_match_ptr;
                    857:         break;
                    858:         }
                    859:       if (rrc == MATCH_THEN)
                    860:         {
                    861:         next = ecode + GET(ecode,1);
                    862:         if (md->start_match_ptr < next &&
                    863:             (*ecode == OP_ALT || *next == OP_ALT))
                    864:           rrc = MATCH_NOMATCH;
                    865:         }
                    866: 
                    867:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    868:       ecode += GET(ecode,1);
1.1.1.2   misho     869:       md->mark = save_mark;
1.1       misho     870:       }
                    871:     while (*ecode == OP_ALT);
                    872: 
                    873:     /* If hit the end of the group (which could be repeated), fail */
                    874: 
                    875:     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                    876: 
                    877:     /* Continue as from after the group, updating the offsets high water
                    878:     mark, since extracts may have been taken. */
                    879: 
                    880:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                    881: 
                    882:     offset_top = md->end_offset_top;
                    883:     eptr = md->end_match_ptr;
                    884: 
                    885:     /* For a non-repeating ket, just continue at this level. This also
                    886:     happens for a repeating ket if no characters were matched in the group.
                    887:     This is the forcible breaking of infinite loops as implemented in Perl
                    888:     5.005. */
                    889: 
                    890:     if (*ecode == OP_KET || eptr == saved_eptr)
                    891:       {
                    892:       ecode += 1+LINK_SIZE;
                    893:       break;
                    894:       }
                    895: 
                    896:     /* The repeating kets try the rest of the pattern or restart from the
                    897:     preceding bracket, in the appropriate order. The second "call" of match()
                    898:     uses tail recursion, to avoid using another stack frame. */
                    899: 
                    900:     if (*ecode == OP_KETRMIN)
                    901:       {
                    902:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
                    903:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    904:       ecode = prev;
                    905:       goto TAIL_RECURSE;
                    906:       }
                    907:     else  /* OP_KETRMAX */
                    908:       {
                    909:       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
                    910:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    911:       ecode += 1 + LINK_SIZE;
                    912:       goto TAIL_RECURSE;
                    913:       }
                    914:     /* Control never gets here */
                    915: 
                    916:     /* Handle a capturing bracket, other than those that are possessive with an
                    917:     unlimited repeat. If there is space in the offset vector, save the current
                    918:     subject position in the working slot at the top of the vector. We mustn't
                    919:     change the current values of the data slot, because they may be set from a
                    920:     previous iteration of this group, and be referred to by a reference inside
                    921:     the group. A failure to match might occur after the group has succeeded,
                    922:     if something later on doesn't match. For this reason, we need to restore
                    923:     the working value and also the values of the final offsets, in case they
                    924:     were set by a previous iteration of the same bracket.
                    925: 
                    926:     If there isn't enough space in the offset vector, treat this as if it were
                    927:     a non-capturing bracket. Don't worry about setting the flag for the error
                    928:     case here; that is handled in the code for KET. */
                    929: 
                    930:     case OP_CBRA:
                    931:     case OP_SCBRA:
                    932:     number = GET2(ecode, 1+LINK_SIZE);
                    933:     offset = number << 1;
                    934: 
                    935: #ifdef PCRE_DEBUG
                    936:     printf("start bracket %d\n", number);
                    937:     printf("subject=");
                    938:     pchars(eptr, 16, TRUE, md);
                    939:     printf("\n");
                    940: #endif
                    941: 
                    942:     if (offset < md->offset_max)
                    943:       {
                    944:       save_offset1 = md->offset_vector[offset];
                    945:       save_offset2 = md->offset_vector[offset+1];
                    946:       save_offset3 = md->offset_vector[md->offset_end - number];
                    947:       save_capture_last = md->capture_last;
1.1.1.2   misho     948:       save_mark = md->mark;
1.1       misho     949: 
                    950:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                    951:       md->offset_vector[md->offset_end - number] =
                    952:         (int)(eptr - md->start_subject);
                    953: 
                    954:       for (;;)
                    955:         {
                    956:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho     957:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     958:           eptrb, RM1);
                    959:         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
                    960: 
                    961:         /* If we backed up to a THEN, check whether it is within the current
                    962:         branch by comparing the address of the THEN that is passed back with
                    963:         the end of the branch. If it is within the current branch, and the
                    964:         branch is one of two or more alternatives (it either starts or ends
                    965:         with OP_ALT), we have reached the limit of THEN's action, so convert
                    966:         the return code to NOMATCH, which will cause normal backtracking to
                    967:         happen from now on. Otherwise, THEN is passed back to an outer
                    968:         alternative. This implements Perl's treatment of parenthesized groups,
                    969:         where a group not containing | does not affect the current alternative,
                    970:         that is, (X) is NOT the same as (X|(*F)). */
                    971: 
                    972:         if (rrc == MATCH_THEN)
                    973:           {
                    974:           next = ecode + GET(ecode,1);
                    975:           if (md->start_match_ptr < next &&
                    976:               (*ecode == OP_ALT || *next == OP_ALT))
                    977:             rrc = MATCH_NOMATCH;
                    978:           }
                    979: 
                    980:         /* Anything other than NOMATCH is passed back. */
                    981: 
                    982:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    983:         md->capture_last = save_capture_last;
                    984:         ecode += GET(ecode, 1);
1.1.1.2   misho     985:         md->mark = save_mark;
1.1       misho     986:         if (*ecode != OP_ALT) break;
                    987:         }
                    988: 
                    989:       DPRINTF(("bracket %d failed\n", number));
                    990:       md->offset_vector[offset] = save_offset1;
                    991:       md->offset_vector[offset+1] = save_offset2;
                    992:       md->offset_vector[md->offset_end - number] = save_offset3;
                    993: 
                    994:       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
                    995: 
                    996:       RRETURN(rrc);
                    997:       }
                    998: 
                    999:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                   1000:     as a non-capturing bracket. */
                   1001: 
                   1002:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1003:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1004: 
                   1005:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                   1006: 
                   1007:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1008:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1009: 
                   1010:     /* Non-capturing or atomic group, except for possessive with unlimited
                   1011:     repeat and ONCE group with no captures. Loop for all the alternatives.
                   1012: 
                   1013:     When we get to the final alternative within the brackets, we used to return
                   1014:     the result of a recursive call to match() whatever happened so it was
                   1015:     possible to reduce stack usage by turning this into a tail recursion,
                   1016:     except in the case of a possibly empty group. However, now that there is
                   1017:     the possiblity of (*THEN) occurring in the final alternative, this
                   1018:     optimization is no longer always possible.
                   1019: 
                   1020:     We can optimize if we know there are no (*THEN)s in the pattern; at present
                   1021:     this is the best that can be done.
                   1022: 
                   1023:     MATCH_ONCE is returned when the end of an atomic group is successfully
                   1024:     reached, but subsequent matching fails. It passes back up the tree (causing
                   1025:     captured values to be reset) until the original atomic group level is
                   1026:     reached. This is tested by comparing md->once_target with the start of the
                   1027:     group. At this point, the return is converted into MATCH_NOMATCH so that
                   1028:     previous backup points can be taken. */
                   1029: 
                   1030:     case OP_ONCE:
                   1031:     case OP_BRA:
                   1032:     case OP_SBRA:
                   1033:     DPRINTF(("start non-capturing bracket\n"));
                   1034: 
                   1035:     for (;;)
                   1036:       {
1.1.1.3 ! misho    1037:       if (op >= OP_SBRA || op == OP_ONCE)
        !          1038:         md->match_function_type = MATCH_CBEGROUP;
1.1       misho    1039: 
                   1040:       /* If this is not a possibly empty group, and there are no (*THEN)s in
                   1041:       the pattern, and this is the final alternative, optimize as described
                   1042:       above. */
                   1043: 
                   1044:       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
                   1045:         {
1.1.1.2   misho    1046:         ecode += PRIV(OP_lengths)[*ecode];
1.1       misho    1047:         goto TAIL_RECURSE;
                   1048:         }
                   1049: 
                   1050:       /* In all other cases, we have to make another call to match(). */
                   1051: 
1.1.1.2   misho    1052:       save_mark = md->mark;
                   1053:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1.1       misho    1054:         RM2);
                   1055: 
                   1056:       /* See comment in the code for capturing groups above about handling
                   1057:       THEN. */
                   1058: 
                   1059:       if (rrc == MATCH_THEN)
                   1060:         {
                   1061:         next = ecode + GET(ecode,1);
                   1062:         if (md->start_match_ptr < next &&
                   1063:             (*ecode == OP_ALT || *next == OP_ALT))
                   1064:           rrc = MATCH_NOMATCH;
                   1065:         }
                   1066: 
                   1067:       if (rrc != MATCH_NOMATCH)
                   1068:         {
                   1069:         if (rrc == MATCH_ONCE)
                   1070:           {
1.1.1.2   misho    1071:           const pcre_uchar *scode = ecode;
1.1       misho    1072:           if (*scode != OP_ONCE)           /* If not at start, find it */
                   1073:             {
                   1074:             while (*scode == OP_ALT) scode += GET(scode, 1);
                   1075:             scode -= GET(scode, 1);
                   1076:             }
                   1077:           if (md->once_target == scode) rrc = MATCH_NOMATCH;
                   1078:           }
                   1079:         RRETURN(rrc);
                   1080:         }
                   1081:       ecode += GET(ecode, 1);
1.1.1.2   misho    1082:       md->mark = save_mark;
1.1       misho    1083:       if (*ecode != OP_ALT) break;
                   1084:       }
                   1085: 
                   1086:     RRETURN(MATCH_NOMATCH);
                   1087: 
                   1088:     /* Handle possessive capturing brackets with an unlimited repeat. We come
                   1089:     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
                   1090:     handled similarly to the normal case above. However, the matching is
                   1091:     different. The end of these brackets will always be OP_KETRPOS, which
                   1092:     returns MATCH_KETRPOS without going further in the pattern. By this means
                   1093:     we can handle the group by iteration rather than recursion, thereby
                   1094:     reducing the amount of stack needed. */
                   1095: 
                   1096:     case OP_CBRAPOS:
                   1097:     case OP_SCBRAPOS:
                   1098:     allow_zero = FALSE;
                   1099: 
                   1100:     POSSESSIVE_CAPTURE:
                   1101:     number = GET2(ecode, 1+LINK_SIZE);
                   1102:     offset = number << 1;
                   1103: 
                   1104: #ifdef PCRE_DEBUG
                   1105:     printf("start possessive bracket %d\n", number);
                   1106:     printf("subject=");
                   1107:     pchars(eptr, 16, TRUE, md);
                   1108:     printf("\n");
                   1109: #endif
                   1110: 
                   1111:     if (offset < md->offset_max)
                   1112:       {
                   1113:       matched_once = FALSE;
                   1114:       code_offset = (int)(ecode - md->start_code);
                   1115: 
                   1116:       save_offset1 = md->offset_vector[offset];
                   1117:       save_offset2 = md->offset_vector[offset+1];
                   1118:       save_offset3 = md->offset_vector[md->offset_end - number];
                   1119:       save_capture_last = md->capture_last;
                   1120: 
                   1121:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                   1122: 
                   1123:       /* Each time round the loop, save the current subject position for use
                   1124:       when the group matches. For MATCH_MATCH, the group has matched, so we
                   1125:       restart it with a new subject starting position, remembering that we had
                   1126:       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
                   1127:       usual. If we haven't matched any alternatives in any iteration, check to
                   1128:       see if a previous iteration matched. If so, the group has matched;
                   1129:       continue from afterwards. Otherwise it has failed; restore the previous
                   1130:       capture values before returning NOMATCH. */
                   1131: 
                   1132:       for (;;)
                   1133:         {
                   1134:         md->offset_vector[md->offset_end - number] =
                   1135:           (int)(eptr - md->start_subject);
                   1136:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho    1137:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho    1138:           eptrb, RM63);
                   1139:         if (rrc == MATCH_KETRPOS)
                   1140:           {
                   1141:           offset_top = md->end_offset_top;
                   1142:           eptr = md->end_match_ptr;
                   1143:           ecode = md->start_code + code_offset;
                   1144:           save_capture_last = md->capture_last;
                   1145:           matched_once = TRUE;
                   1146:           continue;
                   1147:           }
                   1148: 
                   1149:         /* See comment in the code for capturing groups above about handling
                   1150:         THEN. */
                   1151: 
                   1152:         if (rrc == MATCH_THEN)
                   1153:           {
                   1154:           next = ecode + GET(ecode,1);
                   1155:           if (md->start_match_ptr < next &&
                   1156:               (*ecode == OP_ALT || *next == OP_ALT))
                   1157:             rrc = MATCH_NOMATCH;
                   1158:           }
                   1159: 
                   1160:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1161:         md->capture_last = save_capture_last;
                   1162:         ecode += GET(ecode, 1);
                   1163:         if (*ecode != OP_ALT) break;
                   1164:         }
                   1165: 
                   1166:       if (!matched_once)
                   1167:         {
                   1168:         md->offset_vector[offset] = save_offset1;
                   1169:         md->offset_vector[offset+1] = save_offset2;
                   1170:         md->offset_vector[md->offset_end - number] = save_offset3;
                   1171:         }
                   1172: 
                   1173:       if (allow_zero || matched_once)
                   1174:         {
                   1175:         ecode += 1 + LINK_SIZE;
                   1176:         break;
                   1177:         }
                   1178: 
                   1179:       RRETURN(MATCH_NOMATCH);
                   1180:       }
                   1181: 
                   1182:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                   1183:     as a non-capturing bracket. */
                   1184: 
                   1185:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1186:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1187: 
                   1188:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                   1189: 
                   1190:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1191:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1192: 
                   1193:     /* Non-capturing possessive bracket with unlimited repeat. We come here
                   1194:     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
                   1195:     without the capturing complication. It is written out separately for speed
                   1196:     and cleanliness. */
                   1197: 
                   1198:     case OP_BRAPOS:
                   1199:     case OP_SBRAPOS:
                   1200:     allow_zero = FALSE;
                   1201: 
                   1202:     POSSESSIVE_NON_CAPTURE:
                   1203:     matched_once = FALSE;
                   1204:     code_offset = (int)(ecode - md->start_code);
                   1205: 
                   1206:     for (;;)
                   1207:       {
                   1208:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho    1209:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho    1210:         eptrb, RM48);
                   1211:       if (rrc == MATCH_KETRPOS)
                   1212:         {
                   1213:         offset_top = md->end_offset_top;
                   1214:         eptr = md->end_match_ptr;
                   1215:         ecode = md->start_code + code_offset;
                   1216:         matched_once = TRUE;
                   1217:         continue;
                   1218:         }
                   1219: 
                   1220:       /* See comment in the code for capturing groups above about handling
                   1221:       THEN. */
                   1222: 
                   1223:       if (rrc == MATCH_THEN)
                   1224:         {
                   1225:         next = ecode + GET(ecode,1);
                   1226:         if (md->start_match_ptr < next &&
                   1227:             (*ecode == OP_ALT || *next == OP_ALT))
                   1228:           rrc = MATCH_NOMATCH;
                   1229:         }
                   1230: 
                   1231:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1232:       ecode += GET(ecode, 1);
                   1233:       if (*ecode != OP_ALT) break;
                   1234:       }
                   1235: 
                   1236:     if (matched_once || allow_zero)
                   1237:       {
                   1238:       ecode += 1 + LINK_SIZE;
                   1239:       break;
                   1240:       }
                   1241:     RRETURN(MATCH_NOMATCH);
                   1242: 
                   1243:     /* Control never reaches here. */
                   1244: 
                   1245:     /* Conditional group: compilation checked that there are no more than
                   1246:     two branches. If the condition is false, skipping the first branch takes us
                   1247:     past the end if there is only one branch, but that's OK because that is
                   1248:     exactly what going to the ket would do. */
                   1249: 
                   1250:     case OP_COND:
                   1251:     case OP_SCOND:
                   1252:     codelink = GET(ecode, 1);
                   1253: 
                   1254:     /* Because of the way auto-callout works during compile, a callout item is
                   1255:     inserted between OP_COND and an assertion condition. */
                   1256: 
                   1257:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                   1258:       {
1.1.1.2   misho    1259:       if (PUBL(callout) != NULL)
1.1       misho    1260:         {
1.1.1.2   misho    1261:         PUBL(callout_block) cb;
1.1       misho    1262:         cb.version          = 2;   /* Version 1 of the callout block */
                   1263:         cb.callout_number   = ecode[LINK_SIZE+2];
                   1264:         cb.offset_vector    = md->offset_vector;
1.1.1.2   misho    1265: #ifdef COMPILE_PCRE8
1.1       misho    1266:         cb.subject          = (PCRE_SPTR)md->start_subject;
1.1.1.2   misho    1267: #else
                   1268:         cb.subject          = (PCRE_SPTR16)md->start_subject;
                   1269: #endif
1.1       misho    1270:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1271:         cb.start_match      = (int)(mstart - md->start_subject);
                   1272:         cb.current_position = (int)(eptr - md->start_subject);
                   1273:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                   1274:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                   1275:         cb.capture_top      = offset_top/2;
                   1276:         cb.capture_last     = md->capture_last;
                   1277:         cb.callout_data     = md->callout_data;
                   1278:         cb.mark             = md->nomatch_mark;
1.1.1.2   misho    1279:         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misho    1280:         if (rrc < 0) RRETURN(rrc);
                   1281:         }
1.1.1.2   misho    1282:       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1.1       misho    1283:       }
                   1284: 
                   1285:     condcode = ecode[LINK_SIZE+1];
                   1286: 
                   1287:     /* Now see what the actual condition is */
                   1288: 
                   1289:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
                   1290:       {
                   1291:       if (md->recursive == NULL)                /* Not recursing => FALSE */
                   1292:         {
                   1293:         condition = FALSE;
                   1294:         ecode += GET(ecode, 1);
                   1295:         }
                   1296:       else
                   1297:         {
                   1298:         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
                   1299:         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
                   1300: 
                   1301:         /* If the test is for recursion into a specific subpattern, and it is
                   1302:         false, but the test was set up by name, scan the table to see if the
                   1303:         name refers to any other numbers, and test them. The condition is true
                   1304:         if any one is set. */
                   1305: 
                   1306:         if (!condition && condcode == OP_NRREF)
                   1307:           {
1.1.1.2   misho    1308:           pcre_uchar *slotA = md->name_table;
1.1       misho    1309:           for (i = 0; i < md->name_count; i++)
                   1310:             {
                   1311:             if (GET2(slotA, 0) == recno) break;
                   1312:             slotA += md->name_entry_size;
                   1313:             }
                   1314: 
                   1315:           /* Found a name for the number - there can be only one; duplicate
                   1316:           names for different numbers are allowed, but not vice versa. First
                   1317:           scan down for duplicates. */
                   1318: 
                   1319:           if (i < md->name_count)
                   1320:             {
1.1.1.2   misho    1321:             pcre_uchar *slotB = slotA;
1.1       misho    1322:             while (slotB > md->name_table)
                   1323:               {
                   1324:               slotB -= md->name_entry_size;
1.1.1.2   misho    1325:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1326:                 {
                   1327:                 condition = GET2(slotB, 0) == md->recursive->group_num;
                   1328:                 if (condition) break;
                   1329:                 }
                   1330:               else break;
                   1331:               }
                   1332: 
                   1333:             /* Scan up for duplicates */
                   1334: 
                   1335:             if (!condition)
                   1336:               {
                   1337:               slotB = slotA;
                   1338:               for (i++; i < md->name_count; i++)
                   1339:                 {
                   1340:                 slotB += md->name_entry_size;
1.1.1.2   misho    1341:                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1342:                   {
                   1343:                   condition = GET2(slotB, 0) == md->recursive->group_num;
                   1344:                   if (condition) break;
                   1345:                   }
                   1346:                 else break;
                   1347:                 }
                   1348:               }
                   1349:             }
                   1350:           }
                   1351: 
                   1352:         /* Chose branch according to the condition */
                   1353: 
1.1.1.2   misho    1354:         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1       misho    1355:         }
                   1356:       }
                   1357: 
                   1358:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
                   1359:       {
                   1360:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                   1361:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
                   1362: 
                   1363:       /* If the numbered capture is unset, but the reference was by name,
                   1364:       scan the table to see if the name refers to any other numbers, and test
                   1365:       them. The condition is true if any one is set. This is tediously similar
                   1366:       to the code above, but not close enough to try to amalgamate. */
                   1367: 
                   1368:       if (!condition && condcode == OP_NCREF)
                   1369:         {
                   1370:         int refno = offset >> 1;
1.1.1.2   misho    1371:         pcre_uchar *slotA = md->name_table;
1.1       misho    1372: 
                   1373:         for (i = 0; i < md->name_count; i++)
                   1374:           {
                   1375:           if (GET2(slotA, 0) == refno) break;
                   1376:           slotA += md->name_entry_size;
                   1377:           }
                   1378: 
                   1379:         /* Found a name for the number - there can be only one; duplicate names
                   1380:         for different numbers are allowed, but not vice versa. First scan down
                   1381:         for duplicates. */
                   1382: 
                   1383:         if (i < md->name_count)
                   1384:           {
1.1.1.2   misho    1385:           pcre_uchar *slotB = slotA;
1.1       misho    1386:           while (slotB > md->name_table)
                   1387:             {
                   1388:             slotB -= md->name_entry_size;
1.1.1.2   misho    1389:             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1390:               {
                   1391:               offset = GET2(slotB, 0) << 1;
                   1392:               condition = offset < offset_top &&
                   1393:                 md->offset_vector[offset] >= 0;
                   1394:               if (condition) break;
                   1395:               }
                   1396:             else break;
                   1397:             }
                   1398: 
                   1399:           /* Scan up for duplicates */
                   1400: 
                   1401:           if (!condition)
                   1402:             {
                   1403:             slotB = slotA;
                   1404:             for (i++; i < md->name_count; i++)
                   1405:               {
                   1406:               slotB += md->name_entry_size;
1.1.1.2   misho    1407:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1408:                 {
                   1409:                 offset = GET2(slotB, 0) << 1;
                   1410:                 condition = offset < offset_top &&
                   1411:                   md->offset_vector[offset] >= 0;
                   1412:                 if (condition) break;
                   1413:                 }
                   1414:               else break;
                   1415:               }
                   1416:             }
                   1417:           }
                   1418:         }
                   1419: 
                   1420:       /* Chose branch according to the condition */
                   1421: 
1.1.1.2   misho    1422:       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1       misho    1423:       }
                   1424: 
                   1425:     else if (condcode == OP_DEF)     /* DEFINE - always false */
                   1426:       {
                   1427:       condition = FALSE;
                   1428:       ecode += GET(ecode, 1);
                   1429:       }
                   1430: 
                   1431:     /* The condition is an assertion. Call match() to evaluate it - setting
                   1432:     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
                   1433:     an assertion. */
                   1434: 
                   1435:     else
                   1436:       {
                   1437:       md->match_function_type = MATCH_CONDASSERT;
                   1438:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
                   1439:       if (rrc == MATCH_MATCH)
                   1440:         {
                   1441:         if (md->end_offset_top > offset_top)
                   1442:           offset_top = md->end_offset_top;  /* Captures may have happened */
                   1443:         condition = TRUE;
                   1444:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1445:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1446:         }
                   1447: 
                   1448:       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
                   1449:       assertion; it is therefore treated as NOMATCH. */
                   1450: 
                   1451:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1452:         {
                   1453:         RRETURN(rrc);         /* Need braces because of following else */
                   1454:         }
                   1455:       else
                   1456:         {
                   1457:         condition = FALSE;
                   1458:         ecode += codelink;
                   1459:         }
                   1460:       }
                   1461: 
                   1462:     /* We are now at the branch that is to be obeyed. As there is only one, can
                   1463:     use tail recursion to avoid using another stack frame, except when there is
                   1464:     unlimited repeat of a possibly empty group. In the latter case, a recursive
                   1465:     call to match() is always required, unless the second alternative doesn't
                   1466:     exist, in which case we can just plough on. Note that, for compatibility
                   1467:     with Perl, the | in a conditional group is NOT treated as creating two
                   1468:     alternatives. If a THEN is encountered in the branch, it propagates out to
                   1469:     the enclosing alternative (unless nested in a deeper set of alternatives,
                   1470:     of course). */
                   1471: 
                   1472:     if (condition || *ecode == OP_ALT)
                   1473:       {
                   1474:       if (op != OP_SCOND)
                   1475:         {
                   1476:         ecode += 1 + LINK_SIZE;
                   1477:         goto TAIL_RECURSE;
                   1478:         }
                   1479: 
                   1480:       md->match_function_type = MATCH_CBEGROUP;
                   1481:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
                   1482:       RRETURN(rrc);
                   1483:       }
                   1484: 
                   1485:      /* Condition false & no alternative; continue after the group. */
                   1486: 
                   1487:     else
                   1488:       {
                   1489:       ecode += 1 + LINK_SIZE;
                   1490:       }
                   1491:     break;
                   1492: 
                   1493: 
                   1494:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1495:     to close any currently open capturing brackets. */
                   1496: 
                   1497:     case OP_CLOSE:
                   1498:     number = GET2(ecode, 1);
                   1499:     offset = number << 1;
                   1500: 
                   1501: #ifdef PCRE_DEBUG
                   1502:       printf("end bracket %d at *ACCEPT", number);
                   1503:       printf("\n");
                   1504: #endif
                   1505: 
                   1506:     md->capture_last = number;
                   1507:     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1508:       {
                   1509:       md->offset_vector[offset] =
                   1510:         md->offset_vector[md->offset_end - number];
                   1511:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1512:       if (offset_top <= offset) offset_top = offset + 2;
                   1513:       }
1.1.1.2   misho    1514:     ecode += 1 + IMM2_SIZE;
1.1       misho    1515:     break;
                   1516: 
                   1517: 
                   1518:     /* End of the pattern, either real or forced. */
                   1519: 
                   1520:     case OP_END:
                   1521:     case OP_ACCEPT:
                   1522:     case OP_ASSERT_ACCEPT:
                   1523: 
                   1524:     /* If we have matched an empty string, fail if not in an assertion and not
                   1525:     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
                   1526:     is set and we have matched at the start of the subject. In both cases,
                   1527:     backtracking will then try other alternatives, if any. */
                   1528: 
                   1529:     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
                   1530:          md->recursive == NULL &&
                   1531:          (md->notempty ||
                   1532:            (md->notempty_atstart &&
                   1533:              mstart == md->start_subject + md->start_offset)))
                   1534:       RRETURN(MATCH_NOMATCH);
                   1535: 
                   1536:     /* Otherwise, we have a match. */
                   1537: 
                   1538:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1539:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1540:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
                   1541: 
                   1542:     /* For some reason, the macros don't work properly if an expression is
                   1543:     given as the argument to RRETURN when the heap is in use. */
                   1544: 
                   1545:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
                   1546:     RRETURN(rrc);
                   1547: 
                   1548:     /* Assertion brackets. Check the alternative branches in turn - the
                   1549:     matching won't pass the KET for an assertion. If any one branch matches,
                   1550:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1551:     start of each branch to move the current point backwards, so the code at
                   1552:     this level is identical to the lookahead case. When the assertion is part
                   1553:     of a condition, we want to return immediately afterwards. The caller of
                   1554:     this incarnation of the match() function will have set MATCH_CONDASSERT in
                   1555:     md->match_function type, and one of these opcodes will be the first opcode
                   1556:     that is processed. We use a local variable that is preserved over calls to
                   1557:     match() to remember this case. */
                   1558: 
                   1559:     case OP_ASSERT:
                   1560:     case OP_ASSERTBACK:
1.1.1.2   misho    1561:     save_mark = md->mark;
1.1       misho    1562:     if (md->match_function_type == MATCH_CONDASSERT)
                   1563:       {
                   1564:       condassert = TRUE;
                   1565:       md->match_function_type = 0;
                   1566:       }
                   1567:     else condassert = FALSE;
                   1568: 
                   1569:     do
                   1570:       {
                   1571:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
                   1572:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1573:         {
                   1574:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1575:         break;
                   1576:         }
1.1.1.3 ! misho    1577:       md->mark = save_mark;
1.1       misho    1578: 
1.1.1.3 ! misho    1579:       /* A COMMIT failure must fail the entire assertion, without trying any
        !          1580:       subsequent branches. */
        !          1581: 
        !          1582:       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
        !          1583: 
        !          1584:       /* PCRE does not allow THEN to escape beyond an assertion; it
        !          1585:       is treated as NOMATCH. */
1.1       misho    1586: 
                   1587:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1588:       ecode += GET(ecode, 1);
                   1589:       }
                   1590:     while (*ecode == OP_ALT);
                   1591: 
                   1592:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
                   1593: 
                   1594:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1595: 
                   1596:     if (condassert) RRETURN(MATCH_MATCH);
                   1597: 
                   1598:     /* Continue from after the assertion, updating the offsets high water
                   1599:     mark, since extracts may have been taken during the assertion. */
                   1600: 
                   1601:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1602:     ecode += 1 + LINK_SIZE;
                   1603:     offset_top = md->end_offset_top;
                   1604:     continue;
                   1605: 
                   1606:     /* Negative assertion: all branches must fail to match. Encountering SKIP,
                   1607:     PRUNE, or COMMIT means we must assume failure without checking subsequent
                   1608:     branches. */
                   1609: 
                   1610:     case OP_ASSERT_NOT:
                   1611:     case OP_ASSERTBACK_NOT:
1.1.1.2   misho    1612:     save_mark = md->mark;
1.1       misho    1613:     if (md->match_function_type == MATCH_CONDASSERT)
                   1614:       {
                   1615:       condassert = TRUE;
                   1616:       md->match_function_type = 0;
                   1617:       }
                   1618:     else condassert = FALSE;
                   1619: 
                   1620:     do
                   1621:       {
                   1622:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1.1.1.2   misho    1623:       md->mark = save_mark;
1.1       misho    1624:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
                   1625:       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
                   1626:         {
                   1627:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1628:         break;
                   1629:         }
                   1630: 
                   1631:       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
                   1632:       as NOMATCH. */
                   1633: 
                   1634:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1635:       ecode += GET(ecode,1);
                   1636:       }
                   1637:     while (*ecode == OP_ALT);
                   1638: 
                   1639:     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
                   1640: 
                   1641:     ecode += 1 + LINK_SIZE;
                   1642:     continue;
                   1643: 
                   1644:     /* Move the subject pointer back. This occurs only at the start of
                   1645:     each branch of a lookbehind assertion. If we are too close to the start to
                   1646:     move back, this match function fails. When working with UTF-8 we move
                   1647:     back a number of characters, not bytes. */
                   1648: 
                   1649:     case OP_REVERSE:
1.1.1.2   misho    1650: #ifdef SUPPORT_UTF
                   1651:     if (utf)
1.1       misho    1652:       {
                   1653:       i = GET(ecode, 1);
                   1654:       while (i-- > 0)
                   1655:         {
                   1656:         eptr--;
                   1657:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1658:         BACKCHAR(eptr);
                   1659:         }
                   1660:       }
                   1661:     else
                   1662: #endif
                   1663: 
                   1664:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1665: 
                   1666:       {
                   1667:       eptr -= GET(ecode, 1);
                   1668:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1669:       }
                   1670: 
                   1671:     /* Save the earliest consulted character, then skip to next op code */
                   1672: 
                   1673:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
                   1674:     ecode += 1 + LINK_SIZE;
                   1675:     break;
                   1676: 
                   1677:     /* The callout item calls an external function, if one is provided, passing
                   1678:     details of the match so far. This is mainly for debugging, though the
                   1679:     function is able to force a failure. */
                   1680: 
                   1681:     case OP_CALLOUT:
1.1.1.2   misho    1682:     if (PUBL(callout) != NULL)
1.1       misho    1683:       {
1.1.1.2   misho    1684:       PUBL(callout_block) cb;
1.1       misho    1685:       cb.version          = 2;   /* Version 1 of the callout block */
                   1686:       cb.callout_number   = ecode[1];
                   1687:       cb.offset_vector    = md->offset_vector;
1.1.1.2   misho    1688: #ifdef COMPILE_PCRE8
1.1       misho    1689:       cb.subject          = (PCRE_SPTR)md->start_subject;
1.1.1.2   misho    1690: #else
                   1691:       cb.subject          = (PCRE_SPTR16)md->start_subject;
                   1692: #endif
1.1       misho    1693:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1694:       cb.start_match      = (int)(mstart - md->start_subject);
                   1695:       cb.current_position = (int)(eptr - md->start_subject);
                   1696:       cb.pattern_position = GET(ecode, 2);
                   1697:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1698:       cb.capture_top      = offset_top/2;
                   1699:       cb.capture_last     = md->capture_last;
                   1700:       cb.callout_data     = md->callout_data;
                   1701:       cb.mark             = md->nomatch_mark;
1.1.1.2   misho    1702:       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misho    1703:       if (rrc < 0) RRETURN(rrc);
                   1704:       }
                   1705:     ecode += 2 + 2*LINK_SIZE;
                   1706:     break;
                   1707: 
                   1708:     /* Recursion either matches the current regex, or some subexpression. The
                   1709:     offset data is the offset to the starting bracket from the start of the
                   1710:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1711: 
                   1712:     The state of the capturing groups is preserved over recursion, and
                   1713:     re-instated afterwards. We don't know how many are started and not yet
                   1714:     finished (offset_top records the completed total) so we just have to save
                   1715:     all the potential data. There may be up to 65535 such values, which is too
                   1716:     large to put on the stack, but using malloc for small numbers seems
                   1717:     expensive. As a compromise, the stack is used when there are no more than
                   1718:     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
                   1719: 
                   1720:     There are also other values that have to be saved. We use a chained
                   1721:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1722:     for the original version of this logic. It has, however, been hacked around
                   1723:     a lot, so he is not to blame for the current way it works. */
                   1724: 
                   1725:     case OP_RECURSE:
                   1726:       {
                   1727:       recursion_info *ri;
                   1728:       int recno;
                   1729: 
                   1730:       callpat = md->start_code + GET(ecode, 1);
                   1731:       recno = (callpat == md->start_code)? 0 :
                   1732:         GET2(callpat, 1 + LINK_SIZE);
                   1733: 
                   1734:       /* Check for repeating a recursion without advancing the subject pointer.
                   1735:       This should catch convoluted mutual recursions. (Some simple cases are
                   1736:       caught at compile time.) */
                   1737: 
                   1738:       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   1739:         if (recno == ri->group_num && eptr == ri->subject_position)
                   1740:           RRETURN(PCRE_ERROR_RECURSELOOP);
                   1741: 
                   1742:       /* Add to "recursing stack" */
                   1743: 
                   1744:       new_recursive.group_num = recno;
                   1745:       new_recursive.subject_position = eptr;
                   1746:       new_recursive.prevrec = md->recursive;
                   1747:       md->recursive = &new_recursive;
                   1748: 
                   1749:       /* Where to continue from afterwards */
                   1750: 
                   1751:       ecode += 1 + LINK_SIZE;
                   1752: 
                   1753:       /* Now save the offset data */
                   1754: 
                   1755:       new_recursive.saved_max = md->offset_end;
                   1756:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1757:         new_recursive.offset_save = stacksave;
                   1758:       else
                   1759:         {
                   1760:         new_recursive.offset_save =
1.1.1.2   misho    1761:           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1.1       misho    1762:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1763:         }
                   1764:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1765:             new_recursive.saved_max * sizeof(int));
                   1766: 
                   1767:       /* OK, now we can do the recursion. After processing each alternative,
                   1768:       restore the offset data. If there were nested recursions, md->recursive
                   1769:       might be changed, so reset it before looping. */
                   1770: 
                   1771:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1772:       cbegroup = (*callpat >= OP_SBRA);
                   1773:       do
                   1774:         {
                   1775:         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho    1776:         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1.1       misho    1777:           md, eptrb, RM6);
                   1778:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1779:             new_recursive.saved_max * sizeof(int));
                   1780:         md->recursive = new_recursive.prevrec;
                   1781:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1782:           {
                   1783:           DPRINTF(("Recursion matched\n"));
                   1784:           if (new_recursive.offset_save != stacksave)
1.1.1.2   misho    1785:             (PUBL(free))(new_recursive.offset_save);
1.1       misho    1786: 
                   1787:           /* Set where we got to in the subject, and reset the start in case
                   1788:           it was changed by \K. This *is* propagated back out of a recursion,
                   1789:           for Perl compatibility. */
                   1790: 
                   1791:           eptr = md->end_match_ptr;
                   1792:           mstart = md->start_match_ptr;
                   1793:           goto RECURSION_MATCHED;        /* Exit loop; end processing */
                   1794:           }
                   1795: 
1.1.1.3 ! misho    1796:         /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
        !          1797:         is treated as NOMATCH. */
1.1       misho    1798: 
1.1.1.3 ! misho    1799:         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
        !          1800:                  rrc != MATCH_COMMIT)
1.1       misho    1801:           {
                   1802:           DPRINTF(("Recursion gave error %d\n", rrc));
                   1803:           if (new_recursive.offset_save != stacksave)
1.1.1.2   misho    1804:             (PUBL(free))(new_recursive.offset_save);
1.1       misho    1805:           RRETURN(rrc);
                   1806:           }
                   1807: 
                   1808:         md->recursive = &new_recursive;
                   1809:         callpat += GET(callpat, 1);
                   1810:         }
                   1811:       while (*callpat == OP_ALT);
                   1812: 
                   1813:       DPRINTF(("Recursion didn't match\n"));
                   1814:       md->recursive = new_recursive.prevrec;
                   1815:       if (new_recursive.offset_save != stacksave)
1.1.1.2   misho    1816:         (PUBL(free))(new_recursive.offset_save);
1.1       misho    1817:       RRETURN(MATCH_NOMATCH);
                   1818:       }
                   1819: 
                   1820:     RECURSION_MATCHED:
                   1821:     break;
                   1822: 
                   1823:     /* An alternation is the end of a branch; scan along to find the end of the
                   1824:     bracketed group and go to there. */
                   1825: 
                   1826:     case OP_ALT:
                   1827:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1828:     break;
                   1829: 
                   1830:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1831:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1832:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1833:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1834:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1835: 
                   1836:     case OP_BRAZERO:
                   1837:     next = ecode + 1;
                   1838:     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
                   1839:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1840:     do next += GET(next, 1); while (*next == OP_ALT);
                   1841:     ecode = next + 1 + LINK_SIZE;
                   1842:     break;
                   1843: 
                   1844:     case OP_BRAMINZERO:
                   1845:     next = ecode + 1;
                   1846:     do next += GET(next, 1); while (*next == OP_ALT);
                   1847:     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
                   1848:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1849:     ecode++;
                   1850:     break;
                   1851: 
                   1852:     case OP_SKIPZERO:
                   1853:     next = ecode+1;
                   1854:     do next += GET(next,1); while (*next == OP_ALT);
                   1855:     ecode = next + 1 + LINK_SIZE;
                   1856:     break;
                   1857: 
                   1858:     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
                   1859:     here; just jump to the group, with allow_zero set TRUE. */
                   1860: 
                   1861:     case OP_BRAPOSZERO:
                   1862:     op = *(++ecode);
                   1863:     allow_zero = TRUE;
                   1864:     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
                   1865:       goto POSSESSIVE_NON_CAPTURE;
                   1866: 
                   1867:     /* End of a group, repeated or non-repeating. */
                   1868: 
                   1869:     case OP_KET:
                   1870:     case OP_KETRMIN:
                   1871:     case OP_KETRMAX:
                   1872:     case OP_KETRPOS:
                   1873:     prev = ecode - GET(ecode, 1);
                   1874: 
                   1875:     /* If this was a group that remembered the subject start, in order to break
                   1876:     infinite repeats of empty string matches, retrieve the subject start from
                   1877:     the chain. Otherwise, set it NULL. */
                   1878: 
                   1879:     if (*prev >= OP_SBRA || *prev == OP_ONCE)
                   1880:       {
                   1881:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1882:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1883:       }
                   1884:     else saved_eptr = NULL;
                   1885: 
                   1886:     /* If we are at the end of an assertion group or a non-capturing atomic
                   1887:     group, stop matching and return MATCH_MATCH, but record the current high
                   1888:     water mark for use by positive assertions. We also need to record the match
                   1889:     start in case it was changed by \K. */
                   1890: 
                   1891:     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
                   1892:          *prev == OP_ONCE_NC)
                   1893:       {
                   1894:       md->end_match_ptr = eptr;      /* For ONCE_NC */
                   1895:       md->end_offset_top = offset_top;
                   1896:       md->start_match_ptr = mstart;
                   1897:       RRETURN(MATCH_MATCH);         /* Sets md->mark */
                   1898:       }
                   1899: 
                   1900:     /* For capturing groups we have to check the group number back at the start
                   1901:     and if necessary complete handling an extraction by setting the offsets and
                   1902:     bumping the high water mark. Whole-pattern recursion is coded as a recurse
                   1903:     into group 0, so it won't be picked up here. Instead, we catch it when the
                   1904:     OP_END is reached. Other recursion is handled here. We just have to record
                   1905:     the current subject position and start match pointer and give a MATCH
                   1906:     return. */
                   1907: 
                   1908:     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
                   1909:         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
                   1910:       {
                   1911:       number = GET2(prev, 1+LINK_SIZE);
                   1912:       offset = number << 1;
                   1913: 
                   1914: #ifdef PCRE_DEBUG
                   1915:       printf("end bracket %d", number);
                   1916:       printf("\n");
                   1917: #endif
                   1918: 
                   1919:       /* Handle a recursively called group. */
                   1920: 
                   1921:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1922:         {
                   1923:         md->end_match_ptr = eptr;
                   1924:         md->start_match_ptr = mstart;
                   1925:         RRETURN(MATCH_MATCH);
                   1926:         }
                   1927: 
                   1928:       /* Deal with capturing */
                   1929: 
                   1930:       md->capture_last = number;
                   1931:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1932:         {
                   1933:         /* If offset is greater than offset_top, it means that we are
                   1934:         "skipping" a capturing group, and that group's offsets must be marked
                   1935:         unset. In earlier versions of PCRE, all the offsets were unset at the
                   1936:         start of matching, but this doesn't work because atomic groups and
                   1937:         assertions can cause a value to be set that should later be unset.
                   1938:         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
                   1939:         part of the atomic group, but this is not on the final matching path,
                   1940:         so must be unset when 2 is set. (If there is no group 2, there is no
                   1941:         problem, because offset_top will then be 2, indicating no capture.) */
                   1942: 
                   1943:         if (offset > offset_top)
                   1944:           {
                   1945:           register int *iptr = md->offset_vector + offset_top;
                   1946:           register int *iend = md->offset_vector + offset;
                   1947:           while (iptr < iend) *iptr++ = -1;
                   1948:           }
                   1949: 
                   1950:         /* Now make the extraction */
                   1951: 
                   1952:         md->offset_vector[offset] =
                   1953:           md->offset_vector[md->offset_end - number];
                   1954:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1955:         if (offset_top <= offset) offset_top = offset + 2;
                   1956:         }
                   1957:       }
                   1958: 
                   1959:     /* For an ordinary non-repeating ket, just continue at this level. This
                   1960:     also happens for a repeating ket if no characters were matched in the
                   1961:     group. This is the forcible breaking of infinite loops as implemented in
                   1962:     Perl 5.005. For a non-repeating atomic group that includes captures,
                   1963:     establish a backup point by processing the rest of the pattern at a lower
                   1964:     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
                   1965:     original OP_ONCE level, thereby bypassing intermediate backup points, but
                   1966:     resetting any captures that happened along the way. */
                   1967: 
                   1968:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1969:       {
                   1970:       if (*prev == OP_ONCE)
                   1971:         {
                   1972:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
                   1973:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1974:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   1975:         RRETURN(MATCH_ONCE);
                   1976:         }
                   1977:       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
                   1978:       break;
                   1979:       }
                   1980: 
                   1981:     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
                   1982:     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
                   1983:     at a time from the outer level, thus saving stack. */
                   1984: 
                   1985:     if (*ecode == OP_KETRPOS)
                   1986:       {
                   1987:       md->end_match_ptr = eptr;
                   1988:       md->end_offset_top = offset_top;
                   1989:       RRETURN(MATCH_KETRPOS);
                   1990:       }
                   1991: 
                   1992:     /* The normal repeating kets try the rest of the pattern or restart from
                   1993:     the preceding bracket, in the appropriate order. In the second case, we can
                   1994:     use tail recursion to avoid using another stack frame, unless we have an
                   1995:     an atomic group or an unlimited repeat of a group that can match an empty
                   1996:     string. */
                   1997: 
                   1998:     if (*ecode == OP_KETRMIN)
                   1999:       {
                   2000:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
                   2001:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2002:       if (*prev == OP_ONCE)
                   2003:         {
                   2004:         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
                   2005:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2006:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   2007:         RRETURN(MATCH_ONCE);
                   2008:         }
                   2009:       if (*prev >= OP_SBRA)    /* Could match an empty string */
                   2010:         {
                   2011:         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
                   2012:         RRETURN(rrc);
                   2013:         }
                   2014:       ecode = prev;
                   2015:       goto TAIL_RECURSE;
                   2016:       }
                   2017:     else  /* OP_KETRMAX */
                   2018:       {
                   2019:       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
                   2020:       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
                   2021:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2022:       if (*prev == OP_ONCE)
                   2023:         {
                   2024:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
                   2025:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2026:         md->once_target = prev;
                   2027:         RRETURN(MATCH_ONCE);
                   2028:         }
                   2029:       ecode += 1 + LINK_SIZE;
                   2030:       goto TAIL_RECURSE;
                   2031:       }
                   2032:     /* Control never gets here */
                   2033: 
                   2034:     /* Not multiline mode: start of subject assertion, unless notbol. */
                   2035: 
                   2036:     case OP_CIRC:
                   2037:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   2038: 
                   2039:     /* Start of subject assertion */
                   2040: 
                   2041:     case OP_SOD:
                   2042:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
                   2043:     ecode++;
                   2044:     break;
                   2045: 
                   2046:     /* Multiline mode: start of subject unless notbol, or after any newline. */
                   2047: 
                   2048:     case OP_CIRCM:
                   2049:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   2050:     if (eptr != md->start_subject &&
                   2051:         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
                   2052:       RRETURN(MATCH_NOMATCH);
                   2053:     ecode++;
                   2054:     break;
                   2055: 
                   2056:     /* Start of match assertion */
                   2057: 
                   2058:     case OP_SOM:
                   2059:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
                   2060:     ecode++;
                   2061:     break;
                   2062: 
                   2063:     /* Reset the start of match point */
                   2064: 
                   2065:     case OP_SET_SOM:
                   2066:     mstart = eptr;
                   2067:     ecode++;
                   2068:     break;
                   2069: 
                   2070:     /* Multiline mode: assert before any newline, or before end of subject
                   2071:     unless noteol is set. */
                   2072: 
                   2073:     case OP_DOLLM:
                   2074:     if (eptr < md->end_subject)
1.1.1.3 ! misho    2075:       {
        !          2076:       if (!IS_NEWLINE(eptr))
        !          2077:         {
        !          2078:         if (md->partial != 0 &&
        !          2079:             eptr + 1 >= md->end_subject &&
        !          2080:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !          2081:             NLBLOCK->nllen == 2 &&
        !          2082:             *eptr == NLBLOCK->nl[0])
        !          2083:           {
        !          2084:           md->hitend = TRUE;
        !          2085:           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          2086:           }
        !          2087:         RRETURN(MATCH_NOMATCH);
        !          2088:         }
        !          2089:       }
1.1       misho    2090:     else
                   2091:       {
                   2092:       if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2093:       SCHECK_PARTIAL();
                   2094:       }
                   2095:     ecode++;
                   2096:     break;
                   2097: 
                   2098:     /* Not multiline mode: assert before a terminating newline or before end of
                   2099:     subject unless noteol is set. */
                   2100: 
                   2101:     case OP_DOLL:
                   2102:     if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2103:     if (!md->endonly) goto ASSERT_NL_OR_EOS;
                   2104: 
                   2105:     /* ... else fall through for endonly */
                   2106: 
                   2107:     /* End of subject assertion (\z) */
                   2108: 
                   2109:     case OP_EOD:
                   2110:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
                   2111:     SCHECK_PARTIAL();
                   2112:     ecode++;
                   2113:     break;
                   2114: 
                   2115:     /* End of subject or ending \n assertion (\Z) */
                   2116: 
                   2117:     case OP_EODN:
                   2118:     ASSERT_NL_OR_EOS:
                   2119:     if (eptr < md->end_subject &&
                   2120:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.1.1.3 ! misho    2121:       {
        !          2122:       if (md->partial != 0 &&
        !          2123:           eptr + 1 >= md->end_subject &&
        !          2124:           NLBLOCK->nltype == NLTYPE_FIXED &&
        !          2125:           NLBLOCK->nllen == 2 &&
        !          2126:           *eptr == NLBLOCK->nl[0])
        !          2127:         {
        !          2128:         md->hitend = TRUE;
        !          2129:         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          2130:         }
1.1       misho    2131:       RRETURN(MATCH_NOMATCH);
1.1.1.3 ! misho    2132:       }
1.1       misho    2133: 
                   2134:     /* Either at end of string or \n before end. */
                   2135: 
                   2136:     SCHECK_PARTIAL();
                   2137:     ecode++;
                   2138:     break;
                   2139: 
                   2140:     /* Word boundary assertions */
                   2141: 
                   2142:     case OP_NOT_WORD_BOUNDARY:
                   2143:     case OP_WORD_BOUNDARY:
                   2144:       {
                   2145: 
                   2146:       /* Find out if the previous and current characters are "word" characters.
                   2147:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
                   2148:       be "non-word" characters. Remember the earliest consulted character for
                   2149:       partial matching. */
                   2150: 
1.1.1.2   misho    2151: #ifdef SUPPORT_UTF
                   2152:       if (utf)
1.1       misho    2153:         {
                   2154:         /* Get status of previous character */
                   2155: 
                   2156:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2157:           {
1.1.1.2   misho    2158:           PCRE_PUCHAR lastptr = eptr - 1;
                   2159:           BACKCHAR(lastptr);
1.1       misho    2160:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
                   2161:           GETCHAR(c, lastptr);
                   2162: #ifdef SUPPORT_UCP
                   2163:           if (md->use_ucp)
                   2164:             {
                   2165:             if (c == '_') prev_is_word = TRUE; else
                   2166:               {
                   2167:               int cat = UCD_CATEGORY(c);
                   2168:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2169:               }
                   2170:             }
                   2171:           else
                   2172: #endif
                   2173:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2174:           }
                   2175: 
                   2176:         /* Get status of next character */
                   2177: 
                   2178:         if (eptr >= md->end_subject)
                   2179:           {
                   2180:           SCHECK_PARTIAL();
                   2181:           cur_is_word = FALSE;
                   2182:           }
                   2183:         else
                   2184:           {
                   2185:           GETCHAR(c, eptr);
                   2186: #ifdef SUPPORT_UCP
                   2187:           if (md->use_ucp)
                   2188:             {
                   2189:             if (c == '_') cur_is_word = TRUE; else
                   2190:               {
                   2191:               int cat = UCD_CATEGORY(c);
                   2192:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2193:               }
                   2194:             }
                   2195:           else
                   2196: #endif
                   2197:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2198:           }
                   2199:         }
                   2200:       else
                   2201: #endif
                   2202: 
                   2203:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   2204:       consistency with the behaviour of \w we do use it in this case. */
                   2205: 
                   2206:         {
                   2207:         /* Get status of previous character */
                   2208: 
                   2209:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2210:           {
                   2211:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   2212: #ifdef SUPPORT_UCP
                   2213:           if (md->use_ucp)
                   2214:             {
                   2215:             c = eptr[-1];
                   2216:             if (c == '_') prev_is_word = TRUE; else
                   2217:               {
                   2218:               int cat = UCD_CATEGORY(c);
                   2219:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2220:               }
                   2221:             }
                   2222:           else
                   2223: #endif
1.1.1.2   misho    2224:           prev_is_word = MAX_255(eptr[-1])
                   2225:             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1.1       misho    2226:           }
                   2227: 
                   2228:         /* Get status of next character */
                   2229: 
                   2230:         if (eptr >= md->end_subject)
                   2231:           {
                   2232:           SCHECK_PARTIAL();
                   2233:           cur_is_word = FALSE;
                   2234:           }
                   2235:         else
                   2236: #ifdef SUPPORT_UCP
                   2237:         if (md->use_ucp)
                   2238:           {
                   2239:           c = *eptr;
                   2240:           if (c == '_') cur_is_word = TRUE; else
                   2241:             {
                   2242:             int cat = UCD_CATEGORY(c);
                   2243:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2244:             }
                   2245:           }
                   2246:         else
                   2247: #endif
1.1.1.2   misho    2248:         cur_is_word = MAX_255(*eptr)
                   2249:           && ((md->ctypes[*eptr] & ctype_word) != 0);
1.1       misho    2250:         }
                   2251: 
                   2252:       /* Now see if the situation is what we want */
                   2253: 
                   2254:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   2255:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
                   2256:         RRETURN(MATCH_NOMATCH);
                   2257:       }
                   2258:     break;
                   2259: 
1.1.1.3 ! misho    2260:     /* Match any single character type except newline; have to take care with
        !          2261:     CRLF newlines and partial matching. */
1.1       misho    2262: 
                   2263:     case OP_ANY:
                   2264:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1.1.3 ! misho    2265:     if (md->partial != 0 &&
        !          2266:         eptr + 1 >= md->end_subject &&
        !          2267:         NLBLOCK->nltype == NLTYPE_FIXED &&
        !          2268:         NLBLOCK->nllen == 2 &&
        !          2269:         *eptr == NLBLOCK->nl[0])
        !          2270:       {
        !          2271:       md->hitend = TRUE;
        !          2272:       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          2273:       }
        !          2274: 
1.1       misho    2275:     /* Fall through */
                   2276: 
1.1.1.3 ! misho    2277:     /* Match any single character whatsoever. */
        !          2278: 
1.1       misho    2279:     case OP_ALLANY:
                   2280:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2281:       {                            /* not be updated before SCHECK_PARTIAL. */
                   2282:       SCHECK_PARTIAL();
                   2283:       RRETURN(MATCH_NOMATCH);
                   2284:       }
                   2285:     eptr++;
1.1.1.2   misho    2286: #ifdef SUPPORT_UTF
                   2287:     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
                   2288: #endif
1.1       misho    2289:     ecode++;
                   2290:     break;
                   2291: 
                   2292:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   2293:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   2294: 
                   2295:     case OP_ANYBYTE:
                   2296:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2297:       {                            /* not be updated before SCHECK_PARTIAL. */
                   2298:       SCHECK_PARTIAL();
                   2299:       RRETURN(MATCH_NOMATCH);
                   2300:       }
                   2301:     eptr++;
                   2302:     ecode++;
                   2303:     break;
                   2304: 
                   2305:     case OP_NOT_DIGIT:
                   2306:     if (eptr >= md->end_subject)
                   2307:       {
                   2308:       SCHECK_PARTIAL();
                   2309:       RRETURN(MATCH_NOMATCH);
                   2310:       }
                   2311:     GETCHARINCTEST(c, eptr);
                   2312:     if (
1.1.1.2   misho    2313: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2314:        c < 256 &&
                   2315: #endif
                   2316:        (md->ctypes[c] & ctype_digit) != 0
                   2317:        )
                   2318:       RRETURN(MATCH_NOMATCH);
                   2319:     ecode++;
                   2320:     break;
                   2321: 
                   2322:     case OP_DIGIT:
                   2323:     if (eptr >= md->end_subject)
                   2324:       {
                   2325:       SCHECK_PARTIAL();
                   2326:       RRETURN(MATCH_NOMATCH);
                   2327:       }
                   2328:     GETCHARINCTEST(c, eptr);
                   2329:     if (
1.1.1.2   misho    2330: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2331:        c > 255 ||
1.1       misho    2332: #endif
                   2333:        (md->ctypes[c] & ctype_digit) == 0
                   2334:        )
                   2335:       RRETURN(MATCH_NOMATCH);
                   2336:     ecode++;
                   2337:     break;
                   2338: 
                   2339:     case OP_NOT_WHITESPACE:
                   2340:     if (eptr >= md->end_subject)
                   2341:       {
                   2342:       SCHECK_PARTIAL();
                   2343:       RRETURN(MATCH_NOMATCH);
                   2344:       }
                   2345:     GETCHARINCTEST(c, eptr);
                   2346:     if (
1.1.1.2   misho    2347: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2348:        c < 256 &&
                   2349: #endif
                   2350:        (md->ctypes[c] & ctype_space) != 0
                   2351:        )
                   2352:       RRETURN(MATCH_NOMATCH);
                   2353:     ecode++;
                   2354:     break;
                   2355: 
                   2356:     case OP_WHITESPACE:
                   2357:     if (eptr >= md->end_subject)
                   2358:       {
                   2359:       SCHECK_PARTIAL();
                   2360:       RRETURN(MATCH_NOMATCH);
                   2361:       }
                   2362:     GETCHARINCTEST(c, eptr);
                   2363:     if (
1.1.1.2   misho    2364: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2365:        c > 255 ||
1.1       misho    2366: #endif
                   2367:        (md->ctypes[c] & ctype_space) == 0
                   2368:        )
                   2369:       RRETURN(MATCH_NOMATCH);
                   2370:     ecode++;
                   2371:     break;
                   2372: 
                   2373:     case OP_NOT_WORDCHAR:
                   2374:     if (eptr >= md->end_subject)
                   2375:       {
                   2376:       SCHECK_PARTIAL();
                   2377:       RRETURN(MATCH_NOMATCH);
                   2378:       }
                   2379:     GETCHARINCTEST(c, eptr);
                   2380:     if (
1.1.1.2   misho    2381: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2382:        c < 256 &&
                   2383: #endif
                   2384:        (md->ctypes[c] & ctype_word) != 0
                   2385:        )
                   2386:       RRETURN(MATCH_NOMATCH);
                   2387:     ecode++;
                   2388:     break;
                   2389: 
                   2390:     case OP_WORDCHAR:
                   2391:     if (eptr >= md->end_subject)
                   2392:       {
                   2393:       SCHECK_PARTIAL();
                   2394:       RRETURN(MATCH_NOMATCH);
                   2395:       }
                   2396:     GETCHARINCTEST(c, eptr);
                   2397:     if (
1.1.1.2   misho    2398: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2399:        c > 255 ||
1.1       misho    2400: #endif
                   2401:        (md->ctypes[c] & ctype_word) == 0
                   2402:        )
                   2403:       RRETURN(MATCH_NOMATCH);
                   2404:     ecode++;
                   2405:     break;
                   2406: 
                   2407:     case OP_ANYNL:
                   2408:     if (eptr >= md->end_subject)
                   2409:       {
                   2410:       SCHECK_PARTIAL();
                   2411:       RRETURN(MATCH_NOMATCH);
                   2412:       }
                   2413:     GETCHARINCTEST(c, eptr);
                   2414:     switch(c)
                   2415:       {
                   2416:       default: RRETURN(MATCH_NOMATCH);
                   2417: 
                   2418:       case 0x000d:
1.1.1.3 ! misho    2419:       if (eptr >= md->end_subject)
        !          2420:         {
        !          2421:         SCHECK_PARTIAL();
        !          2422:         }
        !          2423:       else if (*eptr == 0x0a) eptr++;
1.1       misho    2424:       break;
                   2425: 
                   2426:       case 0x000a:
                   2427:       break;
                   2428: 
                   2429:       case 0x000b:
                   2430:       case 0x000c:
                   2431:       case 0x0085:
                   2432:       case 0x2028:
                   2433:       case 0x2029:
                   2434:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   2435:       break;
                   2436:       }
                   2437:     ecode++;
                   2438:     break;
                   2439: 
                   2440:     case OP_NOT_HSPACE:
                   2441:     if (eptr >= md->end_subject)
                   2442:       {
                   2443:       SCHECK_PARTIAL();
                   2444:       RRETURN(MATCH_NOMATCH);
                   2445:       }
                   2446:     GETCHARINCTEST(c, eptr);
                   2447:     switch(c)
                   2448:       {
                   2449:       default: break;
                   2450:       case 0x09:      /* HT */
                   2451:       case 0x20:      /* SPACE */
                   2452:       case 0xa0:      /* NBSP */
                   2453:       case 0x1680:    /* OGHAM SPACE MARK */
                   2454:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2455:       case 0x2000:    /* EN QUAD */
                   2456:       case 0x2001:    /* EM QUAD */
                   2457:       case 0x2002:    /* EN SPACE */
                   2458:       case 0x2003:    /* EM SPACE */
                   2459:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2460:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2461:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2462:       case 0x2007:    /* FIGURE SPACE */
                   2463:       case 0x2008:    /* PUNCTUATION SPACE */
                   2464:       case 0x2009:    /* THIN SPACE */
                   2465:       case 0x200A:    /* HAIR SPACE */
                   2466:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2467:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2468:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2469:       RRETURN(MATCH_NOMATCH);
                   2470:       }
                   2471:     ecode++;
                   2472:     break;
                   2473: 
                   2474:     case OP_HSPACE:
                   2475:     if (eptr >= md->end_subject)
                   2476:       {
                   2477:       SCHECK_PARTIAL();
                   2478:       RRETURN(MATCH_NOMATCH);
                   2479:       }
                   2480:     GETCHARINCTEST(c, eptr);
                   2481:     switch(c)
                   2482:       {
                   2483:       default: RRETURN(MATCH_NOMATCH);
                   2484:       case 0x09:      /* HT */
                   2485:       case 0x20:      /* SPACE */
                   2486:       case 0xa0:      /* NBSP */
                   2487:       case 0x1680:    /* OGHAM SPACE MARK */
                   2488:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2489:       case 0x2000:    /* EN QUAD */
                   2490:       case 0x2001:    /* EM QUAD */
                   2491:       case 0x2002:    /* EN SPACE */
                   2492:       case 0x2003:    /* EM SPACE */
                   2493:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2494:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2495:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2496:       case 0x2007:    /* FIGURE SPACE */
                   2497:       case 0x2008:    /* PUNCTUATION SPACE */
                   2498:       case 0x2009:    /* THIN SPACE */
                   2499:       case 0x200A:    /* HAIR SPACE */
                   2500:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2501:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2502:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2503:       break;
                   2504:       }
                   2505:     ecode++;
                   2506:     break;
                   2507: 
                   2508:     case OP_NOT_VSPACE:
                   2509:     if (eptr >= md->end_subject)
                   2510:       {
                   2511:       SCHECK_PARTIAL();
                   2512:       RRETURN(MATCH_NOMATCH);
                   2513:       }
                   2514:     GETCHARINCTEST(c, eptr);
                   2515:     switch(c)
                   2516:       {
                   2517:       default: break;
                   2518:       case 0x0a:      /* LF */
                   2519:       case 0x0b:      /* VT */
                   2520:       case 0x0c:      /* FF */
                   2521:       case 0x0d:      /* CR */
                   2522:       case 0x85:      /* NEL */
                   2523:       case 0x2028:    /* LINE SEPARATOR */
                   2524:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2525:       RRETURN(MATCH_NOMATCH);
                   2526:       }
                   2527:     ecode++;
                   2528:     break;
                   2529: 
                   2530:     case OP_VSPACE:
                   2531:     if (eptr >= md->end_subject)
                   2532:       {
                   2533:       SCHECK_PARTIAL();
                   2534:       RRETURN(MATCH_NOMATCH);
                   2535:       }
                   2536:     GETCHARINCTEST(c, eptr);
                   2537:     switch(c)
                   2538:       {
                   2539:       default: RRETURN(MATCH_NOMATCH);
                   2540:       case 0x0a:      /* LF */
                   2541:       case 0x0b:      /* VT */
                   2542:       case 0x0c:      /* FF */
                   2543:       case 0x0d:      /* CR */
                   2544:       case 0x85:      /* NEL */
                   2545:       case 0x2028:    /* LINE SEPARATOR */
                   2546:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2547:       break;
                   2548:       }
                   2549:     ecode++;
                   2550:     break;
                   2551: 
                   2552: #ifdef SUPPORT_UCP
                   2553:     /* Check the next character by Unicode property. We will get here only
                   2554:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2555: 
                   2556:     case OP_PROP:
                   2557:     case OP_NOTPROP:
                   2558:     if (eptr >= md->end_subject)
                   2559:       {
                   2560:       SCHECK_PARTIAL();
                   2561:       RRETURN(MATCH_NOMATCH);
                   2562:       }
                   2563:     GETCHARINCTEST(c, eptr);
                   2564:       {
                   2565:       const ucd_record *prop = GET_UCD(c);
                   2566: 
                   2567:       switch(ecode[1])
                   2568:         {
                   2569:         case PT_ANY:
                   2570:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
                   2571:         break;
                   2572: 
                   2573:         case PT_LAMP:
                   2574:         if ((prop->chartype == ucp_Lu ||
                   2575:              prop->chartype == ucp_Ll ||
                   2576:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
                   2577:           RRETURN(MATCH_NOMATCH);
                   2578:         break;
                   2579: 
                   2580:         case PT_GC:
1.1.1.2   misho    2581:         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
1.1       misho    2582:           RRETURN(MATCH_NOMATCH);
                   2583:         break;
                   2584: 
                   2585:         case PT_PC:
                   2586:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
                   2587:           RRETURN(MATCH_NOMATCH);
                   2588:         break;
                   2589: 
                   2590:         case PT_SC:
                   2591:         if ((ecode[2] != prop->script) == (op == OP_PROP))
                   2592:           RRETURN(MATCH_NOMATCH);
                   2593:         break;
                   2594: 
                   2595:         /* These are specials */
                   2596: 
                   2597:         case PT_ALNUM:
1.1.1.2   misho    2598:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   2599:              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
1.1       misho    2600:           RRETURN(MATCH_NOMATCH);
                   2601:         break;
                   2602: 
                   2603:         case PT_SPACE:    /* Perl space */
1.1.1.2   misho    2604:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    2605:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                   2606:                == (op == OP_NOTPROP))
                   2607:           RRETURN(MATCH_NOMATCH);
                   2608:         break;
                   2609: 
                   2610:         case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    2611:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    2612:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   2613:              c == CHAR_FF || c == CHAR_CR)
                   2614:                == (op == OP_NOTPROP))
                   2615:           RRETURN(MATCH_NOMATCH);
                   2616:         break;
                   2617: 
                   2618:         case PT_WORD:
1.1.1.2   misho    2619:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   2620:              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    2621:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
                   2622:           RRETURN(MATCH_NOMATCH);
                   2623:         break;
                   2624: 
                   2625:         /* This should never occur */
                   2626: 
                   2627:         default:
                   2628:         RRETURN(PCRE_ERROR_INTERNAL);
                   2629:         }
                   2630: 
                   2631:       ecode += 3;
                   2632:       }
                   2633:     break;
                   2634: 
                   2635:     /* Match an extended Unicode sequence. We will get here only if the support
                   2636:     is in the binary; otherwise a compile-time error occurs. */
                   2637: 
                   2638:     case OP_EXTUNI:
                   2639:     if (eptr >= md->end_subject)
                   2640:       {
                   2641:       SCHECK_PARTIAL();
                   2642:       RRETURN(MATCH_NOMATCH);
                   2643:       }
                   2644:     GETCHARINCTEST(c, eptr);
                   2645:     if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
                   2646:     while (eptr < md->end_subject)
                   2647:       {
                   2648:       int len = 1;
1.1.1.2   misho    2649:       if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1       misho    2650:       if (UCD_CATEGORY(c) != ucp_M) break;
                   2651:       eptr += len;
                   2652:       }
1.1.1.3 ! misho    2653:     CHECK_PARTIAL();
1.1       misho    2654:     ecode++;
                   2655:     break;
                   2656: #endif
                   2657: 
                   2658: 
                   2659:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2660:     item to see if there is repeat information following. The code is similar
                   2661:     to that for character classes, but repeated for efficiency. Then obey
                   2662:     similar code to character type repeats - written out again for speed.
                   2663:     However, if the referenced string is the empty string, always treat
                   2664:     it as matched, any number of times (otherwise there could be infinite
                   2665:     loops). */
                   2666: 
                   2667:     case OP_REF:
                   2668:     case OP_REFI:
                   2669:     caseless = op == OP_REFI;
                   2670:     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1.1.1.2   misho    2671:     ecode += 1 + IMM2_SIZE;
1.1       misho    2672: 
                   2673:     /* If the reference is unset, there are two possibilities:
                   2674: 
                   2675:     (a) In the default, Perl-compatible state, set the length negative;
                   2676:     this ensures that every attempt at a match fails. We can't just fail
                   2677:     here, because of the possibility of quantifiers with zero minima.
                   2678: 
                   2679:     (b) If the JavaScript compatibility flag is set, set the length to zero
                   2680:     so that the back reference matches an empty string.
                   2681: 
                   2682:     Otherwise, set the length to the length of what was matched by the
                   2683:     referenced subpattern. */
                   2684: 
                   2685:     if (offset >= offset_top || md->offset_vector[offset] < 0)
                   2686:       length = (md->jscript_compat)? 0 : -1;
                   2687:     else
                   2688:       length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   2689: 
                   2690:     /* Set up for repetition, or handle the non-repeated case */
                   2691: 
                   2692:     switch (*ecode)
                   2693:       {
                   2694:       case OP_CRSTAR:
                   2695:       case OP_CRMINSTAR:
                   2696:       case OP_CRPLUS:
                   2697:       case OP_CRMINPLUS:
                   2698:       case OP_CRQUERY:
                   2699:       case OP_CRMINQUERY:
                   2700:       c = *ecode++ - OP_CRSTAR;
                   2701:       minimize = (c & 1) != 0;
                   2702:       min = rep_min[c];                 /* Pick up values from tables; */
                   2703:       max = rep_max[c];                 /* zero for max => infinity */
                   2704:       if (max == 0) max = INT_MAX;
                   2705:       break;
                   2706: 
                   2707:       case OP_CRRANGE:
                   2708:       case OP_CRMINRANGE:
                   2709:       minimize = (*ecode == OP_CRMINRANGE);
                   2710:       min = GET2(ecode, 1);
1.1.1.2   misho    2711:       max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    2712:       if (max == 0) max = INT_MAX;
1.1.1.2   misho    2713:       ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    2714:       break;
                   2715: 
                   2716:       default:               /* No repeat follows */
                   2717:       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2718:         {
1.1.1.3 ! misho    2719:         if (length == -2) eptr = md->end_subject;   /* Partial match */
1.1       misho    2720:         CHECK_PARTIAL();
                   2721:         RRETURN(MATCH_NOMATCH);
                   2722:         }
                   2723:       eptr += length;
                   2724:       continue;              /* With the main loop */
                   2725:       }
                   2726: 
                   2727:     /* Handle repeated back references. If the length of the reference is
1.1.1.2   misho    2728:     zero, just continue with the main loop. If the length is negative, it
                   2729:     means the reference is unset in non-Java-compatible mode. If the minimum is
                   2730:     zero, we can continue at the same level without recursion. For any other
                   2731:     minimum, carrying on will result in NOMATCH. */
1.1       misho    2732: 
                   2733:     if (length == 0) continue;
1.1.1.2   misho    2734:     if (length < 0 && min == 0) continue;
1.1       misho    2735: 
                   2736:     /* First, ensure the minimum number of matches are present. We get back
                   2737:     the length of the reference string explicitly rather than passing the
                   2738:     address of eptr, so that eptr can be a register variable. */
                   2739: 
                   2740:     for (i = 1; i <= min; i++)
                   2741:       {
                   2742:       int slength;
                   2743:       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2744:         {
1.1.1.3 ! misho    2745:         if (slength == -2) eptr = md->end_subject;   /* Partial match */
1.1       misho    2746:         CHECK_PARTIAL();
                   2747:         RRETURN(MATCH_NOMATCH);
                   2748:         }
                   2749:       eptr += slength;
                   2750:       }
                   2751: 
                   2752:     /* If min = max, continue at the same level without recursion.
                   2753:     They are not both allowed to be zero. */
                   2754: 
                   2755:     if (min == max) continue;
                   2756: 
                   2757:     /* If minimizing, keep trying and advancing the pointer */
                   2758: 
                   2759:     if (minimize)
                   2760:       {
                   2761:       for (fi = min;; fi++)
                   2762:         {
                   2763:         int slength;
                   2764:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
                   2765:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2766:         if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2767:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2768:           {
1.1.1.3 ! misho    2769:           if (slength == -2) eptr = md->end_subject;   /* Partial match */
1.1       misho    2770:           CHECK_PARTIAL();
                   2771:           RRETURN(MATCH_NOMATCH);
                   2772:           }
                   2773:         eptr += slength;
                   2774:         }
                   2775:       /* Control never gets here */
                   2776:       }
                   2777: 
                   2778:     /* If maximizing, find the longest string and work backwards */
                   2779: 
                   2780:     else
                   2781:       {
                   2782:       pp = eptr;
                   2783:       for (i = min; i < max; i++)
                   2784:         {
                   2785:         int slength;
                   2786:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2787:           {
1.1.1.3 ! misho    2788:           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
        !          2789:           the soft partial matching case. */
        !          2790: 
        !          2791:           if (slength == -2 && md->partial != 0 &&
        !          2792:               md->end_subject > md->start_used_ptr)
        !          2793:             {
        !          2794:             md->hitend = TRUE;
        !          2795:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          2796:             }
1.1       misho    2797:           break;
                   2798:           }
                   2799:         eptr += slength;
                   2800:         }
1.1.1.3 ! misho    2801: 
1.1       misho    2802:       while (eptr >= pp)
                   2803:         {
                   2804:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
                   2805:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2806:         eptr -= length;
                   2807:         }
                   2808:       RRETURN(MATCH_NOMATCH);
                   2809:       }
                   2810:     /* Control never gets here */
                   2811: 
                   2812:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2813:     used when all the characters in the class have values in the range 0-255,
                   2814:     and either the matching is caseful, or the characters are in the range
                   2815:     0-127 when UTF-8 processing is enabled. The only difference between
                   2816:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2817:     encountered.
                   2818: 
                   2819:     First, look past the end of the item to see if there is repeat information
                   2820:     following. Then obey similar code to character type repeats - written out
                   2821:     again for speed. */
                   2822: 
                   2823:     case OP_NCLASS:
                   2824:     case OP_CLASS:
                   2825:       {
1.1.1.2   misho    2826:       /* The data variable is saved across frames, so the byte map needs to
                   2827:       be stored there. */
                   2828: #define BYTE_MAP ((pcre_uint8 *)data)
1.1       misho    2829:       data = ecode + 1;                /* Save for matching */
1.1.1.2   misho    2830:       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
1.1       misho    2831: 
                   2832:       switch (*ecode)
                   2833:         {
                   2834:         case OP_CRSTAR:
                   2835:         case OP_CRMINSTAR:
                   2836:         case OP_CRPLUS:
                   2837:         case OP_CRMINPLUS:
                   2838:         case OP_CRQUERY:
                   2839:         case OP_CRMINQUERY:
                   2840:         c = *ecode++ - OP_CRSTAR;
                   2841:         minimize = (c & 1) != 0;
                   2842:         min = rep_min[c];                 /* Pick up values from tables; */
                   2843:         max = rep_max[c];                 /* zero for max => infinity */
                   2844:         if (max == 0) max = INT_MAX;
                   2845:         break;
                   2846: 
                   2847:         case OP_CRRANGE:
                   2848:         case OP_CRMINRANGE:
                   2849:         minimize = (*ecode == OP_CRMINRANGE);
                   2850:         min = GET2(ecode, 1);
1.1.1.2   misho    2851:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    2852:         if (max == 0) max = INT_MAX;
1.1.1.2   misho    2853:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    2854:         break;
                   2855: 
                   2856:         default:               /* No repeat follows */
                   2857:         min = max = 1;
                   2858:         break;
                   2859:         }
                   2860: 
                   2861:       /* First, ensure the minimum number of matches are present. */
                   2862: 
1.1.1.2   misho    2863: #ifdef SUPPORT_UTF
                   2864:       if (utf)
1.1       misho    2865:         {
                   2866:         for (i = 1; i <= min; i++)
                   2867:           {
                   2868:           if (eptr >= md->end_subject)
                   2869:             {
                   2870:             SCHECK_PARTIAL();
                   2871:             RRETURN(MATCH_NOMATCH);
                   2872:             }
                   2873:           GETCHARINC(c, eptr);
                   2874:           if (c > 255)
                   2875:             {
                   2876:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2877:             }
                   2878:           else
1.1.1.2   misho    2879:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2880:           }
                   2881:         }
                   2882:       else
                   2883: #endif
1.1.1.2   misho    2884:       /* Not UTF mode */
1.1       misho    2885:         {
                   2886:         for (i = 1; i <= min; i++)
                   2887:           {
                   2888:           if (eptr >= md->end_subject)
                   2889:             {
                   2890:             SCHECK_PARTIAL();
                   2891:             RRETURN(MATCH_NOMATCH);
                   2892:             }
                   2893:           c = *eptr++;
1.1.1.2   misho    2894: #ifndef COMPILE_PCRE8
                   2895:           if (c > 255)
                   2896:             {
                   2897:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2898:             }
                   2899:           else
                   2900: #endif
                   2901:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2902:           }
                   2903:         }
                   2904: 
                   2905:       /* If max == min we can continue with the main loop without the
                   2906:       need to recurse. */
                   2907: 
                   2908:       if (min == max) continue;
                   2909: 
                   2910:       /* If minimizing, keep testing the rest of the expression and advancing
                   2911:       the pointer while it matches the class. */
                   2912: 
                   2913:       if (minimize)
                   2914:         {
1.1.1.2   misho    2915: #ifdef SUPPORT_UTF
                   2916:         if (utf)
1.1       misho    2917:           {
                   2918:           for (fi = min;; fi++)
                   2919:             {
                   2920:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
                   2921:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2922:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2923:             if (eptr >= md->end_subject)
                   2924:               {
                   2925:               SCHECK_PARTIAL();
                   2926:               RRETURN(MATCH_NOMATCH);
                   2927:               }
                   2928:             GETCHARINC(c, eptr);
                   2929:             if (c > 255)
                   2930:               {
                   2931:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2932:               }
                   2933:             else
1.1.1.2   misho    2934:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2935:             }
                   2936:           }
                   2937:         else
                   2938: #endif
1.1.1.2   misho    2939:         /* Not UTF mode */
1.1       misho    2940:           {
                   2941:           for (fi = min;; fi++)
                   2942:             {
                   2943:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
                   2944:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2945:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2946:             if (eptr >= md->end_subject)
                   2947:               {
                   2948:               SCHECK_PARTIAL();
                   2949:               RRETURN(MATCH_NOMATCH);
                   2950:               }
                   2951:             c = *eptr++;
1.1.1.2   misho    2952: #ifndef COMPILE_PCRE8
                   2953:             if (c > 255)
                   2954:               {
                   2955:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2956:               }
                   2957:             else
                   2958: #endif
                   2959:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2960:             }
                   2961:           }
                   2962:         /* Control never gets here */
                   2963:         }
                   2964: 
                   2965:       /* If maximizing, find the longest possible run, then work backwards. */
                   2966: 
                   2967:       else
                   2968:         {
                   2969:         pp = eptr;
                   2970: 
1.1.1.2   misho    2971: #ifdef SUPPORT_UTF
                   2972:         if (utf)
1.1       misho    2973:           {
                   2974:           for (i = min; i < max; i++)
                   2975:             {
                   2976:             int len = 1;
                   2977:             if (eptr >= md->end_subject)
                   2978:               {
                   2979:               SCHECK_PARTIAL();
                   2980:               break;
                   2981:               }
                   2982:             GETCHARLEN(c, eptr, len);
                   2983:             if (c > 255)
                   2984:               {
                   2985:               if (op == OP_CLASS) break;
                   2986:               }
                   2987:             else
1.1.1.2   misho    2988:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misho    2989:             eptr += len;
                   2990:             }
                   2991:           for (;;)
                   2992:             {
                   2993:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
                   2994:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2995:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2996:             BACKCHAR(eptr);
                   2997:             }
                   2998:           }
                   2999:         else
                   3000: #endif
1.1.1.2   misho    3001:           /* Not UTF mode */
1.1       misho    3002:           {
                   3003:           for (i = min; i < max; i++)
                   3004:             {
                   3005:             if (eptr >= md->end_subject)
                   3006:               {
                   3007:               SCHECK_PARTIAL();
                   3008:               break;
                   3009:               }
                   3010:             c = *eptr;
1.1.1.2   misho    3011: #ifndef COMPILE_PCRE8
                   3012:             if (c > 255)
                   3013:               {
                   3014:               if (op == OP_CLASS) break;
                   3015:               }
                   3016:             else
                   3017: #endif
                   3018:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misho    3019:             eptr++;
                   3020:             }
                   3021:           while (eptr >= pp)
                   3022:             {
                   3023:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
                   3024:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3025:             eptr--;
                   3026:             }
                   3027:           }
                   3028: 
                   3029:         RRETURN(MATCH_NOMATCH);
                   3030:         }
1.1.1.2   misho    3031: #undef BYTE_MAP
1.1       misho    3032:       }
                   3033:     /* Control never gets here */
                   3034: 
                   3035: 
                   3036:     /* Match an extended character class. This opcode is encountered only
                   3037:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   3038:     mode, because Unicode properties are supported in non-UTF-8 mode. */
                   3039: 
1.1.1.2   misho    3040: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1       misho    3041:     case OP_XCLASS:
                   3042:       {
                   3043:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   3044:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   3045: 
                   3046:       switch (*ecode)
                   3047:         {
                   3048:         case OP_CRSTAR:
                   3049:         case OP_CRMINSTAR:
                   3050:         case OP_CRPLUS:
                   3051:         case OP_CRMINPLUS:
                   3052:         case OP_CRQUERY:
                   3053:         case OP_CRMINQUERY:
                   3054:         c = *ecode++ - OP_CRSTAR;
                   3055:         minimize = (c & 1) != 0;
                   3056:         min = rep_min[c];                 /* Pick up values from tables; */
                   3057:         max = rep_max[c];                 /* zero for max => infinity */
                   3058:         if (max == 0) max = INT_MAX;
                   3059:         break;
                   3060: 
                   3061:         case OP_CRRANGE:
                   3062:         case OP_CRMINRANGE:
                   3063:         minimize = (*ecode == OP_CRMINRANGE);
                   3064:         min = GET2(ecode, 1);
1.1.1.2   misho    3065:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    3066:         if (max == 0) max = INT_MAX;
1.1.1.2   misho    3067:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    3068:         break;
                   3069: 
                   3070:         default:               /* No repeat follows */
                   3071:         min = max = 1;
                   3072:         break;
                   3073:         }
                   3074: 
                   3075:       /* First, ensure the minimum number of matches are present. */
                   3076: 
                   3077:       for (i = 1; i <= min; i++)
                   3078:         {
                   3079:         if (eptr >= md->end_subject)
                   3080:           {
                   3081:           SCHECK_PARTIAL();
                   3082:           RRETURN(MATCH_NOMATCH);
                   3083:           }
                   3084:         GETCHARINCTEST(c, eptr);
1.1.1.2   misho    3085:         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misho    3086:         }
                   3087: 
                   3088:       /* If max == min we can continue with the main loop without the
                   3089:       need to recurse. */
                   3090: 
                   3091:       if (min == max) continue;
                   3092: 
                   3093:       /* If minimizing, keep testing the rest of the expression and advancing
                   3094:       the pointer while it matches the class. */
                   3095: 
                   3096:       if (minimize)
                   3097:         {
                   3098:         for (fi = min;; fi++)
                   3099:           {
                   3100:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
                   3101:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3102:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3103:           if (eptr >= md->end_subject)
                   3104:             {
                   3105:             SCHECK_PARTIAL();
                   3106:             RRETURN(MATCH_NOMATCH);
                   3107:             }
                   3108:           GETCHARINCTEST(c, eptr);
1.1.1.2   misho    3109:           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misho    3110:           }
                   3111:         /* Control never gets here */
                   3112:         }
                   3113: 
                   3114:       /* If maximizing, find the longest possible run, then work backwards. */
                   3115: 
                   3116:       else
                   3117:         {
                   3118:         pp = eptr;
                   3119:         for (i = min; i < max; i++)
                   3120:           {
                   3121:           int len = 1;
                   3122:           if (eptr >= md->end_subject)
                   3123:             {
                   3124:             SCHECK_PARTIAL();
                   3125:             break;
                   3126:             }
1.1.1.2   misho    3127: #ifdef SUPPORT_UTF
1.1       misho    3128:           GETCHARLENTEST(c, eptr, len);
1.1.1.2   misho    3129: #else
                   3130:           c = *eptr;
                   3131: #endif
                   3132:           if (!PRIV(xclass)(c, data, utf)) break;
1.1       misho    3133:           eptr += len;
                   3134:           }
                   3135:         for(;;)
                   3136:           {
                   3137:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
                   3138:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3139:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1.1.1.2   misho    3140: #ifdef SUPPORT_UTF
                   3141:           if (utf) BACKCHAR(eptr);
                   3142: #endif
1.1       misho    3143:           }
                   3144:         RRETURN(MATCH_NOMATCH);
                   3145:         }
                   3146: 
                   3147:       /* Control never gets here */
                   3148:       }
                   3149: #endif    /* End of XCLASS */
                   3150: 
                   3151:     /* Match a single character, casefully */
                   3152: 
                   3153:     case OP_CHAR:
1.1.1.2   misho    3154: #ifdef SUPPORT_UTF
                   3155:     if (utf)
1.1       misho    3156:       {
                   3157:       length = 1;
                   3158:       ecode++;
                   3159:       GETCHARLEN(fc, ecode, length);
                   3160:       if (length > md->end_subject - eptr)
                   3161:         {
                   3162:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
                   3163:         RRETURN(MATCH_NOMATCH);
                   3164:         }
                   3165:       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
                   3166:       }
                   3167:     else
                   3168: #endif
1.1.1.2   misho    3169:     /* Not UTF mode */
1.1       misho    3170:       {
                   3171:       if (md->end_subject - eptr < 1)
                   3172:         {
                   3173:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
                   3174:         RRETURN(MATCH_NOMATCH);
                   3175:         }
                   3176:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
                   3177:       ecode += 2;
                   3178:       }
                   3179:     break;
                   3180: 
                   3181:     /* Match a single character, caselessly. If we are at the end of the
                   3182:     subject, give up immediately. */
                   3183: 
                   3184:     case OP_CHARI:
                   3185:     if (eptr >= md->end_subject)
                   3186:       {
                   3187:       SCHECK_PARTIAL();
                   3188:       RRETURN(MATCH_NOMATCH);
                   3189:       }
                   3190: 
1.1.1.2   misho    3191: #ifdef SUPPORT_UTF
                   3192:     if (utf)
1.1       misho    3193:       {
                   3194:       length = 1;
                   3195:       ecode++;
                   3196:       GETCHARLEN(fc, ecode, length);
                   3197: 
                   3198:       /* If the pattern character's value is < 128, we have only one byte, and
                   3199:       we know that its other case must also be one byte long, so we can use the
                   3200:       fast lookup table. We know that there is at least one byte left in the
                   3201:       subject. */
                   3202: 
                   3203:       if (fc < 128)
                   3204:         {
1.1.1.2   misho    3205:         if (md->lcc[fc]
                   3206:             != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
                   3207:         ecode++;
                   3208:         eptr++;
1.1       misho    3209:         }
                   3210: 
                   3211:       /* Otherwise we must pick up the subject character. Note that we cannot
                   3212:       use the value of "length" to check for sufficient bytes left, because the
                   3213:       other case of the character may have more or fewer bytes.  */
                   3214: 
                   3215:       else
                   3216:         {
                   3217:         unsigned int dc;
                   3218:         GETCHARINC(dc, eptr);
                   3219:         ecode += length;
                   3220: 
                   3221:         /* If we have Unicode property support, we can use it to test the other
                   3222:         case of the character, if there is one. */
                   3223: 
                   3224:         if (fc != dc)
                   3225:           {
                   3226: #ifdef SUPPORT_UCP
                   3227:           if (dc != UCD_OTHERCASE(fc))
                   3228: #endif
                   3229:             RRETURN(MATCH_NOMATCH);
                   3230:           }
                   3231:         }
                   3232:       }
                   3233:     else
1.1.1.2   misho    3234: #endif   /* SUPPORT_UTF */
1.1       misho    3235: 
1.1.1.2   misho    3236:     /* Not UTF mode */
1.1       misho    3237:       {
1.1.1.2   misho    3238:       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
                   3239:           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
                   3240:       eptr++;
1.1       misho    3241:       ecode += 2;
                   3242:       }
                   3243:     break;
                   3244: 
                   3245:     /* Match a single character repeatedly. */
                   3246: 
                   3247:     case OP_EXACT:
                   3248:     case OP_EXACTI:
                   3249:     min = max = GET2(ecode, 1);
1.1.1.2   misho    3250:     ecode += 1 + IMM2_SIZE;
1.1       misho    3251:     goto REPEATCHAR;
                   3252: 
                   3253:     case OP_POSUPTO:
                   3254:     case OP_POSUPTOI:
                   3255:     possessive = TRUE;
                   3256:     /* Fall through */
                   3257: 
                   3258:     case OP_UPTO:
                   3259:     case OP_UPTOI:
                   3260:     case OP_MINUPTO:
                   3261:     case OP_MINUPTOI:
                   3262:     min = 0;
                   3263:     max = GET2(ecode, 1);
                   3264:     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
1.1.1.2   misho    3265:     ecode += 1 + IMM2_SIZE;
1.1       misho    3266:     goto REPEATCHAR;
                   3267: 
                   3268:     case OP_POSSTAR:
                   3269:     case OP_POSSTARI:
                   3270:     possessive = TRUE;
                   3271:     min = 0;
                   3272:     max = INT_MAX;
                   3273:     ecode++;
                   3274:     goto REPEATCHAR;
                   3275: 
                   3276:     case OP_POSPLUS:
                   3277:     case OP_POSPLUSI:
                   3278:     possessive = TRUE;
                   3279:     min = 1;
                   3280:     max = INT_MAX;
                   3281:     ecode++;
                   3282:     goto REPEATCHAR;
                   3283: 
                   3284:     case OP_POSQUERY:
                   3285:     case OP_POSQUERYI:
                   3286:     possessive = TRUE;
                   3287:     min = 0;
                   3288:     max = 1;
                   3289:     ecode++;
                   3290:     goto REPEATCHAR;
                   3291: 
                   3292:     case OP_STAR:
                   3293:     case OP_STARI:
                   3294:     case OP_MINSTAR:
                   3295:     case OP_MINSTARI:
                   3296:     case OP_PLUS:
                   3297:     case OP_PLUSI:
                   3298:     case OP_MINPLUS:
                   3299:     case OP_MINPLUSI:
                   3300:     case OP_QUERY:
                   3301:     case OP_QUERYI:
                   3302:     case OP_MINQUERY:
                   3303:     case OP_MINQUERYI:
                   3304:     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
                   3305:     minimize = (c & 1) != 0;
                   3306:     min = rep_min[c];                 /* Pick up values from tables; */
                   3307:     max = rep_max[c];                 /* zero for max => infinity */
                   3308:     if (max == 0) max = INT_MAX;
                   3309: 
                   3310:     /* Common code for all repeated single-character matches. */
                   3311: 
                   3312:     REPEATCHAR:
1.1.1.2   misho    3313: #ifdef SUPPORT_UTF
                   3314:     if (utf)
1.1       misho    3315:       {
                   3316:       length = 1;
                   3317:       charptr = ecode;
                   3318:       GETCHARLEN(fc, ecode, length);
                   3319:       ecode += length;
                   3320: 
                   3321:       /* Handle multibyte character matching specially here. There is
                   3322:       support for caseless matching if UCP support is present. */
                   3323: 
                   3324:       if (length > 1)
                   3325:         {
                   3326: #ifdef SUPPORT_UCP
                   3327:         unsigned int othercase;
                   3328:         if (op >= OP_STARI &&     /* Caseless */
                   3329:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.1.1.2   misho    3330:           oclength = PRIV(ord2utf)(othercase, occhars);
1.1       misho    3331:         else oclength = 0;
                   3332: #endif  /* SUPPORT_UCP */
                   3333: 
                   3334:         for (i = 1; i <= min; i++)
                   3335:           {
                   3336:           if (eptr <= md->end_subject - length &&
1.1.1.2   misho    3337:             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3338: #ifdef SUPPORT_UCP
                   3339:           else if (oclength > 0 &&
                   3340:                    eptr <= md->end_subject - oclength &&
1.1.1.2   misho    3341:                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3342: #endif  /* SUPPORT_UCP */
                   3343:           else
                   3344:             {
                   3345:             CHECK_PARTIAL();
                   3346:             RRETURN(MATCH_NOMATCH);
                   3347:             }
                   3348:           }
                   3349: 
                   3350:         if (min == max) continue;
                   3351: 
                   3352:         if (minimize)
                   3353:           {
                   3354:           for (fi = min;; fi++)
                   3355:             {
                   3356:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
                   3357:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3358:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3359:             if (eptr <= md->end_subject - length &&
1.1.1.2   misho    3360:               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3361: #ifdef SUPPORT_UCP
                   3362:             else if (oclength > 0 &&
                   3363:                      eptr <= md->end_subject - oclength &&
1.1.1.2   misho    3364:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3365: #endif  /* SUPPORT_UCP */
                   3366:             else
                   3367:               {
                   3368:               CHECK_PARTIAL();
                   3369:               RRETURN(MATCH_NOMATCH);
                   3370:               }
                   3371:             }
                   3372:           /* Control never gets here */
                   3373:           }
                   3374: 
                   3375:         else  /* Maximize */
                   3376:           {
                   3377:           pp = eptr;
                   3378:           for (i = min; i < max; i++)
                   3379:             {
                   3380:             if (eptr <= md->end_subject - length &&
1.1.1.2   misho    3381:                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3382: #ifdef SUPPORT_UCP
                   3383:             else if (oclength > 0 &&
                   3384:                      eptr <= md->end_subject - oclength &&
1.1.1.2   misho    3385:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3386: #endif  /* SUPPORT_UCP */
                   3387:             else
                   3388:               {
                   3389:               CHECK_PARTIAL();
                   3390:               break;
                   3391:               }
                   3392:             }
                   3393: 
                   3394:           if (possessive) continue;
                   3395: 
                   3396:           for(;;)
                   3397:             {
                   3398:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
                   3399:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3400:             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
                   3401: #ifdef SUPPORT_UCP
                   3402:             eptr--;
                   3403:             BACKCHAR(eptr);
                   3404: #else   /* without SUPPORT_UCP */
                   3405:             eptr -= length;
                   3406: #endif  /* SUPPORT_UCP */
                   3407:             }
                   3408:           }
                   3409:         /* Control never gets here */
                   3410:         }
                   3411: 
                   3412:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   3413:       obey the code as for non-UTF-8 characters below, though in this case the
                   3414:       value of fc will always be < 128. */
                   3415:       }
                   3416:     else
1.1.1.2   misho    3417: #endif  /* SUPPORT_UTF */
                   3418:       /* When not in UTF-8 mode, load a single-byte character. */
                   3419:       fc = *ecode++;
1.1       misho    3420: 
1.1.1.2   misho    3421:     /* The value of fc at this point is always one character, though we may
                   3422:     or may not be in UTF mode. The code is duplicated for the caseless and
1.1       misho    3423:     caseful cases, for speed, since matching characters is likely to be quite
                   3424:     common. First, ensure the minimum number of matches are present. If min =
                   3425:     max, continue at the same level without recursing. Otherwise, if
                   3426:     minimizing, keep trying the rest of the expression and advancing one
                   3427:     matching character if failing, up to the maximum. Alternatively, if
                   3428:     maximizing, find the maximum number of characters and work backwards. */
                   3429: 
                   3430:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.3 ! misho    3431:       max, (char *)eptr));
1.1       misho    3432: 
                   3433:     if (op >= OP_STARI)  /* Caseless */
                   3434:       {
1.1.1.2   misho    3435: #ifdef COMPILE_PCRE8
                   3436:       /* fc must be < 128 if UTF is enabled. */
                   3437:       foc = md->fcc[fc];
                   3438: #else
                   3439: #ifdef SUPPORT_UTF
                   3440: #ifdef SUPPORT_UCP
                   3441:       if (utf && fc > 127)
                   3442:         foc = UCD_OTHERCASE(fc);
                   3443: #else
                   3444:       if (utf && fc > 127)
                   3445:         foc = fc;
                   3446: #endif /* SUPPORT_UCP */
                   3447:       else
                   3448: #endif /* SUPPORT_UTF */
                   3449:         foc = TABLE_GET(fc, md->fcc, fc);
                   3450: #endif /* COMPILE_PCRE8 */
                   3451: 
1.1       misho    3452:       for (i = 1; i <= min; i++)
                   3453:         {
                   3454:         if (eptr >= md->end_subject)
                   3455:           {
                   3456:           SCHECK_PARTIAL();
                   3457:           RRETURN(MATCH_NOMATCH);
                   3458:           }
1.1.1.2   misho    3459:         if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
                   3460:         eptr++;
1.1       misho    3461:         }
                   3462:       if (min == max) continue;
                   3463:       if (minimize)
                   3464:         {
                   3465:         for (fi = min;; fi++)
                   3466:           {
                   3467:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
                   3468:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3469:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3470:           if (eptr >= md->end_subject)
                   3471:             {
                   3472:             SCHECK_PARTIAL();
                   3473:             RRETURN(MATCH_NOMATCH);
                   3474:             }
1.1.1.2   misho    3475:           if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
                   3476:           eptr++;
1.1       misho    3477:           }
                   3478:         /* Control never gets here */
                   3479:         }
                   3480:       else  /* Maximize */
                   3481:         {
                   3482:         pp = eptr;
                   3483:         for (i = min; i < max; i++)
                   3484:           {
                   3485:           if (eptr >= md->end_subject)
                   3486:             {
                   3487:             SCHECK_PARTIAL();
                   3488:             break;
                   3489:             }
1.1.1.2   misho    3490:           if (fc != *eptr && foc != *eptr) break;
1.1       misho    3491:           eptr++;
                   3492:           }
                   3493: 
                   3494:         if (possessive) continue;
                   3495: 
                   3496:         while (eptr >= pp)
                   3497:           {
                   3498:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
                   3499:           eptr--;
                   3500:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3501:           }
                   3502:         RRETURN(MATCH_NOMATCH);
                   3503:         }
                   3504:       /* Control never gets here */
                   3505:       }
                   3506: 
                   3507:     /* Caseful comparisons (includes all multi-byte characters) */
                   3508: 
                   3509:     else
                   3510:       {
                   3511:       for (i = 1; i <= min; i++)
                   3512:         {
                   3513:         if (eptr >= md->end_subject)
                   3514:           {
                   3515:           SCHECK_PARTIAL();
                   3516:           RRETURN(MATCH_NOMATCH);
                   3517:           }
                   3518:         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
                   3519:         }
                   3520: 
                   3521:       if (min == max) continue;
                   3522: 
                   3523:       if (minimize)
                   3524:         {
                   3525:         for (fi = min;; fi++)
                   3526:           {
                   3527:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
                   3528:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3529:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3530:           if (eptr >= md->end_subject)
                   3531:             {
                   3532:             SCHECK_PARTIAL();
                   3533:             RRETURN(MATCH_NOMATCH);
                   3534:             }
                   3535:           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
                   3536:           }
                   3537:         /* Control never gets here */
                   3538:         }
                   3539:       else  /* Maximize */
                   3540:         {
                   3541:         pp = eptr;
                   3542:         for (i = min; i < max; i++)
                   3543:           {
                   3544:           if (eptr >= md->end_subject)
                   3545:             {
                   3546:             SCHECK_PARTIAL();
                   3547:             break;
                   3548:             }
                   3549:           if (fc != *eptr) break;
                   3550:           eptr++;
                   3551:           }
                   3552:         if (possessive) continue;
                   3553: 
                   3554:         while (eptr >= pp)
                   3555:           {
                   3556:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
                   3557:           eptr--;
                   3558:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3559:           }
                   3560:         RRETURN(MATCH_NOMATCH);
                   3561:         }
                   3562:       }
                   3563:     /* Control never gets here */
                   3564: 
                   3565:     /* Match a negated single one-byte character. The character we are
                   3566:     checking can be multibyte. */
                   3567: 
                   3568:     case OP_NOT:
                   3569:     case OP_NOTI:
                   3570:     if (eptr >= md->end_subject)
                   3571:       {
                   3572:       SCHECK_PARTIAL();
                   3573:       RRETURN(MATCH_NOMATCH);
                   3574:       }
1.1.1.3 ! misho    3575: #ifdef SUPPORT_UTF
        !          3576:     if (utf)
1.1       misho    3577:       {
1.1.1.2   misho    3578:       register unsigned int ch, och;
1.1.1.3 ! misho    3579: 
        !          3580:       ecode++;
        !          3581:       GETCHARINC(ch, ecode);
        !          3582:       GETCHARINC(c, eptr);
        !          3583: 
        !          3584:       if (op == OP_NOT)
        !          3585:         {
        !          3586:         if (ch == c) RRETURN(MATCH_NOMATCH);
        !          3587:         }
        !          3588:       else
        !          3589:         {
1.1.1.2   misho    3590: #ifdef SUPPORT_UCP
1.1.1.3 ! misho    3591:         if (ch > 127)
        !          3592:           och = UCD_OTHERCASE(ch);
1.1.1.2   misho    3593: #else
1.1.1.3 ! misho    3594:         if (ch > 127)
        !          3595:           och = ch;
1.1.1.2   misho    3596: #endif /* SUPPORT_UCP */
1.1.1.3 ! misho    3597:         else
        !          3598:           och = TABLE_GET(ch, md->fcc, ch);
        !          3599:         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
        !          3600:         }
1.1       misho    3601:       }
1.1.1.3 ! misho    3602:     else
        !          3603: #endif
1.1       misho    3604:       {
1.1.1.3 ! misho    3605:       register unsigned int ch = ecode[1];
        !          3606:       c = *eptr++;
        !          3607:       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
        !          3608:         RRETURN(MATCH_NOMATCH);
        !          3609:       ecode += 2;
1.1       misho    3610:       }
                   3611:     break;
                   3612: 
                   3613:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3614:     repeat of the code for a repeated single character, but I haven't found a
                   3615:     nice way of commoning these up that doesn't require a test of the
                   3616:     positive/negative option for each character match. Maybe that wouldn't add
                   3617:     very much to the time taken, but character matching *is* what this is all
                   3618:     about... */
                   3619: 
                   3620:     case OP_NOTEXACT:
                   3621:     case OP_NOTEXACTI:
                   3622:     min = max = GET2(ecode, 1);
1.1.1.2   misho    3623:     ecode += 1 + IMM2_SIZE;
1.1       misho    3624:     goto REPEATNOTCHAR;
                   3625: 
                   3626:     case OP_NOTUPTO:
                   3627:     case OP_NOTUPTOI:
                   3628:     case OP_NOTMINUPTO:
                   3629:     case OP_NOTMINUPTOI:
                   3630:     min = 0;
                   3631:     max = GET2(ecode, 1);
                   3632:     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
1.1.1.2   misho    3633:     ecode += 1 + IMM2_SIZE;
1.1       misho    3634:     goto REPEATNOTCHAR;
                   3635: 
                   3636:     case OP_NOTPOSSTAR:
                   3637:     case OP_NOTPOSSTARI:
                   3638:     possessive = TRUE;
                   3639:     min = 0;
                   3640:     max = INT_MAX;
                   3641:     ecode++;
                   3642:     goto REPEATNOTCHAR;
                   3643: 
                   3644:     case OP_NOTPOSPLUS:
                   3645:     case OP_NOTPOSPLUSI:
                   3646:     possessive = TRUE;
                   3647:     min = 1;
                   3648:     max = INT_MAX;
                   3649:     ecode++;
                   3650:     goto REPEATNOTCHAR;
                   3651: 
                   3652:     case OP_NOTPOSQUERY:
                   3653:     case OP_NOTPOSQUERYI:
                   3654:     possessive = TRUE;
                   3655:     min = 0;
                   3656:     max = 1;
                   3657:     ecode++;
                   3658:     goto REPEATNOTCHAR;
                   3659: 
                   3660:     case OP_NOTPOSUPTO:
                   3661:     case OP_NOTPOSUPTOI:
                   3662:     possessive = TRUE;
                   3663:     min = 0;
                   3664:     max = GET2(ecode, 1);
1.1.1.2   misho    3665:     ecode += 1 + IMM2_SIZE;
1.1       misho    3666:     goto REPEATNOTCHAR;
                   3667: 
                   3668:     case OP_NOTSTAR:
                   3669:     case OP_NOTSTARI:
                   3670:     case OP_NOTMINSTAR:
                   3671:     case OP_NOTMINSTARI:
                   3672:     case OP_NOTPLUS:
                   3673:     case OP_NOTPLUSI:
                   3674:     case OP_NOTMINPLUS:
                   3675:     case OP_NOTMINPLUSI:
                   3676:     case OP_NOTQUERY:
                   3677:     case OP_NOTQUERYI:
                   3678:     case OP_NOTMINQUERY:
                   3679:     case OP_NOTMINQUERYI:
                   3680:     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
                   3681:     minimize = (c & 1) != 0;
                   3682:     min = rep_min[c];                 /* Pick up values from tables; */
                   3683:     max = rep_max[c];                 /* zero for max => infinity */
                   3684:     if (max == 0) max = INT_MAX;
                   3685: 
                   3686:     /* Common code for all repeated single-byte matches. */
                   3687: 
                   3688:     REPEATNOTCHAR:
1.1.1.3 ! misho    3689:     GETCHARINCTEST(fc, ecode);
1.1       misho    3690: 
                   3691:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3692:     since matching characters is likely to be quite common. First, ensure the
                   3693:     minimum number of matches are present. If min = max, continue at the same
                   3694:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3695:     the expression and advancing one matching character if failing, up to the
                   3696:     maximum. Alternatively, if maximizing, find the maximum number of
                   3697:     characters and work backwards. */
                   3698: 
                   3699:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.3 ! misho    3700:       max, (char *)eptr));
1.1       misho    3701: 
                   3702:     if (op >= OP_NOTSTARI)     /* Caseless */
                   3703:       {
1.1.1.2   misho    3704: #ifdef SUPPORT_UTF
                   3705: #ifdef SUPPORT_UCP
                   3706:       if (utf && fc > 127)
                   3707:         foc = UCD_OTHERCASE(fc);
                   3708: #else
                   3709:       if (utf && fc > 127)
                   3710:         foc = fc;
                   3711: #endif /* SUPPORT_UCP */
                   3712:       else
                   3713: #endif /* SUPPORT_UTF */
                   3714:         foc = TABLE_GET(fc, md->fcc, fc);
1.1       misho    3715: 
1.1.1.2   misho    3716: #ifdef SUPPORT_UTF
                   3717:       if (utf)
1.1       misho    3718:         {
                   3719:         register unsigned int d;
                   3720:         for (i = 1; i <= min; i++)
                   3721:           {
                   3722:           if (eptr >= md->end_subject)
                   3723:             {
                   3724:             SCHECK_PARTIAL();
                   3725:             RRETURN(MATCH_NOMATCH);
                   3726:             }
                   3727:           GETCHARINC(d, eptr);
1.1.1.3 ! misho    3728:           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3729:           }
                   3730:         }
                   3731:       else
                   3732: #endif
1.1.1.2   misho    3733:       /* Not UTF mode */
1.1       misho    3734:         {
                   3735:         for (i = 1; i <= min; i++)
                   3736:           {
                   3737:           if (eptr >= md->end_subject)
                   3738:             {
                   3739:             SCHECK_PARTIAL();
                   3740:             RRETURN(MATCH_NOMATCH);
                   3741:             }
1.1.1.2   misho    3742:           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
                   3743:           eptr++;
1.1       misho    3744:           }
                   3745:         }
                   3746: 
                   3747:       if (min == max) continue;
                   3748: 
                   3749:       if (minimize)
                   3750:         {
1.1.1.2   misho    3751: #ifdef SUPPORT_UTF
                   3752:         if (utf)
1.1       misho    3753:           {
                   3754:           register unsigned int d;
                   3755:           for (fi = min;; fi++)
                   3756:             {
                   3757:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
                   3758:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3759:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3760:             if (eptr >= md->end_subject)
                   3761:               {
                   3762:               SCHECK_PARTIAL();
                   3763:               RRETURN(MATCH_NOMATCH);
                   3764:               }
                   3765:             GETCHARINC(d, eptr);
1.1.1.2   misho    3766:             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3767:             }
                   3768:           }
                   3769:         else
                   3770: #endif
1.1.1.2   misho    3771:         /* Not UTF mode */
1.1       misho    3772:           {
                   3773:           for (fi = min;; fi++)
                   3774:             {
                   3775:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
                   3776:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3777:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3778:             if (eptr >= md->end_subject)
                   3779:               {
                   3780:               SCHECK_PARTIAL();
                   3781:               RRETURN(MATCH_NOMATCH);
                   3782:               }
1.1.1.2   misho    3783:             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
                   3784:             eptr++;
1.1       misho    3785:             }
                   3786:           }
                   3787:         /* Control never gets here */
                   3788:         }
                   3789: 
                   3790:       /* Maximize case */
                   3791: 
                   3792:       else
                   3793:         {
                   3794:         pp = eptr;
                   3795: 
1.1.1.2   misho    3796: #ifdef SUPPORT_UTF
                   3797:         if (utf)
1.1       misho    3798:           {
                   3799:           register unsigned int d;
                   3800:           for (i = min; i < max; i++)
                   3801:             {
                   3802:             int len = 1;
                   3803:             if (eptr >= md->end_subject)
                   3804:               {
                   3805:               SCHECK_PARTIAL();
                   3806:               break;
                   3807:               }
                   3808:             GETCHARLEN(d, eptr, len);
1.1.1.2   misho    3809:             if (fc == d || (unsigned int)foc == d) break;
1.1       misho    3810:             eptr += len;
                   3811:             }
1.1.1.2   misho    3812:           if (possessive) continue;
                   3813:           for(;;)
1.1       misho    3814:             {
                   3815:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
                   3816:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3817:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3818:             BACKCHAR(eptr);
                   3819:             }
                   3820:           }
                   3821:         else
                   3822: #endif
1.1.1.2   misho    3823:         /* Not UTF mode */
1.1       misho    3824:           {
                   3825:           for (i = min; i < max; i++)
                   3826:             {
                   3827:             if (eptr >= md->end_subject)
                   3828:               {
                   3829:               SCHECK_PARTIAL();
                   3830:               break;
                   3831:               }
1.1.1.2   misho    3832:             if (fc == *eptr || foc == *eptr) break;
1.1       misho    3833:             eptr++;
                   3834:             }
                   3835:           if (possessive) continue;
                   3836:           while (eptr >= pp)
                   3837:             {
                   3838:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
                   3839:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3840:             eptr--;
                   3841:             }
                   3842:           }
                   3843: 
                   3844:         RRETURN(MATCH_NOMATCH);
                   3845:         }
                   3846:       /* Control never gets here */
                   3847:       }
                   3848: 
                   3849:     /* Caseful comparisons */
                   3850: 
                   3851:     else
                   3852:       {
1.1.1.2   misho    3853: #ifdef SUPPORT_UTF
                   3854:       if (utf)
1.1       misho    3855:         {
                   3856:         register unsigned int d;
                   3857:         for (i = 1; i <= min; i++)
                   3858:           {
                   3859:           if (eptr >= md->end_subject)
                   3860:             {
                   3861:             SCHECK_PARTIAL();
                   3862:             RRETURN(MATCH_NOMATCH);
                   3863:             }
                   3864:           GETCHARINC(d, eptr);
                   3865:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   3866:           }
                   3867:         }
                   3868:       else
                   3869: #endif
1.1.1.2   misho    3870:       /* Not UTF mode */
1.1       misho    3871:         {
                   3872:         for (i = 1; i <= min; i++)
                   3873:           {
                   3874:           if (eptr >= md->end_subject)
                   3875:             {
                   3876:             SCHECK_PARTIAL();
                   3877:             RRETURN(MATCH_NOMATCH);
                   3878:             }
                   3879:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   3880:           }
                   3881:         }
                   3882: 
                   3883:       if (min == max) continue;
                   3884: 
                   3885:       if (minimize)
                   3886:         {
1.1.1.2   misho    3887: #ifdef SUPPORT_UTF
                   3888:         if (utf)
1.1       misho    3889:           {
                   3890:           register unsigned int d;
                   3891:           for (fi = min;; fi++)
                   3892:             {
                   3893:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
                   3894:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3895:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3896:             if (eptr >= md->end_subject)
                   3897:               {
                   3898:               SCHECK_PARTIAL();
                   3899:               RRETURN(MATCH_NOMATCH);
                   3900:               }
                   3901:             GETCHARINC(d, eptr);
                   3902:             if (fc == d) RRETURN(MATCH_NOMATCH);
                   3903:             }
                   3904:           }
                   3905:         else
                   3906: #endif
1.1.1.2   misho    3907:         /* Not UTF mode */
1.1       misho    3908:           {
                   3909:           for (fi = min;; fi++)
                   3910:             {
                   3911:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
                   3912:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3913:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3914:             if (eptr >= md->end_subject)
                   3915:               {
                   3916:               SCHECK_PARTIAL();
                   3917:               RRETURN(MATCH_NOMATCH);
                   3918:               }
                   3919:             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   3920:             }
                   3921:           }
                   3922:         /* Control never gets here */
                   3923:         }
                   3924: 
                   3925:       /* Maximize case */
                   3926: 
                   3927:       else
                   3928:         {
                   3929:         pp = eptr;
                   3930: 
1.1.1.2   misho    3931: #ifdef SUPPORT_UTF
                   3932:         if (utf)
1.1       misho    3933:           {
                   3934:           register unsigned int d;
                   3935:           for (i = min; i < max; i++)
                   3936:             {
                   3937:             int len = 1;
                   3938:             if (eptr >= md->end_subject)
                   3939:               {
                   3940:               SCHECK_PARTIAL();
                   3941:               break;
                   3942:               }
                   3943:             GETCHARLEN(d, eptr, len);
                   3944:             if (fc == d) break;
                   3945:             eptr += len;
                   3946:             }
                   3947:           if (possessive) continue;
                   3948:           for(;;)
                   3949:             {
                   3950:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
                   3951:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3952:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3953:             BACKCHAR(eptr);
                   3954:             }
                   3955:           }
                   3956:         else
                   3957: #endif
1.1.1.2   misho    3958:         /* Not UTF mode */
1.1       misho    3959:           {
                   3960:           for (i = min; i < max; i++)
                   3961:             {
                   3962:             if (eptr >= md->end_subject)
                   3963:               {
                   3964:               SCHECK_PARTIAL();
                   3965:               break;
                   3966:               }
                   3967:             if (fc == *eptr) break;
                   3968:             eptr++;
                   3969:             }
                   3970:           if (possessive) continue;
                   3971:           while (eptr >= pp)
                   3972:             {
                   3973:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
                   3974:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3975:             eptr--;
                   3976:             }
                   3977:           }
                   3978: 
                   3979:         RRETURN(MATCH_NOMATCH);
                   3980:         }
                   3981:       }
                   3982:     /* Control never gets here */
                   3983: 
                   3984:     /* Match a single character type repeatedly; several different opcodes
                   3985:     share code. This is very similar to the code for single characters, but we
                   3986:     repeat it in the interests of efficiency. */
                   3987: 
                   3988:     case OP_TYPEEXACT:
                   3989:     min = max = GET2(ecode, 1);
                   3990:     minimize = TRUE;
1.1.1.2   misho    3991:     ecode += 1 + IMM2_SIZE;
1.1       misho    3992:     goto REPEATTYPE;
                   3993: 
                   3994:     case OP_TYPEUPTO:
                   3995:     case OP_TYPEMINUPTO:
                   3996:     min = 0;
                   3997:     max = GET2(ecode, 1);
                   3998:     minimize = *ecode == OP_TYPEMINUPTO;
1.1.1.2   misho    3999:     ecode += 1 + IMM2_SIZE;
1.1       misho    4000:     goto REPEATTYPE;
                   4001: 
                   4002:     case OP_TYPEPOSSTAR:
                   4003:     possessive = TRUE;
                   4004:     min = 0;
                   4005:     max = INT_MAX;
                   4006:     ecode++;
                   4007:     goto REPEATTYPE;
                   4008: 
                   4009:     case OP_TYPEPOSPLUS:
                   4010:     possessive = TRUE;
                   4011:     min = 1;
                   4012:     max = INT_MAX;
                   4013:     ecode++;
                   4014:     goto REPEATTYPE;
                   4015: 
                   4016:     case OP_TYPEPOSQUERY:
                   4017:     possessive = TRUE;
                   4018:     min = 0;
                   4019:     max = 1;
                   4020:     ecode++;
                   4021:     goto REPEATTYPE;
                   4022: 
                   4023:     case OP_TYPEPOSUPTO:
                   4024:     possessive = TRUE;
                   4025:     min = 0;
                   4026:     max = GET2(ecode, 1);
1.1.1.2   misho    4027:     ecode += 1 + IMM2_SIZE;
1.1       misho    4028:     goto REPEATTYPE;
                   4029: 
                   4030:     case OP_TYPESTAR:
                   4031:     case OP_TYPEMINSTAR:
                   4032:     case OP_TYPEPLUS:
                   4033:     case OP_TYPEMINPLUS:
                   4034:     case OP_TYPEQUERY:
                   4035:     case OP_TYPEMINQUERY:
                   4036:     c = *ecode++ - OP_TYPESTAR;
                   4037:     minimize = (c & 1) != 0;
                   4038:     min = rep_min[c];                 /* Pick up values from tables; */
                   4039:     max = rep_max[c];                 /* zero for max => infinity */
                   4040:     if (max == 0) max = INT_MAX;
                   4041: 
                   4042:     /* Common code for all repeated single character type matches. Note that
                   4043:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   4044:     character types, the valid characters are all one-byte long. */
                   4045: 
                   4046:     REPEATTYPE:
                   4047:     ctype = *ecode++;      /* Code for the character type */
                   4048: 
                   4049: #ifdef SUPPORT_UCP
                   4050:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   4051:       {
                   4052:       prop_fail_result = ctype == OP_NOTPROP;
                   4053:       prop_type = *ecode++;
                   4054:       prop_value = *ecode++;
                   4055:       }
                   4056:     else prop_type = -1;
                   4057: #endif
                   4058: 
                   4059:     /* First, ensure the minimum number of matches are present. Use inline
                   4060:     code for maximizing the speed, and do the type test once at the start
                   4061:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
                   4062:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   4063:     and single-bytes. */
                   4064: 
                   4065:     if (min > 0)
                   4066:       {
                   4067: #ifdef SUPPORT_UCP
                   4068:       if (prop_type >= 0)
                   4069:         {
                   4070:         switch(prop_type)
                   4071:           {
                   4072:           case PT_ANY:
                   4073:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   4074:           for (i = 1; i <= min; i++)
                   4075:             {
                   4076:             if (eptr >= md->end_subject)
                   4077:               {
                   4078:               SCHECK_PARTIAL();
                   4079:               RRETURN(MATCH_NOMATCH);
                   4080:               }
                   4081:             GETCHARINCTEST(c, eptr);
                   4082:             }
                   4083:           break;
                   4084: 
                   4085:           case PT_LAMP:
                   4086:           for (i = 1; i <= min; i++)
                   4087:             {
                   4088:             int chartype;
                   4089:             if (eptr >= md->end_subject)
                   4090:               {
                   4091:               SCHECK_PARTIAL();
                   4092:               RRETURN(MATCH_NOMATCH);
                   4093:               }
                   4094:             GETCHARINCTEST(c, eptr);
                   4095:             chartype = UCD_CHARTYPE(c);
                   4096:             if ((chartype == ucp_Lu ||
                   4097:                  chartype == ucp_Ll ||
                   4098:                  chartype == ucp_Lt) == prop_fail_result)
                   4099:               RRETURN(MATCH_NOMATCH);
                   4100:             }
                   4101:           break;
                   4102: 
                   4103:           case PT_GC:
                   4104:           for (i = 1; i <= min; i++)
                   4105:             {
                   4106:             if (eptr >= md->end_subject)
                   4107:               {
                   4108:               SCHECK_PARTIAL();
                   4109:               RRETURN(MATCH_NOMATCH);
                   4110:               }
                   4111:             GETCHARINCTEST(c, eptr);
                   4112:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   4113:               RRETURN(MATCH_NOMATCH);
                   4114:             }
                   4115:           break;
                   4116: 
                   4117:           case PT_PC:
                   4118:           for (i = 1; i <= min; i++)
                   4119:             {
                   4120:             if (eptr >= md->end_subject)
                   4121:               {
                   4122:               SCHECK_PARTIAL();
                   4123:               RRETURN(MATCH_NOMATCH);
                   4124:               }
                   4125:             GETCHARINCTEST(c, eptr);
                   4126:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   4127:               RRETURN(MATCH_NOMATCH);
                   4128:             }
                   4129:           break;
                   4130: 
                   4131:           case PT_SC:
                   4132:           for (i = 1; i <= min; i++)
                   4133:             {
                   4134:             if (eptr >= md->end_subject)
                   4135:               {
                   4136:               SCHECK_PARTIAL();
                   4137:               RRETURN(MATCH_NOMATCH);
                   4138:               }
                   4139:             GETCHARINCTEST(c, eptr);
                   4140:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   4141:               RRETURN(MATCH_NOMATCH);
                   4142:             }
                   4143:           break;
                   4144: 
                   4145:           case PT_ALNUM:
                   4146:           for (i = 1; i <= min; i++)
                   4147:             {
                   4148:             int category;
                   4149:             if (eptr >= md->end_subject)
                   4150:               {
                   4151:               SCHECK_PARTIAL();
                   4152:               RRETURN(MATCH_NOMATCH);
                   4153:               }
                   4154:             GETCHARINCTEST(c, eptr);
                   4155:             category = UCD_CATEGORY(c);
                   4156:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   4157:               RRETURN(MATCH_NOMATCH);
                   4158:             }
                   4159:           break;
                   4160: 
                   4161:           case PT_SPACE:    /* Perl space */
                   4162:           for (i = 1; i <= min; i++)
                   4163:             {
                   4164:             if (eptr >= md->end_subject)
                   4165:               {
                   4166:               SCHECK_PARTIAL();
                   4167:               RRETURN(MATCH_NOMATCH);
                   4168:               }
                   4169:             GETCHARINCTEST(c, eptr);
                   4170:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4171:                  c == CHAR_FF || c == CHAR_CR)
                   4172:                    == prop_fail_result)
                   4173:               RRETURN(MATCH_NOMATCH);
                   4174:             }
                   4175:           break;
                   4176: 
                   4177:           case PT_PXSPACE:  /* POSIX space */
                   4178:           for (i = 1; i <= min; i++)
                   4179:             {
                   4180:             if (eptr >= md->end_subject)
                   4181:               {
                   4182:               SCHECK_PARTIAL();
                   4183:               RRETURN(MATCH_NOMATCH);
                   4184:               }
                   4185:             GETCHARINCTEST(c, eptr);
                   4186:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4187:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4188:                    == prop_fail_result)
                   4189:               RRETURN(MATCH_NOMATCH);
                   4190:             }
                   4191:           break;
                   4192: 
                   4193:           case PT_WORD:
                   4194:           for (i = 1; i <= min; i++)
                   4195:             {
                   4196:             int category;
                   4197:             if (eptr >= md->end_subject)
                   4198:               {
                   4199:               SCHECK_PARTIAL();
                   4200:               RRETURN(MATCH_NOMATCH);
                   4201:               }
                   4202:             GETCHARINCTEST(c, eptr);
                   4203:             category = UCD_CATEGORY(c);
                   4204:             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
                   4205:                    == prop_fail_result)
                   4206:               RRETURN(MATCH_NOMATCH);
                   4207:             }
                   4208:           break;
                   4209: 
                   4210:           /* This should not occur */
                   4211: 
                   4212:           default:
                   4213:           RRETURN(PCRE_ERROR_INTERNAL);
                   4214:           }
                   4215:         }
                   4216: 
                   4217:       /* Match extended Unicode sequences. We will get here only if the
                   4218:       support is in the binary; otherwise a compile-time error occurs. */
                   4219: 
                   4220:       else if (ctype == OP_EXTUNI)
                   4221:         {
                   4222:         for (i = 1; i <= min; i++)
                   4223:           {
                   4224:           if (eptr >= md->end_subject)
                   4225:             {
                   4226:             SCHECK_PARTIAL();
                   4227:             RRETURN(MATCH_NOMATCH);
                   4228:             }
                   4229:           GETCHARINCTEST(c, eptr);
                   4230:           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
                   4231:           while (eptr < md->end_subject)
                   4232:             {
                   4233:             int len = 1;
1.1.1.2   misho    4234:             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1       misho    4235:             if (UCD_CATEGORY(c) != ucp_M) break;
                   4236:             eptr += len;
                   4237:             }
1.1.1.3 ! misho    4238:           CHECK_PARTIAL();
1.1       misho    4239:           }
                   4240:         }
                   4241: 
                   4242:       else
                   4243: #endif     /* SUPPORT_UCP */
                   4244: 
                   4245: /* Handle all other cases when the coding is UTF-8 */
                   4246: 
1.1.1.2   misho    4247: #ifdef SUPPORT_UTF
                   4248:       if (utf) switch(ctype)
1.1       misho    4249:         {
                   4250:         case OP_ANY:
                   4251:         for (i = 1; i <= min; i++)
                   4252:           {
                   4253:           if (eptr >= md->end_subject)
                   4254:             {
                   4255:             SCHECK_PARTIAL();
                   4256:             RRETURN(MATCH_NOMATCH);
                   4257:             }
                   4258:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1.1.3 ! misho    4259:           if (md->partial != 0 &&
        !          4260:               eptr + 1 >= md->end_subject &&
        !          4261:               NLBLOCK->nltype == NLTYPE_FIXED &&
        !          4262:               NLBLOCK->nllen == 2 &&
        !          4263:               *eptr == NLBLOCK->nl[0])
        !          4264:             {
        !          4265:             md->hitend = TRUE;
        !          4266:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          4267:             }
1.1       misho    4268:           eptr++;
1.1.1.2   misho    4269:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4270:           }
                   4271:         break;
                   4272: 
                   4273:         case OP_ALLANY:
                   4274:         for (i = 1; i <= min; i++)
                   4275:           {
                   4276:           if (eptr >= md->end_subject)
                   4277:             {
                   4278:             SCHECK_PARTIAL();
                   4279:             RRETURN(MATCH_NOMATCH);
                   4280:             }
                   4281:           eptr++;
1.1.1.2   misho    4282:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4283:           }
                   4284:         break;
                   4285: 
                   4286:         case OP_ANYBYTE:
                   4287:         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
                   4288:         eptr += min;
                   4289:         break;
                   4290: 
                   4291:         case OP_ANYNL:
                   4292:         for (i = 1; i <= min; i++)
                   4293:           {
                   4294:           if (eptr >= md->end_subject)
                   4295:             {
                   4296:             SCHECK_PARTIAL();
                   4297:             RRETURN(MATCH_NOMATCH);
                   4298:             }
                   4299:           GETCHARINC(c, eptr);
                   4300:           switch(c)
                   4301:             {
                   4302:             default: RRETURN(MATCH_NOMATCH);
                   4303: 
                   4304:             case 0x000d:
                   4305:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4306:             break;
                   4307: 
                   4308:             case 0x000a:
                   4309:             break;
                   4310: 
                   4311:             case 0x000b:
                   4312:             case 0x000c:
                   4313:             case 0x0085:
                   4314:             case 0x2028:
                   4315:             case 0x2029:
                   4316:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4317:             break;
                   4318:             }
                   4319:           }
                   4320:         break;
                   4321: 
                   4322:         case OP_NOT_HSPACE:
                   4323:         for (i = 1; i <= min; i++)
                   4324:           {
                   4325:           if (eptr >= md->end_subject)
                   4326:             {
                   4327:             SCHECK_PARTIAL();
                   4328:             RRETURN(MATCH_NOMATCH);
                   4329:             }
                   4330:           GETCHARINC(c, eptr);
                   4331:           switch(c)
                   4332:             {
                   4333:             default: break;
                   4334:             case 0x09:      /* HT */
                   4335:             case 0x20:      /* SPACE */
                   4336:             case 0xa0:      /* NBSP */
                   4337:             case 0x1680:    /* OGHAM SPACE MARK */
                   4338:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4339:             case 0x2000:    /* EN QUAD */
                   4340:             case 0x2001:    /* EM QUAD */
                   4341:             case 0x2002:    /* EN SPACE */
                   4342:             case 0x2003:    /* EM SPACE */
                   4343:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4344:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4345:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4346:             case 0x2007:    /* FIGURE SPACE */
                   4347:             case 0x2008:    /* PUNCTUATION SPACE */
                   4348:             case 0x2009:    /* THIN SPACE */
                   4349:             case 0x200A:    /* HAIR SPACE */
                   4350:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4351:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4352:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4353:             RRETURN(MATCH_NOMATCH);
                   4354:             }
                   4355:           }
                   4356:         break;
                   4357: 
                   4358:         case OP_HSPACE:
                   4359:         for (i = 1; i <= min; i++)
                   4360:           {
                   4361:           if (eptr >= md->end_subject)
                   4362:             {
                   4363:             SCHECK_PARTIAL();
                   4364:             RRETURN(MATCH_NOMATCH);
                   4365:             }
                   4366:           GETCHARINC(c, eptr);
                   4367:           switch(c)
                   4368:             {
                   4369:             default: RRETURN(MATCH_NOMATCH);
                   4370:             case 0x09:      /* HT */
                   4371:             case 0x20:      /* SPACE */
                   4372:             case 0xa0:      /* NBSP */
                   4373:             case 0x1680:    /* OGHAM SPACE MARK */
                   4374:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4375:             case 0x2000:    /* EN QUAD */
                   4376:             case 0x2001:    /* EM QUAD */
                   4377:             case 0x2002:    /* EN SPACE */
                   4378:             case 0x2003:    /* EM SPACE */
                   4379:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4380:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4381:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4382:             case 0x2007:    /* FIGURE SPACE */
                   4383:             case 0x2008:    /* PUNCTUATION SPACE */
                   4384:             case 0x2009:    /* THIN SPACE */
                   4385:             case 0x200A:    /* HAIR SPACE */
                   4386:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4387:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4388:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4389:             break;
                   4390:             }
                   4391:           }
                   4392:         break;
                   4393: 
                   4394:         case OP_NOT_VSPACE:
                   4395:         for (i = 1; i <= min; i++)
                   4396:           {
                   4397:           if (eptr >= md->end_subject)
                   4398:             {
                   4399:             SCHECK_PARTIAL();
                   4400:             RRETURN(MATCH_NOMATCH);
                   4401:             }
                   4402:           GETCHARINC(c, eptr);
                   4403:           switch(c)
                   4404:             {
                   4405:             default: break;
                   4406:             case 0x0a:      /* LF */
                   4407:             case 0x0b:      /* VT */
                   4408:             case 0x0c:      /* FF */
                   4409:             case 0x0d:      /* CR */
                   4410:             case 0x85:      /* NEL */
                   4411:             case 0x2028:    /* LINE SEPARATOR */
                   4412:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4413:             RRETURN(MATCH_NOMATCH);
                   4414:             }
                   4415:           }
                   4416:         break;
                   4417: 
                   4418:         case OP_VSPACE:
                   4419:         for (i = 1; i <= min; i++)
                   4420:           {
                   4421:           if (eptr >= md->end_subject)
                   4422:             {
                   4423:             SCHECK_PARTIAL();
                   4424:             RRETURN(MATCH_NOMATCH);
                   4425:             }
                   4426:           GETCHARINC(c, eptr);
                   4427:           switch(c)
                   4428:             {
                   4429:             default: RRETURN(MATCH_NOMATCH);
                   4430:             case 0x0a:      /* LF */
                   4431:             case 0x0b:      /* VT */
                   4432:             case 0x0c:      /* FF */
                   4433:             case 0x0d:      /* CR */
                   4434:             case 0x85:      /* NEL */
                   4435:             case 0x2028:    /* LINE SEPARATOR */
                   4436:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4437:             break;
                   4438:             }
                   4439:           }
                   4440:         break;
                   4441: 
                   4442:         case OP_NOT_DIGIT:
                   4443:         for (i = 1; i <= min; i++)
                   4444:           {
                   4445:           if (eptr >= md->end_subject)
                   4446:             {
                   4447:             SCHECK_PARTIAL();
                   4448:             RRETURN(MATCH_NOMATCH);
                   4449:             }
                   4450:           GETCHARINC(c, eptr);
                   4451:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
                   4452:             RRETURN(MATCH_NOMATCH);
                   4453:           }
                   4454:         break;
                   4455: 
                   4456:         case OP_DIGIT:
                   4457:         for (i = 1; i <= min; i++)
                   4458:           {
                   4459:           if (eptr >= md->end_subject)
                   4460:             {
                   4461:             SCHECK_PARTIAL();
                   4462:             RRETURN(MATCH_NOMATCH);
                   4463:             }
1.1.1.2   misho    4464:           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
1.1       misho    4465:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4466:           eptr++;
1.1       misho    4467:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4468:           }
                   4469:         break;
                   4470: 
                   4471:         case OP_NOT_WHITESPACE:
                   4472:         for (i = 1; i <= min; i++)
                   4473:           {
                   4474:           if (eptr >= md->end_subject)
                   4475:             {
                   4476:             SCHECK_PARTIAL();
                   4477:             RRETURN(MATCH_NOMATCH);
                   4478:             }
                   4479:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
                   4480:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4481:           eptr++;
                   4482:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4483:           }
                   4484:         break;
                   4485: 
                   4486:         case OP_WHITESPACE:
                   4487:         for (i = 1; i <= min; i++)
                   4488:           {
                   4489:           if (eptr >= md->end_subject)
                   4490:             {
                   4491:             SCHECK_PARTIAL();
                   4492:             RRETURN(MATCH_NOMATCH);
                   4493:             }
1.1.1.2   misho    4494:           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
1.1       misho    4495:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4496:           eptr++;
1.1       misho    4497:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4498:           }
                   4499:         break;
                   4500: 
                   4501:         case OP_NOT_WORDCHAR:
                   4502:         for (i = 1; i <= min; i++)
                   4503:           {
                   4504:           if (eptr >= md->end_subject)
                   4505:             {
                   4506:             SCHECK_PARTIAL();
                   4507:             RRETURN(MATCH_NOMATCH);
                   4508:             }
                   4509:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
                   4510:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4511:           eptr++;
                   4512:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4513:           }
                   4514:         break;
                   4515: 
                   4516:         case OP_WORDCHAR:
                   4517:         for (i = 1; i <= min; i++)
                   4518:           {
                   4519:           if (eptr >= md->end_subject)
                   4520:             {
                   4521:             SCHECK_PARTIAL();
                   4522:             RRETURN(MATCH_NOMATCH);
                   4523:             }
1.1.1.2   misho    4524:           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
1.1       misho    4525:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4526:           eptr++;
1.1       misho    4527:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4528:           }
                   4529:         break;
                   4530: 
                   4531:         default:
                   4532:         RRETURN(PCRE_ERROR_INTERNAL);
                   4533:         }  /* End switch(ctype) */
                   4534: 
                   4535:       else
1.1.1.2   misho    4536: #endif     /* SUPPORT_UTF */
1.1       misho    4537: 
                   4538:       /* Code for the non-UTF-8 case for minimum matching of operators other
                   4539:       than OP_PROP and OP_NOTPROP. */
                   4540: 
                   4541:       switch(ctype)
                   4542:         {
                   4543:         case OP_ANY:
                   4544:         for (i = 1; i <= min; i++)
                   4545:           {
                   4546:           if (eptr >= md->end_subject)
                   4547:             {
                   4548:             SCHECK_PARTIAL();
                   4549:             RRETURN(MATCH_NOMATCH);
                   4550:             }
                   4551:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1.1.3 ! misho    4552:           if (md->partial != 0 &&
        !          4553:               eptr + 1 >= md->end_subject &&
        !          4554:               NLBLOCK->nltype == NLTYPE_FIXED &&
        !          4555:               NLBLOCK->nllen == 2 &&
        !          4556:               *eptr == NLBLOCK->nl[0])
        !          4557:             {
        !          4558:             md->hitend = TRUE;
        !          4559:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          4560:             }
1.1       misho    4561:           eptr++;
                   4562:           }
                   4563:         break;
                   4564: 
                   4565:         case OP_ALLANY:
                   4566:         if (eptr > md->end_subject - min)
                   4567:           {
                   4568:           SCHECK_PARTIAL();
                   4569:           RRETURN(MATCH_NOMATCH);
                   4570:           }
                   4571:         eptr += min;
                   4572:         break;
                   4573: 
                   4574:         case OP_ANYBYTE:
                   4575:         if (eptr > md->end_subject - min)
                   4576:           {
                   4577:           SCHECK_PARTIAL();
                   4578:           RRETURN(MATCH_NOMATCH);
                   4579:           }
                   4580:         eptr += min;
                   4581:         break;
                   4582: 
                   4583:         case OP_ANYNL:
                   4584:         for (i = 1; i <= min; i++)
                   4585:           {
                   4586:           if (eptr >= md->end_subject)
                   4587:             {
                   4588:             SCHECK_PARTIAL();
                   4589:             RRETURN(MATCH_NOMATCH);
                   4590:             }
                   4591:           switch(*eptr++)
                   4592:             {
                   4593:             default: RRETURN(MATCH_NOMATCH);
                   4594: 
                   4595:             case 0x000d:
                   4596:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4597:             break;
                   4598: 
                   4599:             case 0x000a:
                   4600:             break;
                   4601: 
                   4602:             case 0x000b:
                   4603:             case 0x000c:
                   4604:             case 0x0085:
1.1.1.2   misho    4605: #ifdef COMPILE_PCRE16
                   4606:             case 0x2028:
                   4607:             case 0x2029:
                   4608: #endif
1.1       misho    4609:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4610:             break;
                   4611:             }
                   4612:           }
                   4613:         break;
                   4614: 
                   4615:         case OP_NOT_HSPACE:
                   4616:         for (i = 1; i <= min; i++)
                   4617:           {
                   4618:           if (eptr >= md->end_subject)
                   4619:             {
                   4620:             SCHECK_PARTIAL();
                   4621:             RRETURN(MATCH_NOMATCH);
                   4622:             }
                   4623:           switch(*eptr++)
                   4624:             {
                   4625:             default: break;
                   4626:             case 0x09:      /* HT */
                   4627:             case 0x20:      /* SPACE */
                   4628:             case 0xa0:      /* NBSP */
1.1.1.2   misho    4629: #ifdef COMPILE_PCRE16
                   4630:             case 0x1680:    /* OGHAM SPACE MARK */
                   4631:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4632:             case 0x2000:    /* EN QUAD */
                   4633:             case 0x2001:    /* EM QUAD */
                   4634:             case 0x2002:    /* EN SPACE */
                   4635:             case 0x2003:    /* EM SPACE */
                   4636:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4637:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4638:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4639:             case 0x2007:    /* FIGURE SPACE */
                   4640:             case 0x2008:    /* PUNCTUATION SPACE */
                   4641:             case 0x2009:    /* THIN SPACE */
                   4642:             case 0x200A:    /* HAIR SPACE */
                   4643:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4644:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4645:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4646: #endif
1.1       misho    4647:             RRETURN(MATCH_NOMATCH);
                   4648:             }
                   4649:           }
                   4650:         break;
                   4651: 
                   4652:         case OP_HSPACE:
                   4653:         for (i = 1; i <= min; i++)
                   4654:           {
                   4655:           if (eptr >= md->end_subject)
                   4656:             {
                   4657:             SCHECK_PARTIAL();
                   4658:             RRETURN(MATCH_NOMATCH);
                   4659:             }
                   4660:           switch(*eptr++)
                   4661:             {
                   4662:             default: RRETURN(MATCH_NOMATCH);
                   4663:             case 0x09:      /* HT */
                   4664:             case 0x20:      /* SPACE */
                   4665:             case 0xa0:      /* NBSP */
1.1.1.2   misho    4666: #ifdef COMPILE_PCRE16
                   4667:             case 0x1680:    /* OGHAM SPACE MARK */
                   4668:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4669:             case 0x2000:    /* EN QUAD */
                   4670:             case 0x2001:    /* EM QUAD */
                   4671:             case 0x2002:    /* EN SPACE */
                   4672:             case 0x2003:    /* EM SPACE */
                   4673:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4674:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4675:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4676:             case 0x2007:    /* FIGURE SPACE */
                   4677:             case 0x2008:    /* PUNCTUATION SPACE */
                   4678:             case 0x2009:    /* THIN SPACE */
                   4679:             case 0x200A:    /* HAIR SPACE */
                   4680:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4681:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4682:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4683: #endif
1.1       misho    4684:             break;
                   4685:             }
                   4686:           }
                   4687:         break;
                   4688: 
                   4689:         case OP_NOT_VSPACE:
                   4690:         for (i = 1; i <= min; i++)
                   4691:           {
                   4692:           if (eptr >= md->end_subject)
                   4693:             {
                   4694:             SCHECK_PARTIAL();
                   4695:             RRETURN(MATCH_NOMATCH);
                   4696:             }
                   4697:           switch(*eptr++)
                   4698:             {
                   4699:             default: break;
                   4700:             case 0x0a:      /* LF */
                   4701:             case 0x0b:      /* VT */
                   4702:             case 0x0c:      /* FF */
                   4703:             case 0x0d:      /* CR */
                   4704:             case 0x85:      /* NEL */
1.1.1.2   misho    4705: #ifdef COMPILE_PCRE16
                   4706:             case 0x2028:    /* LINE SEPARATOR */
                   4707:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4708: #endif
1.1       misho    4709:             RRETURN(MATCH_NOMATCH);
                   4710:             }
                   4711:           }
                   4712:         break;
                   4713: 
                   4714:         case OP_VSPACE:
                   4715:         for (i = 1; i <= min; i++)
                   4716:           {
                   4717:           if (eptr >= md->end_subject)
                   4718:             {
                   4719:             SCHECK_PARTIAL();
                   4720:             RRETURN(MATCH_NOMATCH);
                   4721:             }
                   4722:           switch(*eptr++)
                   4723:             {
                   4724:             default: RRETURN(MATCH_NOMATCH);
                   4725:             case 0x0a:      /* LF */
                   4726:             case 0x0b:      /* VT */
                   4727:             case 0x0c:      /* FF */
                   4728:             case 0x0d:      /* CR */
                   4729:             case 0x85:      /* NEL */
1.1.1.2   misho    4730: #ifdef COMPILE_PCRE16
                   4731:             case 0x2028:    /* LINE SEPARATOR */
                   4732:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4733: #endif
1.1       misho    4734:             break;
                   4735:             }
                   4736:           }
                   4737:         break;
                   4738: 
                   4739:         case OP_NOT_DIGIT:
                   4740:         for (i = 1; i <= min; i++)
                   4741:           {
                   4742:           if (eptr >= md->end_subject)
                   4743:             {
                   4744:             SCHECK_PARTIAL();
                   4745:             RRETURN(MATCH_NOMATCH);
                   4746:             }
1.1.1.2   misho    4747:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
                   4748:             RRETURN(MATCH_NOMATCH);
                   4749:           eptr++;
1.1       misho    4750:           }
                   4751:         break;
                   4752: 
                   4753:         case OP_DIGIT:
                   4754:         for (i = 1; i <= min; i++)
                   4755:           {
                   4756:           if (eptr >= md->end_subject)
                   4757:             {
                   4758:             SCHECK_PARTIAL();
                   4759:             RRETURN(MATCH_NOMATCH);
                   4760:             }
1.1.1.2   misho    4761:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
                   4762:             RRETURN(MATCH_NOMATCH);
                   4763:           eptr++;
1.1       misho    4764:           }
                   4765:         break;
                   4766: 
                   4767:         case OP_NOT_WHITESPACE:
                   4768:         for (i = 1; i <= min; i++)
                   4769:           {
                   4770:           if (eptr >= md->end_subject)
                   4771:             {
                   4772:             SCHECK_PARTIAL();
                   4773:             RRETURN(MATCH_NOMATCH);
                   4774:             }
1.1.1.2   misho    4775:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
                   4776:             RRETURN(MATCH_NOMATCH);
                   4777:           eptr++;
1.1       misho    4778:           }
                   4779:         break;
                   4780: 
                   4781:         case OP_WHITESPACE:
                   4782:         for (i = 1; i <= min; i++)
                   4783:           {
                   4784:           if (eptr >= md->end_subject)
                   4785:             {
                   4786:             SCHECK_PARTIAL();
                   4787:             RRETURN(MATCH_NOMATCH);
                   4788:             }
1.1.1.2   misho    4789:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
                   4790:             RRETURN(MATCH_NOMATCH);
                   4791:           eptr++;
1.1       misho    4792:           }
                   4793:         break;
                   4794: 
                   4795:         case OP_NOT_WORDCHAR:
                   4796:         for (i = 1; i <= min; i++)
                   4797:           {
                   4798:           if (eptr >= md->end_subject)
                   4799:             {
                   4800:             SCHECK_PARTIAL();
                   4801:             RRETURN(MATCH_NOMATCH);
                   4802:             }
1.1.1.2   misho    4803:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
1.1       misho    4804:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4805:           eptr++;
1.1       misho    4806:           }
                   4807:         break;
                   4808: 
                   4809:         case OP_WORDCHAR:
                   4810:         for (i = 1; i <= min; i++)
                   4811:           {
                   4812:           if (eptr >= md->end_subject)
                   4813:             {
                   4814:             SCHECK_PARTIAL();
                   4815:             RRETURN(MATCH_NOMATCH);
                   4816:             }
1.1.1.2   misho    4817:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
1.1       misho    4818:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4819:           eptr++;
1.1       misho    4820:           }
                   4821:         break;
                   4822: 
                   4823:         default:
                   4824:         RRETURN(PCRE_ERROR_INTERNAL);
                   4825:         }
                   4826:       }
                   4827: 
                   4828:     /* If min = max, continue at the same level without recursing */
                   4829: 
                   4830:     if (min == max) continue;
                   4831: 
                   4832:     /* If minimizing, we have to test the rest of the pattern before each
                   4833:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4834:     separate the UCP cases. */
                   4835: 
                   4836:     if (minimize)
                   4837:       {
                   4838: #ifdef SUPPORT_UCP
                   4839:       if (prop_type >= 0)
                   4840:         {
                   4841:         switch(prop_type)
                   4842:           {
                   4843:           case PT_ANY:
                   4844:           for (fi = min;; fi++)
                   4845:             {
                   4846:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
                   4847:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4848:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4849:             if (eptr >= md->end_subject)
                   4850:               {
                   4851:               SCHECK_PARTIAL();
                   4852:               RRETURN(MATCH_NOMATCH);
                   4853:               }
                   4854:             GETCHARINCTEST(c, eptr);
                   4855:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   4856:             }
                   4857:           /* Control never gets here */
                   4858: 
                   4859:           case PT_LAMP:
                   4860:           for (fi = min;; fi++)
                   4861:             {
                   4862:             int chartype;
                   4863:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
                   4864:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4865:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4866:             if (eptr >= md->end_subject)
                   4867:               {
                   4868:               SCHECK_PARTIAL();
                   4869:               RRETURN(MATCH_NOMATCH);
                   4870:               }
                   4871:             GETCHARINCTEST(c, eptr);
                   4872:             chartype = UCD_CHARTYPE(c);
                   4873:             if ((chartype == ucp_Lu ||
                   4874:                  chartype == ucp_Ll ||
                   4875:                  chartype == ucp_Lt) == prop_fail_result)
                   4876:               RRETURN(MATCH_NOMATCH);
                   4877:             }
                   4878:           /* Control never gets here */
                   4879: 
                   4880:           case PT_GC:
                   4881:           for (fi = min;; fi++)
                   4882:             {
                   4883:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
                   4884:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4885:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4886:             if (eptr >= md->end_subject)
                   4887:               {
                   4888:               SCHECK_PARTIAL();
                   4889:               RRETURN(MATCH_NOMATCH);
                   4890:               }
                   4891:             GETCHARINCTEST(c, eptr);
                   4892:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   4893:               RRETURN(MATCH_NOMATCH);
                   4894:             }
                   4895:           /* Control never gets here */
                   4896: 
                   4897:           case PT_PC:
                   4898:           for (fi = min;; fi++)
                   4899:             {
                   4900:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
                   4901:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4902:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4903:             if (eptr >= md->end_subject)
                   4904:               {
                   4905:               SCHECK_PARTIAL();
                   4906:               RRETURN(MATCH_NOMATCH);
                   4907:               }
                   4908:             GETCHARINCTEST(c, eptr);
                   4909:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   4910:               RRETURN(MATCH_NOMATCH);
                   4911:             }
                   4912:           /* Control never gets here */
                   4913: 
                   4914:           case PT_SC:
                   4915:           for (fi = min;; fi++)
                   4916:             {
                   4917:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
                   4918:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4919:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4920:             if (eptr >= md->end_subject)
                   4921:               {
                   4922:               SCHECK_PARTIAL();
                   4923:               RRETURN(MATCH_NOMATCH);
                   4924:               }
                   4925:             GETCHARINCTEST(c, eptr);
                   4926:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   4927:               RRETURN(MATCH_NOMATCH);
                   4928:             }
                   4929:           /* Control never gets here */
                   4930: 
                   4931:           case PT_ALNUM:
                   4932:           for (fi = min;; fi++)
                   4933:             {
                   4934:             int category;
                   4935:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
                   4936:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4937:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4938:             if (eptr >= md->end_subject)
                   4939:               {
                   4940:               SCHECK_PARTIAL();
                   4941:               RRETURN(MATCH_NOMATCH);
                   4942:               }
                   4943:             GETCHARINCTEST(c, eptr);
                   4944:             category = UCD_CATEGORY(c);
                   4945:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   4946:               RRETURN(MATCH_NOMATCH);
                   4947:             }
                   4948:           /* Control never gets here */
                   4949: 
                   4950:           case PT_SPACE:    /* Perl space */
                   4951:           for (fi = min;; fi++)
                   4952:             {
                   4953:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
                   4954:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4955:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4956:             if (eptr >= md->end_subject)
                   4957:               {
                   4958:               SCHECK_PARTIAL();
                   4959:               RRETURN(MATCH_NOMATCH);
                   4960:               }
                   4961:             GETCHARINCTEST(c, eptr);
                   4962:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4963:                  c == CHAR_FF || c == CHAR_CR)
                   4964:                    == prop_fail_result)
                   4965:               RRETURN(MATCH_NOMATCH);
                   4966:             }
                   4967:           /* Control never gets here */
                   4968: 
                   4969:           case PT_PXSPACE:  /* POSIX space */
                   4970:           for (fi = min;; fi++)
                   4971:             {
                   4972:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
                   4973:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4974:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4975:             if (eptr >= md->end_subject)
                   4976:               {
                   4977:               SCHECK_PARTIAL();
                   4978:               RRETURN(MATCH_NOMATCH);
                   4979:               }
                   4980:             GETCHARINCTEST(c, eptr);
                   4981:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4982:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4983:                    == prop_fail_result)
                   4984:               RRETURN(MATCH_NOMATCH);
                   4985:             }
                   4986:           /* Control never gets here */
                   4987: 
                   4988:           case PT_WORD:
                   4989:           for (fi = min;; fi++)
                   4990:             {
                   4991:             int category;
                   4992:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
                   4993:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4994:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4995:             if (eptr >= md->end_subject)
                   4996:               {
                   4997:               SCHECK_PARTIAL();
                   4998:               RRETURN(MATCH_NOMATCH);
                   4999:               }
                   5000:             GETCHARINCTEST(c, eptr);
                   5001:             category = UCD_CATEGORY(c);
                   5002:             if ((category == ucp_L ||
                   5003:                  category == ucp_N ||
                   5004:                  c == CHAR_UNDERSCORE)
                   5005:                    == prop_fail_result)
                   5006:               RRETURN(MATCH_NOMATCH);
                   5007:             }
                   5008:           /* Control never gets here */
                   5009: 
                   5010:           /* This should never occur */
                   5011: 
                   5012:           default:
                   5013:           RRETURN(PCRE_ERROR_INTERNAL);
                   5014:           }
                   5015:         }
                   5016: 
                   5017:       /* Match extended Unicode sequences. We will get here only if the
                   5018:       support is in the binary; otherwise a compile-time error occurs. */
                   5019: 
                   5020:       else if (ctype == OP_EXTUNI)
                   5021:         {
                   5022:         for (fi = min;; fi++)
                   5023:           {
                   5024:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
                   5025:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5026:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5027:           if (eptr >= md->end_subject)
                   5028:             {
                   5029:             SCHECK_PARTIAL();
                   5030:             RRETURN(MATCH_NOMATCH);
                   5031:             }
                   5032:           GETCHARINCTEST(c, eptr);
                   5033:           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
                   5034:           while (eptr < md->end_subject)
                   5035:             {
                   5036:             int len = 1;
1.1.1.2   misho    5037:             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1       misho    5038:             if (UCD_CATEGORY(c) != ucp_M) break;
                   5039:             eptr += len;
                   5040:             }
1.1.1.3 ! misho    5041:           CHECK_PARTIAL();
1.1       misho    5042:           }
                   5043:         }
                   5044:       else
                   5045: #endif     /* SUPPORT_UCP */
                   5046: 
1.1.1.2   misho    5047: #ifdef SUPPORT_UTF
                   5048:       if (utf)
1.1       misho    5049:         {
                   5050:         for (fi = min;; fi++)
                   5051:           {
                   5052:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
                   5053:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5054:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5055:           if (eptr >= md->end_subject)
                   5056:             {
                   5057:             SCHECK_PARTIAL();
                   5058:             RRETURN(MATCH_NOMATCH);
                   5059:             }
                   5060:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   5061:             RRETURN(MATCH_NOMATCH);
                   5062:           GETCHARINC(c, eptr);
                   5063:           switch(ctype)
                   5064:             {
1.1.1.3 ! misho    5065:             case OP_ANY:               /* This is the non-NL case */
        !          5066:             if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5067:                 eptr >= md->end_subject &&
        !          5068:                 NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5069:                 NLBLOCK->nllen == 2 &&
        !          5070:                 c == NLBLOCK->nl[0])
        !          5071:               {
        !          5072:               md->hitend = TRUE;
        !          5073:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5074:               }
        !          5075:             break;
        !          5076: 
1.1       misho    5077:             case OP_ALLANY:
                   5078:             case OP_ANYBYTE:
                   5079:             break;
                   5080: 
                   5081:             case OP_ANYNL:
                   5082:             switch(c)
                   5083:               {
                   5084:               default: RRETURN(MATCH_NOMATCH);
                   5085:               case 0x000d:
                   5086:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   5087:               break;
                   5088:               case 0x000a:
                   5089:               break;
                   5090: 
                   5091:               case 0x000b:
                   5092:               case 0x000c:
                   5093:               case 0x0085:
                   5094:               case 0x2028:
                   5095:               case 0x2029:
                   5096:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   5097:               break;
                   5098:               }
                   5099:             break;
                   5100: 
                   5101:             case OP_NOT_HSPACE:
                   5102:             switch(c)
                   5103:               {
                   5104:               default: break;
                   5105:               case 0x09:      /* HT */
                   5106:               case 0x20:      /* SPACE */
                   5107:               case 0xa0:      /* NBSP */
                   5108:               case 0x1680:    /* OGHAM SPACE MARK */
                   5109:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5110:               case 0x2000:    /* EN QUAD */
                   5111:               case 0x2001:    /* EM QUAD */
                   5112:               case 0x2002:    /* EN SPACE */
                   5113:               case 0x2003:    /* EM SPACE */
                   5114:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5115:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5116:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5117:               case 0x2007:    /* FIGURE SPACE */
                   5118:               case 0x2008:    /* PUNCTUATION SPACE */
                   5119:               case 0x2009:    /* THIN SPACE */
                   5120:               case 0x200A:    /* HAIR SPACE */
                   5121:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5122:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5123:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5124:               RRETURN(MATCH_NOMATCH);
                   5125:               }
                   5126:             break;
                   5127: 
                   5128:             case OP_HSPACE:
                   5129:             switch(c)
                   5130:               {
                   5131:               default: RRETURN(MATCH_NOMATCH);
                   5132:               case 0x09:      /* HT */
                   5133:               case 0x20:      /* SPACE */
                   5134:               case 0xa0:      /* NBSP */
                   5135:               case 0x1680:    /* OGHAM SPACE MARK */
                   5136:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5137:               case 0x2000:    /* EN QUAD */
                   5138:               case 0x2001:    /* EM QUAD */
                   5139:               case 0x2002:    /* EN SPACE */
                   5140:               case 0x2003:    /* EM SPACE */
                   5141:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5142:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5143:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5144:               case 0x2007:    /* FIGURE SPACE */
                   5145:               case 0x2008:    /* PUNCTUATION SPACE */
                   5146:               case 0x2009:    /* THIN SPACE */
                   5147:               case 0x200A:    /* HAIR SPACE */
                   5148:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5149:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5150:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5151:               break;
                   5152:               }
                   5153:             break;
                   5154: 
                   5155:             case OP_NOT_VSPACE:
                   5156:             switch(c)
                   5157:               {
                   5158:               default: break;
                   5159:               case 0x0a:      /* LF */
                   5160:               case 0x0b:      /* VT */
                   5161:               case 0x0c:      /* FF */
                   5162:               case 0x0d:      /* CR */
                   5163:               case 0x85:      /* NEL */
                   5164:               case 0x2028:    /* LINE SEPARATOR */
                   5165:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5166:               RRETURN(MATCH_NOMATCH);
                   5167:               }
                   5168:             break;
                   5169: 
                   5170:             case OP_VSPACE:
                   5171:             switch(c)
                   5172:               {
                   5173:               default: RRETURN(MATCH_NOMATCH);
                   5174:               case 0x0a:      /* LF */
                   5175:               case 0x0b:      /* VT */
                   5176:               case 0x0c:      /* FF */
                   5177:               case 0x0d:      /* CR */
                   5178:               case 0x85:      /* NEL */
                   5179:               case 0x2028:    /* LINE SEPARATOR */
                   5180:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5181:               break;
                   5182:               }
                   5183:             break;
                   5184: 
                   5185:             case OP_NOT_DIGIT:
                   5186:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
                   5187:               RRETURN(MATCH_NOMATCH);
                   5188:             break;
                   5189: 
                   5190:             case OP_DIGIT:
                   5191:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
                   5192:               RRETURN(MATCH_NOMATCH);
                   5193:             break;
                   5194: 
                   5195:             case OP_NOT_WHITESPACE:
                   5196:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
                   5197:               RRETURN(MATCH_NOMATCH);
                   5198:             break;
                   5199: 
                   5200:             case OP_WHITESPACE:
1.1.1.2   misho    5201:             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
1.1       misho    5202:               RRETURN(MATCH_NOMATCH);
                   5203:             break;
                   5204: 
                   5205:             case OP_NOT_WORDCHAR:
                   5206:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
                   5207:               RRETURN(MATCH_NOMATCH);
                   5208:             break;
                   5209: 
                   5210:             case OP_WORDCHAR:
                   5211:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
                   5212:               RRETURN(MATCH_NOMATCH);
                   5213:             break;
                   5214: 
                   5215:             default:
                   5216:             RRETURN(PCRE_ERROR_INTERNAL);
                   5217:             }
                   5218:           }
                   5219:         }
                   5220:       else
                   5221: #endif
1.1.1.2   misho    5222:       /* Not UTF mode */
1.1       misho    5223:         {
                   5224:         for (fi = min;; fi++)
                   5225:           {
                   5226:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
                   5227:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5228:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5229:           if (eptr >= md->end_subject)
                   5230:             {
                   5231:             SCHECK_PARTIAL();
                   5232:             RRETURN(MATCH_NOMATCH);
                   5233:             }
                   5234:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   5235:             RRETURN(MATCH_NOMATCH);
                   5236:           c = *eptr++;
                   5237:           switch(ctype)
                   5238:             {
1.1.1.3 ! misho    5239:             case OP_ANY:               /* This is the non-NL case */
        !          5240:             if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5241:                 eptr >= md->end_subject &&
        !          5242:                 NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5243:                 NLBLOCK->nllen == 2 &&
        !          5244:                 c == NLBLOCK->nl[0])
        !          5245:               {
        !          5246:               md->hitend = TRUE;
        !          5247:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5248:               }
        !          5249:             break;
        !          5250: 
1.1       misho    5251:             case OP_ALLANY:
                   5252:             case OP_ANYBYTE:
                   5253:             break;
                   5254: 
                   5255:             case OP_ANYNL:
                   5256:             switch(c)
                   5257:               {
                   5258:               default: RRETURN(MATCH_NOMATCH);
                   5259:               case 0x000d:
                   5260:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   5261:               break;
                   5262: 
                   5263:               case 0x000a:
                   5264:               break;
                   5265: 
                   5266:               case 0x000b:
                   5267:               case 0x000c:
                   5268:               case 0x0085:
1.1.1.2   misho    5269: #ifdef COMPILE_PCRE16
                   5270:               case 0x2028:
                   5271:               case 0x2029:
                   5272: #endif
1.1       misho    5273:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   5274:               break;
                   5275:               }
                   5276:             break;
                   5277: 
                   5278:             case OP_NOT_HSPACE:
                   5279:             switch(c)
                   5280:               {
                   5281:               default: break;
                   5282:               case 0x09:      /* HT */
                   5283:               case 0x20:      /* SPACE */
                   5284:               case 0xa0:      /* NBSP */
1.1.1.2   misho    5285: #ifdef COMPILE_PCRE16
                   5286:               case 0x1680:    /* OGHAM SPACE MARK */
                   5287:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5288:               case 0x2000:    /* EN QUAD */
                   5289:               case 0x2001:    /* EM QUAD */
                   5290:               case 0x2002:    /* EN SPACE */
                   5291:               case 0x2003:    /* EM SPACE */
                   5292:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5293:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5294:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5295:               case 0x2007:    /* FIGURE SPACE */
                   5296:               case 0x2008:    /* PUNCTUATION SPACE */
                   5297:               case 0x2009:    /* THIN SPACE */
                   5298:               case 0x200A:    /* HAIR SPACE */
                   5299:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5300:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5301:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5302: #endif
1.1       misho    5303:               RRETURN(MATCH_NOMATCH);
                   5304:               }
                   5305:             break;
                   5306: 
                   5307:             case OP_HSPACE:
                   5308:             switch(c)
                   5309:               {
                   5310:               default: RRETURN(MATCH_NOMATCH);
                   5311:               case 0x09:      /* HT */
                   5312:               case 0x20:      /* SPACE */
                   5313:               case 0xa0:      /* NBSP */
1.1.1.2   misho    5314: #ifdef COMPILE_PCRE16
                   5315:               case 0x1680:    /* OGHAM SPACE MARK */
                   5316:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5317:               case 0x2000:    /* EN QUAD */
                   5318:               case 0x2001:    /* EM QUAD */
                   5319:               case 0x2002:    /* EN SPACE */
                   5320:               case 0x2003:    /* EM SPACE */
                   5321:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5322:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5323:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5324:               case 0x2007:    /* FIGURE SPACE */
                   5325:               case 0x2008:    /* PUNCTUATION SPACE */
                   5326:               case 0x2009:    /* THIN SPACE */
                   5327:               case 0x200A:    /* HAIR SPACE */
                   5328:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5329:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5330:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5331: #endif
1.1       misho    5332:               break;
                   5333:               }
                   5334:             break;
                   5335: 
                   5336:             case OP_NOT_VSPACE:
                   5337:             switch(c)
                   5338:               {
                   5339:               default: break;
                   5340:               case 0x0a:      /* LF */
                   5341:               case 0x0b:      /* VT */
                   5342:               case 0x0c:      /* FF */
                   5343:               case 0x0d:      /* CR */
                   5344:               case 0x85:      /* NEL */
1.1.1.2   misho    5345: #ifdef COMPILE_PCRE16
                   5346:               case 0x2028:    /* LINE SEPARATOR */
                   5347:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5348: #endif
1.1       misho    5349:               RRETURN(MATCH_NOMATCH);
                   5350:               }
                   5351:             break;
                   5352: 
                   5353:             case OP_VSPACE:
                   5354:             switch(c)
                   5355:               {
                   5356:               default: RRETURN(MATCH_NOMATCH);
                   5357:               case 0x0a:      /* LF */
                   5358:               case 0x0b:      /* VT */
                   5359:               case 0x0c:      /* FF */
                   5360:               case 0x0d:      /* CR */
                   5361:               case 0x85:      /* NEL */
1.1.1.2   misho    5362: #ifdef COMPILE_PCRE16
                   5363:               case 0x2028:    /* LINE SEPARATOR */
                   5364:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5365: #endif
1.1       misho    5366:               break;
                   5367:               }
                   5368:             break;
                   5369: 
                   5370:             case OP_NOT_DIGIT:
1.1.1.2   misho    5371:             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5372:             break;
                   5373: 
                   5374:             case OP_DIGIT:
1.1.1.2   misho    5375:             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5376:             break;
                   5377: 
                   5378:             case OP_NOT_WHITESPACE:
1.1.1.2   misho    5379:             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5380:             break;
                   5381: 
                   5382:             case OP_WHITESPACE:
1.1.1.2   misho    5383:             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5384:             break;
                   5385: 
                   5386:             case OP_NOT_WORDCHAR:
1.1.1.2   misho    5387:             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5388:             break;
                   5389: 
                   5390:             case OP_WORDCHAR:
1.1.1.2   misho    5391:             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5392:             break;
                   5393: 
                   5394:             default:
                   5395:             RRETURN(PCRE_ERROR_INTERNAL);
                   5396:             }
                   5397:           }
                   5398:         }
                   5399:       /* Control never gets here */
                   5400:       }
                   5401: 
                   5402:     /* If maximizing, it is worth using inline code for speed, doing the type
                   5403:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   5404:     UTF-8 and UCP stuff separate. */
                   5405: 
                   5406:     else
                   5407:       {
                   5408:       pp = eptr;  /* Remember where we started */
                   5409: 
                   5410: #ifdef SUPPORT_UCP
                   5411:       if (prop_type >= 0)
                   5412:         {
                   5413:         switch(prop_type)
                   5414:           {
                   5415:           case PT_ANY:
                   5416:           for (i = min; i < max; i++)
                   5417:             {
                   5418:             int len = 1;
                   5419:             if (eptr >= md->end_subject)
                   5420:               {
                   5421:               SCHECK_PARTIAL();
                   5422:               break;
                   5423:               }
                   5424:             GETCHARLENTEST(c, eptr, len);
                   5425:             if (prop_fail_result) break;
                   5426:             eptr+= len;
                   5427:             }
                   5428:           break;
                   5429: 
                   5430:           case PT_LAMP:
                   5431:           for (i = min; i < max; i++)
                   5432:             {
                   5433:             int chartype;
                   5434:             int len = 1;
                   5435:             if (eptr >= md->end_subject)
                   5436:               {
                   5437:               SCHECK_PARTIAL();
                   5438:               break;
                   5439:               }
                   5440:             GETCHARLENTEST(c, eptr, len);
                   5441:             chartype = UCD_CHARTYPE(c);
                   5442:             if ((chartype == ucp_Lu ||
                   5443:                  chartype == ucp_Ll ||
                   5444:                  chartype == ucp_Lt) == prop_fail_result)
                   5445:               break;
                   5446:             eptr+= len;
                   5447:             }
                   5448:           break;
                   5449: 
                   5450:           case PT_GC:
                   5451:           for (i = min; i < max; i++)
                   5452:             {
                   5453:             int len = 1;
                   5454:             if (eptr >= md->end_subject)
                   5455:               {
                   5456:               SCHECK_PARTIAL();
                   5457:               break;
                   5458:               }
                   5459:             GETCHARLENTEST(c, eptr, len);
                   5460:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
                   5461:             eptr+= len;
                   5462:             }
                   5463:           break;
                   5464: 
                   5465:           case PT_PC:
                   5466:           for (i = min; i < max; i++)
                   5467:             {
                   5468:             int len = 1;
                   5469:             if (eptr >= md->end_subject)
                   5470:               {
                   5471:               SCHECK_PARTIAL();
                   5472:               break;
                   5473:               }
                   5474:             GETCHARLENTEST(c, eptr, len);
                   5475:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
                   5476:             eptr+= len;
                   5477:             }
                   5478:           break;
                   5479: 
                   5480:           case PT_SC:
                   5481:           for (i = min; i < max; i++)
                   5482:             {
                   5483:             int len = 1;
                   5484:             if (eptr >= md->end_subject)
                   5485:               {
                   5486:               SCHECK_PARTIAL();
                   5487:               break;
                   5488:               }
                   5489:             GETCHARLENTEST(c, eptr, len);
                   5490:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
                   5491:             eptr+= len;
                   5492:             }
                   5493:           break;
                   5494: 
                   5495:           case PT_ALNUM:
                   5496:           for (i = min; i < max; i++)
                   5497:             {
                   5498:             int category;
                   5499:             int len = 1;
                   5500:             if (eptr >= md->end_subject)
                   5501:               {
                   5502:               SCHECK_PARTIAL();
                   5503:               break;
                   5504:               }
                   5505:             GETCHARLENTEST(c, eptr, len);
                   5506:             category = UCD_CATEGORY(c);
                   5507:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   5508:               break;
                   5509:             eptr+= len;
                   5510:             }
                   5511:           break;
                   5512: 
                   5513:           case PT_SPACE:    /* Perl space */
                   5514:           for (i = min; i < max; i++)
                   5515:             {
                   5516:             int len = 1;
                   5517:             if (eptr >= md->end_subject)
                   5518:               {
                   5519:               SCHECK_PARTIAL();
                   5520:               break;
                   5521:               }
                   5522:             GETCHARLENTEST(c, eptr, len);
                   5523:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5524:                  c == CHAR_FF || c == CHAR_CR)
                   5525:                  == prop_fail_result)
                   5526:               break;
                   5527:             eptr+= len;
                   5528:             }
                   5529:           break;
                   5530: 
                   5531:           case PT_PXSPACE:  /* POSIX space */
                   5532:           for (i = min; i < max; i++)
                   5533:             {
                   5534:             int len = 1;
                   5535:             if (eptr >= md->end_subject)
                   5536:               {
                   5537:               SCHECK_PARTIAL();
                   5538:               break;
                   5539:               }
                   5540:             GETCHARLENTEST(c, eptr, len);
                   5541:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5542:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   5543:                  == prop_fail_result)
                   5544:               break;
                   5545:             eptr+= len;
                   5546:             }
                   5547:           break;
                   5548: 
                   5549:           case PT_WORD:
                   5550:           for (i = min; i < max; i++)
                   5551:             {
                   5552:             int category;
                   5553:             int len = 1;
                   5554:             if (eptr >= md->end_subject)
                   5555:               {
                   5556:               SCHECK_PARTIAL();
                   5557:               break;
                   5558:               }
                   5559:             GETCHARLENTEST(c, eptr, len);
                   5560:             category = UCD_CATEGORY(c);
                   5561:             if ((category == ucp_L || category == ucp_N ||
                   5562:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   5563:               break;
                   5564:             eptr+= len;
                   5565:             }
                   5566:           break;
                   5567: 
                   5568:           default:
                   5569:           RRETURN(PCRE_ERROR_INTERNAL);
                   5570:           }
                   5571: 
                   5572:         /* eptr is now past the end of the maximum run */
                   5573: 
                   5574:         if (possessive) continue;
                   5575:         for(;;)
                   5576:           {
                   5577:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
                   5578:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5579:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1.1.1.2   misho    5580:           if (utf) BACKCHAR(eptr);
1.1       misho    5581:           }
                   5582:         }
                   5583: 
                   5584:       /* Match extended Unicode sequences. We will get here only if the
                   5585:       support is in the binary; otherwise a compile-time error occurs. */
                   5586: 
                   5587:       else if (ctype == OP_EXTUNI)
                   5588:         {
                   5589:         for (i = min; i < max; i++)
                   5590:           {
                   5591:           int len = 1;
                   5592:           if (eptr >= md->end_subject)
                   5593:             {
                   5594:             SCHECK_PARTIAL();
                   5595:             break;
                   5596:             }
1.1.1.2   misho    5597:           if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1       misho    5598:           if (UCD_CATEGORY(c) == ucp_M) break;
                   5599:           eptr += len;
                   5600:           while (eptr < md->end_subject)
                   5601:             {
                   5602:             len = 1;
1.1.1.2   misho    5603:             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1       misho    5604:             if (UCD_CATEGORY(c) != ucp_M) break;
                   5605:             eptr += len;
                   5606:             }
1.1.1.3 ! misho    5607:           CHECK_PARTIAL();
1.1       misho    5608:           }
                   5609: 
                   5610:         /* eptr is now past the end of the maximum run */
                   5611: 
                   5612:         if (possessive) continue;
                   5613: 
                   5614:         for(;;)
                   5615:           {
                   5616:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
                   5617:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5618:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5619:           for (;;)                        /* Move back over one extended */
                   5620:             {
1.1.1.2   misho    5621:             if (!utf) c = *eptr; else
1.1       misho    5622:               {
                   5623:               BACKCHAR(eptr);
                   5624:               GETCHAR(c, eptr);
                   5625:               }
                   5626:             if (UCD_CATEGORY(c) != ucp_M) break;
                   5627:             eptr--;
                   5628:             }
                   5629:           }
                   5630:         }
                   5631: 
                   5632:       else
                   5633: #endif   /* SUPPORT_UCP */
                   5634: 
1.1.1.2   misho    5635: #ifdef SUPPORT_UTF
                   5636:       if (utf)
1.1       misho    5637:         {
                   5638:         switch(ctype)
                   5639:           {
                   5640:           case OP_ANY:
                   5641:           if (max < INT_MAX)
                   5642:             {
                   5643:             for (i = min; i < max; i++)
                   5644:               {
                   5645:               if (eptr >= md->end_subject)
                   5646:                 {
                   5647:                 SCHECK_PARTIAL();
                   5648:                 break;
                   5649:                 }
                   5650:               if (IS_NEWLINE(eptr)) break;
1.1.1.3 ! misho    5651:               if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5652:                   eptr + 1 >= md->end_subject &&
        !          5653:                   NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5654:                   NLBLOCK->nllen == 2 &&
        !          5655:                   *eptr == NLBLOCK->nl[0])
        !          5656:                 {
        !          5657:                 md->hitend = TRUE;
        !          5658:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5659:                 }
1.1       misho    5660:               eptr++;
1.1.1.2   misho    5661:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5662:               }
                   5663:             }
                   5664: 
                   5665:           /* Handle unlimited UTF-8 repeat */
                   5666: 
                   5667:           else
                   5668:             {
                   5669:             for (i = min; i < max; i++)
                   5670:               {
                   5671:               if (eptr >= md->end_subject)
                   5672:                 {
                   5673:                 SCHECK_PARTIAL();
                   5674:                 break;
                   5675:                 }
                   5676:               if (IS_NEWLINE(eptr)) break;
1.1.1.3 ! misho    5677:               if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5678:                   eptr + 1 >= md->end_subject &&
        !          5679:                   NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5680:                   NLBLOCK->nllen == 2 &&
        !          5681:                   *eptr == NLBLOCK->nl[0])
        !          5682:                 {
        !          5683:                 md->hitend = TRUE;
        !          5684:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5685:                 }
1.1       misho    5686:               eptr++;
1.1.1.2   misho    5687:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5688:               }
                   5689:             }
                   5690:           break;
                   5691: 
                   5692:           case OP_ALLANY:
                   5693:           if (max < INT_MAX)
                   5694:             {
                   5695:             for (i = min; i < max; i++)
                   5696:               {
                   5697:               if (eptr >= md->end_subject)
                   5698:                 {
                   5699:                 SCHECK_PARTIAL();
                   5700:                 break;
                   5701:                 }
                   5702:               eptr++;
1.1.1.2   misho    5703:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5704:               }
                   5705:             }
                   5706:           else
                   5707:             {
                   5708:             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   5709:             SCHECK_PARTIAL();
                   5710:             }
                   5711:           break;
                   5712: 
                   5713:           /* The byte case is the same as non-UTF8 */
                   5714: 
                   5715:           case OP_ANYBYTE:
                   5716:           c = max - min;
                   5717:           if (c > (unsigned int)(md->end_subject - eptr))
                   5718:             {
                   5719:             eptr = md->end_subject;
                   5720:             SCHECK_PARTIAL();
                   5721:             }
                   5722:           else eptr += c;
                   5723:           break;
                   5724: 
                   5725:           case OP_ANYNL:
                   5726:           for (i = min; i < max; i++)
                   5727:             {
                   5728:             int len = 1;
                   5729:             if (eptr >= md->end_subject)
                   5730:               {
                   5731:               SCHECK_PARTIAL();
                   5732:               break;
                   5733:               }
                   5734:             GETCHARLEN(c, eptr, len);
                   5735:             if (c == 0x000d)
                   5736:               {
                   5737:               if (++eptr >= md->end_subject) break;
                   5738:               if (*eptr == 0x000a) eptr++;
                   5739:               }
                   5740:             else
                   5741:               {
                   5742:               if (c != 0x000a &&
                   5743:                   (md->bsr_anycrlf ||
                   5744:                    (c != 0x000b && c != 0x000c &&
                   5745:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   5746:                 break;
                   5747:               eptr += len;
                   5748:               }
                   5749:             }
                   5750:           break;
                   5751: 
                   5752:           case OP_NOT_HSPACE:
                   5753:           case OP_HSPACE:
                   5754:           for (i = min; i < max; i++)
                   5755:             {
                   5756:             BOOL gotspace;
                   5757:             int len = 1;
                   5758:             if (eptr >= md->end_subject)
                   5759:               {
                   5760:               SCHECK_PARTIAL();
                   5761:               break;
                   5762:               }
                   5763:             GETCHARLEN(c, eptr, len);
                   5764:             switch(c)
                   5765:               {
                   5766:               default: gotspace = FALSE; break;
                   5767:               case 0x09:      /* HT */
                   5768:               case 0x20:      /* SPACE */
                   5769:               case 0xa0:      /* NBSP */
                   5770:               case 0x1680:    /* OGHAM SPACE MARK */
                   5771:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5772:               case 0x2000:    /* EN QUAD */
                   5773:               case 0x2001:    /* EM QUAD */
                   5774:               case 0x2002:    /* EN SPACE */
                   5775:               case 0x2003:    /* EM SPACE */
                   5776:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5777:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5778:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5779:               case 0x2007:    /* FIGURE SPACE */
                   5780:               case 0x2008:    /* PUNCTUATION SPACE */
                   5781:               case 0x2009:    /* THIN SPACE */
                   5782:               case 0x200A:    /* HAIR SPACE */
                   5783:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5784:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5785:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5786:               gotspace = TRUE;
                   5787:               break;
                   5788:               }
                   5789:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5790:             eptr += len;
                   5791:             }
                   5792:           break;
                   5793: 
                   5794:           case OP_NOT_VSPACE:
                   5795:           case OP_VSPACE:
                   5796:           for (i = min; i < max; i++)
                   5797:             {
                   5798:             BOOL gotspace;
                   5799:             int len = 1;
                   5800:             if (eptr >= md->end_subject)
                   5801:               {
                   5802:               SCHECK_PARTIAL();
                   5803:               break;
                   5804:               }
                   5805:             GETCHARLEN(c, eptr, len);
                   5806:             switch(c)
                   5807:               {
                   5808:               default: gotspace = FALSE; break;
                   5809:               case 0x0a:      /* LF */
                   5810:               case 0x0b:      /* VT */
                   5811:               case 0x0c:      /* FF */
                   5812:               case 0x0d:      /* CR */
                   5813:               case 0x85:      /* NEL */
                   5814:               case 0x2028:    /* LINE SEPARATOR */
                   5815:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5816:               gotspace = TRUE;
                   5817:               break;
                   5818:               }
                   5819:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5820:             eptr += len;
                   5821:             }
                   5822:           break;
                   5823: 
                   5824:           case OP_NOT_DIGIT:
                   5825:           for (i = min; i < max; i++)
                   5826:             {
                   5827:             int len = 1;
                   5828:             if (eptr >= md->end_subject)
                   5829:               {
                   5830:               SCHECK_PARTIAL();
                   5831:               break;
                   5832:               }
                   5833:             GETCHARLEN(c, eptr, len);
                   5834:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5835:             eptr+= len;
                   5836:             }
                   5837:           break;
                   5838: 
                   5839:           case OP_DIGIT:
                   5840:           for (i = min; i < max; i++)
                   5841:             {
                   5842:             int len = 1;
                   5843:             if (eptr >= md->end_subject)
                   5844:               {
                   5845:               SCHECK_PARTIAL();
                   5846:               break;
                   5847:               }
                   5848:             GETCHARLEN(c, eptr, len);
                   5849:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5850:             eptr+= len;
                   5851:             }
                   5852:           break;
                   5853: 
                   5854:           case OP_NOT_WHITESPACE:
                   5855:           for (i = min; i < max; i++)
                   5856:             {
                   5857:             int len = 1;
                   5858:             if (eptr >= md->end_subject)
                   5859:               {
                   5860:               SCHECK_PARTIAL();
                   5861:               break;
                   5862:               }
                   5863:             GETCHARLEN(c, eptr, len);
                   5864:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5865:             eptr+= len;
                   5866:             }
                   5867:           break;
                   5868: 
                   5869:           case OP_WHITESPACE:
                   5870:           for (i = min; i < max; i++)
                   5871:             {
                   5872:             int len = 1;
                   5873:             if (eptr >= md->end_subject)
                   5874:               {
                   5875:               SCHECK_PARTIAL();
                   5876:               break;
                   5877:               }
                   5878:             GETCHARLEN(c, eptr, len);
                   5879:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5880:             eptr+= len;
                   5881:             }
                   5882:           break;
                   5883: 
                   5884:           case OP_NOT_WORDCHAR:
                   5885:           for (i = min; i < max; i++)
                   5886:             {
                   5887:             int len = 1;
                   5888:             if (eptr >= md->end_subject)
                   5889:               {
                   5890:               SCHECK_PARTIAL();
                   5891:               break;
                   5892:               }
                   5893:             GETCHARLEN(c, eptr, len);
                   5894:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5895:             eptr+= len;
                   5896:             }
                   5897:           break;
                   5898: 
                   5899:           case OP_WORDCHAR:
                   5900:           for (i = min; i < max; i++)
                   5901:             {
                   5902:             int len = 1;
                   5903:             if (eptr >= md->end_subject)
                   5904:               {
                   5905:               SCHECK_PARTIAL();
                   5906:               break;
                   5907:               }
                   5908:             GETCHARLEN(c, eptr, len);
                   5909:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5910:             eptr+= len;
                   5911:             }
                   5912:           break;
                   5913: 
                   5914:           default:
                   5915:           RRETURN(PCRE_ERROR_INTERNAL);
                   5916:           }
                   5917: 
                   5918:         /* eptr is now past the end of the maximum run. If possessive, we are
                   5919:         done (no backing up). Otherwise, match at this position; anything other
                   5920:         than no match is immediately returned. For nomatch, back up one
                   5921:         character, unless we are matching \R and the last thing matched was
                   5922:         \r\n, in which case, back up two bytes. */
                   5923: 
                   5924:         if (possessive) continue;
                   5925:         for(;;)
                   5926:           {
                   5927:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
                   5928:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5929:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5930:           BACKCHAR(eptr);
                   5931:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
                   5932:               eptr[-1] == '\r') eptr--;
                   5933:           }
                   5934:         }
                   5935:       else
1.1.1.2   misho    5936: #endif  /* SUPPORT_UTF */
                   5937:       /* Not UTF mode */
1.1       misho    5938:         {
                   5939:         switch(ctype)
                   5940:           {
                   5941:           case OP_ANY:
                   5942:           for (i = min; i < max; i++)
                   5943:             {
                   5944:             if (eptr >= md->end_subject)
                   5945:               {
                   5946:               SCHECK_PARTIAL();
                   5947:               break;
                   5948:               }
                   5949:             if (IS_NEWLINE(eptr)) break;
1.1.1.3 ! misho    5950:             if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5951:                 eptr + 1 >= md->end_subject &&
        !          5952:                 NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5953:                 NLBLOCK->nllen == 2 &&
        !          5954:                 *eptr == NLBLOCK->nl[0])
        !          5955:               {
        !          5956:               md->hitend = TRUE;
        !          5957:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5958:               }
1.1       misho    5959:             eptr++;
                   5960:             }
                   5961:           break;
                   5962: 
                   5963:           case OP_ALLANY:
                   5964:           case OP_ANYBYTE:
                   5965:           c = max - min;
                   5966:           if (c > (unsigned int)(md->end_subject - eptr))
                   5967:             {
                   5968:             eptr = md->end_subject;
                   5969:             SCHECK_PARTIAL();
                   5970:             }
                   5971:           else eptr += c;
                   5972:           break;
                   5973: 
                   5974:           case OP_ANYNL:
                   5975:           for (i = min; i < max; i++)
                   5976:             {
                   5977:             if (eptr >= md->end_subject)
                   5978:               {
                   5979:               SCHECK_PARTIAL();
                   5980:               break;
                   5981:               }
                   5982:             c = *eptr;
                   5983:             if (c == 0x000d)
                   5984:               {
                   5985:               if (++eptr >= md->end_subject) break;
                   5986:               if (*eptr == 0x000a) eptr++;
                   5987:               }
                   5988:             else
                   5989:               {
1.1.1.2   misho    5990:               if (c != 0x000a && (md->bsr_anycrlf ||
                   5991:                 (c != 0x000b && c != 0x000c && c != 0x0085
                   5992: #ifdef COMPILE_PCRE16
                   5993:                 && c != 0x2028 && c != 0x2029
                   5994: #endif
                   5995:                 ))) break;
1.1       misho    5996:               eptr++;
                   5997:               }
                   5998:             }
                   5999:           break;
                   6000: 
                   6001:           case OP_NOT_HSPACE:
                   6002:           for (i = min; i < max; i++)
                   6003:             {
                   6004:             if (eptr >= md->end_subject)
                   6005:               {
                   6006:               SCHECK_PARTIAL();
                   6007:               break;
                   6008:               }
                   6009:             c = *eptr;
1.1.1.2   misho    6010:             if (c == 0x09 || c == 0x20 || c == 0xa0
                   6011: #ifdef COMPILE_PCRE16
                   6012:               || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
                   6013:               || c == 0x202f || c == 0x205f || c == 0x3000
                   6014: #endif
                   6015:               ) break;
1.1       misho    6016:             eptr++;
                   6017:             }
                   6018:           break;
                   6019: 
                   6020:           case OP_HSPACE:
                   6021:           for (i = min; i < max; i++)
                   6022:             {
                   6023:             if (eptr >= md->end_subject)
                   6024:               {
                   6025:               SCHECK_PARTIAL();
                   6026:               break;
                   6027:               }
                   6028:             c = *eptr;
1.1.1.2   misho    6029:             if (c != 0x09 && c != 0x20 && c != 0xa0
                   6030: #ifdef COMPILE_PCRE16
                   6031:               && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
                   6032:               && c != 0x202f && c != 0x205f && c != 0x3000
                   6033: #endif
                   6034:               ) break;
1.1       misho    6035:             eptr++;
                   6036:             }
                   6037:           break;
                   6038: 
                   6039:           case OP_NOT_VSPACE:
                   6040:           for (i = min; i < max; i++)
                   6041:             {
                   6042:             if (eptr >= md->end_subject)
                   6043:               {
                   6044:               SCHECK_PARTIAL();
                   6045:               break;
                   6046:               }
                   6047:             c = *eptr;
1.1.1.2   misho    6048:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
                   6049: #ifdef COMPILE_PCRE16
                   6050:               || c == 0x2028 || c == 0x2029
                   6051: #endif
                   6052:               ) break;
1.1       misho    6053:             eptr++;
                   6054:             }
                   6055:           break;
                   6056: 
                   6057:           case OP_VSPACE:
                   6058:           for (i = min; i < max; i++)
                   6059:             {
                   6060:             if (eptr >= md->end_subject)
                   6061:               {
                   6062:               SCHECK_PARTIAL();
                   6063:               break;
                   6064:               }
                   6065:             c = *eptr;
1.1.1.2   misho    6066:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
                   6067: #ifdef COMPILE_PCRE16
                   6068:               && c != 0x2028 && c != 0x2029
                   6069: #endif
                   6070:               ) break;
1.1       misho    6071:             eptr++;
                   6072:             }
                   6073:           break;
                   6074: 
                   6075:           case OP_NOT_DIGIT:
                   6076:           for (i = min; i < max; i++)
                   6077:             {
                   6078:             if (eptr >= md->end_subject)
                   6079:               {
                   6080:               SCHECK_PARTIAL();
                   6081:               break;
                   6082:               }
1.1.1.2   misho    6083:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1       misho    6084:             eptr++;
                   6085:             }
                   6086:           break;
                   6087: 
                   6088:           case OP_DIGIT:
                   6089:           for (i = min; i < max; i++)
                   6090:             {
                   6091:             if (eptr >= md->end_subject)
                   6092:               {
                   6093:               SCHECK_PARTIAL();
                   6094:               break;
                   6095:               }
1.1.1.2   misho    6096:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1       misho    6097:             eptr++;
                   6098:             }
                   6099:           break;
                   6100: 
                   6101:           case OP_NOT_WHITESPACE:
                   6102:           for (i = min; i < max; i++)
                   6103:             {
                   6104:             if (eptr >= md->end_subject)
                   6105:               {
                   6106:               SCHECK_PARTIAL();
                   6107:               break;
                   6108:               }
1.1.1.2   misho    6109:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
1.1       misho    6110:             eptr++;
                   6111:             }
                   6112:           break;
                   6113: 
                   6114:           case OP_WHITESPACE:
                   6115:           for (i = min; i < max; i++)
                   6116:             {
                   6117:             if (eptr >= md->end_subject)
                   6118:               {
                   6119:               SCHECK_PARTIAL();
                   6120:               break;
                   6121:               }
1.1.1.2   misho    6122:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
1.1       misho    6123:             eptr++;
                   6124:             }
                   6125:           break;
                   6126: 
                   6127:           case OP_NOT_WORDCHAR:
                   6128:           for (i = min; i < max; i++)
                   6129:             {
                   6130:             if (eptr >= md->end_subject)
                   6131:               {
                   6132:               SCHECK_PARTIAL();
                   6133:               break;
                   6134:               }
1.1.1.2   misho    6135:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
1.1       misho    6136:             eptr++;
                   6137:             }
                   6138:           break;
                   6139: 
                   6140:           case OP_WORDCHAR:
                   6141:           for (i = min; i < max; i++)
                   6142:             {
                   6143:             if (eptr >= md->end_subject)
                   6144:               {
                   6145:               SCHECK_PARTIAL();
                   6146:               break;
                   6147:               }
1.1.1.2   misho    6148:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
1.1       misho    6149:             eptr++;
                   6150:             }
                   6151:           break;
                   6152: 
                   6153:           default:
                   6154:           RRETURN(PCRE_ERROR_INTERNAL);
                   6155:           }
                   6156: 
                   6157:         /* eptr is now past the end of the maximum run. If possessive, we are
                   6158:         done (no backing up). Otherwise, match at this position; anything other
                   6159:         than no match is immediately returned. For nomatch, back up one
                   6160:         character (byte), unless we are matching \R and the last thing matched
                   6161:         was \r\n, in which case, back up two bytes. */
                   6162: 
                   6163:         if (possessive) continue;
                   6164:         while (eptr >= pp)
                   6165:           {
                   6166:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
                   6167:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   6168:           eptr--;
                   6169:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
                   6170:               eptr[-1] == '\r') eptr--;
                   6171:           }
                   6172:         }
                   6173: 
                   6174:       /* Get here if we can't make it match with any permitted repetitions */
                   6175: 
                   6176:       RRETURN(MATCH_NOMATCH);
                   6177:       }
                   6178:     /* Control never gets here */
                   6179: 
                   6180:     /* There's been some horrible disaster. Arrival here can only mean there is
                   6181:     something seriously wrong in the code above or the OP_xxx definitions. */
                   6182: 
                   6183:     default:
                   6184:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   6185:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   6186:     }
                   6187: 
                   6188:   /* Do not stick any code in here without much thought; it is assumed
                   6189:   that "continue" in the code above comes out to here to repeat the main
                   6190:   loop. */
                   6191: 
                   6192:   }             /* End of main loop */
                   6193: /* Control never reaches here */
                   6194: 
                   6195: 
                   6196: /* When compiling to use the heap rather than the stack for recursive calls to
                   6197: match(), the RRETURN() macro jumps here. The number that is saved in
                   6198: frame->Xwhere indicates which label we actually want to return to. */
                   6199: 
                   6200: #ifdef NO_RECURSE
                   6201: #define LBL(val) case val: goto L_RM##val;
                   6202: HEAP_RETURN:
                   6203: switch (frame->Xwhere)
                   6204:   {
                   6205:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   6206:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   6207:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   6208:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
                   6209:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
                   6210:   LBL(65) LBL(66)
1.1.1.2   misho    6211: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
                   6212:   LBL(21)
                   6213: #endif
                   6214: #ifdef SUPPORT_UTF
                   6215:   LBL(16) LBL(18) LBL(20)
                   6216:   LBL(22) LBL(23) LBL(28) LBL(30)
1.1       misho    6217:   LBL(32) LBL(34) LBL(42) LBL(46)
                   6218: #ifdef SUPPORT_UCP
                   6219:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
                   6220:   LBL(59) LBL(60) LBL(61) LBL(62)
                   6221: #endif  /* SUPPORT_UCP */
1.1.1.2   misho    6222: #endif  /* SUPPORT_UTF */
1.1       misho    6223:   default:
                   6224:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
1.1.1.2   misho    6225: 
                   6226: printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
                   6227: 
1.1       misho    6228:   return PCRE_ERROR_INTERNAL;
                   6229:   }
                   6230: #undef LBL
                   6231: #endif  /* NO_RECURSE */
                   6232: }
                   6233: 
                   6234: 
                   6235: /***************************************************************************
                   6236: ****************************************************************************
                   6237:                    RECURSION IN THE match() FUNCTION
                   6238: 
                   6239: Undefine all the macros that were defined above to handle this. */
                   6240: 
                   6241: #ifdef NO_RECURSE
                   6242: #undef eptr
                   6243: #undef ecode
                   6244: #undef mstart
                   6245: #undef offset_top
                   6246: #undef eptrb
                   6247: #undef flags
                   6248: 
                   6249: #undef callpat
                   6250: #undef charptr
                   6251: #undef data
                   6252: #undef next
                   6253: #undef pp
                   6254: #undef prev
                   6255: #undef saved_eptr
                   6256: 
                   6257: #undef new_recursive
                   6258: 
                   6259: #undef cur_is_word
                   6260: #undef condition
                   6261: #undef prev_is_word
                   6262: 
                   6263: #undef ctype
                   6264: #undef length
                   6265: #undef max
                   6266: #undef min
                   6267: #undef number
                   6268: #undef offset
                   6269: #undef op
                   6270: #undef save_capture_last
                   6271: #undef save_offset1
                   6272: #undef save_offset2
                   6273: #undef save_offset3
                   6274: #undef stacksave
                   6275: 
                   6276: #undef newptrb
                   6277: 
                   6278: #endif
                   6279: 
                   6280: /* These two are defined as macros in both cases */
                   6281: 
                   6282: #undef fc
                   6283: #undef fi
                   6284: 
                   6285: /***************************************************************************
                   6286: ***************************************************************************/
                   6287: 
                   6288: 
1.1.1.3 ! misho    6289: #ifdef NO_RECURSE
        !          6290: /*************************************************
        !          6291: *          Release allocated heap frames         *
        !          6292: *************************************************/
        !          6293: 
        !          6294: /* This function releases all the allocated frames. The base frame is on the
        !          6295: machine stack, and so must not be freed.
        !          6296: 
        !          6297: Argument: the address of the base frame
        !          6298: Returns:  nothing
        !          6299: */
        !          6300: 
        !          6301: static void
        !          6302: release_match_heapframes (heapframe *frame_base)
        !          6303: {
        !          6304: heapframe *nextframe = frame_base->Xnextframe;
        !          6305: while (nextframe != NULL)
        !          6306:   {
        !          6307:   heapframe *oldframe = nextframe;
        !          6308:   nextframe = nextframe->Xnextframe;
        !          6309:   (PUBL(stack_free))(oldframe);
        !          6310:   }
        !          6311: }
        !          6312: #endif
        !          6313: 
1.1       misho    6314: 
                   6315: /*************************************************
                   6316: *         Execute a Regular Expression           *
                   6317: *************************************************/
                   6318: 
                   6319: /* This function applies a compiled re to a subject string and picks out
                   6320: portions of the string if it matches. Two elements in the vector are set for
                   6321: each substring: the offsets to the start and end of the substring.
                   6322: 
                   6323: Arguments:
                   6324:   argument_re     points to the compiled expression
                   6325:   extra_data      points to extra data or is NULL
                   6326:   subject         points to the subject string
                   6327:   length          length of subject string (may contain binary zeros)
                   6328:   start_offset    where to start in the subject string
                   6329:   options         option bits
                   6330:   offsets         points to a vector of ints to be filled in with offsets
                   6331:   offsetcount     the number of elements in the vector
                   6332: 
                   6333: Returns:          > 0 => success; value is the number of elements filled in
                   6334:                   = 0 => success, but offsets is not big enough
                   6335:                    -1 => failed to match
                   6336:                  < -1 => some kind of unexpected problem
                   6337: */
                   6338: 
1.1.1.2   misho    6339: #ifdef COMPILE_PCRE8
1.1       misho    6340: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   6341: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   6342:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   6343:   int offsetcount)
1.1.1.2   misho    6344: #else
                   6345: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   6346: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
                   6347:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
                   6348:   int offsetcount)
                   6349: #endif
1.1       misho    6350: {
                   6351: int rc, ocount, arg_offset_max;
                   6352: int newline;
                   6353: BOOL using_temporary_offsets = FALSE;
                   6354: BOOL anchored;
                   6355: BOOL startline;
                   6356: BOOL firstline;
1.1.1.2   misho    6357: BOOL utf;
                   6358: BOOL has_first_char = FALSE;
                   6359: BOOL has_req_char = FALSE;
                   6360: pcre_uchar first_char = 0;
                   6361: pcre_uchar first_char2 = 0;
                   6362: pcre_uchar req_char = 0;
                   6363: pcre_uchar req_char2 = 0;
1.1       misho    6364: match_data match_block;
                   6365: match_data *md = &match_block;
1.1.1.2   misho    6366: const pcre_uint8 *tables;
                   6367: const pcre_uint8 *start_bits = NULL;
                   6368: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
                   6369: PCRE_PUCHAR end_subject;
                   6370: PCRE_PUCHAR start_partial = NULL;
                   6371: PCRE_PUCHAR req_char_ptr = start_match - 1;
1.1       misho    6372: 
                   6373: const pcre_study_data *study;
1.1.1.2   misho    6374: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
1.1       misho    6375: 
1.1.1.3 ! misho    6376: #ifdef NO_RECURSE
        !          6377: heapframe frame_zero;
        !          6378: frame_zero.Xprevframe = NULL;            /* Marks the top level */
        !          6379: frame_zero.Xnextframe = NULL;            /* None are allocated yet */
        !          6380: md->match_frames_base = &frame_zero;
        !          6381: #endif
        !          6382: 
1.1.1.2   misho    6383: /* Check for the special magic call that measures the size of the stack used
1.1.1.3 ! misho    6384: per recursive call of match(). Without the funny casting for sizeof, a Windows
        !          6385: compiler gave this error: "unary minus operator applied to unsigned type,
        !          6386: result still unsigned". Hopefully the cast fixes that. */
1.1.1.2   misho    6387: 
                   6388: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
                   6389:     start_offset == -999)
                   6390: #ifdef NO_RECURSE
1.1.1.3 ! misho    6391:   return -((int)sizeof(heapframe));
1.1.1.2   misho    6392: #else
                   6393:   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
                   6394: #endif
1.1       misho    6395: 
                   6396: /* Plausibility checks */
                   6397: 
                   6398: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1.1.1.2   misho    6399: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
                   6400:   return PCRE_ERROR_NULL;
1.1       misho    6401: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   6402: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   6403: 
1.1.1.2   misho    6404: /* Check that the first field in the block is the magic number. If it is not,
                   6405: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
                   6406: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
                   6407: means that the pattern is likely compiled with different endianness. */
                   6408: 
                   6409: if (re->magic_number != MAGIC_NUMBER)
                   6410:   return re->magic_number == REVERSED_MAGIC_NUMBER?
                   6411:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
                   6412: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
                   6413: 
1.1       misho    6414: /* These two settings are used in the code for checking a UTF-8 string that
                   6415: follows immediately afterwards. Other values in the md block are used only
                   6416: during "normal" pcre_exec() processing, not when the JIT support is in use,
                   6417: so they are set up later. */
                   6418: 
1.1.1.2   misho    6419: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
                   6420: utf = md->utf = (re->options & PCRE_UTF8) != 0;
1.1       misho    6421: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
                   6422:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
                   6423: 
                   6424: /* Check a UTF-8 string if required. Pass back the character offset and error
                   6425: code for an invalid string if a results vector is available. */
                   6426: 
1.1.1.2   misho    6427: #ifdef SUPPORT_UTF
                   6428: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
1.1       misho    6429:   {
                   6430:   int erroroffset;
1.1.1.2   misho    6431:   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
1.1       misho    6432:   if (errorcode != 0)
                   6433:     {
                   6434:     if (offsetcount >= 2)
                   6435:       {
                   6436:       offsets[0] = erroroffset;
                   6437:       offsets[1] = errorcode;
                   6438:       }
1.1.1.2   misho    6439: #ifdef COMPILE_PCRE16
                   6440:     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
                   6441:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
                   6442: #else
1.1       misho    6443:     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
                   6444:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
1.1.1.2   misho    6445: #endif
1.1       misho    6446:     }
                   6447: 
1.1.1.2   misho    6448:   /* Check that a start_offset points to the start of a UTF character. */
1.1       misho    6449:   if (start_offset > 0 && start_offset < length &&
1.1.1.2   misho    6450:       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
1.1       misho    6451:     return PCRE_ERROR_BADUTF8_OFFSET;
                   6452:   }
                   6453: #endif
                   6454: 
                   6455: /* If the pattern was successfully studied with JIT support, run the JIT
                   6456: executable instead of the rest of this function. Most options must be set at
                   6457: compile time for the JIT code to be usable. Fallback to the normal code path if
1.1.1.3 ! misho    6458: an unsupported flag is set. */
1.1       misho    6459: 
                   6460: #ifdef SUPPORT_JIT
                   6461: if (extra_data != NULL
1.1.1.3 ! misho    6462:     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
        !          6463:                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
1.1       misho    6464:     && extra_data->executable_jit != NULL
                   6465:     && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
1.1.1.3 ! misho    6466:                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
        !          6467:                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
        !          6468:   {
        !          6469:   rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
        !          6470:        start_offset, options, offsets, offsetcount);
        !          6471: 
        !          6472:   /* PCRE_ERROR_NULL means that the selected normal or partial matching
        !          6473:   mode is not compiled. In this case we simply fallback to interpreter. */
        !          6474: 
        !          6475:   if (rc != PCRE_ERROR_NULL) return rc;
        !          6476:   }
1.1       misho    6477: #endif
                   6478: 
                   6479: /* Carry on with non-JIT matching. This information is for finding all the
                   6480: numbers associated with a given name, for condition testing. */
                   6481: 
1.1.1.2   misho    6482: md->name_table = (pcre_uchar *)re + re->name_table_offset;
1.1       misho    6483: md->name_count = re->name_count;
                   6484: md->name_entry_size = re->name_entry_size;
                   6485: 
                   6486: /* Fish out the optional data from the extra_data structure, first setting
                   6487: the default values. */
                   6488: 
                   6489: study = NULL;
                   6490: md->match_limit = MATCH_LIMIT;
                   6491: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   6492: md->callout_data = NULL;
                   6493: 
                   6494: /* The table pointer is always in native byte order. */
                   6495: 
1.1.1.2   misho    6496: tables = re->tables;
1.1       misho    6497: 
                   6498: if (extra_data != NULL)
                   6499:   {
                   6500:   register unsigned int flags = extra_data->flags;
                   6501:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   6502:     study = (const pcre_study_data *)extra_data->study_data;
                   6503:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   6504:     md->match_limit = extra_data->match_limit;
                   6505:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   6506:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   6507:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   6508:     md->callout_data = extra_data->callout_data;
                   6509:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   6510:   }
                   6511: 
                   6512: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   6513: is a feature that makes it possible to save compiled regex and re-use them
                   6514: in other programs later. */
                   6515: 
1.1.1.2   misho    6516: if (tables == NULL) tables = PRIV(default_tables);
1.1       misho    6517: 
                   6518: /* Set up other data */
                   6519: 
                   6520: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   6521: startline = (re->flags & PCRE_STARTLINE) != 0;
                   6522: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   6523: 
                   6524: /* The code starts after the real_pcre block and the capture name table. */
                   6525: 
1.1.1.2   misho    6526: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
1.1       misho    6527:   re->name_count * re->name_entry_size;
                   6528: 
1.1.1.2   misho    6529: md->start_subject = (PCRE_PUCHAR)subject;
1.1       misho    6530: md->start_offset = start_offset;
                   6531: md->end_subject = md->start_subject + length;
                   6532: end_subject = md->end_subject;
                   6533: 
                   6534: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   6535: md->use_ucp = (re->options & PCRE_UCP) != 0;
                   6536: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
                   6537: md->ignore_skip_arg = FALSE;
                   6538: 
                   6539: /* Some options are unpacked into BOOL variables in the hope that testing
                   6540: them will be faster than individual option bits. */
                   6541: 
                   6542: md->notbol = (options & PCRE_NOTBOL) != 0;
                   6543: md->noteol = (options & PCRE_NOTEOL) != 0;
                   6544: md->notempty = (options & PCRE_NOTEMPTY) != 0;
                   6545: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
                   6546: 
                   6547: md->hitend = FALSE;
                   6548: md->mark = md->nomatch_mark = NULL;     /* In case never set */
                   6549: 
                   6550: md->recursive = NULL;                   /* No recursion at top level */
                   6551: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
                   6552: 
                   6553: md->lcc = tables + lcc_offset;
1.1.1.2   misho    6554: md->fcc = tables + fcc_offset;
1.1       misho    6555: md->ctypes = tables + ctypes_offset;
                   6556: 
                   6557: /* Handle different \R options. */
                   6558: 
                   6559: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   6560:   {
                   6561:   case 0:
                   6562:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   6563:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   6564:   else
                   6565: #ifdef BSR_ANYCRLF
                   6566:   md->bsr_anycrlf = TRUE;
                   6567: #else
                   6568:   md->bsr_anycrlf = FALSE;
                   6569: #endif
                   6570:   break;
                   6571: 
                   6572:   case PCRE_BSR_ANYCRLF:
                   6573:   md->bsr_anycrlf = TRUE;
                   6574:   break;
                   6575: 
                   6576:   case PCRE_BSR_UNICODE:
                   6577:   md->bsr_anycrlf = FALSE;
                   6578:   break;
                   6579: 
                   6580:   default: return PCRE_ERROR_BADNEWLINE;
                   6581:   }
                   6582: 
                   6583: /* Handle different types of newline. The three bits give eight cases. If
                   6584: nothing is set at run time, whatever was used at compile time applies. */
                   6585: 
                   6586: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   6587:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   6588:   {
                   6589:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   6590:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   6591:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   6592:   case PCRE_NEWLINE_CR+
                   6593:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   6594:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   6595:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   6596:   default: return PCRE_ERROR_BADNEWLINE;
                   6597:   }
                   6598: 
                   6599: if (newline == -2)
                   6600:   {
                   6601:   md->nltype = NLTYPE_ANYCRLF;
                   6602:   }
                   6603: else if (newline < 0)
                   6604:   {
                   6605:   md->nltype = NLTYPE_ANY;
                   6606:   }
                   6607: else
                   6608:   {
                   6609:   md->nltype = NLTYPE_FIXED;
                   6610:   if (newline > 255)
                   6611:     {
                   6612:     md->nllen = 2;
                   6613:     md->nl[0] = (newline >> 8) & 255;
                   6614:     md->nl[1] = newline & 255;
                   6615:     }
                   6616:   else
                   6617:     {
                   6618:     md->nllen = 1;
                   6619:     md->nl[0] = newline;
                   6620:     }
                   6621:   }
                   6622: 
                   6623: /* Partial matching was originally supported only for a restricted set of
                   6624: regexes; from release 8.00 there are no restrictions, but the bits are still
                   6625: defined (though never set). So there's no harm in leaving this code. */
                   6626: 
                   6627: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   6628:   return PCRE_ERROR_BADPARTIAL;
                   6629: 
                   6630: /* If the expression has got more back references than the offsets supplied can
                   6631: hold, we get a temporary chunk of working store to use during the matching.
                   6632: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   6633: of 3. */
                   6634: 
                   6635: ocount = offsetcount - (offsetcount % 3);
                   6636: arg_offset_max = (2*ocount)/3;
                   6637: 
                   6638: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   6639:   {
                   6640:   ocount = re->top_backref * 3 + 3;
1.1.1.2   misho    6641:   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
1.1       misho    6642:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   6643:   using_temporary_offsets = TRUE;
                   6644:   DPRINTF(("Got memory to hold back references\n"));
                   6645:   }
                   6646: else md->offset_vector = offsets;
                   6647: 
                   6648: md->offset_end = ocount;
                   6649: md->offset_max = (2*ocount)/3;
                   6650: md->offset_overflow = FALSE;
                   6651: md->capture_last = -1;
                   6652: 
                   6653: /* Reset the working variable associated with each extraction. These should
                   6654: never be used unless previously set, but they get saved and restored, and so we
                   6655: initialize them to avoid reading uninitialized locations. Also, unset the
                   6656: offsets for the matched string. This is really just for tidiness with callouts,
                   6657: in case they inspect these fields. */
                   6658: 
                   6659: if (md->offset_vector != NULL)
                   6660:   {
                   6661:   register int *iptr = md->offset_vector + ocount;
                   6662:   register int *iend = iptr - re->top_bracket;
                   6663:   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
                   6664:   while (--iptr >= iend) *iptr = -1;
                   6665:   md->offset_vector[0] = md->offset_vector[1] = -1;
                   6666:   }
                   6667: 
1.1.1.2   misho    6668: /* Set up the first character to match, if available. The first_char value is
1.1       misho    6669: never set for an anchored regular expression, but the anchoring may be forced
                   6670: at run time, so we have to test for anchoring. The first char may be unset for
                   6671: an unanchored pattern, of course. If there's no first char and the pattern was
                   6672: studied, there may be a bitmap of possible first characters. */
                   6673: 
                   6674: if (!anchored)
                   6675:   {
                   6676:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   6677:     {
1.1.1.2   misho    6678:     has_first_char = TRUE;
                   6679:     first_char = first_char2 = (pcre_uchar)(re->first_char);
                   6680:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
                   6681:       {
                   6682:       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
                   6683: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   6684:       if (utf && first_char > 127)
                   6685:         first_char2 = UCD_OTHERCASE(first_char);
                   6686: #endif
                   6687:       }
1.1       misho    6688:     }
                   6689:   else
                   6690:     if (!startline && study != NULL &&
                   6691:       (study->flags & PCRE_STUDY_MAPPED) != 0)
                   6692:         start_bits = study->start_bits;
                   6693:   }
                   6694: 
                   6695: /* For anchored or unanchored matches, there may be a "last known required
                   6696: character" set. */
                   6697: 
                   6698: if ((re->flags & PCRE_REQCHSET) != 0)
                   6699:   {
1.1.1.2   misho    6700:   has_req_char = TRUE;
                   6701:   req_char = req_char2 = (pcre_uchar)(re->req_char);
                   6702:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
                   6703:     {
                   6704:     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
                   6705: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   6706:     if (utf && req_char > 127)
                   6707:       req_char2 = UCD_OTHERCASE(req_char);
                   6708: #endif
                   6709:     }
1.1       misho    6710:   }
                   6711: 
                   6712: 
                   6713: /* ==========================================================================*/
                   6714: 
                   6715: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   6716: the loop runs just once. */
                   6717: 
                   6718: for(;;)
                   6719:   {
1.1.1.2   misho    6720:   PCRE_PUCHAR save_end_subject = end_subject;
                   6721:   PCRE_PUCHAR new_start_match;
1.1       misho    6722: 
                   6723:   /* If firstline is TRUE, the start of the match is constrained to the first
                   6724:   line of a multiline string. That is, the match must be before or at the first
                   6725:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   6726:   scanning at a newline. If the match fails at the newline, later code breaks
                   6727:   this loop. */
                   6728: 
                   6729:   if (firstline)
                   6730:     {
1.1.1.2   misho    6731:     PCRE_PUCHAR t = start_match;
                   6732: #ifdef SUPPORT_UTF
                   6733:     if (utf)
1.1       misho    6734:       {
                   6735:       while (t < md->end_subject && !IS_NEWLINE(t))
                   6736:         {
                   6737:         t++;
1.1.1.2   misho    6738:         ACROSSCHAR(t < end_subject, *t, t++);
1.1       misho    6739:         }
                   6740:       }
                   6741:     else
                   6742: #endif
                   6743:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   6744:     end_subject = t;
                   6745:     }
                   6746: 
                   6747:   /* There are some optimizations that avoid running the match if a known
                   6748:   starting point is not found, or if a known later character is not present.
                   6749:   However, there is an option that disables these, for testing and for ensuring
                   6750:   that all callouts do actually occur. The option can be set in the regex by
                   6751:   (*NO_START_OPT) or passed in match-time options. */
                   6752: 
                   6753:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   6754:     {
1.1.1.2   misho    6755:     /* Advance to a unique first char if there is one. */
1.1       misho    6756: 
1.1.1.2   misho    6757:     if (has_first_char)
1.1       misho    6758:       {
1.1.1.2   misho    6759:       if (first_char != first_char2)
                   6760:         while (start_match < end_subject &&
                   6761:             *start_match != first_char && *start_match != first_char2)
1.1       misho    6762:           start_match++;
                   6763:       else
1.1.1.2   misho    6764:         while (start_match < end_subject && *start_match != first_char)
1.1       misho    6765:           start_match++;
                   6766:       }
                   6767: 
                   6768:     /* Or to just after a linebreak for a multiline match */
                   6769: 
                   6770:     else if (startline)
                   6771:       {
                   6772:       if (start_match > md->start_subject + start_offset)
                   6773:         {
1.1.1.2   misho    6774: #ifdef SUPPORT_UTF
                   6775:         if (utf)
1.1       misho    6776:           {
                   6777:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6778:             {
                   6779:             start_match++;
1.1.1.2   misho    6780:             ACROSSCHAR(start_match < end_subject, *start_match,
                   6781:               start_match++);
1.1       misho    6782:             }
                   6783:           }
                   6784:         else
                   6785: #endif
                   6786:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6787:           start_match++;
                   6788: 
                   6789:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   6790:         and we are now at a LF, advance the match position by one more character.
                   6791:         */
                   6792: 
                   6793:         if (start_match[-1] == CHAR_CR &&
                   6794:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   6795:              start_match < end_subject &&
                   6796:              *start_match == CHAR_NL)
                   6797:           start_match++;
                   6798:         }
                   6799:       }
                   6800: 
                   6801:     /* Or to a non-unique first byte after study */
                   6802: 
                   6803:     else if (start_bits != NULL)
                   6804:       {
                   6805:       while (start_match < end_subject)
                   6806:         {
                   6807:         register unsigned int c = *start_match;
1.1.1.2   misho    6808: #ifndef COMPILE_PCRE8
                   6809:         if (c > 255) c = 255;
                   6810: #endif
1.1       misho    6811:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   6812:           {
                   6813:           start_match++;
1.1.1.2   misho    6814: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
                   6815:           /* In non 8-bit mode, the iteration will stop for
                   6816:           characters > 255 at the beginning or not stop at all. */
                   6817:           if (utf)
                   6818:             ACROSSCHAR(start_match < end_subject, *start_match,
                   6819:               start_match++);
1.1       misho    6820: #endif
                   6821:           }
                   6822:         else break;
                   6823:         }
                   6824:       }
                   6825:     }   /* Starting optimizations */
                   6826: 
                   6827:   /* Restore fudged end_subject */
                   6828: 
                   6829:   end_subject = save_end_subject;
                   6830: 
                   6831:   /* The following two optimizations are disabled for partial matching or if
                   6832:   disabling is explicitly requested. */
                   6833: 
                   6834:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
                   6835:     {
                   6836:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6837:     a lower bound; no actual string of that length may actually match the
                   6838:     pattern. Although the value is, strictly, in characters, we treat it as
                   6839:     bytes to avoid spending too much time in this optimization. */
                   6840: 
                   6841:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6842:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6843:       {
                   6844:       rc = MATCH_NOMATCH;
                   6845:       break;
                   6846:       }
                   6847: 
1.1.1.2   misho    6848:     /* If req_char is set, we know that that character must appear in the
                   6849:     subject for the match to succeed. If the first character is set, req_char
1.1       misho    6850:     must be later in the subject; otherwise the test starts at the match point.
                   6851:     This optimization can save a huge amount of backtracking in patterns with
                   6852:     nested unlimited repeats that aren't going to match. Writing separate code
                   6853:     for cased/caseless versions makes it go faster, as does using an
                   6854:     autoincrement and backing off on a match.
                   6855: 
                   6856:     HOWEVER: when the subject string is very, very long, searching to its end
                   6857:     can take a long time, and give bad performance on quite ordinary patterns.
                   6858:     This showed up when somebody was matching something like /^\d+C/ on a
                   6859:     32-megabyte string... so we don't do this when the string is sufficiently
                   6860:     long. */
                   6861: 
1.1.1.2   misho    6862:     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
1.1       misho    6863:       {
1.1.1.2   misho    6864:       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
1.1       misho    6865: 
                   6866:       /* We don't need to repeat the search if we haven't yet reached the
                   6867:       place we found it at last time. */
                   6868: 
1.1.1.2   misho    6869:       if (p > req_char_ptr)
1.1       misho    6870:         {
1.1.1.2   misho    6871:         if (req_char != req_char2)
1.1       misho    6872:           {
                   6873:           while (p < end_subject)
                   6874:             {
                   6875:             register int pp = *p++;
1.1.1.2   misho    6876:             if (pp == req_char || pp == req_char2) { p--; break; }
1.1       misho    6877:             }
                   6878:           }
                   6879:         else
                   6880:           {
                   6881:           while (p < end_subject)
                   6882:             {
1.1.1.2   misho    6883:             if (*p++ == req_char) { p--; break; }
1.1       misho    6884:             }
                   6885:           }
                   6886: 
                   6887:         /* If we can't find the required character, break the matching loop,
                   6888:         forcing a match failure. */
                   6889: 
                   6890:         if (p >= end_subject)
                   6891:           {
                   6892:           rc = MATCH_NOMATCH;
                   6893:           break;
                   6894:           }
                   6895: 
                   6896:         /* If we have found the required character, save the point where we
                   6897:         found it, so that we don't search again next time round the loop if
                   6898:         the start hasn't passed this character yet. */
                   6899: 
1.1.1.2   misho    6900:         req_char_ptr = p;
1.1       misho    6901:         }
                   6902:       }
                   6903:     }
                   6904: 
                   6905: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6906:   printf(">>>> Match against: ");
                   6907:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6908:   printf("\n");
                   6909: #endif
                   6910: 
                   6911:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6912:   first starting point for which a partial match was found. */
                   6913: 
                   6914:   md->start_match_ptr = start_match;
                   6915:   md->start_used_ptr = start_match;
                   6916:   md->match_call_count = 0;
                   6917:   md->match_function_type = 0;
                   6918:   md->end_offset_top = 0;
                   6919:   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
                   6920:   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
                   6921: 
                   6922:   switch(rc)
                   6923:     {
                   6924:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
                   6925:     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
                   6926:     entirely. The only way we can do that is to re-do the match at the same
                   6927:     point, with a flag to force SKIP with an argument to be ignored. Just
                   6928:     treating this case as NOMATCH does not work because it does not check other
                   6929:     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
                   6930: 
                   6931:     case MATCH_SKIP_ARG:
                   6932:     new_start_match = start_match;
                   6933:     md->ignore_skip_arg = TRUE;
                   6934:     break;
                   6935: 
                   6936:     /* SKIP passes back the next starting point explicitly, but if it is the
                   6937:     same as the match we have just done, treat it as NOMATCH. */
                   6938: 
                   6939:     case MATCH_SKIP:
                   6940:     if (md->start_match_ptr != start_match)
                   6941:       {
                   6942:       new_start_match = md->start_match_ptr;
                   6943:       break;
                   6944:       }
                   6945:     /* Fall through */
                   6946: 
                   6947:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
                   6948:     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
                   6949: 
                   6950:     case MATCH_NOMATCH:
                   6951:     case MATCH_PRUNE:
                   6952:     case MATCH_THEN:
                   6953:     md->ignore_skip_arg = FALSE;
                   6954:     new_start_match = start_match + 1;
1.1.1.2   misho    6955: #ifdef SUPPORT_UTF
                   6956:     if (utf)
                   6957:       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
                   6958:         new_start_match++);
1.1       misho    6959: #endif
                   6960:     break;
                   6961: 
                   6962:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6963: 
                   6964:     case MATCH_COMMIT:
                   6965:     rc = MATCH_NOMATCH;
                   6966:     goto ENDLOOP;
                   6967: 
                   6968:     /* Any other return is either a match, or some kind of error. */
                   6969: 
                   6970:     default:
                   6971:     goto ENDLOOP;
                   6972:     }
                   6973: 
                   6974:   /* Control reaches here for the various types of "no match at this point"
                   6975:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6976: 
                   6977:   rc = MATCH_NOMATCH;
                   6978: 
                   6979:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   6980:   newline in the subject (though it may continue over the newline). Therefore,
                   6981:   if we have just failed to match, starting at a newline, do not continue. */
                   6982: 
                   6983:   if (firstline && IS_NEWLINE(start_match)) break;
                   6984: 
                   6985:   /* Advance to new matching position */
                   6986: 
                   6987:   start_match = new_start_match;
                   6988: 
                   6989:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   6990:   the subject. */
                   6991: 
                   6992:   if (anchored || start_match > end_subject) break;
                   6993: 
                   6994:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   6995:   not contain any explicit matches for \r or \n, and the newline option is CRLF
1.1.1.2   misho    6996:   or ANY or ANYCRLF, advance the match position by one more character. In
                   6997:   normal matching start_match will aways be greater than the first position at
                   6998:   this stage, but a failed *SKIP can cause a return at the same point, which is
                   6999:   why the first test exists. */
1.1       misho    7000: 
1.1.1.2   misho    7001:   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
                   7002:       start_match[-1] == CHAR_CR &&
1.1       misho    7003:       start_match < end_subject &&
                   7004:       *start_match == CHAR_NL &&
                   7005:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   7006:         (md->nltype == NLTYPE_ANY ||
                   7007:          md->nltype == NLTYPE_ANYCRLF ||
                   7008:          md->nllen == 2))
                   7009:     start_match++;
                   7010: 
                   7011:   md->mark = NULL;   /* Reset for start of next match attempt */
                   7012:   }                  /* End of for(;;) "bumpalong" loop */
                   7013: 
                   7014: /* ==========================================================================*/
                   7015: 
                   7016: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   7017: conditions is true:
                   7018: 
                   7019: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   7020: 
                   7021: (2) We are past the end of the subject;
                   7022: 
                   7023: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   7024:     this option requests that a match occur at or before the first newline in
                   7025:     the subject.
                   7026: 
                   7027: When we have a match and the offset vector is big enough to deal with any
                   7028: backreferences, captured substring offsets will already be set up. In the case
                   7029: where we had to get some local store to hold offsets for backreference
                   7030: processing, copy those that we can. In this case there need not be overflow if
                   7031: certain parts of the pattern were not used, even though there are more
                   7032: capturing parentheses than vector slots. */
                   7033: 
                   7034: ENDLOOP:
                   7035: 
                   7036: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
                   7037:   {
                   7038:   if (using_temporary_offsets)
                   7039:     {
                   7040:     if (arg_offset_max >= 4)
                   7041:       {
                   7042:       memcpy(offsets + 2, md->offset_vector + 2,
                   7043:         (arg_offset_max - 2) * sizeof(int));
                   7044:       DPRINTF(("Copied offsets from temporary memory\n"));
                   7045:       }
                   7046:     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
                   7047:     DPRINTF(("Freeing temporary memory\n"));
1.1.1.2   misho    7048:     (PUBL(free))(md->offset_vector);
1.1       misho    7049:     }
                   7050: 
                   7051:   /* Set the return code to the number of captured strings, or 0 if there were
                   7052:   too many to fit into the vector. */
                   7053: 
                   7054:   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
                   7055:     0 : md->end_offset_top/2;
                   7056: 
                   7057:   /* If there is space in the offset vector, set any unused pairs at the end of
                   7058:   the pattern to -1 for backwards compatibility. It is documented that this
                   7059:   happens. In earlier versions, the whole set of potential capturing offsets
                   7060:   was set to -1 each time round the loop, but this is handled differently now.
                   7061:   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
                   7062:   those at the end that need unsetting here. We can't just unset them all at
                   7063:   the start of the whole thing because they may get set in one branch that is
                   7064:   not the final matching branch. */
                   7065: 
                   7066:   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
                   7067:     {
                   7068:     register int *iptr, *iend;
                   7069:     int resetcount = 2 + re->top_bracket * 2;
1.1.1.3 ! misho    7070:     if (resetcount > offsetcount) resetcount = offsetcount;
1.1       misho    7071:     iptr = offsets + md->end_offset_top;
                   7072:     iend = offsets + resetcount;
                   7073:     while (iptr < iend) *iptr++ = -1;
                   7074:     }
                   7075: 
                   7076:   /* If there is space, set up the whole thing as substring 0. The value of
                   7077:   md->start_match_ptr might be modified if \K was encountered on the success
                   7078:   matching path. */
                   7079: 
                   7080:   if (offsetcount < 2) rc = 0; else
                   7081:     {
                   7082:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   7083:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
                   7084:     }
                   7085: 
                   7086:   /* Return MARK data if requested */
                   7087: 
                   7088:   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2   misho    7089:     *(extra_data->mark) = (pcre_uchar *)md->mark;
1.1       misho    7090:   DPRINTF((">>>> returning %d\n", rc));
1.1.1.3 ! misho    7091: #ifdef NO_RECURSE
        !          7092:   release_match_heapframes(&frame_zero);
        !          7093: #endif
1.1       misho    7094:   return rc;
                   7095:   }
                   7096: 
                   7097: /* Control gets here if there has been an error, or if the overall match
                   7098: attempt has failed at all permitted starting positions. */
                   7099: 
                   7100: if (using_temporary_offsets)
                   7101:   {
                   7102:   DPRINTF(("Freeing temporary memory\n"));
1.1.1.2   misho    7103:   (PUBL(free))(md->offset_vector);
1.1       misho    7104:   }
                   7105: 
                   7106: /* For anything other than nomatch or partial match, just return the code. */
                   7107: 
                   7108: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
                   7109:   {
                   7110:   DPRINTF((">>>> error: returning %d\n", rc));
1.1.1.3 ! misho    7111: #ifdef NO_RECURSE
        !          7112:   release_match_heapframes(&frame_zero);
        !          7113: #endif
1.1       misho    7114:   return rc;
                   7115:   }
                   7116: 
                   7117: /* Handle partial matches - disable any mark data */
                   7118: 
                   7119: if (start_partial != NULL)
                   7120:   {
                   7121:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
                   7122:   md->mark = NULL;
                   7123:   if (offsetcount > 1)
                   7124:     {
1.1.1.2   misho    7125:     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
                   7126:     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
1.1       misho    7127:     }
                   7128:   rc = PCRE_ERROR_PARTIAL;
                   7129:   }
                   7130: 
                   7131: /* This is the classic nomatch case */
                   7132: 
                   7133: else
                   7134:   {
                   7135:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
                   7136:   rc = PCRE_ERROR_NOMATCH;
                   7137:   }
                   7138: 
                   7139: /* Return the MARK data if it has been requested. */
                   7140: 
                   7141: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2   misho    7142:   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
1.1.1.3 ! misho    7143: #ifdef NO_RECURSE
        !          7144:   release_match_heapframes(&frame_zero);
        !          7145: #endif
1.1       misho    7146: return rc;
                   7147: }
                   7148: 
                   7149: /* End of pcre_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>