embedaddon/pcre/pcre_exec.c - annotate

Return to pcre_exec.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre
Annotation of embedaddon/pcre/pcre_exec.c, revision 1.1.1.4

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.1.1.4 ! misho       9:            Copyright (c) 1997-2013 University of Cambridge
1.1       misho      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: /* This module contains pcre_exec(), the externally visible function that does
                     41: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     42: possible. There are also some static supporting functions. */
                     43: 
                     44: #ifdef HAVE_CONFIG_H
                     45: #include "config.h"
                     46: #endif
                     47: 
                     48: #define NLBLOCK md             /* Block containing newline information */
                     49: #define PSSTART start_subject  /* Field containing processed string start */
                     50: #define PSEND   end_subject    /* Field containing processed string end */
                     51: 
                     52: #include "pcre_internal.h"
                     53: 
                     54: /* Undefine some potentially clashing cpp symbols */
                     55: 
                     56: #undef min
                     57: #undef max
                     58: 
1.1.1.4 ! misho      59: /* The md->capture_last field uses the lower 16 bits for the last captured
        !            60: substring (which can never be greater than 65535) and a bit in the top half
        !            61: to mean "capture vector overflowed". This odd way of doing things was
        !            62: implemented when it was realized that preserving and restoring the overflow bit
        !            63: whenever the last capture number was saved/restored made for a neater
        !            64: interface, and doing it this way saved on (a) another variable, which would
        !            65: have increased the stack frame size (a big NO-NO in PCRE) and (b) another
        !            66: separate set of save/restore instructions. The following defines are used in
        !            67: implementing this. */
        !            68: 
        !            69: #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
        !            70: #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
        !            71: #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
        !            72: 
1.1       misho      73: /* Values for setting in md->match_function_type to indicate two special types
                     74: of call to match(). We do it this way to save on using another stack variable,
                     75: as stack usage is to be discouraged. */
                     76: 
                     77: #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
                     78: #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
                     79: 
                     80: /* Non-error returns from the match() function. Error returns are externally
                     81: defined PCRE_ERROR_xxx codes, which are all negative. */
                     82: 
                     83: #define MATCH_MATCH        1
                     84: #define MATCH_NOMATCH      0
                     85: 
                     86: /* Special internal returns from the match() function. Make them sufficiently
                     87: negative to avoid the external error codes. */
                     88: 
                     89: #define MATCH_ACCEPT       (-999)
1.1.1.4 ! misho      90: #define MATCH_KETRPOS      (-998)
        !            91: #define MATCH_ONCE         (-997)
        !            92: /* The next 5 must be kept together and in sequence so that a test that checks
        !            93: for any one of them can use a range. */
        !            94: #define MATCH_COMMIT       (-996)
1.1       misho      95: #define MATCH_PRUNE        (-995)
                     96: #define MATCH_SKIP         (-994)
                     97: #define MATCH_SKIP_ARG     (-993)
                     98: #define MATCH_THEN         (-992)
1.1.1.4 ! misho      99: #define MATCH_BACKTRACK_MAX MATCH_THEN
        !           100: #define MATCH_BACKTRACK_MIN MATCH_COMMIT
1.1       misho     101: 
                    102: /* Maximum number of ints of offset to save on the stack for recursive calls.
                    103: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                    104: because the offset vector is always a multiple of 3 long. */
                    105: 
                    106: #define REC_STACK_SAVE_MAX 30
                    107: 
                    108: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                    109: 
                    110: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                    111: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                    112: 
                    113: #ifdef PCRE_DEBUG
                    114: /*************************************************
                    115: *        Debugging function to print chars       *
                    116: *************************************************/
                    117: 
                    118: /* Print a sequence of chars in printable format, stopping at the end of the
                    119: subject if the requested.
                    120: 
                    121: Arguments:
                    122:   p           points to characters
                    123:   length      number to print
                    124:   is_subject  TRUE if printing from within md->start_subject
                    125:   md          pointer to matching data block, if is_subject is TRUE
                    126: 
                    127: Returns:     nothing
                    128: */
                    129: 
                    130: static void
1.1.1.2   misho     131: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
1.1       misho     132: {
1.1.1.4 ! misho     133: pcre_uint32 c;
        !           134: BOOL utf = md->utf;
1.1       misho     135: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    136: while (length-- > 0)
1.1.1.4 ! misho     137:   if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
1.1       misho     138: }
                    139: #endif
                    140: 
                    141: 
                    142: 
                    143: /*************************************************
                    144: *          Match a back-reference                *
                    145: *************************************************/
                    146: 
                    147: /* Normally, if a back reference hasn't been set, the length that is passed is
                    148: negative, so the match always fails. However, in JavaScript compatibility mode,
                    149: the length passed is zero. Note that in caseless UTF-8 mode, the number of
                    150: subject bytes matched may be different to the number of reference bytes.
                    151: 
                    152: Arguments:
                    153:   offset      index into the offset vector
                    154:   eptr        pointer into the subject
                    155:   length      length of reference to be matched (number of bytes)
                    156:   md          points to match data block
                    157:   caseless    TRUE if caseless
                    158: 
1.1.1.3   misho     159: Returns:      >= 0 the number of subject bytes matched
                    160:               -1 no match
                    161:               -2 partial match; always given if at end subject
1.1       misho     162: */
                    163: 
                    164: static int
1.1.1.2   misho     165: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
1.1       misho     166:   BOOL caseless)
                    167: {
1.1.1.2   misho     168: PCRE_PUCHAR eptr_start = eptr;
                    169: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
1.1.1.4 ! misho     170: #ifdef SUPPORT_UTF
        !           171: BOOL utf = md->utf;
        !           172: #endif
1.1       misho     173: 
                    174: #ifdef PCRE_DEBUG
                    175: if (eptr >= md->end_subject)
                    176:   printf("matching subject <null>");
                    177: else
                    178:   {
                    179:   printf("matching subject ");
                    180:   pchars(eptr, length, TRUE, md);
                    181:   }
                    182: printf(" against backref ");
                    183: pchars(p, length, FALSE, md);
                    184: printf("\n");
                    185: #endif
                    186: 
1.1.1.3   misho     187: /* Always fail if reference not set (and not JavaScript compatible - in that
                    188: case the length is passed as zero). */
1.1       misho     189: 
                    190: if (length < 0) return -1;
                    191: 
                    192: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    193: properly if Unicode properties are supported. Otherwise, we can check only
                    194: ASCII characters. */
                    195: 
                    196: if (caseless)
                    197:   {
1.1.1.2   misho     198: #ifdef SUPPORT_UTF
1.1       misho     199: #ifdef SUPPORT_UCP
1.1.1.4 ! misho     200:   if (utf)
1.1       misho     201:     {
                    202:     /* Match characters up to the end of the reference. NOTE: the number of
1.1.1.4 ! misho     203:     data units matched may differ, because in UTF-8 there are some characters
        !           204:     whose upper and lower case versions code have different numbers of bytes.
        !           205:     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
        !           206:     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
        !           207:     sequence of two of the latter. It is important, therefore, to check the
        !           208:     length along the reference, not along the subject (earlier code did this
        !           209:     wrong). */
1.1       misho     210: 
1.1.1.2   misho     211:     PCRE_PUCHAR endptr = p + length;
1.1       misho     212:     while (p < endptr)
                    213:       {
1.1.1.4 ! misho     214:       pcre_uint32 c, d;
        !           215:       const ucd_record *ur;
1.1.1.3   misho     216:       if (eptr >= md->end_subject) return -2;   /* Partial match */
1.1       misho     217:       GETCHARINC(c, eptr);
                    218:       GETCHARINC(d, p);
1.1.1.4 ! misho     219:       ur = GET_UCD(d);
        !           220:       if (c != d && c != d + ur->other_case)
        !           221:         {
        !           222:         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
        !           223:         for (;;)
        !           224:           {
        !           225:           if (c < *pp) return -1;
        !           226:           if (c == *pp++) break;
        !           227:           }
        !           228:         }
1.1       misho     229:       }
                    230:     }
                    231:   else
                    232: #endif
                    233: #endif
                    234: 
                    235:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    236:   is no UCP support. */
                    237:     {
                    238:     while (length-- > 0)
1.1.1.2   misho     239:       {
1.1.1.4 ! misho     240:       pcre_uint32 cc, cp;
1.1.1.3   misho     241:       if (eptr >= md->end_subject) return -2;   /* Partial match */
1.1.1.4 ! misho     242:       cc = RAWUCHARTEST(eptr);
        !           243:       cp = RAWUCHARTEST(p);
        !           244:       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
1.1.1.2   misho     245:       p++;
                    246:       eptr++;
                    247:       }
1.1       misho     248:     }
                    249:   }
                    250: 
                    251: /* In the caseful case, we can just compare the bytes, whether or not we
                    252: are in UTF-8 mode. */
                    253: 
                    254: else
                    255:   {
1.1.1.3   misho     256:   while (length-- > 0)
                    257:     {
                    258:     if (eptr >= md->end_subject) return -2;   /* Partial match */
1.1.1.4 ! misho     259:     if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
1.1.1.3   misho     260:     }
1.1       misho     261:   }
                    262: 
                    263: return (int)(eptr - eptr_start);
                    264: }
                    265: 
                    266: 
                    267: 
                    268: /***************************************************************************
                    269: ****************************************************************************
                    270:                    RECURSION IN THE match() FUNCTION
                    271: 
                    272: The match() function is highly recursive, though not every recursive call
                    273: increases the recursive depth. Nevertheless, some regular expressions can cause
                    274: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    275: itself recursively. This uses the stack for saving everything that has to be
                    276: saved for a recursive call. On Unix, the stack can be large, and this works
                    277: fine.
                    278: 
                    279: It turns out that on some non-Unix-like systems there are problems with
                    280: programs that use a lot of stack. (This despite the fact that every last chip
                    281: has oodles of memory these days, and techniques for extending the stack have
                    282: been known for decades.) So....
                    283: 
                    284: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    285: calls by keeping local variables that need to be preserved in blocks of memory
                    286: obtained from malloc() instead instead of on the stack. Macros are used to
                    287: achieve this so that the actual code doesn't look very different to what it
                    288: always used to.
                    289: 
                    290: The original heap-recursive code used longjmp(). However, it seems that this
                    291: can be very slow on some operating systems. Following a suggestion from Stan
                    292: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    293: provide a unique number for each call to RMATCH. There is no way of generating
                    294: a sequence of numbers at compile time in C. I have given them names, to make
                    295: them stand out more clearly.
                    296: 
                    297: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    298: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    299: tests. Furthermore, not using longjmp() means that local dynamic variables
                    300: don't have indeterminate values; this has meant that the frame size can be
                    301: reduced because the result can be "passed back" by straight setting of the
                    302: variable instead of being passed in the frame.
                    303: ****************************************************************************
                    304: ***************************************************************************/
                    305: 
                    306: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    307: below must be updated in sync.  */
                    308: 
                    309: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    310:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    311:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    312:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    313:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
                    314:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
1.1.1.4 ! misho     315:        RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
1.1       misho     316: 
                    317: /* These versions of the macros use the stack, as normal. There are debugging
                    318: versions and production versions. Note that the "rw" argument of RMATCH isn't
                    319: actually used in this definition. */
                    320: 
                    321: #ifndef NO_RECURSE
                    322: #define REGISTER register
                    323: 
                    324: #ifdef PCRE_DEBUG
                    325: #define RMATCH(ra,rb,rc,rd,re,rw) \
                    326:   { \
                    327:   printf("match() called in line %d\n", __LINE__); \
                    328:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
                    329:   printf("to line %d\n", __LINE__); \
                    330:   }
                    331: #define RRETURN(ra) \
                    332:   { \
1.1.1.4 ! misho     333:   printf("match() returned %d from line %d\n", ra, __LINE__); \
1.1       misho     334:   return ra; \
                    335:   }
                    336: #else
                    337: #define RMATCH(ra,rb,rc,rd,re,rw) \
                    338:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
                    339: #define RRETURN(ra) return ra
                    340: #endif
                    341: 
                    342: #else
                    343: 
                    344: 
                    345: /* These versions of the macros manage a private stack on the heap. Note that
                    346: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    347: argument of match(), which never changes. */
                    348: 
                    349: #define REGISTER
                    350: 
                    351: #define RMATCH(ra,rb,rc,rd,re,rw)\
                    352:   {\
1.1.1.3   misho     353:   heapframe *newframe = frame->Xnextframe;\
                    354:   if (newframe == NULL)\
                    355:     {\
                    356:     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
                    357:     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
                    358:     newframe->Xnextframe = NULL;\
                    359:     frame->Xnextframe = newframe;\
                    360:     }\
                    361:   frame->Xwhere = rw;\
1.1       misho     362:   newframe->Xeptr = ra;\
                    363:   newframe->Xecode = rb;\
                    364:   newframe->Xmstart = mstart;\
                    365:   newframe->Xoffset_top = rc;\
                    366:   newframe->Xeptrb = re;\
                    367:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    368:   newframe->Xprevframe = frame;\
                    369:   frame = newframe;\
                    370:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    371:   goto HEAP_RECURSE;\
                    372:   L_##rw:\
                    373:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    374:   }
                    375: 
                    376: #define RRETURN(ra)\
                    377:   {\
                    378:   heapframe *oldframe = frame;\
                    379:   frame = oldframe->Xprevframe;\
                    380:   if (frame != NULL)\
                    381:     {\
                    382:     rrc = ra;\
                    383:     goto HEAP_RETURN;\
                    384:     }\
                    385:   return ra;\
                    386:   }
                    387: 
                    388: 
                    389: /* Structure for remembering the local variables in a private frame */
                    390: 
                    391: typedef struct heapframe {
                    392:   struct heapframe *Xprevframe;
1.1.1.3   misho     393:   struct heapframe *Xnextframe;
1.1       misho     394: 
                    395:   /* Function arguments that may change */
                    396: 
1.1.1.2   misho     397:   PCRE_PUCHAR Xeptr;
                    398:   const pcre_uchar *Xecode;
                    399:   PCRE_PUCHAR Xmstart;
1.1       misho     400:   int Xoffset_top;
                    401:   eptrblock *Xeptrb;
                    402:   unsigned int Xrdepth;
                    403: 
                    404:   /* Function local variables */
                    405: 
1.1.1.2   misho     406:   PCRE_PUCHAR Xcallpat;
                    407: #ifdef SUPPORT_UTF
                    408:   PCRE_PUCHAR Xcharptr;
                    409: #endif
                    410:   PCRE_PUCHAR Xdata;
                    411:   PCRE_PUCHAR Xnext;
                    412:   PCRE_PUCHAR Xpp;
                    413:   PCRE_PUCHAR Xprev;
                    414:   PCRE_PUCHAR Xsaved_eptr;
1.1       misho     415: 
                    416:   recursion_info Xnew_recursive;
                    417: 
                    418:   BOOL Xcur_is_word;
                    419:   BOOL Xcondition;
                    420:   BOOL Xprev_is_word;
                    421: 
                    422: #ifdef SUPPORT_UCP
                    423:   int Xprop_type;
1.1.1.4 ! misho     424:   unsigned int Xprop_value;
1.1       misho     425:   int Xprop_fail_result;
                    426:   int Xoclength;
1.1.1.2   misho     427:   pcre_uchar Xocchars[6];
1.1       misho     428: #endif
                    429: 
                    430:   int Xcodelink;
                    431:   int Xctype;
                    432:   unsigned int Xfc;
                    433:   int Xfi;
                    434:   int Xlength;
                    435:   int Xmax;
                    436:   int Xmin;
1.1.1.4 ! misho     437:   unsigned int Xnumber;
1.1       misho     438:   int Xoffset;
1.1.1.4 ! misho     439:   unsigned int Xop;
        !           440:   pcre_int32 Xsave_capture_last;
1.1       misho     441:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    442:   int Xstacksave[REC_STACK_SAVE_MAX];
                    443: 
                    444:   eptrblock Xnewptrb;
                    445: 
                    446:   /* Where to jump back to */
                    447: 
                    448:   int Xwhere;
                    449: 
                    450: } heapframe;
                    451: 
                    452: #endif
                    453: 
                    454: 
                    455: /***************************************************************************
                    456: ***************************************************************************/
                    457: 
                    458: 
                    459: 
                    460: /*************************************************
                    461: *         Match from current position            *
                    462: *************************************************/
                    463: 
                    464: /* This function is called recursively in many circumstances. Whenever it
                    465: returns a negative (error) response, the outer incarnation must also return the
                    466: same response. */
                    467: 
                    468: /* These macros pack up tests that are used for partial matching, and which
                    469: appear several times in the code. We set the "hit end" flag if the pointer is
                    470: at the end of the subject and also past the start of the subject (i.e.
                    471: something has been matched). For hard partial matching, we then return
                    472: immediately. The second one is used when we already know we are past the end of
                    473: the subject. */
                    474: 
                    475: #define CHECK_PARTIAL()\
                    476:   if (md->partial != 0 && eptr >= md->end_subject && \
                    477:       eptr > md->start_used_ptr) \
                    478:     { \
                    479:     md->hitend = TRUE; \
                    480:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
                    481:     }
                    482: 
                    483: #define SCHECK_PARTIAL()\
                    484:   if (md->partial != 0 && eptr > md->start_used_ptr) \
                    485:     { \
                    486:     md->hitend = TRUE; \
                    487:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
                    488:     }
                    489: 
                    490: 
                    491: /* Performance note: It might be tempting to extract commonly used fields from
1.1.1.2   misho     492: the md structure (e.g. utf, end_subject) into individual variables to improve
1.1       misho     493: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    494: made performance worse.
                    495: 
                    496: Arguments:
                    497:    eptr        pointer to current character in subject
                    498:    ecode       pointer to current position in compiled code
                    499:    mstart      pointer to the current match start position (can be modified
                    500:                  by encountering \K)
                    501:    offset_top  current top pointer
                    502:    md          pointer to "static" info for the match
                    503:    eptrb       pointer to chain of blocks containing eptr at start of
                    504:                  brackets - for testing for empty matches
                    505:    rdepth      the recursion depth
                    506: 
                    507: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    508:                MATCH_NOMATCH if failed to match  )
                    509:                a negative MATCH_xxx value for PRUNE, SKIP, etc
                    510:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    511:                  (e.g. stopped by repeated call or recursion limit)
                    512: */
                    513: 
                    514: static int
1.1.1.2   misho     515: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
                    516:   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
                    517:   unsigned int rdepth)
1.1       misho     518: {
                    519: /* These variables do not need to be preserved over recursion in this function,
                    520: so they can be ordinary variables in all cases. Mark some of them with
                    521: "register" because they are used a lot in loops. */
                    522: 
                    523: register int  rrc;         /* Returns from recursive calls */
                    524: register int  i;           /* Used for loops not involving calls to RMATCH() */
1.1.1.4 ! misho     525: register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
1.1.1.2   misho     526: register BOOL utf;         /* Local copy of UTF flag for speed */
1.1       misho     527: 
                    528: BOOL minimize, possessive; /* Quantifier options */
                    529: BOOL caseless;
                    530: int condcode;
                    531: 
                    532: /* When recursion is not being used, all "local" variables that have to be
1.1.1.2   misho     533: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
                    534: frame on the stack here; subsequent instantiations are obtained from the heap
                    535: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
                    536: the top-level on the stack rather than malloc-ing them all gives a performance
                    537: boost in many cases where there is not much "recursion". */
1.1       misho     538: 
                    539: #ifdef NO_RECURSE
1.1.1.3   misho     540: heapframe *frame = (heapframe *)md->match_frames_base;
1.1       misho     541: 
                    542: /* Copy in the original argument variables */
                    543: 
                    544: frame->Xeptr = eptr;
                    545: frame->Xecode = ecode;
                    546: frame->Xmstart = mstart;
                    547: frame->Xoffset_top = offset_top;
                    548: frame->Xeptrb = eptrb;
                    549: frame->Xrdepth = rdepth;
                    550: 
                    551: /* This is where control jumps back to to effect "recursion" */
                    552: 
                    553: HEAP_RECURSE:
                    554: 
                    555: /* Macros make the argument variables come from the current frame */
                    556: 
                    557: #define eptr               frame->Xeptr
                    558: #define ecode              frame->Xecode
                    559: #define mstart             frame->Xmstart
                    560: #define offset_top         frame->Xoffset_top
                    561: #define eptrb              frame->Xeptrb
                    562: #define rdepth             frame->Xrdepth
                    563: 
                    564: /* Ditto for the local variables */
                    565: 
1.1.1.2   misho     566: #ifdef SUPPORT_UTF
1.1       misho     567: #define charptr            frame->Xcharptr
                    568: #endif
                    569: #define callpat            frame->Xcallpat
                    570: #define codelink           frame->Xcodelink
                    571: #define data               frame->Xdata
                    572: #define next               frame->Xnext
                    573: #define pp                 frame->Xpp
                    574: #define prev               frame->Xprev
                    575: #define saved_eptr         frame->Xsaved_eptr
                    576: 
                    577: #define new_recursive      frame->Xnew_recursive
                    578: 
                    579: #define cur_is_word        frame->Xcur_is_word
                    580: #define condition          frame->Xcondition
                    581: #define prev_is_word       frame->Xprev_is_word
                    582: 
                    583: #ifdef SUPPORT_UCP
                    584: #define prop_type          frame->Xprop_type
                    585: #define prop_value         frame->Xprop_value
                    586: #define prop_fail_result   frame->Xprop_fail_result
                    587: #define oclength           frame->Xoclength
                    588: #define occhars            frame->Xocchars
                    589: #endif
                    590: 
                    591: #define ctype              frame->Xctype
                    592: #define fc                 frame->Xfc
                    593: #define fi                 frame->Xfi
                    594: #define length             frame->Xlength
                    595: #define max                frame->Xmax
                    596: #define min                frame->Xmin
                    597: #define number             frame->Xnumber
                    598: #define offset             frame->Xoffset
                    599: #define op                 frame->Xop
                    600: #define save_capture_last  frame->Xsave_capture_last
                    601: #define save_offset1       frame->Xsave_offset1
                    602: #define save_offset2       frame->Xsave_offset2
                    603: #define save_offset3       frame->Xsave_offset3
                    604: #define stacksave          frame->Xstacksave
                    605: 
                    606: #define newptrb            frame->Xnewptrb
                    607: 
                    608: /* When recursion is being used, local variables are allocated on the stack and
                    609: get preserved during recursion in the normal way. In this environment, fi and
                    610: i, and fc and c, can be the same variables. */
                    611: 
                    612: #else         /* NO_RECURSE not defined */
                    613: #define fi i
                    614: #define fc c
                    615: 
                    616: /* Many of the following variables are used only in small blocks of the code.
                    617: My normal style of coding would have declared them within each of those blocks.
                    618: However, in order to accommodate the version of this code that uses an external
                    619: "stack" implemented on the heap, it is easier to declare them all here, so the
                    620: declarations can be cut out in a block. The only declarations within blocks
                    621: below are for variables that do not have to be preserved over a recursive call
                    622: to RMATCH(). */
                    623: 
1.1.1.2   misho     624: #ifdef SUPPORT_UTF
                    625: const pcre_uchar *charptr;
1.1       misho     626: #endif
1.1.1.2   misho     627: const pcre_uchar *callpat;
                    628: const pcre_uchar *data;
                    629: const pcre_uchar *next;
                    630: PCRE_PUCHAR       pp;
                    631: const pcre_uchar *prev;
                    632: PCRE_PUCHAR       saved_eptr;
1.1       misho     633: 
                    634: recursion_info new_recursive;
                    635: 
                    636: BOOL cur_is_word;
                    637: BOOL condition;
                    638: BOOL prev_is_word;
                    639: 
                    640: #ifdef SUPPORT_UCP
                    641: int prop_type;
1.1.1.4 ! misho     642: unsigned int prop_value;
1.1       misho     643: int prop_fail_result;
                    644: int oclength;
1.1.1.2   misho     645: pcre_uchar occhars[6];
1.1       misho     646: #endif
                    647: 
                    648: int codelink;
                    649: int ctype;
                    650: int length;
                    651: int max;
                    652: int min;
1.1.1.4 ! misho     653: unsigned int number;
1.1       misho     654: int offset;
1.1.1.4 ! misho     655: unsigned int op;
        !           656: pcre_int32 save_capture_last;
1.1       misho     657: int save_offset1, save_offset2, save_offset3;
                    658: int stacksave[REC_STACK_SAVE_MAX];
                    659: 
                    660: eptrblock newptrb;
1.1.1.2   misho     661: 
                    662: /* There is a special fudge for calling match() in a way that causes it to
                    663: measure the size of its basic stack frame when the stack is being used for
                    664: recursion. The second argument (ecode) being NULL triggers this behaviour. It
                    665: cannot normally ever be NULL. The return is the negated value of the frame
                    666: size. */
                    667: 
                    668: if (ecode == NULL)
                    669:   {
                    670:   if (rdepth == 0)
                    671:     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
                    672:   else
                    673:     {
                    674:     int len = (char *)&rdepth - (char *)eptr;
                    675:     return (len > 0)? -len : len;
                    676:     }
                    677:   }
1.1       misho     678: #endif     /* NO_RECURSE */
                    679: 
                    680: /* To save space on the stack and in the heap frame, I have doubled up on some
                    681: of the local variables that are used only in localised parts of the code, but
                    682: still need to be preserved over recursive calls of match(). These macros define
                    683: the alternative names that are used. */
                    684: 
                    685: #define allow_zero    cur_is_word
                    686: #define cbegroup      condition
                    687: #define code_offset   codelink
                    688: #define condassert    condition
                    689: #define matched_once  prev_is_word
1.1.1.2   misho     690: #define foc           number
                    691: #define save_mark     data
1.1       misho     692: 
                    693: /* These statements are here to stop the compiler complaining about unitialized
                    694: variables. */
                    695: 
                    696: #ifdef SUPPORT_UCP
                    697: prop_value = 0;
                    698: prop_fail_result = 0;
                    699: #endif
                    700: 
                    701: 
                    702: /* This label is used for tail recursion, which is used in a few cases even
                    703: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    704: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    705: original patch. */
                    706: 
                    707: TAIL_RECURSE:
                    708: 
                    709: /* OK, now we can get on with the real code of the function. Recursive calls
                    710: are specified by the macro RMATCH and RRETURN is used to return. When
                    711: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
                    712: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
                    713: defined). However, RMATCH isn't like a function call because it's quite a
                    714: complicated macro. It has to be used in one particular way. This shouldn't,
                    715: however, impact performance when true recursion is being used. */
                    716: 
1.1.1.2   misho     717: #ifdef SUPPORT_UTF
                    718: utf = md->utf;       /* Local copy of the flag */
1.1       misho     719: #else
1.1.1.2   misho     720: utf = FALSE;
1.1       misho     721: #endif
                    722: 
                    723: /* First check that we haven't called match() too many times, or that we
                    724: haven't exceeded the recursive call limit. */
                    725: 
                    726: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    727: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    728: 
                    729: /* At the start of a group with an unlimited repeat that may match an empty
                    730: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
                    731: done this way to save having to use another function argument, which would take
                    732: up space on the stack. See also MATCH_CONDASSERT below.
                    733: 
                    734: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
                    735: such remembered pointers, to be checked when we hit the closing ket, in order
                    736: to break infinite loops that match no characters. When match() is called in
                    737: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
                    738: NOT be used with tail recursion, because the memory block that is used is on
                    739: the stack, so a new one may be required for each match(). */
                    740: 
                    741: if (md->match_function_type == MATCH_CBEGROUP)
                    742:   {
                    743:   newptrb.epb_saved_eptr = eptr;
                    744:   newptrb.epb_prev = eptrb;
                    745:   eptrb = &newptrb;
                    746:   md->match_function_type = 0;
                    747:   }
                    748: 
                    749: /* Now start processing the opcodes. */
                    750: 
                    751: for (;;)
                    752:   {
                    753:   minimize = possessive = FALSE;
                    754:   op = *ecode;
                    755: 
                    756:   switch(op)
                    757:     {
                    758:     case OP_MARK:
                    759:     md->nomatch_mark = ecode + 2;
                    760:     md->mark = NULL;    /* In case previously set by assertion */
1.1.1.2   misho     761:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1       misho     762:       eptrb, RM55);
                    763:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    764:          md->mark == NULL) md->mark = ecode + 2;
                    765: 
                    766:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    767:     argument, and we must check whether that argument matches this MARK's
                    768:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    769:     variable). If it does match, we reset that variable to the current subject
                    770:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    771:     unaltered. */
                    772: 
                    773:     else if (rrc == MATCH_SKIP_ARG &&
1.1.1.4 ! misho     774:         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
1.1       misho     775:       {
                    776:       md->start_match_ptr = eptr;
                    777:       RRETURN(MATCH_SKIP);
                    778:       }
                    779:     RRETURN(rrc);
                    780: 
                    781:     case OP_FAIL:
                    782:     RRETURN(MATCH_NOMATCH);
                    783: 
                    784:     case OP_COMMIT:
1.1.1.2   misho     785:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     786:       eptrb, RM52);
1.1.1.4 ! misho     787:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho     788:     RRETURN(MATCH_COMMIT);
                    789: 
                    790:     case OP_PRUNE:
1.1.1.2   misho     791:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     792:       eptrb, RM51);
1.1.1.4 ! misho     793:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho     794:     RRETURN(MATCH_PRUNE);
                    795: 
                    796:     case OP_PRUNE_ARG:
                    797:     md->nomatch_mark = ecode + 2;
                    798:     md->mark = NULL;    /* In case previously set by assertion */
1.1.1.2   misho     799:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1       misho     800:       eptrb, RM56);
                    801:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    802:          md->mark == NULL) md->mark = ecode + 2;
1.1.1.4 ! misho     803:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho     804:     RRETURN(MATCH_PRUNE);
                    805: 
                    806:     case OP_SKIP:
1.1.1.2   misho     807:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     808:       eptrb, RM53);
1.1.1.4 ! misho     809:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho     810:     md->start_match_ptr = eptr;   /* Pass back current position */
                    811:     RRETURN(MATCH_SKIP);
                    812: 
                    813:     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
1.1.1.4 ! misho     814:     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
        !           815:     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
        !           816:     that failed and any that precede it (either they also failed, or were not
        !           817:     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
        !           818:     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
        !           819:     set to the count of the one that failed. */
1.1       misho     820: 
                    821:     case OP_SKIP_ARG:
1.1.1.4 ! misho     822:     md->skip_arg_count++;
        !           823:     if (md->skip_arg_count <= md->ignore_skip_arg)
1.1       misho     824:       {
1.1.1.2   misho     825:       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
1.1       misho     826:       break;
                    827:       }
1.1.1.2   misho     828:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1       misho     829:       eptrb, RM57);
1.1.1.4 ! misho     830:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho     831: 
                    832:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    833:     returning the special MATCH_SKIP_ARG return code. This will either be
                    834:     caught by a matching MARK, or get to the top, where it causes a rematch
1.1.1.4 ! misho     835:     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
1.1       misho     836: 
                    837:     md->start_match_ptr = ecode + 2;
                    838:     RRETURN(MATCH_SKIP_ARG);
                    839: 
                    840:     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
                    841:     the branch in which it occurs can be determined. Overload the start of
                    842:     match pointer to do this. */
                    843: 
                    844:     case OP_THEN:
1.1.1.2   misho     845:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     846:       eptrb, RM54);
                    847:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    848:     md->start_match_ptr = ecode;
                    849:     RRETURN(MATCH_THEN);
                    850: 
                    851:     case OP_THEN_ARG:
                    852:     md->nomatch_mark = ecode + 2;
                    853:     md->mark = NULL;    /* In case previously set by assertion */
1.1.1.2   misho     854:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
1.1       misho     855:       md, eptrb, RM58);
                    856:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    857:          md->mark == NULL) md->mark = ecode + 2;
                    858:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    859:     md->start_match_ptr = ecode;
                    860:     RRETURN(MATCH_THEN);
                    861: 
                    862:     /* Handle an atomic group that does not contain any capturing parentheses.
                    863:     This can be handled like an assertion. Prior to 8.13, all atomic groups
                    864:     were handled this way. In 8.13, the code was changed as below for ONCE, so
                    865:     that backups pass through the group and thereby reset captured values.
                    866:     However, this uses a lot more stack, so in 8.20, atomic groups that do not
                    867:     contain any captures generate OP_ONCE_NC, which can be handled in the old,
                    868:     less stack intensive way.
                    869: 
                    870:     Check the alternative branches in turn - the matching won't pass the KET
                    871:     for this kind of subpattern. If any one branch matches, we carry on as at
                    872:     the end of a normal bracket, leaving the subject pointer, but resetting
                    873:     the start-of-match value in case it was changed by \K. */
                    874: 
                    875:     case OP_ONCE_NC:
                    876:     prev = ecode;
                    877:     saved_eptr = eptr;
1.1.1.2   misho     878:     save_mark = md->mark;
1.1       misho     879:     do
                    880:       {
                    881:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
                    882:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
                    883:         {
                    884:         mstart = md->start_match_ptr;
                    885:         break;
                    886:         }
                    887:       if (rrc == MATCH_THEN)
                    888:         {
                    889:         next = ecode + GET(ecode,1);
                    890:         if (md->start_match_ptr < next &&
                    891:             (*ecode == OP_ALT || *next == OP_ALT))
                    892:           rrc = MATCH_NOMATCH;
                    893:         }
                    894: 
                    895:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    896:       ecode += GET(ecode,1);
1.1.1.2   misho     897:       md->mark = save_mark;
1.1       misho     898:       }
                    899:     while (*ecode == OP_ALT);
                    900: 
                    901:     /* If hit the end of the group (which could be repeated), fail */
                    902: 
                    903:     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                    904: 
                    905:     /* Continue as from after the group, updating the offsets high water
                    906:     mark, since extracts may have been taken. */
                    907: 
                    908:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                    909: 
                    910:     offset_top = md->end_offset_top;
                    911:     eptr = md->end_match_ptr;
                    912: 
                    913:     /* For a non-repeating ket, just continue at this level. This also
                    914:     happens for a repeating ket if no characters were matched in the group.
                    915:     This is the forcible breaking of infinite loops as implemented in Perl
                    916:     5.005. */
                    917: 
                    918:     if (*ecode == OP_KET || eptr == saved_eptr)
                    919:       {
                    920:       ecode += 1+LINK_SIZE;
                    921:       break;
                    922:       }
                    923: 
                    924:     /* The repeating kets try the rest of the pattern or restart from the
                    925:     preceding bracket, in the appropriate order. The second "call" of match()
                    926:     uses tail recursion, to avoid using another stack frame. */
                    927: 
                    928:     if (*ecode == OP_KETRMIN)
                    929:       {
                    930:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
                    931:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    932:       ecode = prev;
                    933:       goto TAIL_RECURSE;
                    934:       }
                    935:     else  /* OP_KETRMAX */
                    936:       {
                    937:       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
                    938:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    939:       ecode += 1 + LINK_SIZE;
                    940:       goto TAIL_RECURSE;
                    941:       }
                    942:     /* Control never gets here */
                    943: 
                    944:     /* Handle a capturing bracket, other than those that are possessive with an
                    945:     unlimited repeat. If there is space in the offset vector, save the current
                    946:     subject position in the working slot at the top of the vector. We mustn't
                    947:     change the current values of the data slot, because they may be set from a
                    948:     previous iteration of this group, and be referred to by a reference inside
                    949:     the group. A failure to match might occur after the group has succeeded,
                    950:     if something later on doesn't match. For this reason, we need to restore
                    951:     the working value and also the values of the final offsets, in case they
                    952:     were set by a previous iteration of the same bracket.
                    953: 
                    954:     If there isn't enough space in the offset vector, treat this as if it were
                    955:     a non-capturing bracket. Don't worry about setting the flag for the error
                    956:     case here; that is handled in the code for KET. */
                    957: 
                    958:     case OP_CBRA:
                    959:     case OP_SCBRA:
                    960:     number = GET2(ecode, 1+LINK_SIZE);
                    961:     offset = number << 1;
                    962: 
                    963: #ifdef PCRE_DEBUG
                    964:     printf("start bracket %d\n", number);
                    965:     printf("subject=");
                    966:     pchars(eptr, 16, TRUE, md);
                    967:     printf("\n");
                    968: #endif
                    969: 
                    970:     if (offset < md->offset_max)
                    971:       {
                    972:       save_offset1 = md->offset_vector[offset];
                    973:       save_offset2 = md->offset_vector[offset+1];
                    974:       save_offset3 = md->offset_vector[md->offset_end - number];
                    975:       save_capture_last = md->capture_last;
1.1.1.2   misho     976:       save_mark = md->mark;
1.1       misho     977: 
                    978:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                    979:       md->offset_vector[md->offset_end - number] =
                    980:         (int)(eptr - md->start_subject);
                    981: 
                    982:       for (;;)
                    983:         {
                    984:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho     985:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho     986:           eptrb, RM1);
                    987:         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
                    988: 
                    989:         /* If we backed up to a THEN, check whether it is within the current
                    990:         branch by comparing the address of the THEN that is passed back with
                    991:         the end of the branch. If it is within the current branch, and the
                    992:         branch is one of two or more alternatives (it either starts or ends
                    993:         with OP_ALT), we have reached the limit of THEN's action, so convert
                    994:         the return code to NOMATCH, which will cause normal backtracking to
                    995:         happen from now on. Otherwise, THEN is passed back to an outer
                    996:         alternative. This implements Perl's treatment of parenthesized groups,
                    997:         where a group not containing | does not affect the current alternative,
                    998:         that is, (X) is NOT the same as (X|(*F)). */
                    999: 
                   1000:         if (rrc == MATCH_THEN)
                   1001:           {
                   1002:           next = ecode + GET(ecode,1);
                   1003:           if (md->start_match_ptr < next &&
                   1004:               (*ecode == OP_ALT || *next == OP_ALT))
                   1005:             rrc = MATCH_NOMATCH;
                   1006:           }
                   1007: 
                   1008:         /* Anything other than NOMATCH is passed back. */
                   1009: 
                   1010:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1011:         md->capture_last = save_capture_last;
                   1012:         ecode += GET(ecode, 1);
1.1.1.2   misho    1013:         md->mark = save_mark;
1.1       misho    1014:         if (*ecode != OP_ALT) break;
                   1015:         }
                   1016: 
                   1017:       DPRINTF(("bracket %d failed\n", number));
                   1018:       md->offset_vector[offset] = save_offset1;
                   1019:       md->offset_vector[offset+1] = save_offset2;
                   1020:       md->offset_vector[md->offset_end - number] = save_offset3;
                   1021: 
                   1022:       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
                   1023: 
                   1024:       RRETURN(rrc);
                   1025:       }
                   1026: 
                   1027:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                   1028:     as a non-capturing bracket. */
                   1029: 
                   1030:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1031:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1032: 
                   1033:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                   1034: 
                   1035:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1036:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1037: 
                   1038:     /* Non-capturing or atomic group, except for possessive with unlimited
                   1039:     repeat and ONCE group with no captures. Loop for all the alternatives.
                   1040: 
                   1041:     When we get to the final alternative within the brackets, we used to return
                   1042:     the result of a recursive call to match() whatever happened so it was
                   1043:     possible to reduce stack usage by turning this into a tail recursion,
                   1044:     except in the case of a possibly empty group. However, now that there is
                   1045:     the possiblity of (*THEN) occurring in the final alternative, this
                   1046:     optimization is no longer always possible.
                   1047: 
                   1048:     We can optimize if we know there are no (*THEN)s in the pattern; at present
                   1049:     this is the best that can be done.
                   1050: 
                   1051:     MATCH_ONCE is returned when the end of an atomic group is successfully
                   1052:     reached, but subsequent matching fails. It passes back up the tree (causing
                   1053:     captured values to be reset) until the original atomic group level is
                   1054:     reached. This is tested by comparing md->once_target with the start of the
                   1055:     group. At this point, the return is converted into MATCH_NOMATCH so that
                   1056:     previous backup points can be taken. */
                   1057: 
                   1058:     case OP_ONCE:
                   1059:     case OP_BRA:
                   1060:     case OP_SBRA:
                   1061:     DPRINTF(("start non-capturing bracket\n"));
                   1062: 
                   1063:     for (;;)
                   1064:       {
1.1.1.3   misho    1065:       if (op >= OP_SBRA || op == OP_ONCE)
                   1066:         md->match_function_type = MATCH_CBEGROUP;
1.1       misho    1067: 
                   1068:       /* If this is not a possibly empty group, and there are no (*THEN)s in
                   1069:       the pattern, and this is the final alternative, optimize as described
                   1070:       above. */
                   1071: 
                   1072:       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
                   1073:         {
1.1.1.2   misho    1074:         ecode += PRIV(OP_lengths)[*ecode];
1.1       misho    1075:         goto TAIL_RECURSE;
                   1076:         }
                   1077: 
                   1078:       /* In all other cases, we have to make another call to match(). */
                   1079: 
1.1.1.2   misho    1080:       save_mark = md->mark;
1.1.1.4 ! misho    1081:       save_capture_last = md->capture_last;
1.1.1.2   misho    1082:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1.1       misho    1083:         RM2);
                   1084: 
                   1085:       /* See comment in the code for capturing groups above about handling
                   1086:       THEN. */
                   1087: 
                   1088:       if (rrc == MATCH_THEN)
                   1089:         {
                   1090:         next = ecode + GET(ecode,1);
                   1091:         if (md->start_match_ptr < next &&
                   1092:             (*ecode == OP_ALT || *next == OP_ALT))
                   1093:           rrc = MATCH_NOMATCH;
                   1094:         }
                   1095: 
                   1096:       if (rrc != MATCH_NOMATCH)
                   1097:         {
                   1098:         if (rrc == MATCH_ONCE)
                   1099:           {
1.1.1.2   misho    1100:           const pcre_uchar *scode = ecode;
1.1       misho    1101:           if (*scode != OP_ONCE)           /* If not at start, find it */
                   1102:             {
                   1103:             while (*scode == OP_ALT) scode += GET(scode, 1);
                   1104:             scode -= GET(scode, 1);
                   1105:             }
                   1106:           if (md->once_target == scode) rrc = MATCH_NOMATCH;
                   1107:           }
                   1108:         RRETURN(rrc);
                   1109:         }
                   1110:       ecode += GET(ecode, 1);
1.1.1.2   misho    1111:       md->mark = save_mark;
1.1       misho    1112:       if (*ecode != OP_ALT) break;
1.1.1.4 ! misho    1113:       md->capture_last = save_capture_last;
1.1       misho    1114:       }
                   1115: 
                   1116:     RRETURN(MATCH_NOMATCH);
                   1117: 
                   1118:     /* Handle possessive capturing brackets with an unlimited repeat. We come
                   1119:     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
                   1120:     handled similarly to the normal case above. However, the matching is
                   1121:     different. The end of these brackets will always be OP_KETRPOS, which
                   1122:     returns MATCH_KETRPOS without going further in the pattern. By this means
                   1123:     we can handle the group by iteration rather than recursion, thereby
                   1124:     reducing the amount of stack needed. */
                   1125: 
                   1126:     case OP_CBRAPOS:
                   1127:     case OP_SCBRAPOS:
                   1128:     allow_zero = FALSE;
                   1129: 
                   1130:     POSSESSIVE_CAPTURE:
                   1131:     number = GET2(ecode, 1+LINK_SIZE);
                   1132:     offset = number << 1;
                   1133: 
                   1134: #ifdef PCRE_DEBUG
                   1135:     printf("start possessive bracket %d\n", number);
                   1136:     printf("subject=");
                   1137:     pchars(eptr, 16, TRUE, md);
                   1138:     printf("\n");
                   1139: #endif
                   1140: 
                   1141:     if (offset < md->offset_max)
                   1142:       {
                   1143:       matched_once = FALSE;
                   1144:       code_offset = (int)(ecode - md->start_code);
                   1145: 
                   1146:       save_offset1 = md->offset_vector[offset];
                   1147:       save_offset2 = md->offset_vector[offset+1];
                   1148:       save_offset3 = md->offset_vector[md->offset_end - number];
                   1149:       save_capture_last = md->capture_last;
                   1150: 
                   1151:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                   1152: 
                   1153:       /* Each time round the loop, save the current subject position for use
                   1154:       when the group matches. For MATCH_MATCH, the group has matched, so we
                   1155:       restart it with a new subject starting position, remembering that we had
                   1156:       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
                   1157:       usual. If we haven't matched any alternatives in any iteration, check to
                   1158:       see if a previous iteration matched. If so, the group has matched;
                   1159:       continue from afterwards. Otherwise it has failed; restore the previous
                   1160:       capture values before returning NOMATCH. */
                   1161: 
                   1162:       for (;;)
                   1163:         {
                   1164:         md->offset_vector[md->offset_end - number] =
                   1165:           (int)(eptr - md->start_subject);
                   1166:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho    1167:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho    1168:           eptrb, RM63);
                   1169:         if (rrc == MATCH_KETRPOS)
                   1170:           {
                   1171:           offset_top = md->end_offset_top;
                   1172:           eptr = md->end_match_ptr;
                   1173:           ecode = md->start_code + code_offset;
                   1174:           save_capture_last = md->capture_last;
                   1175:           matched_once = TRUE;
                   1176:           continue;
                   1177:           }
                   1178: 
                   1179:         /* See comment in the code for capturing groups above about handling
                   1180:         THEN. */
                   1181: 
                   1182:         if (rrc == MATCH_THEN)
                   1183:           {
                   1184:           next = ecode + GET(ecode,1);
                   1185:           if (md->start_match_ptr < next &&
                   1186:               (*ecode == OP_ALT || *next == OP_ALT))
                   1187:             rrc = MATCH_NOMATCH;
                   1188:           }
                   1189: 
                   1190:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1191:         md->capture_last = save_capture_last;
                   1192:         ecode += GET(ecode, 1);
                   1193:         if (*ecode != OP_ALT) break;
                   1194:         }
                   1195: 
                   1196:       if (!matched_once)
                   1197:         {
                   1198:         md->offset_vector[offset] = save_offset1;
                   1199:         md->offset_vector[offset+1] = save_offset2;
                   1200:         md->offset_vector[md->offset_end - number] = save_offset3;
                   1201:         }
                   1202: 
                   1203:       if (allow_zero || matched_once)
                   1204:         {
                   1205:         ecode += 1 + LINK_SIZE;
                   1206:         break;
                   1207:         }
                   1208: 
                   1209:       RRETURN(MATCH_NOMATCH);
                   1210:       }
                   1211: 
                   1212:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                   1213:     as a non-capturing bracket. */
                   1214: 
                   1215:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1216:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1217: 
                   1218:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                   1219: 
                   1220:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1221:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1222: 
                   1223:     /* Non-capturing possessive bracket with unlimited repeat. We come here
                   1224:     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
                   1225:     without the capturing complication. It is written out separately for speed
                   1226:     and cleanliness. */
                   1227: 
                   1228:     case OP_BRAPOS:
                   1229:     case OP_SBRAPOS:
                   1230:     allow_zero = FALSE;
                   1231: 
                   1232:     POSSESSIVE_NON_CAPTURE:
                   1233:     matched_once = FALSE;
                   1234:     code_offset = (int)(ecode - md->start_code);
1.1.1.4 ! misho    1235:     save_capture_last = md->capture_last;
1.1       misho    1236: 
                   1237:     for (;;)
                   1238:       {
                   1239:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho    1240:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1       misho    1241:         eptrb, RM48);
                   1242:       if (rrc == MATCH_KETRPOS)
                   1243:         {
                   1244:         offset_top = md->end_offset_top;
                   1245:         eptr = md->end_match_ptr;
                   1246:         ecode = md->start_code + code_offset;
                   1247:         matched_once = TRUE;
                   1248:         continue;
                   1249:         }
                   1250: 
                   1251:       /* See comment in the code for capturing groups above about handling
                   1252:       THEN. */
                   1253: 
                   1254:       if (rrc == MATCH_THEN)
                   1255:         {
                   1256:         next = ecode + GET(ecode,1);
                   1257:         if (md->start_match_ptr < next &&
                   1258:             (*ecode == OP_ALT || *next == OP_ALT))
                   1259:           rrc = MATCH_NOMATCH;
                   1260:         }
                   1261: 
                   1262:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1263:       ecode += GET(ecode, 1);
                   1264:       if (*ecode != OP_ALT) break;
1.1.1.4 ! misho    1265:       md->capture_last = save_capture_last;
1.1       misho    1266:       }
                   1267: 
                   1268:     if (matched_once || allow_zero)
                   1269:       {
                   1270:       ecode += 1 + LINK_SIZE;
                   1271:       break;
                   1272:       }
                   1273:     RRETURN(MATCH_NOMATCH);
                   1274: 
                   1275:     /* Control never reaches here. */
                   1276: 
                   1277:     /* Conditional group: compilation checked that there are no more than
                   1278:     two branches. If the condition is false, skipping the first branch takes us
                   1279:     past the end if there is only one branch, but that's OK because that is
                   1280:     exactly what going to the ket would do. */
                   1281: 
                   1282:     case OP_COND:
                   1283:     case OP_SCOND:
                   1284:     codelink = GET(ecode, 1);
                   1285: 
                   1286:     /* Because of the way auto-callout works during compile, a callout item is
                   1287:     inserted between OP_COND and an assertion condition. */
                   1288: 
                   1289:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                   1290:       {
1.1.1.2   misho    1291:       if (PUBL(callout) != NULL)
1.1       misho    1292:         {
1.1.1.2   misho    1293:         PUBL(callout_block) cb;
1.1       misho    1294:         cb.version          = 2;   /* Version 1 of the callout block */
                   1295:         cb.callout_number   = ecode[LINK_SIZE+2];
                   1296:         cb.offset_vector    = md->offset_vector;
1.1.1.4 ! misho    1297: #if defined COMPILE_PCRE8
1.1       misho    1298:         cb.subject          = (PCRE_SPTR)md->start_subject;
1.1.1.4 ! misho    1299: #elif defined COMPILE_PCRE16
1.1.1.2   misho    1300:         cb.subject          = (PCRE_SPTR16)md->start_subject;
1.1.1.4 ! misho    1301: #elif defined COMPILE_PCRE32
        !          1302:         cb.subject          = (PCRE_SPTR32)md->start_subject;
1.1.1.2   misho    1303: #endif
1.1       misho    1304:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1305:         cb.start_match      = (int)(mstart - md->start_subject);
                   1306:         cb.current_position = (int)(eptr - md->start_subject);
                   1307:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                   1308:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                   1309:         cb.capture_top      = offset_top/2;
1.1.1.4 ! misho    1310:         cb.capture_last     = md->capture_last & CAPLMASK;
        !          1311:         /* Internal change requires this for API compatibility. */
        !          1312:         if (cb.capture_last == 0) cb.capture_last = -1;
1.1       misho    1313:         cb.callout_data     = md->callout_data;
                   1314:         cb.mark             = md->nomatch_mark;
1.1.1.2   misho    1315:         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misho    1316:         if (rrc < 0) RRETURN(rrc);
                   1317:         }
1.1.1.2   misho    1318:       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1.1.1.4 ! misho    1319:       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1.1       misho    1320:       }
                   1321: 
                   1322:     condcode = ecode[LINK_SIZE+1];
                   1323: 
                   1324:     /* Now see what the actual condition is */
                   1325: 
                   1326:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
                   1327:       {
                   1328:       if (md->recursive == NULL)                /* Not recursing => FALSE */
                   1329:         {
                   1330:         condition = FALSE;
                   1331:         ecode += GET(ecode, 1);
                   1332:         }
                   1333:       else
                   1334:         {
1.1.1.4 ! misho    1335:         unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1.1       misho    1336:         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
                   1337: 
                   1338:         /* If the test is for recursion into a specific subpattern, and it is
                   1339:         false, but the test was set up by name, scan the table to see if the
                   1340:         name refers to any other numbers, and test them. The condition is true
                   1341:         if any one is set. */
                   1342: 
                   1343:         if (!condition && condcode == OP_NRREF)
                   1344:           {
1.1.1.2   misho    1345:           pcre_uchar *slotA = md->name_table;
1.1       misho    1346:           for (i = 0; i < md->name_count; i++)
                   1347:             {
                   1348:             if (GET2(slotA, 0) == recno) break;
                   1349:             slotA += md->name_entry_size;
                   1350:             }
                   1351: 
                   1352:           /* Found a name for the number - there can be only one; duplicate
                   1353:           names for different numbers are allowed, but not vice versa. First
                   1354:           scan down for duplicates. */
                   1355: 
                   1356:           if (i < md->name_count)
                   1357:             {
1.1.1.2   misho    1358:             pcre_uchar *slotB = slotA;
1.1       misho    1359:             while (slotB > md->name_table)
                   1360:               {
                   1361:               slotB -= md->name_entry_size;
1.1.1.2   misho    1362:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1363:                 {
                   1364:                 condition = GET2(slotB, 0) == md->recursive->group_num;
                   1365:                 if (condition) break;
                   1366:                 }
                   1367:               else break;
                   1368:               }
                   1369: 
                   1370:             /* Scan up for duplicates */
                   1371: 
                   1372:             if (!condition)
                   1373:               {
                   1374:               slotB = slotA;
                   1375:               for (i++; i < md->name_count; i++)
                   1376:                 {
                   1377:                 slotB += md->name_entry_size;
1.1.1.2   misho    1378:                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1379:                   {
                   1380:                   condition = GET2(slotB, 0) == md->recursive->group_num;
                   1381:                   if (condition) break;
                   1382:                   }
                   1383:                 else break;
                   1384:                 }
                   1385:               }
                   1386:             }
                   1387:           }
                   1388: 
                   1389:         /* Chose branch according to the condition */
                   1390: 
1.1.1.2   misho    1391:         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1       misho    1392:         }
                   1393:       }
                   1394: 
                   1395:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
                   1396:       {
                   1397:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                   1398:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
                   1399: 
                   1400:       /* If the numbered capture is unset, but the reference was by name,
                   1401:       scan the table to see if the name refers to any other numbers, and test
                   1402:       them. The condition is true if any one is set. This is tediously similar
                   1403:       to the code above, but not close enough to try to amalgamate. */
                   1404: 
                   1405:       if (!condition && condcode == OP_NCREF)
                   1406:         {
1.1.1.4 ! misho    1407:         unsigned int refno = offset >> 1;
1.1.1.2   misho    1408:         pcre_uchar *slotA = md->name_table;
1.1       misho    1409: 
                   1410:         for (i = 0; i < md->name_count; i++)
                   1411:           {
                   1412:           if (GET2(slotA, 0) == refno) break;
                   1413:           slotA += md->name_entry_size;
                   1414:           }
                   1415: 
                   1416:         /* Found a name for the number - there can be only one; duplicate names
                   1417:         for different numbers are allowed, but not vice versa. First scan down
                   1418:         for duplicates. */
                   1419: 
                   1420:         if (i < md->name_count)
                   1421:           {
1.1.1.2   misho    1422:           pcre_uchar *slotB = slotA;
1.1       misho    1423:           while (slotB > md->name_table)
                   1424:             {
                   1425:             slotB -= md->name_entry_size;
1.1.1.2   misho    1426:             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1427:               {
                   1428:               offset = GET2(slotB, 0) << 1;
                   1429:               condition = offset < offset_top &&
                   1430:                 md->offset_vector[offset] >= 0;
                   1431:               if (condition) break;
                   1432:               }
                   1433:             else break;
                   1434:             }
                   1435: 
                   1436:           /* Scan up for duplicates */
                   1437: 
                   1438:           if (!condition)
                   1439:             {
                   1440:             slotB = slotA;
                   1441:             for (i++; i < md->name_count; i++)
                   1442:               {
                   1443:               slotB += md->name_entry_size;
1.1.1.2   misho    1444:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1445:                 {
                   1446:                 offset = GET2(slotB, 0) << 1;
                   1447:                 condition = offset < offset_top &&
                   1448:                   md->offset_vector[offset] >= 0;
                   1449:                 if (condition) break;
                   1450:                 }
                   1451:               else break;
                   1452:               }
                   1453:             }
                   1454:           }
                   1455:         }
                   1456: 
                   1457:       /* Chose branch according to the condition */
                   1458: 
1.1.1.2   misho    1459:       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1       misho    1460:       }
                   1461: 
                   1462:     else if (condcode == OP_DEF)     /* DEFINE - always false */
                   1463:       {
                   1464:       condition = FALSE;
                   1465:       ecode += GET(ecode, 1);
                   1466:       }
                   1467: 
                   1468:     /* The condition is an assertion. Call match() to evaluate it - setting
                   1469:     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
                   1470:     an assertion. */
                   1471: 
                   1472:     else
                   1473:       {
                   1474:       md->match_function_type = MATCH_CONDASSERT;
                   1475:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
                   1476:       if (rrc == MATCH_MATCH)
                   1477:         {
                   1478:         if (md->end_offset_top > offset_top)
                   1479:           offset_top = md->end_offset_top;  /* Captures may have happened */
                   1480:         condition = TRUE;
                   1481:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1482:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1483:         }
                   1484: 
                   1485:       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
                   1486:       assertion; it is therefore treated as NOMATCH. */
                   1487: 
                   1488:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1489:         {
                   1490:         RRETURN(rrc);         /* Need braces because of following else */
                   1491:         }
                   1492:       else
                   1493:         {
                   1494:         condition = FALSE;
                   1495:         ecode += codelink;
                   1496:         }
                   1497:       }
                   1498: 
                   1499:     /* We are now at the branch that is to be obeyed. As there is only one, can
                   1500:     use tail recursion to avoid using another stack frame, except when there is
                   1501:     unlimited repeat of a possibly empty group. In the latter case, a recursive
                   1502:     call to match() is always required, unless the second alternative doesn't
                   1503:     exist, in which case we can just plough on. Note that, for compatibility
                   1504:     with Perl, the | in a conditional group is NOT treated as creating two
                   1505:     alternatives. If a THEN is encountered in the branch, it propagates out to
                   1506:     the enclosing alternative (unless nested in a deeper set of alternatives,
                   1507:     of course). */
                   1508: 
                   1509:     if (condition || *ecode == OP_ALT)
                   1510:       {
                   1511:       if (op != OP_SCOND)
                   1512:         {
                   1513:         ecode += 1 + LINK_SIZE;
                   1514:         goto TAIL_RECURSE;
                   1515:         }
                   1516: 
                   1517:       md->match_function_type = MATCH_CBEGROUP;
                   1518:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
                   1519:       RRETURN(rrc);
                   1520:       }
                   1521: 
                   1522:      /* Condition false & no alternative; continue after the group. */
                   1523: 
                   1524:     else
                   1525:       {
                   1526:       ecode += 1 + LINK_SIZE;
                   1527:       }
                   1528:     break;
                   1529: 
                   1530: 
                   1531:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1532:     to close any currently open capturing brackets. */
                   1533: 
                   1534:     case OP_CLOSE:
1.1.1.4 ! misho    1535:     number = GET2(ecode, 1);   /* Must be less than 65536 */
1.1       misho    1536:     offset = number << 1;
                   1537: 
                   1538: #ifdef PCRE_DEBUG
                   1539:       printf("end bracket %d at *ACCEPT", number);
                   1540:       printf("\n");
                   1541: #endif
                   1542: 
1.1.1.4 ! misho    1543:     md->capture_last = (md->capture_last & OVFLMASK) | number;
        !          1544:     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1.1       misho    1545:       {
                   1546:       md->offset_vector[offset] =
                   1547:         md->offset_vector[md->offset_end - number];
                   1548:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1549:       if (offset_top <= offset) offset_top = offset + 2;
                   1550:       }
1.1.1.2   misho    1551:     ecode += 1 + IMM2_SIZE;
1.1       misho    1552:     break;
                   1553: 
                   1554: 
                   1555:     /* End of the pattern, either real or forced. */
                   1556: 
                   1557:     case OP_END:
                   1558:     case OP_ACCEPT:
                   1559:     case OP_ASSERT_ACCEPT:
                   1560: 
                   1561:     /* If we have matched an empty string, fail if not in an assertion and not
                   1562:     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
                   1563:     is set and we have matched at the start of the subject. In both cases,
                   1564:     backtracking will then try other alternatives, if any. */
                   1565: 
                   1566:     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
                   1567:          md->recursive == NULL &&
                   1568:          (md->notempty ||
                   1569:            (md->notempty_atstart &&
                   1570:              mstart == md->start_subject + md->start_offset)))
                   1571:       RRETURN(MATCH_NOMATCH);
                   1572: 
                   1573:     /* Otherwise, we have a match. */
                   1574: 
                   1575:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1576:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1577:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
                   1578: 
                   1579:     /* For some reason, the macros don't work properly if an expression is
                   1580:     given as the argument to RRETURN when the heap is in use. */
                   1581: 
                   1582:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
                   1583:     RRETURN(rrc);
                   1584: 
                   1585:     /* Assertion brackets. Check the alternative branches in turn - the
                   1586:     matching won't pass the KET for an assertion. If any one branch matches,
                   1587:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1588:     start of each branch to move the current point backwards, so the code at
                   1589:     this level is identical to the lookahead case. When the assertion is part
                   1590:     of a condition, we want to return immediately afterwards. The caller of
                   1591:     this incarnation of the match() function will have set MATCH_CONDASSERT in
                   1592:     md->match_function type, and one of these opcodes will be the first opcode
                   1593:     that is processed. We use a local variable that is preserved over calls to
                   1594:     match() to remember this case. */
                   1595: 
                   1596:     case OP_ASSERT:
                   1597:     case OP_ASSERTBACK:
1.1.1.2   misho    1598:     save_mark = md->mark;
1.1       misho    1599:     if (md->match_function_type == MATCH_CONDASSERT)
                   1600:       {
                   1601:       condassert = TRUE;
                   1602:       md->match_function_type = 0;
                   1603:       }
                   1604:     else condassert = FALSE;
                   1605: 
1.1.1.4 ! misho    1606:     /* Loop for each branch */
        !          1607: 
1.1       misho    1608:     do
                   1609:       {
                   1610:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1.1.1.4 ! misho    1611: 
        !          1612:       /* A match means that the assertion is true; break out of the loop
        !          1613:       that matches its alternatives. */
        !          1614: 
1.1       misho    1615:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1616:         {
                   1617:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1618:         break;
                   1619:         }
1.1.1.4 ! misho    1620: 
        !          1621:       /* If not matched, restore the previous mark setting. */
        !          1622: 
1.1.1.3   misho    1623:       md->mark = save_mark;
1.1       misho    1624: 
1.1.1.4 ! misho    1625:       /* See comment in the code for capturing groups above about handling
        !          1626:       THEN. */
1.1.1.3   misho    1627: 
1.1.1.4 ! misho    1628:       if (rrc == MATCH_THEN)
        !          1629:         {
        !          1630:         next = ecode + GET(ecode,1);
        !          1631:         if (md->start_match_ptr < next &&
        !          1632:             (*ecode == OP_ALT || *next == OP_ALT))
        !          1633:           rrc = MATCH_NOMATCH;
        !          1634:         }
1.1.1.3   misho    1635: 
1.1.1.4 ! misho    1636:       /* Anything other than NOMATCH causes the entire assertion to fail,
        !          1637:       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
        !          1638:       uncaptured THEN, which means they take their normal effect. This
        !          1639:       consistent approach does not always have exactly the same effect as in
        !          1640:       Perl. */
1.1       misho    1641: 
1.1.1.4 ! misho    1642:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho    1643:       ecode += GET(ecode, 1);
                   1644:       }
1.1.1.4 ! misho    1645:     while (*ecode == OP_ALT);   /* Continue for next alternative */
        !          1646: 
        !          1647:     /* If we have tried all the alternative branches, the assertion has
        !          1648:     failed. If not, we broke out after a match. */
1.1       misho    1649: 
                   1650:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
                   1651: 
                   1652:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1653: 
                   1654:     if (condassert) RRETURN(MATCH_MATCH);
                   1655: 
1.1.1.4 ! misho    1656:     /* Continue from after a successful assertion, updating the offsets high
        !          1657:     water mark, since extracts may have been taken during the assertion. */
1.1       misho    1658: 
                   1659:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1660:     ecode += 1 + LINK_SIZE;
                   1661:     offset_top = md->end_offset_top;
                   1662:     continue;
                   1663: 
1.1.1.4 ! misho    1664:     /* Negative assertion: all branches must fail to match for the assertion to
        !          1665:     succeed. */
1.1       misho    1666: 
                   1667:     case OP_ASSERT_NOT:
                   1668:     case OP_ASSERTBACK_NOT:
1.1.1.2   misho    1669:     save_mark = md->mark;
1.1       misho    1670:     if (md->match_function_type == MATCH_CONDASSERT)
                   1671:       {
                   1672:       condassert = TRUE;
                   1673:       md->match_function_type = 0;
                   1674:       }
                   1675:     else condassert = FALSE;
                   1676: 
1.1.1.4 ! misho    1677:     /* Loop for each alternative branch. */
        !          1678: 
1.1       misho    1679:     do
                   1680:       {
                   1681:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1.1.1.4 ! misho    1682:       md->mark = save_mark;   /* Always restore the mark setting */
        !          1683: 
        !          1684:       switch(rrc)
1.1       misho    1685:         {
1.1.1.4 ! misho    1686:         case MATCH_MATCH:            /* A successful match means */
        !          1687:         case MATCH_ACCEPT:           /* the assertion has failed. */
        !          1688:         RRETURN(MATCH_NOMATCH);
        !          1689: 
        !          1690:         case MATCH_NOMATCH:          /* Carry on with next branch */
1.1       misho    1691:         break;
1.1.1.4 ! misho    1692: 
        !          1693:         /* See comment in the code for capturing groups above about handling
        !          1694:         THEN. */
        !          1695: 
        !          1696:         case MATCH_THEN:
        !          1697:         next = ecode + GET(ecode,1);
        !          1698:         if (md->start_match_ptr < next &&
        !          1699:             (*ecode == OP_ALT || *next == OP_ALT))
        !          1700:           {
        !          1701:           rrc = MATCH_NOMATCH;
        !          1702:           break;
        !          1703:           }
        !          1704:         /* Otherwise fall through. */
        !          1705: 
        !          1706:         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
        !          1707:         assertion to fail to match, without considering any more alternatives.
        !          1708:         Failing to match means the assertion is true. This is a consistent
        !          1709:         approach, but does not always have the same effect as in Perl. */
        !          1710: 
        !          1711:         case MATCH_COMMIT:
        !          1712:         case MATCH_SKIP:
        !          1713:         case MATCH_SKIP_ARG:
        !          1714:         case MATCH_PRUNE:
        !          1715:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
        !          1716:         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
        !          1717: 
        !          1718:         /* Anything else is an error */
        !          1719: 
        !          1720:         default:
        !          1721:         RRETURN(rrc);
1.1       misho    1722:         }
                   1723: 
1.1.1.4 ! misho    1724:       /* Continue with next branch */
1.1       misho    1725: 
                   1726:       ecode += GET(ecode,1);
                   1727:       }
                   1728:     while (*ecode == OP_ALT);
                   1729: 
1.1.1.4 ! misho    1730:     /* All branches in the assertion failed to match. */
1.1       misho    1731: 
1.1.1.4 ! misho    1732:     NEG_ASSERT_TRUE:
        !          1733:     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
        !          1734:     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1.1       misho    1735:     continue;
                   1736: 
                   1737:     /* Move the subject pointer back. This occurs only at the start of
                   1738:     each branch of a lookbehind assertion. If we are too close to the start to
                   1739:     move back, this match function fails. When working with UTF-8 we move
                   1740:     back a number of characters, not bytes. */
                   1741: 
                   1742:     case OP_REVERSE:
1.1.1.2   misho    1743: #ifdef SUPPORT_UTF
                   1744:     if (utf)
1.1       misho    1745:       {
                   1746:       i = GET(ecode, 1);
                   1747:       while (i-- > 0)
                   1748:         {
                   1749:         eptr--;
                   1750:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1751:         BACKCHAR(eptr);
                   1752:         }
                   1753:       }
                   1754:     else
                   1755: #endif
                   1756: 
                   1757:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1758: 
                   1759:       {
                   1760:       eptr -= GET(ecode, 1);
                   1761:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1762:       }
                   1763: 
                   1764:     /* Save the earliest consulted character, then skip to next op code */
                   1765: 
                   1766:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
                   1767:     ecode += 1 + LINK_SIZE;
                   1768:     break;
                   1769: 
                   1770:     /* The callout item calls an external function, if one is provided, passing
                   1771:     details of the match so far. This is mainly for debugging, though the
                   1772:     function is able to force a failure. */
                   1773: 
                   1774:     case OP_CALLOUT:
1.1.1.2   misho    1775:     if (PUBL(callout) != NULL)
1.1       misho    1776:       {
1.1.1.2   misho    1777:       PUBL(callout_block) cb;
1.1       misho    1778:       cb.version          = 2;   /* Version 1 of the callout block */
                   1779:       cb.callout_number   = ecode[1];
                   1780:       cb.offset_vector    = md->offset_vector;
1.1.1.4 ! misho    1781: #if defined COMPILE_PCRE8
1.1       misho    1782:       cb.subject          = (PCRE_SPTR)md->start_subject;
1.1.1.4 ! misho    1783: #elif defined COMPILE_PCRE16
1.1.1.2   misho    1784:       cb.subject          = (PCRE_SPTR16)md->start_subject;
1.1.1.4 ! misho    1785: #elif defined COMPILE_PCRE32
        !          1786:       cb.subject          = (PCRE_SPTR32)md->start_subject;
1.1.1.2   misho    1787: #endif
1.1       misho    1788:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1789:       cb.start_match      = (int)(mstart - md->start_subject);
                   1790:       cb.current_position = (int)(eptr - md->start_subject);
                   1791:       cb.pattern_position = GET(ecode, 2);
                   1792:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1793:       cb.capture_top      = offset_top/2;
1.1.1.4 ! misho    1794:       cb.capture_last     = md->capture_last & CAPLMASK;
        !          1795:       /* Internal change requires this for API compatibility. */
        !          1796:       if (cb.capture_last == 0) cb.capture_last = -1;
1.1       misho    1797:       cb.callout_data     = md->callout_data;
                   1798:       cb.mark             = md->nomatch_mark;
1.1.1.2   misho    1799:       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misho    1800:       if (rrc < 0) RRETURN(rrc);
                   1801:       }
                   1802:     ecode += 2 + 2*LINK_SIZE;
                   1803:     break;
                   1804: 
                   1805:     /* Recursion either matches the current regex, or some subexpression. The
                   1806:     offset data is the offset to the starting bracket from the start of the
                   1807:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1808: 
                   1809:     The state of the capturing groups is preserved over recursion, and
                   1810:     re-instated afterwards. We don't know how many are started and not yet
                   1811:     finished (offset_top records the completed total) so we just have to save
                   1812:     all the potential data. There may be up to 65535 such values, which is too
                   1813:     large to put on the stack, but using malloc for small numbers seems
                   1814:     expensive. As a compromise, the stack is used when there are no more than
                   1815:     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
                   1816: 
                   1817:     There are also other values that have to be saved. We use a chained
                   1818:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1819:     for the original version of this logic. It has, however, been hacked around
                   1820:     a lot, so he is not to blame for the current way it works. */
                   1821: 
                   1822:     case OP_RECURSE:
                   1823:       {
                   1824:       recursion_info *ri;
1.1.1.4 ! misho    1825:       unsigned int recno;
1.1       misho    1826: 
                   1827:       callpat = md->start_code + GET(ecode, 1);
                   1828:       recno = (callpat == md->start_code)? 0 :
                   1829:         GET2(callpat, 1 + LINK_SIZE);
                   1830: 
                   1831:       /* Check for repeating a recursion without advancing the subject pointer.
                   1832:       This should catch convoluted mutual recursions. (Some simple cases are
                   1833:       caught at compile time.) */
                   1834: 
                   1835:       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   1836:         if (recno == ri->group_num && eptr == ri->subject_position)
                   1837:           RRETURN(PCRE_ERROR_RECURSELOOP);
                   1838: 
                   1839:       /* Add to "recursing stack" */
                   1840: 
                   1841:       new_recursive.group_num = recno;
1.1.1.4 ! misho    1842:       new_recursive.saved_capture_last = md->capture_last;
1.1       misho    1843:       new_recursive.subject_position = eptr;
                   1844:       new_recursive.prevrec = md->recursive;
                   1845:       md->recursive = &new_recursive;
                   1846: 
                   1847:       /* Where to continue from afterwards */
                   1848: 
                   1849:       ecode += 1 + LINK_SIZE;
                   1850: 
                   1851:       /* Now save the offset data */
                   1852: 
                   1853:       new_recursive.saved_max = md->offset_end;
                   1854:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1855:         new_recursive.offset_save = stacksave;
                   1856:       else
                   1857:         {
                   1858:         new_recursive.offset_save =
1.1.1.2   misho    1859:           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1.1       misho    1860:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1861:         }
                   1862:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1863:             new_recursive.saved_max * sizeof(int));
                   1864: 
                   1865:       /* OK, now we can do the recursion. After processing each alternative,
1.1.1.4 ! misho    1866:       restore the offset data and the last captured value. If there were nested
        !          1867:       recursions, md->recursive might be changed, so reset it before looping.
        !          1868:       */
1.1       misho    1869: 
                   1870:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1871:       cbegroup = (*callpat >= OP_SBRA);
                   1872:       do
                   1873:         {
                   1874:         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2   misho    1875:         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1.1       misho    1876:           md, eptrb, RM6);
                   1877:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1878:             new_recursive.saved_max * sizeof(int));
1.1.1.4 ! misho    1879:         md->capture_last = new_recursive.saved_capture_last;
1.1       misho    1880:         md->recursive = new_recursive.prevrec;
                   1881:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1882:           {
                   1883:           DPRINTF(("Recursion matched\n"));
                   1884:           if (new_recursive.offset_save != stacksave)
1.1.1.2   misho    1885:             (PUBL(free))(new_recursive.offset_save);
1.1       misho    1886: 
                   1887:           /* Set where we got to in the subject, and reset the start in case
                   1888:           it was changed by \K. This *is* propagated back out of a recursion,
                   1889:           for Perl compatibility. */
                   1890: 
                   1891:           eptr = md->end_match_ptr;
                   1892:           mstart = md->start_match_ptr;
                   1893:           goto RECURSION_MATCHED;        /* Exit loop; end processing */
                   1894:           }
                   1895: 
1.1.1.4 ! misho    1896:         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
        !          1897:         recursion; they cause a NOMATCH for the entire recursion. These codes
        !          1898:         are defined in a range that can be tested for. */
        !          1899: 
        !          1900:         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
        !          1901:           RRETURN(MATCH_NOMATCH);
        !          1902: 
        !          1903:         /* Any return code other than NOMATCH is an error. */
1.1       misho    1904: 
1.1.1.4 ! misho    1905:         if (rrc != MATCH_NOMATCH)
1.1       misho    1906:           {
                   1907:           DPRINTF(("Recursion gave error %d\n", rrc));
                   1908:           if (new_recursive.offset_save != stacksave)
1.1.1.2   misho    1909:             (PUBL(free))(new_recursive.offset_save);
1.1       misho    1910:           RRETURN(rrc);
                   1911:           }
                   1912: 
                   1913:         md->recursive = &new_recursive;
                   1914:         callpat += GET(callpat, 1);
                   1915:         }
                   1916:       while (*callpat == OP_ALT);
                   1917: 
                   1918:       DPRINTF(("Recursion didn't match\n"));
                   1919:       md->recursive = new_recursive.prevrec;
                   1920:       if (new_recursive.offset_save != stacksave)
1.1.1.2   misho    1921:         (PUBL(free))(new_recursive.offset_save);
1.1       misho    1922:       RRETURN(MATCH_NOMATCH);
                   1923:       }
                   1924: 
                   1925:     RECURSION_MATCHED:
                   1926:     break;
                   1927: 
                   1928:     /* An alternation is the end of a branch; scan along to find the end of the
                   1929:     bracketed group and go to there. */
                   1930: 
                   1931:     case OP_ALT:
                   1932:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1933:     break;
                   1934: 
                   1935:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1936:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1937:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1938:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1939:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1940: 
                   1941:     case OP_BRAZERO:
                   1942:     next = ecode + 1;
                   1943:     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
                   1944:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1945:     do next += GET(next, 1); while (*next == OP_ALT);
                   1946:     ecode = next + 1 + LINK_SIZE;
                   1947:     break;
                   1948: 
                   1949:     case OP_BRAMINZERO:
                   1950:     next = ecode + 1;
                   1951:     do next += GET(next, 1); while (*next == OP_ALT);
                   1952:     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
                   1953:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1954:     ecode++;
                   1955:     break;
                   1956: 
                   1957:     case OP_SKIPZERO:
                   1958:     next = ecode+1;
                   1959:     do next += GET(next,1); while (*next == OP_ALT);
                   1960:     ecode = next + 1 + LINK_SIZE;
                   1961:     break;
                   1962: 
                   1963:     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
                   1964:     here; just jump to the group, with allow_zero set TRUE. */
                   1965: 
                   1966:     case OP_BRAPOSZERO:
                   1967:     op = *(++ecode);
                   1968:     allow_zero = TRUE;
                   1969:     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
                   1970:       goto POSSESSIVE_NON_CAPTURE;
                   1971: 
                   1972:     /* End of a group, repeated or non-repeating. */
                   1973: 
                   1974:     case OP_KET:
                   1975:     case OP_KETRMIN:
                   1976:     case OP_KETRMAX:
                   1977:     case OP_KETRPOS:
                   1978:     prev = ecode - GET(ecode, 1);
                   1979: 
                   1980:     /* If this was a group that remembered the subject start, in order to break
                   1981:     infinite repeats of empty string matches, retrieve the subject start from
                   1982:     the chain. Otherwise, set it NULL. */
                   1983: 
                   1984:     if (*prev >= OP_SBRA || *prev == OP_ONCE)
                   1985:       {
                   1986:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1987:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1988:       }
                   1989:     else saved_eptr = NULL;
                   1990: 
                   1991:     /* If we are at the end of an assertion group or a non-capturing atomic
                   1992:     group, stop matching and return MATCH_MATCH, but record the current high
                   1993:     water mark for use by positive assertions. We also need to record the match
                   1994:     start in case it was changed by \K. */
                   1995: 
                   1996:     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
                   1997:          *prev == OP_ONCE_NC)
                   1998:       {
                   1999:       md->end_match_ptr = eptr;      /* For ONCE_NC */
                   2000:       md->end_offset_top = offset_top;
                   2001:       md->start_match_ptr = mstart;
                   2002:       RRETURN(MATCH_MATCH);         /* Sets md->mark */
                   2003:       }
                   2004: 
                   2005:     /* For capturing groups we have to check the group number back at the start
                   2006:     and if necessary complete handling an extraction by setting the offsets and
                   2007:     bumping the high water mark. Whole-pattern recursion is coded as a recurse
                   2008:     into group 0, so it won't be picked up here. Instead, we catch it when the
                   2009:     OP_END is reached. Other recursion is handled here. We just have to record
                   2010:     the current subject position and start match pointer and give a MATCH
                   2011:     return. */
                   2012: 
                   2013:     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
                   2014:         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
                   2015:       {
                   2016:       number = GET2(prev, 1+LINK_SIZE);
                   2017:       offset = number << 1;
                   2018: 
                   2019: #ifdef PCRE_DEBUG
                   2020:       printf("end bracket %d", number);
                   2021:       printf("\n");
                   2022: #endif
                   2023: 
                   2024:       /* Handle a recursively called group. */
                   2025: 
                   2026:       if (md->recursive != NULL && md->recursive->group_num == number)
                   2027:         {
                   2028:         md->end_match_ptr = eptr;
                   2029:         md->start_match_ptr = mstart;
                   2030:         RRETURN(MATCH_MATCH);
                   2031:         }
                   2032: 
                   2033:       /* Deal with capturing */
                   2034: 
1.1.1.4 ! misho    2035:       md->capture_last = (md->capture_last & OVFLMASK) | number;
        !          2036:       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1.1       misho    2037:         {
                   2038:         /* If offset is greater than offset_top, it means that we are
                   2039:         "skipping" a capturing group, and that group's offsets must be marked
                   2040:         unset. In earlier versions of PCRE, all the offsets were unset at the
                   2041:         start of matching, but this doesn't work because atomic groups and
                   2042:         assertions can cause a value to be set that should later be unset.
                   2043:         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
                   2044:         part of the atomic group, but this is not on the final matching path,
                   2045:         so must be unset when 2 is set. (If there is no group 2, there is no
                   2046:         problem, because offset_top will then be 2, indicating no capture.) */
                   2047: 
                   2048:         if (offset > offset_top)
                   2049:           {
                   2050:           register int *iptr = md->offset_vector + offset_top;
                   2051:           register int *iend = md->offset_vector + offset;
                   2052:           while (iptr < iend) *iptr++ = -1;
                   2053:           }
                   2054: 
                   2055:         /* Now make the extraction */
                   2056: 
                   2057:         md->offset_vector[offset] =
                   2058:           md->offset_vector[md->offset_end - number];
                   2059:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   2060:         if (offset_top <= offset) offset_top = offset + 2;
                   2061:         }
                   2062:       }
                   2063: 
                   2064:     /* For an ordinary non-repeating ket, just continue at this level. This
                   2065:     also happens for a repeating ket if no characters were matched in the
                   2066:     group. This is the forcible breaking of infinite loops as implemented in
                   2067:     Perl 5.005. For a non-repeating atomic group that includes captures,
                   2068:     establish a backup point by processing the rest of the pattern at a lower
                   2069:     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
                   2070:     original OP_ONCE level, thereby bypassing intermediate backup points, but
                   2071:     resetting any captures that happened along the way. */
                   2072: 
                   2073:     if (*ecode == OP_KET || eptr == saved_eptr)
                   2074:       {
                   2075:       if (*prev == OP_ONCE)
                   2076:         {
                   2077:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
                   2078:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2079:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   2080:         RRETURN(MATCH_ONCE);
                   2081:         }
                   2082:       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
                   2083:       break;
                   2084:       }
                   2085: 
                   2086:     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
                   2087:     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
                   2088:     at a time from the outer level, thus saving stack. */
                   2089: 
                   2090:     if (*ecode == OP_KETRPOS)
                   2091:       {
                   2092:       md->end_match_ptr = eptr;
                   2093:       md->end_offset_top = offset_top;
                   2094:       RRETURN(MATCH_KETRPOS);
                   2095:       }
                   2096: 
                   2097:     /* The normal repeating kets try the rest of the pattern or restart from
                   2098:     the preceding bracket, in the appropriate order. In the second case, we can
                   2099:     use tail recursion to avoid using another stack frame, unless we have an
                   2100:     an atomic group or an unlimited repeat of a group that can match an empty
                   2101:     string. */
                   2102: 
                   2103:     if (*ecode == OP_KETRMIN)
                   2104:       {
                   2105:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
                   2106:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2107:       if (*prev == OP_ONCE)
                   2108:         {
                   2109:         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
                   2110:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2111:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   2112:         RRETURN(MATCH_ONCE);
                   2113:         }
                   2114:       if (*prev >= OP_SBRA)    /* Could match an empty string */
                   2115:         {
                   2116:         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
                   2117:         RRETURN(rrc);
                   2118:         }
                   2119:       ecode = prev;
                   2120:       goto TAIL_RECURSE;
                   2121:       }
                   2122:     else  /* OP_KETRMAX */
                   2123:       {
                   2124:       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
                   2125:       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
                   2126:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2127:       if (*prev == OP_ONCE)
                   2128:         {
                   2129:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
                   2130:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2131:         md->once_target = prev;
                   2132:         RRETURN(MATCH_ONCE);
                   2133:         }
                   2134:       ecode += 1 + LINK_SIZE;
                   2135:       goto TAIL_RECURSE;
                   2136:       }
                   2137:     /* Control never gets here */
                   2138: 
                   2139:     /* Not multiline mode: start of subject assertion, unless notbol. */
                   2140: 
                   2141:     case OP_CIRC:
                   2142:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   2143: 
                   2144:     /* Start of subject assertion */
                   2145: 
                   2146:     case OP_SOD:
                   2147:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
                   2148:     ecode++;
                   2149:     break;
                   2150: 
                   2151:     /* Multiline mode: start of subject unless notbol, or after any newline. */
                   2152: 
                   2153:     case OP_CIRCM:
                   2154:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   2155:     if (eptr != md->start_subject &&
                   2156:         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
                   2157:       RRETURN(MATCH_NOMATCH);
                   2158:     ecode++;
                   2159:     break;
                   2160: 
                   2161:     /* Start of match assertion */
                   2162: 
                   2163:     case OP_SOM:
                   2164:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
                   2165:     ecode++;
                   2166:     break;
                   2167: 
                   2168:     /* Reset the start of match point */
                   2169: 
                   2170:     case OP_SET_SOM:
                   2171:     mstart = eptr;
                   2172:     ecode++;
                   2173:     break;
                   2174: 
                   2175:     /* Multiline mode: assert before any newline, or before end of subject
                   2176:     unless noteol is set. */
                   2177: 
                   2178:     case OP_DOLLM:
                   2179:     if (eptr < md->end_subject)
1.1.1.3   misho    2180:       {
                   2181:       if (!IS_NEWLINE(eptr))
                   2182:         {
                   2183:         if (md->partial != 0 &&
                   2184:             eptr + 1 >= md->end_subject &&
                   2185:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   2186:             NLBLOCK->nllen == 2 &&
1.1.1.4 ! misho    2187:             RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
1.1.1.3   misho    2188:           {
                   2189:           md->hitend = TRUE;
                   2190:           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2191:           }
                   2192:         RRETURN(MATCH_NOMATCH);
                   2193:         }
                   2194:       }
1.1       misho    2195:     else
                   2196:       {
                   2197:       if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2198:       SCHECK_PARTIAL();
                   2199:       }
                   2200:     ecode++;
                   2201:     break;
                   2202: 
                   2203:     /* Not multiline mode: assert before a terminating newline or before end of
                   2204:     subject unless noteol is set. */
                   2205: 
                   2206:     case OP_DOLL:
                   2207:     if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2208:     if (!md->endonly) goto ASSERT_NL_OR_EOS;
                   2209: 
                   2210:     /* ... else fall through for endonly */
                   2211: 
                   2212:     /* End of subject assertion (\z) */
                   2213: 
                   2214:     case OP_EOD:
                   2215:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
                   2216:     SCHECK_PARTIAL();
                   2217:     ecode++;
                   2218:     break;
                   2219: 
                   2220:     /* End of subject or ending \n assertion (\Z) */
                   2221: 
                   2222:     case OP_EODN:
                   2223:     ASSERT_NL_OR_EOS:
                   2224:     if (eptr < md->end_subject &&
                   2225:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.1.1.3   misho    2226:       {
                   2227:       if (md->partial != 0 &&
                   2228:           eptr + 1 >= md->end_subject &&
                   2229:           NLBLOCK->nltype == NLTYPE_FIXED &&
                   2230:           NLBLOCK->nllen == 2 &&
1.1.1.4 ! misho    2231:           RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
1.1.1.3   misho    2232:         {
                   2233:         md->hitend = TRUE;
                   2234:         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2235:         }
1.1       misho    2236:       RRETURN(MATCH_NOMATCH);
1.1.1.3   misho    2237:       }
1.1       misho    2238: 
                   2239:     /* Either at end of string or \n before end. */
                   2240: 
                   2241:     SCHECK_PARTIAL();
                   2242:     ecode++;
                   2243:     break;
                   2244: 
                   2245:     /* Word boundary assertions */
                   2246: 
                   2247:     case OP_NOT_WORD_BOUNDARY:
                   2248:     case OP_WORD_BOUNDARY:
                   2249:       {
                   2250: 
                   2251:       /* Find out if the previous and current characters are "word" characters.
                   2252:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
                   2253:       be "non-word" characters. Remember the earliest consulted character for
                   2254:       partial matching. */
                   2255: 
1.1.1.2   misho    2256: #ifdef SUPPORT_UTF
                   2257:       if (utf)
1.1       misho    2258:         {
                   2259:         /* Get status of previous character */
                   2260: 
                   2261:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2262:           {
1.1.1.2   misho    2263:           PCRE_PUCHAR lastptr = eptr - 1;
                   2264:           BACKCHAR(lastptr);
1.1       misho    2265:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
                   2266:           GETCHAR(c, lastptr);
                   2267: #ifdef SUPPORT_UCP
                   2268:           if (md->use_ucp)
                   2269:             {
                   2270:             if (c == '_') prev_is_word = TRUE; else
                   2271:               {
                   2272:               int cat = UCD_CATEGORY(c);
                   2273:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2274:               }
                   2275:             }
                   2276:           else
                   2277: #endif
                   2278:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2279:           }
                   2280: 
                   2281:         /* Get status of next character */
                   2282: 
                   2283:         if (eptr >= md->end_subject)
                   2284:           {
                   2285:           SCHECK_PARTIAL();
                   2286:           cur_is_word = FALSE;
                   2287:           }
                   2288:         else
                   2289:           {
                   2290:           GETCHAR(c, eptr);
                   2291: #ifdef SUPPORT_UCP
                   2292:           if (md->use_ucp)
                   2293:             {
                   2294:             if (c == '_') cur_is_word = TRUE; else
                   2295:               {
                   2296:               int cat = UCD_CATEGORY(c);
                   2297:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2298:               }
                   2299:             }
                   2300:           else
                   2301: #endif
                   2302:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2303:           }
                   2304:         }
                   2305:       else
                   2306: #endif
                   2307: 
                   2308:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   2309:       consistency with the behaviour of \w we do use it in this case. */
                   2310: 
                   2311:         {
                   2312:         /* Get status of previous character */
                   2313: 
                   2314:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2315:           {
                   2316:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   2317: #ifdef SUPPORT_UCP
                   2318:           if (md->use_ucp)
                   2319:             {
                   2320:             c = eptr[-1];
                   2321:             if (c == '_') prev_is_word = TRUE; else
                   2322:               {
                   2323:               int cat = UCD_CATEGORY(c);
                   2324:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2325:               }
                   2326:             }
                   2327:           else
                   2328: #endif
1.1.1.2   misho    2329:           prev_is_word = MAX_255(eptr[-1])
                   2330:             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1.1       misho    2331:           }
                   2332: 
                   2333:         /* Get status of next character */
                   2334: 
                   2335:         if (eptr >= md->end_subject)
                   2336:           {
                   2337:           SCHECK_PARTIAL();
                   2338:           cur_is_word = FALSE;
                   2339:           }
                   2340:         else
                   2341: #ifdef SUPPORT_UCP
                   2342:         if (md->use_ucp)
                   2343:           {
                   2344:           c = *eptr;
                   2345:           if (c == '_') cur_is_word = TRUE; else
                   2346:             {
                   2347:             int cat = UCD_CATEGORY(c);
                   2348:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2349:             }
                   2350:           }
                   2351:         else
                   2352: #endif
1.1.1.2   misho    2353:         cur_is_word = MAX_255(*eptr)
                   2354:           && ((md->ctypes[*eptr] & ctype_word) != 0);
1.1       misho    2355:         }
                   2356: 
                   2357:       /* Now see if the situation is what we want */
                   2358: 
                   2359:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   2360:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
                   2361:         RRETURN(MATCH_NOMATCH);
                   2362:       }
                   2363:     break;
                   2364: 
1.1.1.3   misho    2365:     /* Match any single character type except newline; have to take care with
                   2366:     CRLF newlines and partial matching. */
1.1       misho    2367: 
                   2368:     case OP_ANY:
                   2369:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1.1.3   misho    2370:     if (md->partial != 0 &&
                   2371:         eptr + 1 >= md->end_subject &&
                   2372:         NLBLOCK->nltype == NLTYPE_FIXED &&
                   2373:         NLBLOCK->nllen == 2 &&
1.1.1.4 ! misho    2374:         RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
1.1.1.3   misho    2375:       {
                   2376:       md->hitend = TRUE;
                   2377:       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2378:       }
                   2379: 
1.1       misho    2380:     /* Fall through */
                   2381: 
1.1.1.3   misho    2382:     /* Match any single character whatsoever. */
                   2383: 
1.1       misho    2384:     case OP_ALLANY:
                   2385:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2386:       {                            /* not be updated before SCHECK_PARTIAL. */
                   2387:       SCHECK_PARTIAL();
                   2388:       RRETURN(MATCH_NOMATCH);
                   2389:       }
                   2390:     eptr++;
1.1.1.2   misho    2391: #ifdef SUPPORT_UTF
                   2392:     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
                   2393: #endif
1.1       misho    2394:     ecode++;
                   2395:     break;
                   2396: 
                   2397:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   2398:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   2399: 
                   2400:     case OP_ANYBYTE:
                   2401:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2402:       {                            /* not be updated before SCHECK_PARTIAL. */
                   2403:       SCHECK_PARTIAL();
                   2404:       RRETURN(MATCH_NOMATCH);
                   2405:       }
                   2406:     eptr++;
                   2407:     ecode++;
                   2408:     break;
                   2409: 
                   2410:     case OP_NOT_DIGIT:
                   2411:     if (eptr >= md->end_subject)
                   2412:       {
                   2413:       SCHECK_PARTIAL();
                   2414:       RRETURN(MATCH_NOMATCH);
                   2415:       }
                   2416:     GETCHARINCTEST(c, eptr);
                   2417:     if (
1.1.1.2   misho    2418: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2419:        c < 256 &&
                   2420: #endif
                   2421:        (md->ctypes[c] & ctype_digit) != 0
                   2422:        )
                   2423:       RRETURN(MATCH_NOMATCH);
                   2424:     ecode++;
                   2425:     break;
                   2426: 
                   2427:     case OP_DIGIT:
                   2428:     if (eptr >= md->end_subject)
                   2429:       {
                   2430:       SCHECK_PARTIAL();
                   2431:       RRETURN(MATCH_NOMATCH);
                   2432:       }
                   2433:     GETCHARINCTEST(c, eptr);
                   2434:     if (
1.1.1.2   misho    2435: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2436:        c > 255 ||
1.1       misho    2437: #endif
                   2438:        (md->ctypes[c] & ctype_digit) == 0
                   2439:        )
                   2440:       RRETURN(MATCH_NOMATCH);
                   2441:     ecode++;
                   2442:     break;
                   2443: 
                   2444:     case OP_NOT_WHITESPACE:
                   2445:     if (eptr >= md->end_subject)
                   2446:       {
                   2447:       SCHECK_PARTIAL();
                   2448:       RRETURN(MATCH_NOMATCH);
                   2449:       }
                   2450:     GETCHARINCTEST(c, eptr);
                   2451:     if (
1.1.1.2   misho    2452: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2453:        c < 256 &&
                   2454: #endif
                   2455:        (md->ctypes[c] & ctype_space) != 0
                   2456:        )
                   2457:       RRETURN(MATCH_NOMATCH);
                   2458:     ecode++;
                   2459:     break;
                   2460: 
                   2461:     case OP_WHITESPACE:
                   2462:     if (eptr >= md->end_subject)
                   2463:       {
                   2464:       SCHECK_PARTIAL();
                   2465:       RRETURN(MATCH_NOMATCH);
                   2466:       }
                   2467:     GETCHARINCTEST(c, eptr);
                   2468:     if (
1.1.1.2   misho    2469: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2470:        c > 255 ||
1.1       misho    2471: #endif
                   2472:        (md->ctypes[c] & ctype_space) == 0
                   2473:        )
                   2474:       RRETURN(MATCH_NOMATCH);
                   2475:     ecode++;
                   2476:     break;
                   2477: 
                   2478:     case OP_NOT_WORDCHAR:
                   2479:     if (eptr >= md->end_subject)
                   2480:       {
                   2481:       SCHECK_PARTIAL();
                   2482:       RRETURN(MATCH_NOMATCH);
                   2483:       }
                   2484:     GETCHARINCTEST(c, eptr);
                   2485:     if (
1.1.1.2   misho    2486: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2487:        c < 256 &&
                   2488: #endif
                   2489:        (md->ctypes[c] & ctype_word) != 0
                   2490:        )
                   2491:       RRETURN(MATCH_NOMATCH);
                   2492:     ecode++;
                   2493:     break;
                   2494: 
                   2495:     case OP_WORDCHAR:
                   2496:     if (eptr >= md->end_subject)
                   2497:       {
                   2498:       SCHECK_PARTIAL();
                   2499:       RRETURN(MATCH_NOMATCH);
                   2500:       }
                   2501:     GETCHARINCTEST(c, eptr);
                   2502:     if (
1.1.1.2   misho    2503: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2504:        c > 255 ||
1.1       misho    2505: #endif
                   2506:        (md->ctypes[c] & ctype_word) == 0
                   2507:        )
                   2508:       RRETURN(MATCH_NOMATCH);
                   2509:     ecode++;
                   2510:     break;
                   2511: 
                   2512:     case OP_ANYNL:
                   2513:     if (eptr >= md->end_subject)
                   2514:       {
                   2515:       SCHECK_PARTIAL();
                   2516:       RRETURN(MATCH_NOMATCH);
                   2517:       }
                   2518:     GETCHARINCTEST(c, eptr);
                   2519:     switch(c)
                   2520:       {
                   2521:       default: RRETURN(MATCH_NOMATCH);
                   2522: 
1.1.1.4 ! misho    2523:       case CHAR_CR:
1.1.1.3   misho    2524:       if (eptr >= md->end_subject)
                   2525:         {
                   2526:         SCHECK_PARTIAL();
                   2527:         }
1.1.1.4 ! misho    2528:       else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
1.1       misho    2529:       break;
                   2530: 
1.1.1.4 ! misho    2531:       case CHAR_LF:
1.1       misho    2532:       break;
                   2533: 
1.1.1.4 ! misho    2534:       case CHAR_VT:
        !          2535:       case CHAR_FF:
        !          2536:       case CHAR_NEL:
        !          2537: #ifndef EBCDIC
1.1       misho    2538:       case 0x2028:
                   2539:       case 0x2029:
1.1.1.4 ! misho    2540: #endif  /* Not EBCDIC */
1.1       misho    2541:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   2542:       break;
                   2543:       }
                   2544:     ecode++;
                   2545:     break;
                   2546: 
                   2547:     case OP_NOT_HSPACE:
                   2548:     if (eptr >= md->end_subject)
                   2549:       {
                   2550:       SCHECK_PARTIAL();
                   2551:       RRETURN(MATCH_NOMATCH);
                   2552:       }
                   2553:     GETCHARINCTEST(c, eptr);
                   2554:     switch(c)
                   2555:       {
1.1.1.4 ! misho    2556:       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
1.1       misho    2557:       default: break;
                   2558:       }
                   2559:     ecode++;
                   2560:     break;
                   2561: 
                   2562:     case OP_HSPACE:
                   2563:     if (eptr >= md->end_subject)
                   2564:       {
                   2565:       SCHECK_PARTIAL();
                   2566:       RRETURN(MATCH_NOMATCH);
                   2567:       }
                   2568:     GETCHARINCTEST(c, eptr);
                   2569:     switch(c)
                   2570:       {
1.1.1.4 ! misho    2571:       HSPACE_CASES: break;  /* Byte and multibyte cases */
1.1       misho    2572:       default: RRETURN(MATCH_NOMATCH);
                   2573:       }
                   2574:     ecode++;
                   2575:     break;
                   2576: 
                   2577:     case OP_NOT_VSPACE:
                   2578:     if (eptr >= md->end_subject)
                   2579:       {
                   2580:       SCHECK_PARTIAL();
                   2581:       RRETURN(MATCH_NOMATCH);
                   2582:       }
                   2583:     GETCHARINCTEST(c, eptr);
                   2584:     switch(c)
                   2585:       {
1.1.1.4 ! misho    2586:       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    2587:       default: break;
                   2588:       }
                   2589:     ecode++;
                   2590:     break;
                   2591: 
                   2592:     case OP_VSPACE:
                   2593:     if (eptr >= md->end_subject)
                   2594:       {
                   2595:       SCHECK_PARTIAL();
                   2596:       RRETURN(MATCH_NOMATCH);
                   2597:       }
                   2598:     GETCHARINCTEST(c, eptr);
                   2599:     switch(c)
                   2600:       {
1.1.1.4 ! misho    2601:       VSPACE_CASES: break;
1.1       misho    2602:       default: RRETURN(MATCH_NOMATCH);
                   2603:       }
                   2604:     ecode++;
                   2605:     break;
                   2606: 
                   2607: #ifdef SUPPORT_UCP
                   2608:     /* Check the next character by Unicode property. We will get here only
                   2609:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2610: 
                   2611:     case OP_PROP:
                   2612:     case OP_NOTPROP:
                   2613:     if (eptr >= md->end_subject)
                   2614:       {
                   2615:       SCHECK_PARTIAL();
                   2616:       RRETURN(MATCH_NOMATCH);
                   2617:       }
                   2618:     GETCHARINCTEST(c, eptr);
                   2619:       {
1.1.1.4 ! misho    2620:       const pcre_uint32 *cp;
1.1       misho    2621:       const ucd_record *prop = GET_UCD(c);
                   2622: 
                   2623:       switch(ecode[1])
                   2624:         {
                   2625:         case PT_ANY:
                   2626:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
                   2627:         break;
                   2628: 
                   2629:         case PT_LAMP:
                   2630:         if ((prop->chartype == ucp_Lu ||
                   2631:              prop->chartype == ucp_Ll ||
                   2632:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
                   2633:           RRETURN(MATCH_NOMATCH);
                   2634:         break;
                   2635: 
                   2636:         case PT_GC:
1.1.1.2   misho    2637:         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
1.1       misho    2638:           RRETURN(MATCH_NOMATCH);
                   2639:         break;
                   2640: 
                   2641:         case PT_PC:
                   2642:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
                   2643:           RRETURN(MATCH_NOMATCH);
                   2644:         break;
                   2645: 
                   2646:         case PT_SC:
                   2647:         if ((ecode[2] != prop->script) == (op == OP_PROP))
                   2648:           RRETURN(MATCH_NOMATCH);
                   2649:         break;
                   2650: 
                   2651:         /* These are specials */
                   2652: 
                   2653:         case PT_ALNUM:
1.1.1.2   misho    2654:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   2655:              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
1.1       misho    2656:           RRETURN(MATCH_NOMATCH);
                   2657:         break;
                   2658: 
                   2659:         case PT_SPACE:    /* Perl space */
1.1.1.2   misho    2660:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    2661:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                   2662:                == (op == OP_NOTPROP))
                   2663:           RRETURN(MATCH_NOMATCH);
                   2664:         break;
                   2665: 
                   2666:         case PT_PXSPACE:  /* POSIX space */
1.1.1.2   misho    2667:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    2668:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   2669:              c == CHAR_FF || c == CHAR_CR)
                   2670:                == (op == OP_NOTPROP))
                   2671:           RRETURN(MATCH_NOMATCH);
                   2672:         break;
                   2673: 
                   2674:         case PT_WORD:
1.1.1.2   misho    2675:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   2676:              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    2677:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
                   2678:           RRETURN(MATCH_NOMATCH);
                   2679:         break;
                   2680: 
1.1.1.4 ! misho    2681:         case PT_CLIST:
        !          2682:         cp = PRIV(ucd_caseless_sets) + ecode[2];
        !          2683:         for (;;)
        !          2684:           {
        !          2685:           if (c < *cp)
        !          2686:             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
        !          2687:           if (c == *cp++)
        !          2688:             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
        !          2689:           }
        !          2690:         break;
        !          2691: 
        !          2692:         case PT_UCNC:
        !          2693:         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          2694:              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          2695:              c >= 0xe000) == (op == OP_NOTPROP))
        !          2696:           RRETURN(MATCH_NOMATCH);
        !          2697:         break;
        !          2698: 
1.1       misho    2699:         /* This should never occur */
                   2700: 
                   2701:         default:
                   2702:         RRETURN(PCRE_ERROR_INTERNAL);
                   2703:         }
                   2704: 
                   2705:       ecode += 3;
                   2706:       }
                   2707:     break;
                   2708: 
                   2709:     /* Match an extended Unicode sequence. We will get here only if the support
                   2710:     is in the binary; otherwise a compile-time error occurs. */
                   2711: 
                   2712:     case OP_EXTUNI:
                   2713:     if (eptr >= md->end_subject)
                   2714:       {
                   2715:       SCHECK_PARTIAL();
                   2716:       RRETURN(MATCH_NOMATCH);
                   2717:       }
1.1.1.4 ! misho    2718:     else
1.1       misho    2719:       {
1.1.1.4 ! misho    2720:       int lgb, rgb;
        !          2721:       GETCHARINCTEST(c, eptr);
        !          2722:       lgb = UCD_GRAPHBREAK(c);
        !          2723:       while (eptr < md->end_subject)
        !          2724:         {
        !          2725:         int len = 1;
        !          2726:         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          2727:         rgb = UCD_GRAPHBREAK(c);
        !          2728:         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          2729:         lgb = rgb;
        !          2730:         eptr += len;
        !          2731:         }
1.1       misho    2732:       }
1.1.1.3   misho    2733:     CHECK_PARTIAL();
1.1       misho    2734:     ecode++;
                   2735:     break;
1.1.1.4 ! misho    2736: #endif  /* SUPPORT_UCP */
1.1       misho    2737: 
                   2738: 
                   2739:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2740:     item to see if there is repeat information following. The code is similar
                   2741:     to that for character classes, but repeated for efficiency. Then obey
                   2742:     similar code to character type repeats - written out again for speed.
                   2743:     However, if the referenced string is the empty string, always treat
                   2744:     it as matched, any number of times (otherwise there could be infinite
                   2745:     loops). */
                   2746: 
                   2747:     case OP_REF:
                   2748:     case OP_REFI:
                   2749:     caseless = op == OP_REFI;
                   2750:     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1.1.1.2   misho    2751:     ecode += 1 + IMM2_SIZE;
1.1       misho    2752: 
                   2753:     /* If the reference is unset, there are two possibilities:
                   2754: 
                   2755:     (a) In the default, Perl-compatible state, set the length negative;
                   2756:     this ensures that every attempt at a match fails. We can't just fail
                   2757:     here, because of the possibility of quantifiers with zero minima.
                   2758: 
                   2759:     (b) If the JavaScript compatibility flag is set, set the length to zero
                   2760:     so that the back reference matches an empty string.
                   2761: 
                   2762:     Otherwise, set the length to the length of what was matched by the
                   2763:     referenced subpattern. */
                   2764: 
                   2765:     if (offset >= offset_top || md->offset_vector[offset] < 0)
                   2766:       length = (md->jscript_compat)? 0 : -1;
                   2767:     else
                   2768:       length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   2769: 
                   2770:     /* Set up for repetition, or handle the non-repeated case */
                   2771: 
                   2772:     switch (*ecode)
                   2773:       {
                   2774:       case OP_CRSTAR:
                   2775:       case OP_CRMINSTAR:
                   2776:       case OP_CRPLUS:
                   2777:       case OP_CRMINPLUS:
                   2778:       case OP_CRQUERY:
                   2779:       case OP_CRMINQUERY:
                   2780:       c = *ecode++ - OP_CRSTAR;
                   2781:       minimize = (c & 1) != 0;
                   2782:       min = rep_min[c];                 /* Pick up values from tables; */
                   2783:       max = rep_max[c];                 /* zero for max => infinity */
                   2784:       if (max == 0) max = INT_MAX;
                   2785:       break;
                   2786: 
                   2787:       case OP_CRRANGE:
                   2788:       case OP_CRMINRANGE:
                   2789:       minimize = (*ecode == OP_CRMINRANGE);
                   2790:       min = GET2(ecode, 1);
1.1.1.2   misho    2791:       max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    2792:       if (max == 0) max = INT_MAX;
1.1.1.2   misho    2793:       ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    2794:       break;
                   2795: 
                   2796:       default:               /* No repeat follows */
                   2797:       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2798:         {
1.1.1.3   misho    2799:         if (length == -2) eptr = md->end_subject;   /* Partial match */
1.1       misho    2800:         CHECK_PARTIAL();
                   2801:         RRETURN(MATCH_NOMATCH);
                   2802:         }
                   2803:       eptr += length;
                   2804:       continue;              /* With the main loop */
                   2805:       }
                   2806: 
                   2807:     /* Handle repeated back references. If the length of the reference is
1.1.1.2   misho    2808:     zero, just continue with the main loop. If the length is negative, it
                   2809:     means the reference is unset in non-Java-compatible mode. If the minimum is
                   2810:     zero, we can continue at the same level without recursion. For any other
                   2811:     minimum, carrying on will result in NOMATCH. */
1.1       misho    2812: 
                   2813:     if (length == 0) continue;
1.1.1.2   misho    2814:     if (length < 0 && min == 0) continue;
1.1       misho    2815: 
                   2816:     /* First, ensure the minimum number of matches are present. We get back
                   2817:     the length of the reference string explicitly rather than passing the
                   2818:     address of eptr, so that eptr can be a register variable. */
                   2819: 
                   2820:     for (i = 1; i <= min; i++)
                   2821:       {
                   2822:       int slength;
                   2823:       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2824:         {
1.1.1.3   misho    2825:         if (slength == -2) eptr = md->end_subject;   /* Partial match */
1.1       misho    2826:         CHECK_PARTIAL();
                   2827:         RRETURN(MATCH_NOMATCH);
                   2828:         }
                   2829:       eptr += slength;
                   2830:       }
                   2831: 
                   2832:     /* If min = max, continue at the same level without recursion.
                   2833:     They are not both allowed to be zero. */
                   2834: 
                   2835:     if (min == max) continue;
                   2836: 
                   2837:     /* If minimizing, keep trying and advancing the pointer */
                   2838: 
                   2839:     if (minimize)
                   2840:       {
                   2841:       for (fi = min;; fi++)
                   2842:         {
                   2843:         int slength;
                   2844:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
                   2845:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2846:         if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2847:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2848:           {
1.1.1.3   misho    2849:           if (slength == -2) eptr = md->end_subject;   /* Partial match */
1.1       misho    2850:           CHECK_PARTIAL();
                   2851:           RRETURN(MATCH_NOMATCH);
                   2852:           }
                   2853:         eptr += slength;
                   2854:         }
                   2855:       /* Control never gets here */
                   2856:       }
                   2857: 
                   2858:     /* If maximizing, find the longest string and work backwards */
                   2859: 
                   2860:     else
                   2861:       {
                   2862:       pp = eptr;
                   2863:       for (i = min; i < max; i++)
                   2864:         {
                   2865:         int slength;
                   2866:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2867:           {
1.1.1.3   misho    2868:           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
                   2869:           the soft partial matching case. */
                   2870: 
                   2871:           if (slength == -2 && md->partial != 0 &&
                   2872:               md->end_subject > md->start_used_ptr)
                   2873:             {
                   2874:             md->hitend = TRUE;
                   2875:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2876:             }
1.1       misho    2877:           break;
                   2878:           }
                   2879:         eptr += slength;
                   2880:         }
1.1.1.3   misho    2881: 
1.1       misho    2882:       while (eptr >= pp)
                   2883:         {
                   2884:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
                   2885:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2886:         eptr -= length;
                   2887:         }
                   2888:       RRETURN(MATCH_NOMATCH);
                   2889:       }
                   2890:     /* Control never gets here */
                   2891: 
                   2892:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2893:     used when all the characters in the class have values in the range 0-255,
                   2894:     and either the matching is caseful, or the characters are in the range
                   2895:     0-127 when UTF-8 processing is enabled. The only difference between
                   2896:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2897:     encountered.
                   2898: 
                   2899:     First, look past the end of the item to see if there is repeat information
                   2900:     following. Then obey similar code to character type repeats - written out
                   2901:     again for speed. */
                   2902: 
                   2903:     case OP_NCLASS:
                   2904:     case OP_CLASS:
                   2905:       {
1.1.1.2   misho    2906:       /* The data variable is saved across frames, so the byte map needs to
                   2907:       be stored there. */
                   2908: #define BYTE_MAP ((pcre_uint8 *)data)
1.1       misho    2909:       data = ecode + 1;                /* Save for matching */
1.1.1.2   misho    2910:       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
1.1       misho    2911: 
                   2912:       switch (*ecode)
                   2913:         {
                   2914:         case OP_CRSTAR:
                   2915:         case OP_CRMINSTAR:
                   2916:         case OP_CRPLUS:
                   2917:         case OP_CRMINPLUS:
                   2918:         case OP_CRQUERY:
                   2919:         case OP_CRMINQUERY:
                   2920:         c = *ecode++ - OP_CRSTAR;
                   2921:         minimize = (c & 1) != 0;
                   2922:         min = rep_min[c];                 /* Pick up values from tables; */
                   2923:         max = rep_max[c];                 /* zero for max => infinity */
                   2924:         if (max == 0) max = INT_MAX;
                   2925:         break;
                   2926: 
                   2927:         case OP_CRRANGE:
                   2928:         case OP_CRMINRANGE:
                   2929:         minimize = (*ecode == OP_CRMINRANGE);
                   2930:         min = GET2(ecode, 1);
1.1.1.2   misho    2931:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    2932:         if (max == 0) max = INT_MAX;
1.1.1.2   misho    2933:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    2934:         break;
                   2935: 
                   2936:         default:               /* No repeat follows */
                   2937:         min = max = 1;
                   2938:         break;
                   2939:         }
                   2940: 
                   2941:       /* First, ensure the minimum number of matches are present. */
                   2942: 
1.1.1.2   misho    2943: #ifdef SUPPORT_UTF
                   2944:       if (utf)
1.1       misho    2945:         {
                   2946:         for (i = 1; i <= min; i++)
                   2947:           {
                   2948:           if (eptr >= md->end_subject)
                   2949:             {
                   2950:             SCHECK_PARTIAL();
                   2951:             RRETURN(MATCH_NOMATCH);
                   2952:             }
                   2953:           GETCHARINC(c, eptr);
                   2954:           if (c > 255)
                   2955:             {
                   2956:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2957:             }
                   2958:           else
1.1.1.2   misho    2959:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2960:           }
                   2961:         }
                   2962:       else
                   2963: #endif
1.1.1.2   misho    2964:       /* Not UTF mode */
1.1       misho    2965:         {
                   2966:         for (i = 1; i <= min; i++)
                   2967:           {
                   2968:           if (eptr >= md->end_subject)
                   2969:             {
                   2970:             SCHECK_PARTIAL();
                   2971:             RRETURN(MATCH_NOMATCH);
                   2972:             }
                   2973:           c = *eptr++;
1.1.1.2   misho    2974: #ifndef COMPILE_PCRE8
                   2975:           if (c > 255)
                   2976:             {
                   2977:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2978:             }
                   2979:           else
                   2980: #endif
                   2981:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2982:           }
                   2983:         }
                   2984: 
                   2985:       /* If max == min we can continue with the main loop without the
                   2986:       need to recurse. */
                   2987: 
                   2988:       if (min == max) continue;
                   2989: 
                   2990:       /* If minimizing, keep testing the rest of the expression and advancing
                   2991:       the pointer while it matches the class. */
                   2992: 
                   2993:       if (minimize)
                   2994:         {
1.1.1.2   misho    2995: #ifdef SUPPORT_UTF
                   2996:         if (utf)
1.1       misho    2997:           {
                   2998:           for (fi = min;; fi++)
                   2999:             {
                   3000:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
                   3001:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3002:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3003:             if (eptr >= md->end_subject)
                   3004:               {
                   3005:               SCHECK_PARTIAL();
                   3006:               RRETURN(MATCH_NOMATCH);
                   3007:               }
                   3008:             GETCHARINC(c, eptr);
                   3009:             if (c > 255)
                   3010:               {
                   3011:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   3012:               }
                   3013:             else
1.1.1.2   misho    3014:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    3015:             }
                   3016:           }
                   3017:         else
                   3018: #endif
1.1.1.2   misho    3019:         /* Not UTF mode */
1.1       misho    3020:           {
                   3021:           for (fi = min;; fi++)
                   3022:             {
                   3023:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
                   3024:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3025:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3026:             if (eptr >= md->end_subject)
                   3027:               {
                   3028:               SCHECK_PARTIAL();
                   3029:               RRETURN(MATCH_NOMATCH);
                   3030:               }
                   3031:             c = *eptr++;
1.1.1.2   misho    3032: #ifndef COMPILE_PCRE8
                   3033:             if (c > 255)
                   3034:               {
                   3035:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   3036:               }
                   3037:             else
                   3038: #endif
                   3039:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    3040:             }
                   3041:           }
                   3042:         /* Control never gets here */
                   3043:         }
                   3044: 
                   3045:       /* If maximizing, find the longest possible run, then work backwards. */
                   3046: 
                   3047:       else
                   3048:         {
                   3049:         pp = eptr;
                   3050: 
1.1.1.2   misho    3051: #ifdef SUPPORT_UTF
                   3052:         if (utf)
1.1       misho    3053:           {
                   3054:           for (i = min; i < max; i++)
                   3055:             {
                   3056:             int len = 1;
                   3057:             if (eptr >= md->end_subject)
                   3058:               {
                   3059:               SCHECK_PARTIAL();
                   3060:               break;
                   3061:               }
                   3062:             GETCHARLEN(c, eptr, len);
                   3063:             if (c > 255)
                   3064:               {
                   3065:               if (op == OP_CLASS) break;
                   3066:               }
                   3067:             else
1.1.1.2   misho    3068:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misho    3069:             eptr += len;
                   3070:             }
                   3071:           for (;;)
                   3072:             {
                   3073:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
                   3074:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3075:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3076:             BACKCHAR(eptr);
                   3077:             }
                   3078:           }
                   3079:         else
                   3080: #endif
1.1.1.2   misho    3081:           /* Not UTF mode */
1.1       misho    3082:           {
                   3083:           for (i = min; i < max; i++)
                   3084:             {
                   3085:             if (eptr >= md->end_subject)
                   3086:               {
                   3087:               SCHECK_PARTIAL();
                   3088:               break;
                   3089:               }
                   3090:             c = *eptr;
1.1.1.2   misho    3091: #ifndef COMPILE_PCRE8
                   3092:             if (c > 255)
                   3093:               {
                   3094:               if (op == OP_CLASS) break;
                   3095:               }
                   3096:             else
                   3097: #endif
                   3098:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misho    3099:             eptr++;
                   3100:             }
                   3101:           while (eptr >= pp)
                   3102:             {
                   3103:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
                   3104:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3105:             eptr--;
                   3106:             }
                   3107:           }
                   3108: 
                   3109:         RRETURN(MATCH_NOMATCH);
                   3110:         }
1.1.1.2   misho    3111: #undef BYTE_MAP
1.1       misho    3112:       }
                   3113:     /* Control never gets here */
                   3114: 
                   3115: 
                   3116:     /* Match an extended character class. This opcode is encountered only
                   3117:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   3118:     mode, because Unicode properties are supported in non-UTF-8 mode. */
                   3119: 
1.1.1.2   misho    3120: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1       misho    3121:     case OP_XCLASS:
                   3122:       {
                   3123:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   3124:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   3125: 
                   3126:       switch (*ecode)
                   3127:         {
                   3128:         case OP_CRSTAR:
                   3129:         case OP_CRMINSTAR:
                   3130:         case OP_CRPLUS:
                   3131:         case OP_CRMINPLUS:
                   3132:         case OP_CRQUERY:
                   3133:         case OP_CRMINQUERY:
                   3134:         c = *ecode++ - OP_CRSTAR;
                   3135:         minimize = (c & 1) != 0;
                   3136:         min = rep_min[c];                 /* Pick up values from tables; */
                   3137:         max = rep_max[c];                 /* zero for max => infinity */
                   3138:         if (max == 0) max = INT_MAX;
                   3139:         break;
                   3140: 
                   3141:         case OP_CRRANGE:
                   3142:         case OP_CRMINRANGE:
                   3143:         minimize = (*ecode == OP_CRMINRANGE);
                   3144:         min = GET2(ecode, 1);
1.1.1.2   misho    3145:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    3146:         if (max == 0) max = INT_MAX;
1.1.1.2   misho    3147:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    3148:         break;
                   3149: 
                   3150:         default:               /* No repeat follows */
                   3151:         min = max = 1;
                   3152:         break;
                   3153:         }
                   3154: 
                   3155:       /* First, ensure the minimum number of matches are present. */
                   3156: 
                   3157:       for (i = 1; i <= min; i++)
                   3158:         {
                   3159:         if (eptr >= md->end_subject)
                   3160:           {
                   3161:           SCHECK_PARTIAL();
                   3162:           RRETURN(MATCH_NOMATCH);
                   3163:           }
                   3164:         GETCHARINCTEST(c, eptr);
1.1.1.2   misho    3165:         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misho    3166:         }
                   3167: 
                   3168:       /* If max == min we can continue with the main loop without the
                   3169:       need to recurse. */
                   3170: 
                   3171:       if (min == max) continue;
                   3172: 
                   3173:       /* If minimizing, keep testing the rest of the expression and advancing
                   3174:       the pointer while it matches the class. */
                   3175: 
                   3176:       if (minimize)
                   3177:         {
                   3178:         for (fi = min;; fi++)
                   3179:           {
                   3180:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
                   3181:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3182:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3183:           if (eptr >= md->end_subject)
                   3184:             {
                   3185:             SCHECK_PARTIAL();
                   3186:             RRETURN(MATCH_NOMATCH);
                   3187:             }
                   3188:           GETCHARINCTEST(c, eptr);
1.1.1.2   misho    3189:           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misho    3190:           }
                   3191:         /* Control never gets here */
                   3192:         }
                   3193: 
                   3194:       /* If maximizing, find the longest possible run, then work backwards. */
                   3195: 
                   3196:       else
                   3197:         {
                   3198:         pp = eptr;
                   3199:         for (i = min; i < max; i++)
                   3200:           {
                   3201:           int len = 1;
                   3202:           if (eptr >= md->end_subject)
                   3203:             {
                   3204:             SCHECK_PARTIAL();
                   3205:             break;
                   3206:             }
1.1.1.2   misho    3207: #ifdef SUPPORT_UTF
1.1       misho    3208:           GETCHARLENTEST(c, eptr, len);
1.1.1.2   misho    3209: #else
                   3210:           c = *eptr;
                   3211: #endif
                   3212:           if (!PRIV(xclass)(c, data, utf)) break;
1.1       misho    3213:           eptr += len;
                   3214:           }
                   3215:         for(;;)
                   3216:           {
                   3217:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
                   3218:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3219:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1.1.1.2   misho    3220: #ifdef SUPPORT_UTF
                   3221:           if (utf) BACKCHAR(eptr);
                   3222: #endif
1.1       misho    3223:           }
                   3224:         RRETURN(MATCH_NOMATCH);
                   3225:         }
                   3226: 
                   3227:       /* Control never gets here */
                   3228:       }
                   3229: #endif    /* End of XCLASS */
                   3230: 
                   3231:     /* Match a single character, casefully */
                   3232: 
                   3233:     case OP_CHAR:
1.1.1.2   misho    3234: #ifdef SUPPORT_UTF
                   3235:     if (utf)
1.1       misho    3236:       {
                   3237:       length = 1;
                   3238:       ecode++;
                   3239:       GETCHARLEN(fc, ecode, length);
                   3240:       if (length > md->end_subject - eptr)
                   3241:         {
                   3242:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
                   3243:         RRETURN(MATCH_NOMATCH);
                   3244:         }
1.1.1.4 ! misho    3245:       while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misho    3246:       }
                   3247:     else
                   3248: #endif
1.1.1.2   misho    3249:     /* Not UTF mode */
1.1       misho    3250:       {
                   3251:       if (md->end_subject - eptr < 1)
                   3252:         {
                   3253:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
                   3254:         RRETURN(MATCH_NOMATCH);
                   3255:         }
                   3256:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
                   3257:       ecode += 2;
                   3258:       }
                   3259:     break;
                   3260: 
                   3261:     /* Match a single character, caselessly. If we are at the end of the
                   3262:     subject, give up immediately. */
                   3263: 
                   3264:     case OP_CHARI:
                   3265:     if (eptr >= md->end_subject)
                   3266:       {
                   3267:       SCHECK_PARTIAL();
                   3268:       RRETURN(MATCH_NOMATCH);
                   3269:       }
                   3270: 
1.1.1.2   misho    3271: #ifdef SUPPORT_UTF
                   3272:     if (utf)
1.1       misho    3273:       {
                   3274:       length = 1;
                   3275:       ecode++;
                   3276:       GETCHARLEN(fc, ecode, length);
                   3277: 
                   3278:       /* If the pattern character's value is < 128, we have only one byte, and
                   3279:       we know that its other case must also be one byte long, so we can use the
                   3280:       fast lookup table. We know that there is at least one byte left in the
                   3281:       subject. */
                   3282: 
                   3283:       if (fc < 128)
                   3284:         {
1.1.1.4 ! misho    3285:         pcre_uint32 cc = RAWUCHAR(eptr);
        !          3286:         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    3287:         ecode++;
                   3288:         eptr++;
1.1       misho    3289:         }
                   3290: 
                   3291:       /* Otherwise we must pick up the subject character. Note that we cannot
                   3292:       use the value of "length" to check for sufficient bytes left, because the
                   3293:       other case of the character may have more or fewer bytes.  */
                   3294: 
                   3295:       else
                   3296:         {
1.1.1.4 ! misho    3297:         pcre_uint32 dc;
1.1       misho    3298:         GETCHARINC(dc, eptr);
                   3299:         ecode += length;
                   3300: 
                   3301:         /* If we have Unicode property support, we can use it to test the other
                   3302:         case of the character, if there is one. */
                   3303: 
                   3304:         if (fc != dc)
                   3305:           {
                   3306: #ifdef SUPPORT_UCP
                   3307:           if (dc != UCD_OTHERCASE(fc))
                   3308: #endif
                   3309:             RRETURN(MATCH_NOMATCH);
                   3310:           }
                   3311:         }
                   3312:       }
                   3313:     else
1.1.1.2   misho    3314: #endif   /* SUPPORT_UTF */
1.1       misho    3315: 
1.1.1.2   misho    3316:     /* Not UTF mode */
1.1       misho    3317:       {
1.1.1.2   misho    3318:       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
                   3319:           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
                   3320:       eptr++;
1.1       misho    3321:       ecode += 2;
                   3322:       }
                   3323:     break;
                   3324: 
                   3325:     /* Match a single character repeatedly. */
                   3326: 
                   3327:     case OP_EXACT:
                   3328:     case OP_EXACTI:
                   3329:     min = max = GET2(ecode, 1);
1.1.1.2   misho    3330:     ecode += 1 + IMM2_SIZE;
1.1       misho    3331:     goto REPEATCHAR;
                   3332: 
                   3333:     case OP_POSUPTO:
                   3334:     case OP_POSUPTOI:
                   3335:     possessive = TRUE;
                   3336:     /* Fall through */
                   3337: 
                   3338:     case OP_UPTO:
                   3339:     case OP_UPTOI:
                   3340:     case OP_MINUPTO:
                   3341:     case OP_MINUPTOI:
                   3342:     min = 0;
                   3343:     max = GET2(ecode, 1);
                   3344:     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
1.1.1.2   misho    3345:     ecode += 1 + IMM2_SIZE;
1.1       misho    3346:     goto REPEATCHAR;
                   3347: 
                   3348:     case OP_POSSTAR:
                   3349:     case OP_POSSTARI:
                   3350:     possessive = TRUE;
                   3351:     min = 0;
                   3352:     max = INT_MAX;
                   3353:     ecode++;
                   3354:     goto REPEATCHAR;
                   3355: 
                   3356:     case OP_POSPLUS:
                   3357:     case OP_POSPLUSI:
                   3358:     possessive = TRUE;
                   3359:     min = 1;
                   3360:     max = INT_MAX;
                   3361:     ecode++;
                   3362:     goto REPEATCHAR;
                   3363: 
                   3364:     case OP_POSQUERY:
                   3365:     case OP_POSQUERYI:
                   3366:     possessive = TRUE;
                   3367:     min = 0;
                   3368:     max = 1;
                   3369:     ecode++;
                   3370:     goto REPEATCHAR;
                   3371: 
                   3372:     case OP_STAR:
                   3373:     case OP_STARI:
                   3374:     case OP_MINSTAR:
                   3375:     case OP_MINSTARI:
                   3376:     case OP_PLUS:
                   3377:     case OP_PLUSI:
                   3378:     case OP_MINPLUS:
                   3379:     case OP_MINPLUSI:
                   3380:     case OP_QUERY:
                   3381:     case OP_QUERYI:
                   3382:     case OP_MINQUERY:
                   3383:     case OP_MINQUERYI:
                   3384:     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
                   3385:     minimize = (c & 1) != 0;
                   3386:     min = rep_min[c];                 /* Pick up values from tables; */
                   3387:     max = rep_max[c];                 /* zero for max => infinity */
                   3388:     if (max == 0) max = INT_MAX;
                   3389: 
1.1.1.4 ! misho    3390:     /* Common code for all repeated single-character matches. We first check
        !          3391:     for the minimum number of characters. If the minimum equals the maximum, we
        !          3392:     are done. Otherwise, if minimizing, check the rest of the pattern for a
        !          3393:     match; if there isn't one, advance up to the maximum, one character at a
        !          3394:     time.
        !          3395: 
        !          3396:     If maximizing, advance up to the maximum number of matching characters,
        !          3397:     until eptr is past the end of the maximum run. If possessive, we are
        !          3398:     then done (no backing up). Otherwise, match at this position; anything
        !          3399:     other than no match is immediately returned. For nomatch, back up one
        !          3400:     character, unless we are matching \R and the last thing matched was
        !          3401:     \r\n, in which case, back up two bytes. When we reach the first optional
        !          3402:     character position, we can save stack by doing a tail recurse.
        !          3403: 
        !          3404:     The various UTF/non-UTF and caseful/caseless cases are handled separately,
        !          3405:     for speed. */
1.1       misho    3406: 
                   3407:     REPEATCHAR:
1.1.1.2   misho    3408: #ifdef SUPPORT_UTF
                   3409:     if (utf)
1.1       misho    3410:       {
                   3411:       length = 1;
                   3412:       charptr = ecode;
                   3413:       GETCHARLEN(fc, ecode, length);
                   3414:       ecode += length;
                   3415: 
                   3416:       /* Handle multibyte character matching specially here. There is
                   3417:       support for caseless matching if UCP support is present. */
                   3418: 
                   3419:       if (length > 1)
                   3420:         {
                   3421: #ifdef SUPPORT_UCP
1.1.1.4 ! misho    3422:         pcre_uint32 othercase;
1.1       misho    3423:         if (op >= OP_STARI &&     /* Caseless */
                   3424:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.1.1.2   misho    3425:           oclength = PRIV(ord2utf)(othercase, occhars);
1.1       misho    3426:         else oclength = 0;
                   3427: #endif  /* SUPPORT_UCP */
                   3428: 
                   3429:         for (i = 1; i <= min; i++)
                   3430:           {
                   3431:           if (eptr <= md->end_subject - length &&
1.1.1.2   misho    3432:             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3433: #ifdef SUPPORT_UCP
                   3434:           else if (oclength > 0 &&
                   3435:                    eptr <= md->end_subject - oclength &&
1.1.1.2   misho    3436:                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3437: #endif  /* SUPPORT_UCP */
                   3438:           else
                   3439:             {
                   3440:             CHECK_PARTIAL();
                   3441:             RRETURN(MATCH_NOMATCH);
                   3442:             }
                   3443:           }
                   3444: 
                   3445:         if (min == max) continue;
                   3446: 
                   3447:         if (minimize)
                   3448:           {
                   3449:           for (fi = min;; fi++)
                   3450:             {
                   3451:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
                   3452:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3453:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3454:             if (eptr <= md->end_subject - length &&
1.1.1.2   misho    3455:               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3456: #ifdef SUPPORT_UCP
                   3457:             else if (oclength > 0 &&
                   3458:                      eptr <= md->end_subject - oclength &&
1.1.1.2   misho    3459:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3460: #endif  /* SUPPORT_UCP */
                   3461:             else
                   3462:               {
                   3463:               CHECK_PARTIAL();
                   3464:               RRETURN(MATCH_NOMATCH);
                   3465:               }
                   3466:             }
                   3467:           /* Control never gets here */
                   3468:           }
                   3469: 
                   3470:         else  /* Maximize */
                   3471:           {
                   3472:           pp = eptr;
                   3473:           for (i = min; i < max; i++)
                   3474:             {
                   3475:             if (eptr <= md->end_subject - length &&
1.1.1.2   misho    3476:                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3477: #ifdef SUPPORT_UCP
                   3478:             else if (oclength > 0 &&
                   3479:                      eptr <= md->end_subject - oclength &&
1.1.1.2   misho    3480:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3481: #endif  /* SUPPORT_UCP */
                   3482:             else
                   3483:               {
                   3484:               CHECK_PARTIAL();
                   3485:               break;
                   3486:               }
                   3487:             }
                   3488: 
1.1.1.4 ! misho    3489:           if (possessive) continue;    /* No backtracking */
1.1       misho    3490:           for(;;)
                   3491:             {
1.1.1.4 ! misho    3492:             if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    3493:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
                   3494:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3495: #ifdef SUPPORT_UCP
                   3496:             eptr--;
                   3497:             BACKCHAR(eptr);
                   3498: #else   /* without SUPPORT_UCP */
                   3499:             eptr -= length;
                   3500: #endif  /* SUPPORT_UCP */
                   3501:             }
                   3502:           }
                   3503:         /* Control never gets here */
                   3504:         }
                   3505: 
                   3506:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   3507:       obey the code as for non-UTF-8 characters below, though in this case the
                   3508:       value of fc will always be < 128. */
                   3509:       }
                   3510:     else
1.1.1.2   misho    3511: #endif  /* SUPPORT_UTF */
                   3512:       /* When not in UTF-8 mode, load a single-byte character. */
                   3513:       fc = *ecode++;
1.1       misho    3514: 
1.1.1.2   misho    3515:     /* The value of fc at this point is always one character, though we may
                   3516:     or may not be in UTF mode. The code is duplicated for the caseless and
1.1       misho    3517:     caseful cases, for speed, since matching characters is likely to be quite
                   3518:     common. First, ensure the minimum number of matches are present. If min =
                   3519:     max, continue at the same level without recursing. Otherwise, if
                   3520:     minimizing, keep trying the rest of the expression and advancing one
                   3521:     matching character if failing, up to the maximum. Alternatively, if
                   3522:     maximizing, find the maximum number of characters and work backwards. */
                   3523: 
                   3524:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.3   misho    3525:       max, (char *)eptr));
1.1       misho    3526: 
                   3527:     if (op >= OP_STARI)  /* Caseless */
                   3528:       {
1.1.1.2   misho    3529: #ifdef COMPILE_PCRE8
                   3530:       /* fc must be < 128 if UTF is enabled. */
                   3531:       foc = md->fcc[fc];
                   3532: #else
                   3533: #ifdef SUPPORT_UTF
                   3534: #ifdef SUPPORT_UCP
                   3535:       if (utf && fc > 127)
                   3536:         foc = UCD_OTHERCASE(fc);
                   3537: #else
                   3538:       if (utf && fc > 127)
                   3539:         foc = fc;
                   3540: #endif /* SUPPORT_UCP */
                   3541:       else
                   3542: #endif /* SUPPORT_UTF */
                   3543:         foc = TABLE_GET(fc, md->fcc, fc);
                   3544: #endif /* COMPILE_PCRE8 */
                   3545: 
1.1       misho    3546:       for (i = 1; i <= min; i++)
                   3547:         {
1.1.1.4 ! misho    3548:         pcre_uint32 cc;                 /* Faster than pcre_uchar */
1.1       misho    3549:         if (eptr >= md->end_subject)
                   3550:           {
                   3551:           SCHECK_PARTIAL();
                   3552:           RRETURN(MATCH_NOMATCH);
                   3553:           }
1.1.1.4 ! misho    3554:         cc = RAWUCHARTEST(eptr);
        !          3555:         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    3556:         eptr++;
1.1       misho    3557:         }
                   3558:       if (min == max) continue;
                   3559:       if (minimize)
                   3560:         {
                   3561:         for (fi = min;; fi++)
                   3562:           {
1.1.1.4 ! misho    3563:           pcre_uint32 cc;               /* Faster than pcre_uchar */
1.1       misho    3564:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
                   3565:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3566:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3567:           if (eptr >= md->end_subject)
                   3568:             {
                   3569:             SCHECK_PARTIAL();
                   3570:             RRETURN(MATCH_NOMATCH);
                   3571:             }
1.1.1.4 ! misho    3572:           cc = RAWUCHARTEST(eptr);
        !          3573:           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    3574:           eptr++;
1.1       misho    3575:           }
                   3576:         /* Control never gets here */
                   3577:         }
                   3578:       else  /* Maximize */
                   3579:         {
                   3580:         pp = eptr;
                   3581:         for (i = min; i < max; i++)
                   3582:           {
1.1.1.4 ! misho    3583:           pcre_uint32 cc;               /* Faster than pcre_uchar */
1.1       misho    3584:           if (eptr >= md->end_subject)
                   3585:             {
                   3586:             SCHECK_PARTIAL();
                   3587:             break;
                   3588:             }
1.1.1.4 ! misho    3589:           cc = RAWUCHARTEST(eptr);
        !          3590:           if (fc != cc && foc != cc) break;
1.1       misho    3591:           eptr++;
                   3592:           }
                   3593: 
1.1.1.4 ! misho    3594:         if (possessive) continue;       /* No backtracking */
        !          3595:         for (;;)
1.1       misho    3596:           {
1.1.1.4 ! misho    3597:           if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    3598:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
                   3599:           eptr--;
                   3600:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3601:           }
                   3602:         RRETURN(MATCH_NOMATCH);
                   3603:         }
                   3604:       /* Control never gets here */
                   3605:       }
                   3606: 
                   3607:     /* Caseful comparisons (includes all multi-byte characters) */
                   3608: 
                   3609:     else
                   3610:       {
                   3611:       for (i = 1; i <= min; i++)
                   3612:         {
                   3613:         if (eptr >= md->end_subject)
                   3614:           {
                   3615:           SCHECK_PARTIAL();
                   3616:           RRETURN(MATCH_NOMATCH);
                   3617:           }
1.1.1.4 ! misho    3618:         if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misho    3619:         }
                   3620: 
                   3621:       if (min == max) continue;
                   3622: 
                   3623:       if (minimize)
                   3624:         {
                   3625:         for (fi = min;; fi++)
                   3626:           {
                   3627:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
                   3628:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3629:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3630:           if (eptr >= md->end_subject)
                   3631:             {
                   3632:             SCHECK_PARTIAL();
                   3633:             RRETURN(MATCH_NOMATCH);
                   3634:             }
1.1.1.4 ! misho    3635:           if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misho    3636:           }
                   3637:         /* Control never gets here */
                   3638:         }
                   3639:       else  /* Maximize */
                   3640:         {
                   3641:         pp = eptr;
                   3642:         for (i = min; i < max; i++)
                   3643:           {
                   3644:           if (eptr >= md->end_subject)
                   3645:             {
                   3646:             SCHECK_PARTIAL();
                   3647:             break;
                   3648:             }
1.1.1.4 ! misho    3649:           if (fc != RAWUCHARTEST(eptr)) break;
1.1       misho    3650:           eptr++;
                   3651:           }
1.1.1.4 ! misho    3652:         if (possessive) continue;    /* No backtracking */
        !          3653:         for (;;)
1.1       misho    3654:           {
1.1.1.4 ! misho    3655:           if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    3656:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
                   3657:           eptr--;
                   3658:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3659:           }
                   3660:         RRETURN(MATCH_NOMATCH);
                   3661:         }
                   3662:       }
                   3663:     /* Control never gets here */
                   3664: 
                   3665:     /* Match a negated single one-byte character. The character we are
                   3666:     checking can be multibyte. */
                   3667: 
                   3668:     case OP_NOT:
                   3669:     case OP_NOTI:
                   3670:     if (eptr >= md->end_subject)
                   3671:       {
                   3672:       SCHECK_PARTIAL();
                   3673:       RRETURN(MATCH_NOMATCH);
                   3674:       }
1.1.1.3   misho    3675: #ifdef SUPPORT_UTF
                   3676:     if (utf)
1.1       misho    3677:       {
1.1.1.4 ! misho    3678:       register pcre_uint32 ch, och;
1.1.1.3   misho    3679: 
                   3680:       ecode++;
                   3681:       GETCHARINC(ch, ecode);
                   3682:       GETCHARINC(c, eptr);
                   3683: 
                   3684:       if (op == OP_NOT)
                   3685:         {
                   3686:         if (ch == c) RRETURN(MATCH_NOMATCH);
                   3687:         }
                   3688:       else
                   3689:         {
1.1.1.2   misho    3690: #ifdef SUPPORT_UCP
1.1.1.3   misho    3691:         if (ch > 127)
                   3692:           och = UCD_OTHERCASE(ch);
1.1.1.2   misho    3693: #else
1.1.1.3   misho    3694:         if (ch > 127)
                   3695:           och = ch;
1.1.1.2   misho    3696: #endif /* SUPPORT_UCP */
1.1.1.3   misho    3697:         else
                   3698:           och = TABLE_GET(ch, md->fcc, ch);
                   3699:         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
                   3700:         }
1.1       misho    3701:       }
1.1.1.3   misho    3702:     else
                   3703: #endif
1.1       misho    3704:       {
1.1.1.4 ! misho    3705:       register pcre_uint32 ch = ecode[1];
1.1.1.3   misho    3706:       c = *eptr++;
                   3707:       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
                   3708:         RRETURN(MATCH_NOMATCH);
                   3709:       ecode += 2;
1.1       misho    3710:       }
                   3711:     break;
                   3712: 
                   3713:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3714:     repeat of the code for a repeated single character, but I haven't found a
                   3715:     nice way of commoning these up that doesn't require a test of the
                   3716:     positive/negative option for each character match. Maybe that wouldn't add
                   3717:     very much to the time taken, but character matching *is* what this is all
                   3718:     about... */
                   3719: 
                   3720:     case OP_NOTEXACT:
                   3721:     case OP_NOTEXACTI:
                   3722:     min = max = GET2(ecode, 1);
1.1.1.2   misho    3723:     ecode += 1 + IMM2_SIZE;
1.1       misho    3724:     goto REPEATNOTCHAR;
                   3725: 
                   3726:     case OP_NOTUPTO:
                   3727:     case OP_NOTUPTOI:
                   3728:     case OP_NOTMINUPTO:
                   3729:     case OP_NOTMINUPTOI:
                   3730:     min = 0;
                   3731:     max = GET2(ecode, 1);
                   3732:     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
1.1.1.2   misho    3733:     ecode += 1 + IMM2_SIZE;
1.1       misho    3734:     goto REPEATNOTCHAR;
                   3735: 
                   3736:     case OP_NOTPOSSTAR:
                   3737:     case OP_NOTPOSSTARI:
                   3738:     possessive = TRUE;
                   3739:     min = 0;
                   3740:     max = INT_MAX;
                   3741:     ecode++;
                   3742:     goto REPEATNOTCHAR;
                   3743: 
                   3744:     case OP_NOTPOSPLUS:
                   3745:     case OP_NOTPOSPLUSI:
                   3746:     possessive = TRUE;
                   3747:     min = 1;
                   3748:     max = INT_MAX;
                   3749:     ecode++;
                   3750:     goto REPEATNOTCHAR;
                   3751: 
                   3752:     case OP_NOTPOSQUERY:
                   3753:     case OP_NOTPOSQUERYI:
                   3754:     possessive = TRUE;
                   3755:     min = 0;
                   3756:     max = 1;
                   3757:     ecode++;
                   3758:     goto REPEATNOTCHAR;
                   3759: 
                   3760:     case OP_NOTPOSUPTO:
                   3761:     case OP_NOTPOSUPTOI:
                   3762:     possessive = TRUE;
                   3763:     min = 0;
                   3764:     max = GET2(ecode, 1);
1.1.1.2   misho    3765:     ecode += 1 + IMM2_SIZE;
1.1       misho    3766:     goto REPEATNOTCHAR;
                   3767: 
                   3768:     case OP_NOTSTAR:
                   3769:     case OP_NOTSTARI:
                   3770:     case OP_NOTMINSTAR:
                   3771:     case OP_NOTMINSTARI:
                   3772:     case OP_NOTPLUS:
                   3773:     case OP_NOTPLUSI:
                   3774:     case OP_NOTMINPLUS:
                   3775:     case OP_NOTMINPLUSI:
                   3776:     case OP_NOTQUERY:
                   3777:     case OP_NOTQUERYI:
                   3778:     case OP_NOTMINQUERY:
                   3779:     case OP_NOTMINQUERYI:
                   3780:     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
                   3781:     minimize = (c & 1) != 0;
                   3782:     min = rep_min[c];                 /* Pick up values from tables; */
                   3783:     max = rep_max[c];                 /* zero for max => infinity */
                   3784:     if (max == 0) max = INT_MAX;
                   3785: 
                   3786:     /* Common code for all repeated single-byte matches. */
                   3787: 
                   3788:     REPEATNOTCHAR:
1.1.1.3   misho    3789:     GETCHARINCTEST(fc, ecode);
1.1       misho    3790: 
                   3791:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3792:     since matching characters is likely to be quite common. First, ensure the
                   3793:     minimum number of matches are present. If min = max, continue at the same
                   3794:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3795:     the expression and advancing one matching character if failing, up to the
                   3796:     maximum. Alternatively, if maximizing, find the maximum number of
                   3797:     characters and work backwards. */
                   3798: 
                   3799:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.3   misho    3800:       max, (char *)eptr));
1.1       misho    3801: 
                   3802:     if (op >= OP_NOTSTARI)     /* Caseless */
                   3803:       {
1.1.1.2   misho    3804: #ifdef SUPPORT_UTF
                   3805: #ifdef SUPPORT_UCP
                   3806:       if (utf && fc > 127)
                   3807:         foc = UCD_OTHERCASE(fc);
                   3808: #else
                   3809:       if (utf && fc > 127)
                   3810:         foc = fc;
                   3811: #endif /* SUPPORT_UCP */
                   3812:       else
                   3813: #endif /* SUPPORT_UTF */
                   3814:         foc = TABLE_GET(fc, md->fcc, fc);
1.1       misho    3815: 
1.1.1.2   misho    3816: #ifdef SUPPORT_UTF
                   3817:       if (utf)
1.1       misho    3818:         {
1.1.1.4 ! misho    3819:         register pcre_uint32 d;
1.1       misho    3820:         for (i = 1; i <= min; i++)
                   3821:           {
                   3822:           if (eptr >= md->end_subject)
                   3823:             {
                   3824:             SCHECK_PARTIAL();
                   3825:             RRETURN(MATCH_NOMATCH);
                   3826:             }
                   3827:           GETCHARINC(d, eptr);
1.1.1.3   misho    3828:           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3829:           }
                   3830:         }
                   3831:       else
1.1.1.4 ! misho    3832: #endif  /* SUPPORT_UTF */
1.1.1.2   misho    3833:       /* Not UTF mode */
1.1       misho    3834:         {
                   3835:         for (i = 1; i <= min; i++)
                   3836:           {
                   3837:           if (eptr >= md->end_subject)
                   3838:             {
                   3839:             SCHECK_PARTIAL();
                   3840:             RRETURN(MATCH_NOMATCH);
                   3841:             }
1.1.1.2   misho    3842:           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
                   3843:           eptr++;
1.1       misho    3844:           }
                   3845:         }
                   3846: 
                   3847:       if (min == max) continue;
                   3848: 
                   3849:       if (minimize)
                   3850:         {
1.1.1.2   misho    3851: #ifdef SUPPORT_UTF
                   3852:         if (utf)
1.1       misho    3853:           {
1.1.1.4 ! misho    3854:           register pcre_uint32 d;
1.1       misho    3855:           for (fi = min;; fi++)
                   3856:             {
                   3857:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
                   3858:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3859:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3860:             if (eptr >= md->end_subject)
                   3861:               {
                   3862:               SCHECK_PARTIAL();
                   3863:               RRETURN(MATCH_NOMATCH);
                   3864:               }
                   3865:             GETCHARINC(d, eptr);
1.1.1.2   misho    3866:             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3867:             }
                   3868:           }
                   3869:         else
1.1.1.4 ! misho    3870: #endif  /*SUPPORT_UTF */
1.1.1.2   misho    3871:         /* Not UTF mode */
1.1       misho    3872:           {
                   3873:           for (fi = min;; fi++)
                   3874:             {
                   3875:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
                   3876:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3877:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3878:             if (eptr >= md->end_subject)
                   3879:               {
                   3880:               SCHECK_PARTIAL();
                   3881:               RRETURN(MATCH_NOMATCH);
                   3882:               }
1.1.1.2   misho    3883:             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
                   3884:             eptr++;
1.1       misho    3885:             }
                   3886:           }
                   3887:         /* Control never gets here */
                   3888:         }
                   3889: 
                   3890:       /* Maximize case */
                   3891: 
                   3892:       else
                   3893:         {
                   3894:         pp = eptr;
                   3895: 
1.1.1.2   misho    3896: #ifdef SUPPORT_UTF
                   3897:         if (utf)
1.1       misho    3898:           {
1.1.1.4 ! misho    3899:           register pcre_uint32 d;
1.1       misho    3900:           for (i = min; i < max; i++)
                   3901:             {
                   3902:             int len = 1;
                   3903:             if (eptr >= md->end_subject)
                   3904:               {
                   3905:               SCHECK_PARTIAL();
                   3906:               break;
                   3907:               }
                   3908:             GETCHARLEN(d, eptr, len);
1.1.1.2   misho    3909:             if (fc == d || (unsigned int)foc == d) break;
1.1       misho    3910:             eptr += len;
                   3911:             }
1.1.1.4 ! misho    3912:           if (possessive) continue;    /* No backtracking */
1.1.1.2   misho    3913:           for(;;)
1.1       misho    3914:             {
1.1.1.4 ! misho    3915:             if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    3916:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
                   3917:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.4 ! misho    3918:             eptr--;
1.1       misho    3919:             BACKCHAR(eptr);
                   3920:             }
                   3921:           }
                   3922:         else
1.1.1.4 ! misho    3923: #endif  /* SUPPORT_UTF */
1.1.1.2   misho    3924:         /* Not UTF mode */
1.1       misho    3925:           {
                   3926:           for (i = min; i < max; i++)
                   3927:             {
                   3928:             if (eptr >= md->end_subject)
                   3929:               {
                   3930:               SCHECK_PARTIAL();
                   3931:               break;
                   3932:               }
1.1.1.2   misho    3933:             if (fc == *eptr || foc == *eptr) break;
1.1       misho    3934:             eptr++;
                   3935:             }
1.1.1.4 ! misho    3936:           if (possessive) continue;    /* No backtracking */
        !          3937:           for (;;)
1.1       misho    3938:             {
1.1.1.4 ! misho    3939:             if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    3940:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
                   3941:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3942:             eptr--;
                   3943:             }
                   3944:           }
                   3945: 
                   3946:         RRETURN(MATCH_NOMATCH);
                   3947:         }
                   3948:       /* Control never gets here */
                   3949:       }
                   3950: 
                   3951:     /* Caseful comparisons */
                   3952: 
                   3953:     else
                   3954:       {
1.1.1.2   misho    3955: #ifdef SUPPORT_UTF
                   3956:       if (utf)
1.1       misho    3957:         {
1.1.1.4 ! misho    3958:         register pcre_uint32 d;
1.1       misho    3959:         for (i = 1; i <= min; i++)
                   3960:           {
                   3961:           if (eptr >= md->end_subject)
                   3962:             {
                   3963:             SCHECK_PARTIAL();
                   3964:             RRETURN(MATCH_NOMATCH);
                   3965:             }
                   3966:           GETCHARINC(d, eptr);
                   3967:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   3968:           }
                   3969:         }
                   3970:       else
                   3971: #endif
1.1.1.2   misho    3972:       /* Not UTF mode */
1.1       misho    3973:         {
                   3974:         for (i = 1; i <= min; i++)
                   3975:           {
                   3976:           if (eptr >= md->end_subject)
                   3977:             {
                   3978:             SCHECK_PARTIAL();
                   3979:             RRETURN(MATCH_NOMATCH);
                   3980:             }
                   3981:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   3982:           }
                   3983:         }
                   3984: 
                   3985:       if (min == max) continue;
                   3986: 
                   3987:       if (minimize)
                   3988:         {
1.1.1.2   misho    3989: #ifdef SUPPORT_UTF
                   3990:         if (utf)
1.1       misho    3991:           {
1.1.1.4 ! misho    3992:           register pcre_uint32 d;
1.1       misho    3993:           for (fi = min;; fi++)
                   3994:             {
                   3995:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
                   3996:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3997:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   3998:             if (eptr >= md->end_subject)
                   3999:               {
                   4000:               SCHECK_PARTIAL();
                   4001:               RRETURN(MATCH_NOMATCH);
                   4002:               }
                   4003:             GETCHARINC(d, eptr);
                   4004:             if (fc == d) RRETURN(MATCH_NOMATCH);
                   4005:             }
                   4006:           }
                   4007:         else
                   4008: #endif
1.1.1.2   misho    4009:         /* Not UTF mode */
1.1       misho    4010:           {
                   4011:           for (fi = min;; fi++)
                   4012:             {
                   4013:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
                   4014:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4015:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4016:             if (eptr >= md->end_subject)
                   4017:               {
                   4018:               SCHECK_PARTIAL();
                   4019:               RRETURN(MATCH_NOMATCH);
                   4020:               }
                   4021:             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   4022:             }
                   4023:           }
                   4024:         /* Control never gets here */
                   4025:         }
                   4026: 
                   4027:       /* Maximize case */
                   4028: 
                   4029:       else
                   4030:         {
                   4031:         pp = eptr;
                   4032: 
1.1.1.2   misho    4033: #ifdef SUPPORT_UTF
                   4034:         if (utf)
1.1       misho    4035:           {
1.1.1.4 ! misho    4036:           register pcre_uint32 d;
1.1       misho    4037:           for (i = min; i < max; i++)
                   4038:             {
                   4039:             int len = 1;
                   4040:             if (eptr >= md->end_subject)
                   4041:               {
                   4042:               SCHECK_PARTIAL();
                   4043:               break;
                   4044:               }
                   4045:             GETCHARLEN(d, eptr, len);
                   4046:             if (fc == d) break;
                   4047:             eptr += len;
                   4048:             }
1.1.1.4 ! misho    4049:           if (possessive) continue;    /* No backtracking */
1.1       misho    4050:           for(;;)
                   4051:             {
1.1.1.4 ! misho    4052:             if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    4053:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
                   4054:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.4 ! misho    4055:             eptr--;
1.1       misho    4056:             BACKCHAR(eptr);
                   4057:             }
                   4058:           }
                   4059:         else
                   4060: #endif
1.1.1.2   misho    4061:         /* Not UTF mode */
1.1       misho    4062:           {
                   4063:           for (i = min; i < max; i++)
                   4064:             {
                   4065:             if (eptr >= md->end_subject)
                   4066:               {
                   4067:               SCHECK_PARTIAL();
                   4068:               break;
                   4069:               }
                   4070:             if (fc == *eptr) break;
                   4071:             eptr++;
                   4072:             }
1.1.1.4 ! misho    4073:           if (possessive) continue;    /* No backtracking */
        !          4074:           for (;;)
1.1       misho    4075:             {
1.1.1.4 ! misho    4076:             if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    4077:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
                   4078:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4079:             eptr--;
                   4080:             }
                   4081:           }
                   4082: 
                   4083:         RRETURN(MATCH_NOMATCH);
                   4084:         }
                   4085:       }
                   4086:     /* Control never gets here */
                   4087: 
                   4088:     /* Match a single character type repeatedly; several different opcodes
                   4089:     share code. This is very similar to the code for single characters, but we
                   4090:     repeat it in the interests of efficiency. */
                   4091: 
                   4092:     case OP_TYPEEXACT:
                   4093:     min = max = GET2(ecode, 1);
                   4094:     minimize = TRUE;
1.1.1.2   misho    4095:     ecode += 1 + IMM2_SIZE;
1.1       misho    4096:     goto REPEATTYPE;
                   4097: 
                   4098:     case OP_TYPEUPTO:
                   4099:     case OP_TYPEMINUPTO:
                   4100:     min = 0;
                   4101:     max = GET2(ecode, 1);
                   4102:     minimize = *ecode == OP_TYPEMINUPTO;
1.1.1.2   misho    4103:     ecode += 1 + IMM2_SIZE;
1.1       misho    4104:     goto REPEATTYPE;
                   4105: 
                   4106:     case OP_TYPEPOSSTAR:
                   4107:     possessive = TRUE;
                   4108:     min = 0;
                   4109:     max = INT_MAX;
                   4110:     ecode++;
                   4111:     goto REPEATTYPE;
                   4112: 
                   4113:     case OP_TYPEPOSPLUS:
                   4114:     possessive = TRUE;
                   4115:     min = 1;
                   4116:     max = INT_MAX;
                   4117:     ecode++;
                   4118:     goto REPEATTYPE;
                   4119: 
                   4120:     case OP_TYPEPOSQUERY:
                   4121:     possessive = TRUE;
                   4122:     min = 0;
                   4123:     max = 1;
                   4124:     ecode++;
                   4125:     goto REPEATTYPE;
                   4126: 
                   4127:     case OP_TYPEPOSUPTO:
                   4128:     possessive = TRUE;
                   4129:     min = 0;
                   4130:     max = GET2(ecode, 1);
1.1.1.2   misho    4131:     ecode += 1 + IMM2_SIZE;
1.1       misho    4132:     goto REPEATTYPE;
                   4133: 
                   4134:     case OP_TYPESTAR:
                   4135:     case OP_TYPEMINSTAR:
                   4136:     case OP_TYPEPLUS:
                   4137:     case OP_TYPEMINPLUS:
                   4138:     case OP_TYPEQUERY:
                   4139:     case OP_TYPEMINQUERY:
                   4140:     c = *ecode++ - OP_TYPESTAR;
                   4141:     minimize = (c & 1) != 0;
                   4142:     min = rep_min[c];                 /* Pick up values from tables; */
                   4143:     max = rep_max[c];                 /* zero for max => infinity */
                   4144:     if (max == 0) max = INT_MAX;
                   4145: 
                   4146:     /* Common code for all repeated single character type matches. Note that
                   4147:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   4148:     character types, the valid characters are all one-byte long. */
                   4149: 
                   4150:     REPEATTYPE:
                   4151:     ctype = *ecode++;      /* Code for the character type */
                   4152: 
                   4153: #ifdef SUPPORT_UCP
                   4154:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   4155:       {
                   4156:       prop_fail_result = ctype == OP_NOTPROP;
                   4157:       prop_type = *ecode++;
                   4158:       prop_value = *ecode++;
                   4159:       }
                   4160:     else prop_type = -1;
                   4161: #endif
                   4162: 
                   4163:     /* First, ensure the minimum number of matches are present. Use inline
                   4164:     code for maximizing the speed, and do the type test once at the start
                   4165:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
                   4166:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   4167:     and single-bytes. */
                   4168: 
                   4169:     if (min > 0)
                   4170:       {
                   4171: #ifdef SUPPORT_UCP
                   4172:       if (prop_type >= 0)
                   4173:         {
                   4174:         switch(prop_type)
                   4175:           {
                   4176:           case PT_ANY:
                   4177:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   4178:           for (i = 1; i <= min; i++)
                   4179:             {
                   4180:             if (eptr >= md->end_subject)
                   4181:               {
                   4182:               SCHECK_PARTIAL();
                   4183:               RRETURN(MATCH_NOMATCH);
                   4184:               }
                   4185:             GETCHARINCTEST(c, eptr);
                   4186:             }
                   4187:           break;
                   4188: 
                   4189:           case PT_LAMP:
                   4190:           for (i = 1; i <= min; i++)
                   4191:             {
                   4192:             int chartype;
                   4193:             if (eptr >= md->end_subject)
                   4194:               {
                   4195:               SCHECK_PARTIAL();
                   4196:               RRETURN(MATCH_NOMATCH);
                   4197:               }
                   4198:             GETCHARINCTEST(c, eptr);
                   4199:             chartype = UCD_CHARTYPE(c);
                   4200:             if ((chartype == ucp_Lu ||
                   4201:                  chartype == ucp_Ll ||
                   4202:                  chartype == ucp_Lt) == prop_fail_result)
                   4203:               RRETURN(MATCH_NOMATCH);
                   4204:             }
                   4205:           break;
                   4206: 
                   4207:           case PT_GC:
                   4208:           for (i = 1; i <= min; i++)
                   4209:             {
                   4210:             if (eptr >= md->end_subject)
                   4211:               {
                   4212:               SCHECK_PARTIAL();
                   4213:               RRETURN(MATCH_NOMATCH);
                   4214:               }
                   4215:             GETCHARINCTEST(c, eptr);
                   4216:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   4217:               RRETURN(MATCH_NOMATCH);
                   4218:             }
                   4219:           break;
                   4220: 
                   4221:           case PT_PC:
                   4222:           for (i = 1; i <= min; i++)
                   4223:             {
                   4224:             if (eptr >= md->end_subject)
                   4225:               {
                   4226:               SCHECK_PARTIAL();
                   4227:               RRETURN(MATCH_NOMATCH);
                   4228:               }
                   4229:             GETCHARINCTEST(c, eptr);
                   4230:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   4231:               RRETURN(MATCH_NOMATCH);
                   4232:             }
                   4233:           break;
                   4234: 
                   4235:           case PT_SC:
                   4236:           for (i = 1; i <= min; i++)
                   4237:             {
                   4238:             if (eptr >= md->end_subject)
                   4239:               {
                   4240:               SCHECK_PARTIAL();
                   4241:               RRETURN(MATCH_NOMATCH);
                   4242:               }
                   4243:             GETCHARINCTEST(c, eptr);
                   4244:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   4245:               RRETURN(MATCH_NOMATCH);
                   4246:             }
                   4247:           break;
                   4248: 
                   4249:           case PT_ALNUM:
                   4250:           for (i = 1; i <= min; i++)
                   4251:             {
                   4252:             int category;
                   4253:             if (eptr >= md->end_subject)
                   4254:               {
                   4255:               SCHECK_PARTIAL();
                   4256:               RRETURN(MATCH_NOMATCH);
                   4257:               }
                   4258:             GETCHARINCTEST(c, eptr);
                   4259:             category = UCD_CATEGORY(c);
                   4260:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   4261:               RRETURN(MATCH_NOMATCH);
                   4262:             }
                   4263:           break;
                   4264: 
                   4265:           case PT_SPACE:    /* Perl space */
                   4266:           for (i = 1; i <= min; i++)
                   4267:             {
                   4268:             if (eptr >= md->end_subject)
                   4269:               {
                   4270:               SCHECK_PARTIAL();
                   4271:               RRETURN(MATCH_NOMATCH);
                   4272:               }
                   4273:             GETCHARINCTEST(c, eptr);
                   4274:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4275:                  c == CHAR_FF || c == CHAR_CR)
                   4276:                    == prop_fail_result)
                   4277:               RRETURN(MATCH_NOMATCH);
                   4278:             }
                   4279:           break;
                   4280: 
                   4281:           case PT_PXSPACE:  /* POSIX space */
                   4282:           for (i = 1; i <= min; i++)
                   4283:             {
                   4284:             if (eptr >= md->end_subject)
                   4285:               {
                   4286:               SCHECK_PARTIAL();
                   4287:               RRETURN(MATCH_NOMATCH);
                   4288:               }
                   4289:             GETCHARINCTEST(c, eptr);
                   4290:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4291:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4292:                    == prop_fail_result)
                   4293:               RRETURN(MATCH_NOMATCH);
                   4294:             }
                   4295:           break;
                   4296: 
                   4297:           case PT_WORD:
                   4298:           for (i = 1; i <= min; i++)
                   4299:             {
                   4300:             int category;
                   4301:             if (eptr >= md->end_subject)
                   4302:               {
                   4303:               SCHECK_PARTIAL();
                   4304:               RRETURN(MATCH_NOMATCH);
                   4305:               }
                   4306:             GETCHARINCTEST(c, eptr);
                   4307:             category = UCD_CATEGORY(c);
                   4308:             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
                   4309:                    == prop_fail_result)
                   4310:               RRETURN(MATCH_NOMATCH);
                   4311:             }
                   4312:           break;
                   4313: 
1.1.1.4 ! misho    4314:           case PT_CLIST:
        !          4315:           for (i = 1; i <= min; i++)
        !          4316:             {
        !          4317:             const pcre_uint32 *cp;
        !          4318:             if (eptr >= md->end_subject)
        !          4319:               {
        !          4320:               SCHECK_PARTIAL();
        !          4321:               RRETURN(MATCH_NOMATCH);
        !          4322:               }
        !          4323:             GETCHARINCTEST(c, eptr);
        !          4324:             cp = PRIV(ucd_caseless_sets) + prop_value;
        !          4325:             for (;;)
        !          4326:               {
        !          4327:               if (c < *cp)
        !          4328:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
        !          4329:               if (c == *cp++)
        !          4330:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
        !          4331:               }
        !          4332:             }
        !          4333:           break;
        !          4334: 
        !          4335:           case PT_UCNC:
        !          4336:           for (i = 1; i <= min; i++)
        !          4337:             {
        !          4338:             if (eptr >= md->end_subject)
        !          4339:               {
        !          4340:               SCHECK_PARTIAL();
        !          4341:               RRETURN(MATCH_NOMATCH);
        !          4342:               }
        !          4343:             GETCHARINCTEST(c, eptr);
        !          4344:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          4345:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          4346:                  c >= 0xe000) == prop_fail_result)
        !          4347:               RRETURN(MATCH_NOMATCH);
        !          4348:             }
        !          4349:           break;
        !          4350: 
1.1       misho    4351:           /* This should not occur */
                   4352: 
                   4353:           default:
                   4354:           RRETURN(PCRE_ERROR_INTERNAL);
                   4355:           }
                   4356:         }
                   4357: 
                   4358:       /* Match extended Unicode sequences. We will get here only if the
                   4359:       support is in the binary; otherwise a compile-time error occurs. */
                   4360: 
                   4361:       else if (ctype == OP_EXTUNI)
                   4362:         {
                   4363:         for (i = 1; i <= min; i++)
                   4364:           {
                   4365:           if (eptr >= md->end_subject)
                   4366:             {
                   4367:             SCHECK_PARTIAL();
                   4368:             RRETURN(MATCH_NOMATCH);
                   4369:             }
1.1.1.4 ! misho    4370:           else
1.1       misho    4371:             {
1.1.1.4 ! misho    4372:             int lgb, rgb;
        !          4373:             GETCHARINCTEST(c, eptr);
        !          4374:             lgb = UCD_GRAPHBREAK(c);
        !          4375:            while (eptr < md->end_subject)
        !          4376:               {
        !          4377:               int len = 1;
        !          4378:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          4379:               rgb = UCD_GRAPHBREAK(c);
        !          4380:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          4381:               lgb = rgb;
        !          4382:               eptr += len;
        !          4383:               }
1.1       misho    4384:             }
1.1.1.3   misho    4385:           CHECK_PARTIAL();
1.1       misho    4386:           }
                   4387:         }
                   4388: 
                   4389:       else
                   4390: #endif     /* SUPPORT_UCP */
                   4391: 
                   4392: /* Handle all other cases when the coding is UTF-8 */
                   4393: 
1.1.1.2   misho    4394: #ifdef SUPPORT_UTF
                   4395:       if (utf) switch(ctype)
1.1       misho    4396:         {
                   4397:         case OP_ANY:
                   4398:         for (i = 1; i <= min; i++)
                   4399:           {
                   4400:           if (eptr >= md->end_subject)
                   4401:             {
                   4402:             SCHECK_PARTIAL();
                   4403:             RRETURN(MATCH_NOMATCH);
                   4404:             }
                   4405:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1.1.3   misho    4406:           if (md->partial != 0 &&
                   4407:               eptr + 1 >= md->end_subject &&
                   4408:               NLBLOCK->nltype == NLTYPE_FIXED &&
                   4409:               NLBLOCK->nllen == 2 &&
1.1.1.4 ! misho    4410:               RAWUCHAR(eptr) == NLBLOCK->nl[0])
1.1.1.3   misho    4411:             {
                   4412:             md->hitend = TRUE;
                   4413:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   4414:             }
1.1       misho    4415:           eptr++;
1.1.1.2   misho    4416:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4417:           }
                   4418:         break;
                   4419: 
                   4420:         case OP_ALLANY:
                   4421:         for (i = 1; i <= min; i++)
                   4422:           {
                   4423:           if (eptr >= md->end_subject)
                   4424:             {
                   4425:             SCHECK_PARTIAL();
                   4426:             RRETURN(MATCH_NOMATCH);
                   4427:             }
                   4428:           eptr++;
1.1.1.2   misho    4429:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4430:           }
                   4431:         break;
                   4432: 
                   4433:         case OP_ANYBYTE:
                   4434:         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
                   4435:         eptr += min;
                   4436:         break;
                   4437: 
                   4438:         case OP_ANYNL:
                   4439:         for (i = 1; i <= min; i++)
                   4440:           {
                   4441:           if (eptr >= md->end_subject)
                   4442:             {
                   4443:             SCHECK_PARTIAL();
                   4444:             RRETURN(MATCH_NOMATCH);
                   4445:             }
                   4446:           GETCHARINC(c, eptr);
                   4447:           switch(c)
                   4448:             {
                   4449:             default: RRETURN(MATCH_NOMATCH);
                   4450: 
1.1.1.4 ! misho    4451:             case CHAR_CR:
        !          4452:             if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1       misho    4453:             break;
                   4454: 
1.1.1.4 ! misho    4455:             case CHAR_LF:
1.1       misho    4456:             break;
                   4457: 
1.1.1.4 ! misho    4458:             case CHAR_VT:
        !          4459:             case CHAR_FF:
        !          4460:             case CHAR_NEL:
        !          4461: #ifndef EBCDIC
1.1       misho    4462:             case 0x2028:
                   4463:             case 0x2029:
1.1.1.4 ! misho    4464: #endif  /* Not EBCDIC */
1.1       misho    4465:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4466:             break;
                   4467:             }
                   4468:           }
                   4469:         break;
                   4470: 
                   4471:         case OP_NOT_HSPACE:
                   4472:         for (i = 1; i <= min; i++)
                   4473:           {
                   4474:           if (eptr >= md->end_subject)
                   4475:             {
                   4476:             SCHECK_PARTIAL();
                   4477:             RRETURN(MATCH_NOMATCH);
                   4478:             }
                   4479:           GETCHARINC(c, eptr);
                   4480:           switch(c)
                   4481:             {
1.1.1.4 ! misho    4482:             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
1.1       misho    4483:             default: break;
                   4484:             }
                   4485:           }
                   4486:         break;
                   4487: 
                   4488:         case OP_HSPACE:
                   4489:         for (i = 1; i <= min; i++)
                   4490:           {
                   4491:           if (eptr >= md->end_subject)
                   4492:             {
                   4493:             SCHECK_PARTIAL();
                   4494:             RRETURN(MATCH_NOMATCH);
                   4495:             }
                   4496:           GETCHARINC(c, eptr);
                   4497:           switch(c)
                   4498:             {
1.1.1.4 ! misho    4499:             HSPACE_CASES: break;  /* Byte and multibyte cases */
1.1       misho    4500:             default: RRETURN(MATCH_NOMATCH);
                   4501:             }
                   4502:           }
                   4503:         break;
                   4504: 
                   4505:         case OP_NOT_VSPACE:
                   4506:         for (i = 1; i <= min; i++)
                   4507:           {
                   4508:           if (eptr >= md->end_subject)
                   4509:             {
                   4510:             SCHECK_PARTIAL();
                   4511:             RRETURN(MATCH_NOMATCH);
                   4512:             }
                   4513:           GETCHARINC(c, eptr);
                   4514:           switch(c)
                   4515:             {
1.1.1.4 ! misho    4516:             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    4517:             default: break;
                   4518:             }
                   4519:           }
                   4520:         break;
                   4521: 
                   4522:         case OP_VSPACE:
                   4523:         for (i = 1; i <= min; i++)
                   4524:           {
                   4525:           if (eptr >= md->end_subject)
                   4526:             {
                   4527:             SCHECK_PARTIAL();
                   4528:             RRETURN(MATCH_NOMATCH);
                   4529:             }
                   4530:           GETCHARINC(c, eptr);
                   4531:           switch(c)
                   4532:             {
1.1.1.4 ! misho    4533:             VSPACE_CASES: break;
1.1       misho    4534:             default: RRETURN(MATCH_NOMATCH);
                   4535:             }
                   4536:           }
                   4537:         break;
                   4538: 
                   4539:         case OP_NOT_DIGIT:
                   4540:         for (i = 1; i <= min; i++)
                   4541:           {
                   4542:           if (eptr >= md->end_subject)
                   4543:             {
                   4544:             SCHECK_PARTIAL();
                   4545:             RRETURN(MATCH_NOMATCH);
                   4546:             }
                   4547:           GETCHARINC(c, eptr);
                   4548:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
                   4549:             RRETURN(MATCH_NOMATCH);
                   4550:           }
                   4551:         break;
                   4552: 
                   4553:         case OP_DIGIT:
                   4554:         for (i = 1; i <= min; i++)
                   4555:           {
1.1.1.4 ! misho    4556:           pcre_uint32 cc;
1.1       misho    4557:           if (eptr >= md->end_subject)
                   4558:             {
                   4559:             SCHECK_PARTIAL();
                   4560:             RRETURN(MATCH_NOMATCH);
                   4561:             }
1.1.1.4 ! misho    4562:           cc = RAWUCHAR(eptr);
        !          4563:           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
1.1       misho    4564:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4565:           eptr++;
1.1       misho    4566:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4567:           }
                   4568:         break;
                   4569: 
                   4570:         case OP_NOT_WHITESPACE:
                   4571:         for (i = 1; i <= min; i++)
                   4572:           {
1.1.1.4 ! misho    4573:           pcre_uint32 cc;
1.1       misho    4574:           if (eptr >= md->end_subject)
                   4575:             {
                   4576:             SCHECK_PARTIAL();
                   4577:             RRETURN(MATCH_NOMATCH);
                   4578:             }
1.1.1.4 ! misho    4579:           cc = RAWUCHAR(eptr);
        !          4580:           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
1.1       misho    4581:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4582:           eptr++;
                   4583:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4584:           }
                   4585:         break;
                   4586: 
                   4587:         case OP_WHITESPACE:
                   4588:         for (i = 1; i <= min; i++)
                   4589:           {
1.1.1.4 ! misho    4590:           pcre_uint32 cc;
1.1       misho    4591:           if (eptr >= md->end_subject)
                   4592:             {
                   4593:             SCHECK_PARTIAL();
                   4594:             RRETURN(MATCH_NOMATCH);
                   4595:             }
1.1.1.4 ! misho    4596:           cc = RAWUCHAR(eptr);
        !          4597:           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
1.1       misho    4598:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4599:           eptr++;
1.1       misho    4600:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4601:           }
                   4602:         break;
                   4603: 
                   4604:         case OP_NOT_WORDCHAR:
                   4605:         for (i = 1; i <= min; i++)
                   4606:           {
1.1.1.4 ! misho    4607:           pcre_uint32 cc;
1.1       misho    4608:           if (eptr >= md->end_subject)
                   4609:             {
                   4610:             SCHECK_PARTIAL();
                   4611:             RRETURN(MATCH_NOMATCH);
                   4612:             }
1.1.1.4 ! misho    4613:           cc = RAWUCHAR(eptr);
        !          4614:           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
1.1       misho    4615:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4616:           eptr++;
                   4617:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4618:           }
                   4619:         break;
                   4620: 
                   4621:         case OP_WORDCHAR:
                   4622:         for (i = 1; i <= min; i++)
                   4623:           {
1.1.1.4 ! misho    4624:           pcre_uint32 cc;
1.1       misho    4625:           if (eptr >= md->end_subject)
                   4626:             {
                   4627:             SCHECK_PARTIAL();
                   4628:             RRETURN(MATCH_NOMATCH);
                   4629:             }
1.1.1.4 ! misho    4630:           cc = RAWUCHAR(eptr);
        !          4631:           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
1.1       misho    4632:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4633:           eptr++;
1.1       misho    4634:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4635:           }
                   4636:         break;
                   4637: 
                   4638:         default:
                   4639:         RRETURN(PCRE_ERROR_INTERNAL);
                   4640:         }  /* End switch(ctype) */
                   4641: 
                   4642:       else
1.1.1.2   misho    4643: #endif     /* SUPPORT_UTF */
1.1       misho    4644: 
                   4645:       /* Code for the non-UTF-8 case for minimum matching of operators other
                   4646:       than OP_PROP and OP_NOTPROP. */
                   4647: 
                   4648:       switch(ctype)
                   4649:         {
                   4650:         case OP_ANY:
                   4651:         for (i = 1; i <= min; i++)
                   4652:           {
                   4653:           if (eptr >= md->end_subject)
                   4654:             {
                   4655:             SCHECK_PARTIAL();
                   4656:             RRETURN(MATCH_NOMATCH);
                   4657:             }
                   4658:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1.1.3   misho    4659:           if (md->partial != 0 &&
                   4660:               eptr + 1 >= md->end_subject &&
                   4661:               NLBLOCK->nltype == NLTYPE_FIXED &&
                   4662:               NLBLOCK->nllen == 2 &&
                   4663:               *eptr == NLBLOCK->nl[0])
                   4664:             {
                   4665:             md->hitend = TRUE;
                   4666:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   4667:             }
1.1       misho    4668:           eptr++;
                   4669:           }
                   4670:         break;
                   4671: 
                   4672:         case OP_ALLANY:
                   4673:         if (eptr > md->end_subject - min)
                   4674:           {
                   4675:           SCHECK_PARTIAL();
                   4676:           RRETURN(MATCH_NOMATCH);
                   4677:           }
                   4678:         eptr += min;
                   4679:         break;
                   4680: 
                   4681:         case OP_ANYBYTE:
                   4682:         if (eptr > md->end_subject - min)
                   4683:           {
                   4684:           SCHECK_PARTIAL();
                   4685:           RRETURN(MATCH_NOMATCH);
                   4686:           }
                   4687:         eptr += min;
                   4688:         break;
                   4689: 
                   4690:         case OP_ANYNL:
                   4691:         for (i = 1; i <= min; i++)
                   4692:           {
                   4693:           if (eptr >= md->end_subject)
                   4694:             {
                   4695:             SCHECK_PARTIAL();
                   4696:             RRETURN(MATCH_NOMATCH);
                   4697:             }
                   4698:           switch(*eptr++)
                   4699:             {
                   4700:             default: RRETURN(MATCH_NOMATCH);
                   4701: 
1.1.1.4 ! misho    4702:             case CHAR_CR:
        !          4703:             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1       misho    4704:             break;
                   4705: 
1.1.1.4 ! misho    4706:             case CHAR_LF:
1.1       misho    4707:             break;
                   4708: 
1.1.1.4 ! misho    4709:             case CHAR_VT:
        !          4710:             case CHAR_FF:
        !          4711:             case CHAR_NEL:
        !          4712: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2   misho    4713:             case 0x2028:
                   4714:             case 0x2029:
                   4715: #endif
1.1       misho    4716:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   4717:             break;
                   4718:             }
                   4719:           }
                   4720:         break;
                   4721: 
                   4722:         case OP_NOT_HSPACE:
                   4723:         for (i = 1; i <= min; i++)
                   4724:           {
                   4725:           if (eptr >= md->end_subject)
                   4726:             {
                   4727:             SCHECK_PARTIAL();
                   4728:             RRETURN(MATCH_NOMATCH);
                   4729:             }
                   4730:           switch(*eptr++)
                   4731:             {
                   4732:             default: break;
1.1.1.4 ! misho    4733:             HSPACE_BYTE_CASES:
        !          4734: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4735:             HSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    4736: #endif
1.1       misho    4737:             RRETURN(MATCH_NOMATCH);
                   4738:             }
                   4739:           }
                   4740:         break;
                   4741: 
                   4742:         case OP_HSPACE:
                   4743:         for (i = 1; i <= min; i++)
                   4744:           {
                   4745:           if (eptr >= md->end_subject)
                   4746:             {
                   4747:             SCHECK_PARTIAL();
                   4748:             RRETURN(MATCH_NOMATCH);
                   4749:             }
                   4750:           switch(*eptr++)
                   4751:             {
                   4752:             default: RRETURN(MATCH_NOMATCH);
1.1.1.4 ! misho    4753:             HSPACE_BYTE_CASES:
        !          4754: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4755:             HSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    4756: #endif
1.1       misho    4757:             break;
                   4758:             }
                   4759:           }
                   4760:         break;
                   4761: 
                   4762:         case OP_NOT_VSPACE:
                   4763:         for (i = 1; i <= min; i++)
                   4764:           {
                   4765:           if (eptr >= md->end_subject)
                   4766:             {
                   4767:             SCHECK_PARTIAL();
                   4768:             RRETURN(MATCH_NOMATCH);
                   4769:             }
                   4770:           switch(*eptr++)
                   4771:             {
1.1.1.4 ! misho    4772:             VSPACE_BYTE_CASES:
        !          4773: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4774:             VSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    4775: #endif
1.1       misho    4776:             RRETURN(MATCH_NOMATCH);
1.1.1.4 ! misho    4777:             default: break;
1.1       misho    4778:             }
                   4779:           }
                   4780:         break;
                   4781: 
                   4782:         case OP_VSPACE:
                   4783:         for (i = 1; i <= min; i++)
                   4784:           {
                   4785:           if (eptr >= md->end_subject)
                   4786:             {
                   4787:             SCHECK_PARTIAL();
                   4788:             RRETURN(MATCH_NOMATCH);
                   4789:             }
                   4790:           switch(*eptr++)
                   4791:             {
                   4792:             default: RRETURN(MATCH_NOMATCH);
1.1.1.4 ! misho    4793:             VSPACE_BYTE_CASES:
        !          4794: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4795:             VSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    4796: #endif
1.1       misho    4797:             break;
                   4798:             }
                   4799:           }
                   4800:         break;
                   4801: 
                   4802:         case OP_NOT_DIGIT:
                   4803:         for (i = 1; i <= min; i++)
                   4804:           {
                   4805:           if (eptr >= md->end_subject)
                   4806:             {
                   4807:             SCHECK_PARTIAL();
                   4808:             RRETURN(MATCH_NOMATCH);
                   4809:             }
1.1.1.2   misho    4810:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
                   4811:             RRETURN(MATCH_NOMATCH);
                   4812:           eptr++;
1.1       misho    4813:           }
                   4814:         break;
                   4815: 
                   4816:         case OP_DIGIT:
                   4817:         for (i = 1; i <= min; i++)
                   4818:           {
                   4819:           if (eptr >= md->end_subject)
                   4820:             {
                   4821:             SCHECK_PARTIAL();
                   4822:             RRETURN(MATCH_NOMATCH);
                   4823:             }
1.1.1.2   misho    4824:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
                   4825:             RRETURN(MATCH_NOMATCH);
                   4826:           eptr++;
1.1       misho    4827:           }
                   4828:         break;
                   4829: 
                   4830:         case OP_NOT_WHITESPACE:
                   4831:         for (i = 1; i <= min; i++)
                   4832:           {
                   4833:           if (eptr >= md->end_subject)
                   4834:             {
                   4835:             SCHECK_PARTIAL();
                   4836:             RRETURN(MATCH_NOMATCH);
                   4837:             }
1.1.1.2   misho    4838:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
                   4839:             RRETURN(MATCH_NOMATCH);
                   4840:           eptr++;
1.1       misho    4841:           }
                   4842:         break;
                   4843: 
                   4844:         case OP_WHITESPACE:
                   4845:         for (i = 1; i <= min; i++)
                   4846:           {
                   4847:           if (eptr >= md->end_subject)
                   4848:             {
                   4849:             SCHECK_PARTIAL();
                   4850:             RRETURN(MATCH_NOMATCH);
                   4851:             }
1.1.1.2   misho    4852:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
                   4853:             RRETURN(MATCH_NOMATCH);
                   4854:           eptr++;
1.1       misho    4855:           }
                   4856:         break;
                   4857: 
                   4858:         case OP_NOT_WORDCHAR:
                   4859:         for (i = 1; i <= min; i++)
                   4860:           {
                   4861:           if (eptr >= md->end_subject)
                   4862:             {
                   4863:             SCHECK_PARTIAL();
                   4864:             RRETURN(MATCH_NOMATCH);
                   4865:             }
1.1.1.2   misho    4866:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
1.1       misho    4867:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4868:           eptr++;
1.1       misho    4869:           }
                   4870:         break;
                   4871: 
                   4872:         case OP_WORDCHAR:
                   4873:         for (i = 1; i <= min; i++)
                   4874:           {
                   4875:           if (eptr >= md->end_subject)
                   4876:             {
                   4877:             SCHECK_PARTIAL();
                   4878:             RRETURN(MATCH_NOMATCH);
                   4879:             }
1.1.1.2   misho    4880:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
1.1       misho    4881:             RRETURN(MATCH_NOMATCH);
1.1.1.2   misho    4882:           eptr++;
1.1       misho    4883:           }
                   4884:         break;
                   4885: 
                   4886:         default:
                   4887:         RRETURN(PCRE_ERROR_INTERNAL);
                   4888:         }
                   4889:       }
                   4890: 
                   4891:     /* If min = max, continue at the same level without recursing */
                   4892: 
                   4893:     if (min == max) continue;
                   4894: 
                   4895:     /* If minimizing, we have to test the rest of the pattern before each
                   4896:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4897:     separate the UCP cases. */
                   4898: 
                   4899:     if (minimize)
                   4900:       {
                   4901: #ifdef SUPPORT_UCP
                   4902:       if (prop_type >= 0)
                   4903:         {
                   4904:         switch(prop_type)
                   4905:           {
                   4906:           case PT_ANY:
                   4907:           for (fi = min;; fi++)
                   4908:             {
                   4909:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
                   4910:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4911:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4912:             if (eptr >= md->end_subject)
                   4913:               {
                   4914:               SCHECK_PARTIAL();
                   4915:               RRETURN(MATCH_NOMATCH);
                   4916:               }
                   4917:             GETCHARINCTEST(c, eptr);
                   4918:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   4919:             }
                   4920:           /* Control never gets here */
                   4921: 
                   4922:           case PT_LAMP:
                   4923:           for (fi = min;; fi++)
                   4924:             {
                   4925:             int chartype;
                   4926:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
                   4927:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4928:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4929:             if (eptr >= md->end_subject)
                   4930:               {
                   4931:               SCHECK_PARTIAL();
                   4932:               RRETURN(MATCH_NOMATCH);
                   4933:               }
                   4934:             GETCHARINCTEST(c, eptr);
                   4935:             chartype = UCD_CHARTYPE(c);
                   4936:             if ((chartype == ucp_Lu ||
                   4937:                  chartype == ucp_Ll ||
                   4938:                  chartype == ucp_Lt) == prop_fail_result)
                   4939:               RRETURN(MATCH_NOMATCH);
                   4940:             }
                   4941:           /* Control never gets here */
                   4942: 
                   4943:           case PT_GC:
                   4944:           for (fi = min;; fi++)
                   4945:             {
                   4946:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
                   4947:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4948:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4949:             if (eptr >= md->end_subject)
                   4950:               {
                   4951:               SCHECK_PARTIAL();
                   4952:               RRETURN(MATCH_NOMATCH);
                   4953:               }
                   4954:             GETCHARINCTEST(c, eptr);
                   4955:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   4956:               RRETURN(MATCH_NOMATCH);
                   4957:             }
                   4958:           /* Control never gets here */
                   4959: 
                   4960:           case PT_PC:
                   4961:           for (fi = min;; fi++)
                   4962:             {
                   4963:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
                   4964:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4965:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4966:             if (eptr >= md->end_subject)
                   4967:               {
                   4968:               SCHECK_PARTIAL();
                   4969:               RRETURN(MATCH_NOMATCH);
                   4970:               }
                   4971:             GETCHARINCTEST(c, eptr);
                   4972:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   4973:               RRETURN(MATCH_NOMATCH);
                   4974:             }
                   4975:           /* Control never gets here */
                   4976: 
                   4977:           case PT_SC:
                   4978:           for (fi = min;; fi++)
                   4979:             {
                   4980:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
                   4981:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4982:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   4983:             if (eptr >= md->end_subject)
                   4984:               {
                   4985:               SCHECK_PARTIAL();
                   4986:               RRETURN(MATCH_NOMATCH);
                   4987:               }
                   4988:             GETCHARINCTEST(c, eptr);
                   4989:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   4990:               RRETURN(MATCH_NOMATCH);
                   4991:             }
                   4992:           /* Control never gets here */
                   4993: 
                   4994:           case PT_ALNUM:
                   4995:           for (fi = min;; fi++)
                   4996:             {
                   4997:             int category;
                   4998:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
                   4999:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5000:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5001:             if (eptr >= md->end_subject)
                   5002:               {
                   5003:               SCHECK_PARTIAL();
                   5004:               RRETURN(MATCH_NOMATCH);
                   5005:               }
                   5006:             GETCHARINCTEST(c, eptr);
                   5007:             category = UCD_CATEGORY(c);
                   5008:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   5009:               RRETURN(MATCH_NOMATCH);
                   5010:             }
                   5011:           /* Control never gets here */
                   5012: 
                   5013:           case PT_SPACE:    /* Perl space */
                   5014:           for (fi = min;; fi++)
                   5015:             {
                   5016:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
                   5017:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5018:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5019:             if (eptr >= md->end_subject)
                   5020:               {
                   5021:               SCHECK_PARTIAL();
                   5022:               RRETURN(MATCH_NOMATCH);
                   5023:               }
                   5024:             GETCHARINCTEST(c, eptr);
                   5025:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5026:                  c == CHAR_FF || c == CHAR_CR)
                   5027:                    == prop_fail_result)
                   5028:               RRETURN(MATCH_NOMATCH);
                   5029:             }
                   5030:           /* Control never gets here */
                   5031: 
                   5032:           case PT_PXSPACE:  /* POSIX space */
                   5033:           for (fi = min;; fi++)
                   5034:             {
                   5035:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
                   5036:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5037:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5038:             if (eptr >= md->end_subject)
                   5039:               {
                   5040:               SCHECK_PARTIAL();
                   5041:               RRETURN(MATCH_NOMATCH);
                   5042:               }
                   5043:             GETCHARINCTEST(c, eptr);
                   5044:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5045:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   5046:                    == prop_fail_result)
                   5047:               RRETURN(MATCH_NOMATCH);
                   5048:             }
                   5049:           /* Control never gets here */
                   5050: 
                   5051:           case PT_WORD:
                   5052:           for (fi = min;; fi++)
                   5053:             {
                   5054:             int category;
                   5055:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
                   5056:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5057:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5058:             if (eptr >= md->end_subject)
                   5059:               {
                   5060:               SCHECK_PARTIAL();
                   5061:               RRETURN(MATCH_NOMATCH);
                   5062:               }
                   5063:             GETCHARINCTEST(c, eptr);
                   5064:             category = UCD_CATEGORY(c);
                   5065:             if ((category == ucp_L ||
                   5066:                  category == ucp_N ||
                   5067:                  c == CHAR_UNDERSCORE)
                   5068:                    == prop_fail_result)
                   5069:               RRETURN(MATCH_NOMATCH);
                   5070:             }
                   5071:           /* Control never gets here */
                   5072: 
1.1.1.4 ! misho    5073:           case PT_CLIST:
        !          5074:           for (fi = min;; fi++)
        !          5075:             {
        !          5076:             const pcre_uint32 *cp;
        !          5077:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
        !          5078:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          5079:             if (fi >= max) RRETURN(MATCH_NOMATCH);
        !          5080:             if (eptr >= md->end_subject)
        !          5081:               {
        !          5082:               SCHECK_PARTIAL();
        !          5083:               RRETURN(MATCH_NOMATCH);
        !          5084:               }
        !          5085:             GETCHARINCTEST(c, eptr);
        !          5086:             cp = PRIV(ucd_caseless_sets) + prop_value;
        !          5087:             for (;;)
        !          5088:               {
        !          5089:               if (c < *cp)
        !          5090:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
        !          5091:               if (c == *cp++)
        !          5092:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
        !          5093:               }
        !          5094:             }
        !          5095:           /* Control never gets here */
1.1       misho    5096: 
1.1.1.4 ! misho    5097:           case PT_UCNC:
        !          5098:           for (fi = min;; fi++)
        !          5099:             {
        !          5100:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
        !          5101:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          5102:             if (fi >= max) RRETURN(MATCH_NOMATCH);
        !          5103:             if (eptr >= md->end_subject)
        !          5104:               {
        !          5105:               SCHECK_PARTIAL();
        !          5106:               RRETURN(MATCH_NOMATCH);
        !          5107:               }
        !          5108:             GETCHARINCTEST(c, eptr);
        !          5109:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          5110:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          5111:                  c >= 0xe000) == prop_fail_result)
        !          5112:               RRETURN(MATCH_NOMATCH);
        !          5113:             }
        !          5114:           /* Control never gets here */
        !          5115: 
        !          5116:           /* This should never occur */
1.1       misho    5117:           default:
                   5118:           RRETURN(PCRE_ERROR_INTERNAL);
                   5119:           }
                   5120:         }
                   5121: 
                   5122:       /* Match extended Unicode sequences. We will get here only if the
                   5123:       support is in the binary; otherwise a compile-time error occurs. */
                   5124: 
                   5125:       else if (ctype == OP_EXTUNI)
                   5126:         {
                   5127:         for (fi = min;; fi++)
                   5128:           {
                   5129:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
                   5130:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5131:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5132:           if (eptr >= md->end_subject)
                   5133:             {
                   5134:             SCHECK_PARTIAL();
                   5135:             RRETURN(MATCH_NOMATCH);
                   5136:             }
1.1.1.4 ! misho    5137:           else
1.1       misho    5138:             {
1.1.1.4 ! misho    5139:             int lgb, rgb;
        !          5140:             GETCHARINCTEST(c, eptr);
        !          5141:             lgb = UCD_GRAPHBREAK(c);
        !          5142:             while (eptr < md->end_subject)
        !          5143:               {
        !          5144:               int len = 1;
        !          5145:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          5146:               rgb = UCD_GRAPHBREAK(c);
        !          5147:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          5148:               lgb = rgb;
        !          5149:               eptr += len;
        !          5150:               }
1.1       misho    5151:             }
1.1.1.3   misho    5152:           CHECK_PARTIAL();
1.1       misho    5153:           }
                   5154:         }
                   5155:       else
                   5156: #endif     /* SUPPORT_UCP */
                   5157: 
1.1.1.2   misho    5158: #ifdef SUPPORT_UTF
                   5159:       if (utf)
1.1       misho    5160:         {
                   5161:         for (fi = min;; fi++)
                   5162:           {
                   5163:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
                   5164:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5165:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5166:           if (eptr >= md->end_subject)
                   5167:             {
                   5168:             SCHECK_PARTIAL();
                   5169:             RRETURN(MATCH_NOMATCH);
                   5170:             }
                   5171:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   5172:             RRETURN(MATCH_NOMATCH);
                   5173:           GETCHARINC(c, eptr);
                   5174:           switch(ctype)
                   5175:             {
1.1.1.3   misho    5176:             case OP_ANY:               /* This is the non-NL case */
                   5177:             if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5178:                 eptr >= md->end_subject &&
                   5179:                 NLBLOCK->nltype == NLTYPE_FIXED &&
                   5180:                 NLBLOCK->nllen == 2 &&
                   5181:                 c == NLBLOCK->nl[0])
                   5182:               {
                   5183:               md->hitend = TRUE;
                   5184:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5185:               }
                   5186:             break;
                   5187: 
1.1       misho    5188:             case OP_ALLANY:
                   5189:             case OP_ANYBYTE:
                   5190:             break;
                   5191: 
                   5192:             case OP_ANYNL:
                   5193:             switch(c)
                   5194:               {
                   5195:               default: RRETURN(MATCH_NOMATCH);
1.1.1.4 ! misho    5196:               case CHAR_CR:
        !          5197:               if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1       misho    5198:               break;
1.1.1.4 ! misho    5199: 
        !          5200:               case CHAR_LF:
1.1       misho    5201:               break;
                   5202: 
1.1.1.4 ! misho    5203:               case CHAR_VT:
        !          5204:               case CHAR_FF:
        !          5205:               case CHAR_NEL:
        !          5206: #ifndef EBCDIC
1.1       misho    5207:               case 0x2028:
                   5208:               case 0x2029:
1.1.1.4 ! misho    5209: #endif  /* Not EBCDIC */
1.1       misho    5210:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   5211:               break;
                   5212:               }
                   5213:             break;
                   5214: 
                   5215:             case OP_NOT_HSPACE:
                   5216:             switch(c)
                   5217:               {
1.1.1.4 ! misho    5218:               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    5219:               default: break;
                   5220:               }
                   5221:             break;
                   5222: 
                   5223:             case OP_HSPACE:
                   5224:             switch(c)
                   5225:               {
1.1.1.4 ! misho    5226:               HSPACE_CASES: break;
1.1       misho    5227:               default: RRETURN(MATCH_NOMATCH);
                   5228:               }
                   5229:             break;
                   5230: 
                   5231:             case OP_NOT_VSPACE:
                   5232:             switch(c)
                   5233:               {
1.1.1.4 ! misho    5234:               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    5235:               default: break;
                   5236:               }
                   5237:             break;
                   5238: 
                   5239:             case OP_VSPACE:
                   5240:             switch(c)
                   5241:               {
1.1.1.4 ! misho    5242:               VSPACE_CASES: break;
1.1       misho    5243:               default: RRETURN(MATCH_NOMATCH);
                   5244:               }
                   5245:             break;
                   5246: 
                   5247:             case OP_NOT_DIGIT:
                   5248:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
                   5249:               RRETURN(MATCH_NOMATCH);
                   5250:             break;
                   5251: 
                   5252:             case OP_DIGIT:
                   5253:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
                   5254:               RRETURN(MATCH_NOMATCH);
                   5255:             break;
                   5256: 
                   5257:             case OP_NOT_WHITESPACE:
                   5258:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
                   5259:               RRETURN(MATCH_NOMATCH);
                   5260:             break;
                   5261: 
                   5262:             case OP_WHITESPACE:
1.1.1.2   misho    5263:             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
1.1       misho    5264:               RRETURN(MATCH_NOMATCH);
                   5265:             break;
                   5266: 
                   5267:             case OP_NOT_WORDCHAR:
                   5268:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
                   5269:               RRETURN(MATCH_NOMATCH);
                   5270:             break;
                   5271: 
                   5272:             case OP_WORDCHAR:
                   5273:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
                   5274:               RRETURN(MATCH_NOMATCH);
                   5275:             break;
                   5276: 
                   5277:             default:
                   5278:             RRETURN(PCRE_ERROR_INTERNAL);
                   5279:             }
                   5280:           }
                   5281:         }
                   5282:       else
                   5283: #endif
1.1.1.2   misho    5284:       /* Not UTF mode */
1.1       misho    5285:         {
                   5286:         for (fi = min;; fi++)
                   5287:           {
                   5288:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
                   5289:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5290:           if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5291:           if (eptr >= md->end_subject)
                   5292:             {
                   5293:             SCHECK_PARTIAL();
                   5294:             RRETURN(MATCH_NOMATCH);
                   5295:             }
                   5296:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   5297:             RRETURN(MATCH_NOMATCH);
                   5298:           c = *eptr++;
                   5299:           switch(ctype)
                   5300:             {
1.1.1.3   misho    5301:             case OP_ANY:               /* This is the non-NL case */
                   5302:             if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5303:                 eptr >= md->end_subject &&
                   5304:                 NLBLOCK->nltype == NLTYPE_FIXED &&
                   5305:                 NLBLOCK->nllen == 2 &&
                   5306:                 c == NLBLOCK->nl[0])
                   5307:               {
                   5308:               md->hitend = TRUE;
                   5309:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5310:               }
                   5311:             break;
                   5312: 
1.1       misho    5313:             case OP_ALLANY:
                   5314:             case OP_ANYBYTE:
                   5315:             break;
                   5316: 
                   5317:             case OP_ANYNL:
                   5318:             switch(c)
                   5319:               {
                   5320:               default: RRETURN(MATCH_NOMATCH);
1.1.1.4 ! misho    5321:               case CHAR_CR:
        !          5322:               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1       misho    5323:               break;
                   5324: 
1.1.1.4 ! misho    5325:               case CHAR_LF:
1.1       misho    5326:               break;
                   5327: 
1.1.1.4 ! misho    5328:               case CHAR_VT:
        !          5329:               case CHAR_FF:
        !          5330:               case CHAR_NEL:
        !          5331: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.1.1.2   misho    5332:               case 0x2028:
                   5333:               case 0x2029:
                   5334: #endif
1.1       misho    5335:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   5336:               break;
                   5337:               }
                   5338:             break;
                   5339: 
                   5340:             case OP_NOT_HSPACE:
                   5341:             switch(c)
                   5342:               {
                   5343:               default: break;
1.1.1.4 ! misho    5344:               HSPACE_BYTE_CASES:
        !          5345: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5346:               HSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    5347: #endif
1.1       misho    5348:               RRETURN(MATCH_NOMATCH);
                   5349:               }
                   5350:             break;
                   5351: 
                   5352:             case OP_HSPACE:
                   5353:             switch(c)
                   5354:               {
                   5355:               default: RRETURN(MATCH_NOMATCH);
1.1.1.4 ! misho    5356:               HSPACE_BYTE_CASES:
        !          5357: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5358:               HSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    5359: #endif
1.1       misho    5360:               break;
                   5361:               }
                   5362:             break;
                   5363: 
                   5364:             case OP_NOT_VSPACE:
                   5365:             switch(c)
                   5366:               {
                   5367:               default: break;
1.1.1.4 ! misho    5368:               VSPACE_BYTE_CASES:
        !          5369: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5370:               VSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    5371: #endif
1.1       misho    5372:               RRETURN(MATCH_NOMATCH);
                   5373:               }
                   5374:             break;
                   5375: 
                   5376:             case OP_VSPACE:
                   5377:             switch(c)
                   5378:               {
                   5379:               default: RRETURN(MATCH_NOMATCH);
1.1.1.4 ! misho    5380:               VSPACE_BYTE_CASES:
        !          5381: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5382:               VSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    5383: #endif
1.1       misho    5384:               break;
                   5385:               }
                   5386:             break;
                   5387: 
                   5388:             case OP_NOT_DIGIT:
1.1.1.2   misho    5389:             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5390:             break;
                   5391: 
                   5392:             case OP_DIGIT:
1.1.1.2   misho    5393:             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5394:             break;
                   5395: 
                   5396:             case OP_NOT_WHITESPACE:
1.1.1.2   misho    5397:             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5398:             break;
                   5399: 
                   5400:             case OP_WHITESPACE:
1.1.1.2   misho    5401:             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5402:             break;
                   5403: 
                   5404:             case OP_NOT_WORDCHAR:
1.1.1.2   misho    5405:             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5406:             break;
                   5407: 
                   5408:             case OP_WORDCHAR:
1.1.1.2   misho    5409:             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5410:             break;
                   5411: 
                   5412:             default:
                   5413:             RRETURN(PCRE_ERROR_INTERNAL);
                   5414:             }
                   5415:           }
                   5416:         }
                   5417:       /* Control never gets here */
                   5418:       }
                   5419: 
                   5420:     /* If maximizing, it is worth using inline code for speed, doing the type
                   5421:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   5422:     UTF-8 and UCP stuff separate. */
                   5423: 
                   5424:     else
                   5425:       {
                   5426:       pp = eptr;  /* Remember where we started */
                   5427: 
                   5428: #ifdef SUPPORT_UCP
                   5429:       if (prop_type >= 0)
                   5430:         {
                   5431:         switch(prop_type)
                   5432:           {
                   5433:           case PT_ANY:
                   5434:           for (i = min; i < max; i++)
                   5435:             {
                   5436:             int len = 1;
                   5437:             if (eptr >= md->end_subject)
                   5438:               {
                   5439:               SCHECK_PARTIAL();
                   5440:               break;
                   5441:               }
                   5442:             GETCHARLENTEST(c, eptr, len);
                   5443:             if (prop_fail_result) break;
                   5444:             eptr+= len;
                   5445:             }
                   5446:           break;
                   5447: 
                   5448:           case PT_LAMP:
                   5449:           for (i = min; i < max; i++)
                   5450:             {
                   5451:             int chartype;
                   5452:             int len = 1;
                   5453:             if (eptr >= md->end_subject)
                   5454:               {
                   5455:               SCHECK_PARTIAL();
                   5456:               break;
                   5457:               }
                   5458:             GETCHARLENTEST(c, eptr, len);
                   5459:             chartype = UCD_CHARTYPE(c);
                   5460:             if ((chartype == ucp_Lu ||
                   5461:                  chartype == ucp_Ll ||
                   5462:                  chartype == ucp_Lt) == prop_fail_result)
                   5463:               break;
                   5464:             eptr+= len;
                   5465:             }
                   5466:           break;
                   5467: 
                   5468:           case PT_GC:
                   5469:           for (i = min; i < max; i++)
                   5470:             {
                   5471:             int len = 1;
                   5472:             if (eptr >= md->end_subject)
                   5473:               {
                   5474:               SCHECK_PARTIAL();
                   5475:               break;
                   5476:               }
                   5477:             GETCHARLENTEST(c, eptr, len);
                   5478:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
                   5479:             eptr+= len;
                   5480:             }
                   5481:           break;
                   5482: 
                   5483:           case PT_PC:
                   5484:           for (i = min; i < max; i++)
                   5485:             {
                   5486:             int len = 1;
                   5487:             if (eptr >= md->end_subject)
                   5488:               {
                   5489:               SCHECK_PARTIAL();
                   5490:               break;
                   5491:               }
                   5492:             GETCHARLENTEST(c, eptr, len);
                   5493:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
                   5494:             eptr+= len;
                   5495:             }
                   5496:           break;
                   5497: 
                   5498:           case PT_SC:
                   5499:           for (i = min; i < max; i++)
                   5500:             {
                   5501:             int len = 1;
                   5502:             if (eptr >= md->end_subject)
                   5503:               {
                   5504:               SCHECK_PARTIAL();
                   5505:               break;
                   5506:               }
                   5507:             GETCHARLENTEST(c, eptr, len);
                   5508:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
                   5509:             eptr+= len;
                   5510:             }
                   5511:           break;
                   5512: 
                   5513:           case PT_ALNUM:
                   5514:           for (i = min; i < max; i++)
                   5515:             {
                   5516:             int category;
                   5517:             int len = 1;
                   5518:             if (eptr >= md->end_subject)
                   5519:               {
                   5520:               SCHECK_PARTIAL();
                   5521:               break;
                   5522:               }
                   5523:             GETCHARLENTEST(c, eptr, len);
                   5524:             category = UCD_CATEGORY(c);
                   5525:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   5526:               break;
                   5527:             eptr+= len;
                   5528:             }
                   5529:           break;
                   5530: 
                   5531:           case PT_SPACE:    /* Perl space */
                   5532:           for (i = min; i < max; i++)
                   5533:             {
                   5534:             int len = 1;
                   5535:             if (eptr >= md->end_subject)
                   5536:               {
                   5537:               SCHECK_PARTIAL();
                   5538:               break;
                   5539:               }
                   5540:             GETCHARLENTEST(c, eptr, len);
                   5541:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5542:                  c == CHAR_FF || c == CHAR_CR)
                   5543:                  == prop_fail_result)
                   5544:               break;
                   5545:             eptr+= len;
                   5546:             }
                   5547:           break;
                   5548: 
                   5549:           case PT_PXSPACE:  /* POSIX space */
                   5550:           for (i = min; i < max; i++)
                   5551:             {
                   5552:             int len = 1;
                   5553:             if (eptr >= md->end_subject)
                   5554:               {
                   5555:               SCHECK_PARTIAL();
                   5556:               break;
                   5557:               }
                   5558:             GETCHARLENTEST(c, eptr, len);
                   5559:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   5560:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   5561:                  == prop_fail_result)
                   5562:               break;
                   5563:             eptr+= len;
                   5564:             }
                   5565:           break;
                   5566: 
                   5567:           case PT_WORD:
                   5568:           for (i = min; i < max; i++)
                   5569:             {
                   5570:             int category;
                   5571:             int len = 1;
                   5572:             if (eptr >= md->end_subject)
                   5573:               {
                   5574:               SCHECK_PARTIAL();
                   5575:               break;
                   5576:               }
                   5577:             GETCHARLENTEST(c, eptr, len);
                   5578:             category = UCD_CATEGORY(c);
                   5579:             if ((category == ucp_L || category == ucp_N ||
                   5580:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   5581:               break;
                   5582:             eptr+= len;
                   5583:             }
                   5584:           break;
                   5585: 
1.1.1.4 ! misho    5586:           case PT_CLIST:
        !          5587:           for (i = min; i < max; i++)
        !          5588:             {
        !          5589:             const pcre_uint32 *cp;
        !          5590:             int len = 1;
        !          5591:             if (eptr >= md->end_subject)
        !          5592:               {
        !          5593:               SCHECK_PARTIAL();
        !          5594:               break;
        !          5595:               }
        !          5596:             GETCHARLENTEST(c, eptr, len);
        !          5597:             cp = PRIV(ucd_caseless_sets) + prop_value;
        !          5598:             for (;;)
        !          5599:               {
        !          5600:               if (c < *cp)
        !          5601:                 { if (prop_fail_result) break; else goto GOT_MAX; }
        !          5602:               if (c == *cp++)
        !          5603:                 { if (prop_fail_result) goto GOT_MAX; else break; }
        !          5604:               }
        !          5605:             eptr += len;
        !          5606:             }
        !          5607:           GOT_MAX:
        !          5608:           break;
        !          5609: 
        !          5610:           case PT_UCNC:
        !          5611:           for (i = min; i < max; i++)
        !          5612:             {
        !          5613:             int len = 1;
        !          5614:             if (eptr >= md->end_subject)
        !          5615:               {
        !          5616:               SCHECK_PARTIAL();
        !          5617:               break;
        !          5618:               }
        !          5619:             GETCHARLENTEST(c, eptr, len);
        !          5620:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
        !          5621:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
        !          5622:                  c >= 0xe000) == prop_fail_result)
        !          5623:               break;
        !          5624:             eptr += len;
        !          5625:             }
        !          5626:           break;
        !          5627: 
1.1       misho    5628:           default:
                   5629:           RRETURN(PCRE_ERROR_INTERNAL);
                   5630:           }
                   5631: 
                   5632:         /* eptr is now past the end of the maximum run */
                   5633: 
1.1.1.4 ! misho    5634:         if (possessive) continue;    /* No backtracking */
1.1       misho    5635:         for(;;)
                   5636:           {
1.1.1.4 ! misho    5637:           if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    5638:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
                   5639:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.4 ! misho    5640:           eptr--;
1.1.1.2   misho    5641:           if (utf) BACKCHAR(eptr);
1.1       misho    5642:           }
                   5643:         }
                   5644: 
                   5645:       /* Match extended Unicode sequences. We will get here only if the
                   5646:       support is in the binary; otherwise a compile-time error occurs. */
                   5647: 
                   5648:       else if (ctype == OP_EXTUNI)
                   5649:         {
                   5650:         for (i = min; i < max; i++)
                   5651:           {
                   5652:           if (eptr >= md->end_subject)
                   5653:             {
                   5654:             SCHECK_PARTIAL();
                   5655:             break;
                   5656:             }
1.1.1.4 ! misho    5657:           else
1.1       misho    5658:             {
1.1.1.4 ! misho    5659:             int lgb, rgb;
        !          5660:             GETCHARINCTEST(c, eptr);
        !          5661:             lgb = UCD_GRAPHBREAK(c);
        !          5662:             while (eptr < md->end_subject)
        !          5663:               {
        !          5664:               int len = 1;
        !          5665:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          5666:               rgb = UCD_GRAPHBREAK(c);
        !          5667:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          5668:               lgb = rgb;
        !          5669:               eptr += len;
        !          5670:               }
1.1       misho    5671:             }
1.1.1.3   misho    5672:           CHECK_PARTIAL();
1.1       misho    5673:           }
                   5674: 
                   5675:         /* eptr is now past the end of the maximum run */
                   5676: 
1.1.1.4 ! misho    5677:         if (possessive) continue;    /* No backtracking */
1.1       misho    5678:         for(;;)
                   5679:           {
1.1.1.4 ! misho    5680:           if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    5681:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
                   5682:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.4 ! misho    5683:           eptr--;
1.1       misho    5684:           for (;;)                        /* Move back over one extended */
                   5685:             {
1.1.1.2   misho    5686:             if (!utf) c = *eptr; else
1.1       misho    5687:               {
                   5688:               BACKCHAR(eptr);
                   5689:               GETCHAR(c, eptr);
                   5690:               }
                   5691:             if (UCD_CATEGORY(c) != ucp_M) break;
                   5692:             eptr--;
                   5693:             }
                   5694:           }
                   5695:         }
                   5696: 
                   5697:       else
                   5698: #endif   /* SUPPORT_UCP */
                   5699: 
1.1.1.2   misho    5700: #ifdef SUPPORT_UTF
                   5701:       if (utf)
1.1       misho    5702:         {
                   5703:         switch(ctype)
                   5704:           {
                   5705:           case OP_ANY:
                   5706:           if (max < INT_MAX)
                   5707:             {
                   5708:             for (i = min; i < max; i++)
                   5709:               {
                   5710:               if (eptr >= md->end_subject)
                   5711:                 {
                   5712:                 SCHECK_PARTIAL();
                   5713:                 break;
                   5714:                 }
                   5715:               if (IS_NEWLINE(eptr)) break;
1.1.1.3   misho    5716:               if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5717:                   eptr + 1 >= md->end_subject &&
                   5718:                   NLBLOCK->nltype == NLTYPE_FIXED &&
                   5719:                   NLBLOCK->nllen == 2 &&
1.1.1.4 ! misho    5720:                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
1.1.1.3   misho    5721:                 {
                   5722:                 md->hitend = TRUE;
                   5723:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5724:                 }
1.1       misho    5725:               eptr++;
1.1.1.2   misho    5726:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5727:               }
                   5728:             }
                   5729: 
                   5730:           /* Handle unlimited UTF-8 repeat */
                   5731: 
                   5732:           else
                   5733:             {
                   5734:             for (i = min; i < max; i++)
                   5735:               {
                   5736:               if (eptr >= md->end_subject)
                   5737:                 {
                   5738:                 SCHECK_PARTIAL();
                   5739:                 break;
                   5740:                 }
                   5741:               if (IS_NEWLINE(eptr)) break;
1.1.1.3   misho    5742:               if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5743:                   eptr + 1 >= md->end_subject &&
                   5744:                   NLBLOCK->nltype == NLTYPE_FIXED &&
                   5745:                   NLBLOCK->nllen == 2 &&
1.1.1.4 ! misho    5746:                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
1.1.1.3   misho    5747:                 {
                   5748:                 md->hitend = TRUE;
                   5749:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5750:                 }
1.1       misho    5751:               eptr++;
1.1.1.2   misho    5752:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5753:               }
                   5754:             }
                   5755:           break;
                   5756: 
                   5757:           case OP_ALLANY:
                   5758:           if (max < INT_MAX)
                   5759:             {
                   5760:             for (i = min; i < max; i++)
                   5761:               {
                   5762:               if (eptr >= md->end_subject)
                   5763:                 {
                   5764:                 SCHECK_PARTIAL();
                   5765:                 break;
                   5766:                 }
                   5767:               eptr++;
1.1.1.2   misho    5768:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5769:               }
                   5770:             }
                   5771:           else
                   5772:             {
                   5773:             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   5774:             SCHECK_PARTIAL();
                   5775:             }
                   5776:           break;
                   5777: 
                   5778:           /* The byte case is the same as non-UTF8 */
                   5779: 
                   5780:           case OP_ANYBYTE:
                   5781:           c = max - min;
                   5782:           if (c > (unsigned int)(md->end_subject - eptr))
                   5783:             {
                   5784:             eptr = md->end_subject;
                   5785:             SCHECK_PARTIAL();
                   5786:             }
                   5787:           else eptr += c;
                   5788:           break;
                   5789: 
                   5790:           case OP_ANYNL:
                   5791:           for (i = min; i < max; i++)
                   5792:             {
                   5793:             int len = 1;
                   5794:             if (eptr >= md->end_subject)
                   5795:               {
                   5796:               SCHECK_PARTIAL();
                   5797:               break;
                   5798:               }
                   5799:             GETCHARLEN(c, eptr, len);
1.1.1.4 ! misho    5800:             if (c == CHAR_CR)
1.1       misho    5801:               {
                   5802:               if (++eptr >= md->end_subject) break;
1.1.1.4 ! misho    5803:               if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1       misho    5804:               }
                   5805:             else
                   5806:               {
1.1.1.4 ! misho    5807:               if (c != CHAR_LF &&
1.1       misho    5808:                   (md->bsr_anycrlf ||
1.1.1.4 ! misho    5809:                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
        !          5810: #ifndef EBCDIC
        !          5811:                     && c != 0x2028 && c != 0x2029
        !          5812: #endif  /* Not EBCDIC */
        !          5813:                     )))
1.1       misho    5814:                 break;
                   5815:               eptr += len;
                   5816:               }
                   5817:             }
                   5818:           break;
                   5819: 
                   5820:           case OP_NOT_HSPACE:
                   5821:           case OP_HSPACE:
                   5822:           for (i = min; i < max; i++)
                   5823:             {
                   5824:             BOOL gotspace;
                   5825:             int len = 1;
                   5826:             if (eptr >= md->end_subject)
                   5827:               {
                   5828:               SCHECK_PARTIAL();
                   5829:               break;
                   5830:               }
                   5831:             GETCHARLEN(c, eptr, len);
                   5832:             switch(c)
                   5833:               {
1.1.1.4 ! misho    5834:               HSPACE_CASES: gotspace = TRUE; break;
1.1       misho    5835:               default: gotspace = FALSE; break;
                   5836:               }
                   5837:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5838:             eptr += len;
                   5839:             }
                   5840:           break;
                   5841: 
                   5842:           case OP_NOT_VSPACE:
                   5843:           case OP_VSPACE:
                   5844:           for (i = min; i < max; i++)
                   5845:             {
                   5846:             BOOL gotspace;
                   5847:             int len = 1;
                   5848:             if (eptr >= md->end_subject)
                   5849:               {
                   5850:               SCHECK_PARTIAL();
                   5851:               break;
                   5852:               }
                   5853:             GETCHARLEN(c, eptr, len);
                   5854:             switch(c)
                   5855:               {
1.1.1.4 ! misho    5856:               VSPACE_CASES: gotspace = TRUE; break;
1.1       misho    5857:               default: gotspace = FALSE; break;
                   5858:               }
                   5859:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5860:             eptr += len;
                   5861:             }
                   5862:           break;
                   5863: 
                   5864:           case OP_NOT_DIGIT:
                   5865:           for (i = min; i < max; i++)
                   5866:             {
                   5867:             int len = 1;
                   5868:             if (eptr >= md->end_subject)
                   5869:               {
                   5870:               SCHECK_PARTIAL();
                   5871:               break;
                   5872:               }
                   5873:             GETCHARLEN(c, eptr, len);
                   5874:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5875:             eptr+= len;
                   5876:             }
                   5877:           break;
                   5878: 
                   5879:           case OP_DIGIT:
                   5880:           for (i = min; i < max; i++)
                   5881:             {
                   5882:             int len = 1;
                   5883:             if (eptr >= md->end_subject)
                   5884:               {
                   5885:               SCHECK_PARTIAL();
                   5886:               break;
                   5887:               }
                   5888:             GETCHARLEN(c, eptr, len);
                   5889:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5890:             eptr+= len;
                   5891:             }
                   5892:           break;
                   5893: 
                   5894:           case OP_NOT_WHITESPACE:
                   5895:           for (i = min; i < max; i++)
                   5896:             {
                   5897:             int len = 1;
                   5898:             if (eptr >= md->end_subject)
                   5899:               {
                   5900:               SCHECK_PARTIAL();
                   5901:               break;
                   5902:               }
                   5903:             GETCHARLEN(c, eptr, len);
                   5904:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5905:             eptr+= len;
                   5906:             }
                   5907:           break;
                   5908: 
                   5909:           case OP_WHITESPACE:
                   5910:           for (i = min; i < max; i++)
                   5911:             {
                   5912:             int len = 1;
                   5913:             if (eptr >= md->end_subject)
                   5914:               {
                   5915:               SCHECK_PARTIAL();
                   5916:               break;
                   5917:               }
                   5918:             GETCHARLEN(c, eptr, len);
                   5919:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5920:             eptr+= len;
                   5921:             }
                   5922:           break;
                   5923: 
                   5924:           case OP_NOT_WORDCHAR:
                   5925:           for (i = min; i < max; i++)
                   5926:             {
                   5927:             int len = 1;
                   5928:             if (eptr >= md->end_subject)
                   5929:               {
                   5930:               SCHECK_PARTIAL();
                   5931:               break;
                   5932:               }
                   5933:             GETCHARLEN(c, eptr, len);
                   5934:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5935:             eptr+= len;
                   5936:             }
                   5937:           break;
                   5938: 
                   5939:           case OP_WORDCHAR:
                   5940:           for (i = min; i < max; i++)
                   5941:             {
                   5942:             int len = 1;
                   5943:             if (eptr >= md->end_subject)
                   5944:               {
                   5945:               SCHECK_PARTIAL();
                   5946:               break;
                   5947:               }
                   5948:             GETCHARLEN(c, eptr, len);
                   5949:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5950:             eptr+= len;
                   5951:             }
                   5952:           break;
                   5953: 
                   5954:           default:
                   5955:           RRETURN(PCRE_ERROR_INTERNAL);
                   5956:           }
                   5957: 
1.1.1.4 ! misho    5958:         if (possessive) continue;    /* No backtracking */
1.1       misho    5959:         for(;;)
                   5960:           {
1.1.1.4 ! misho    5961:           if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    5962:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
                   5963:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.4 ! misho    5964:           eptr--;
1.1       misho    5965:           BACKCHAR(eptr);
1.1.1.4 ! misho    5966:           if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
        !          5967:               RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
1.1       misho    5968:           }
                   5969:         }
                   5970:       else
1.1.1.2   misho    5971: #endif  /* SUPPORT_UTF */
                   5972:       /* Not UTF mode */
1.1       misho    5973:         {
                   5974:         switch(ctype)
                   5975:           {
                   5976:           case OP_ANY:
                   5977:           for (i = min; i < max; i++)
                   5978:             {
                   5979:             if (eptr >= md->end_subject)
                   5980:               {
                   5981:               SCHECK_PARTIAL();
                   5982:               break;
                   5983:               }
                   5984:             if (IS_NEWLINE(eptr)) break;
1.1.1.3   misho    5985:             if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5986:                 eptr + 1 >= md->end_subject &&
                   5987:                 NLBLOCK->nltype == NLTYPE_FIXED &&
                   5988:                 NLBLOCK->nllen == 2 &&
                   5989:                 *eptr == NLBLOCK->nl[0])
                   5990:               {
                   5991:               md->hitend = TRUE;
                   5992:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5993:               }
1.1       misho    5994:             eptr++;
                   5995:             }
                   5996:           break;
                   5997: 
                   5998:           case OP_ALLANY:
                   5999:           case OP_ANYBYTE:
                   6000:           c = max - min;
                   6001:           if (c > (unsigned int)(md->end_subject - eptr))
                   6002:             {
                   6003:             eptr = md->end_subject;
                   6004:             SCHECK_PARTIAL();
                   6005:             }
                   6006:           else eptr += c;
                   6007:           break;
                   6008: 
                   6009:           case OP_ANYNL:
                   6010:           for (i = min; i < max; i++)
                   6011:             {
                   6012:             if (eptr >= md->end_subject)
                   6013:               {
                   6014:               SCHECK_PARTIAL();
                   6015:               break;
                   6016:               }
                   6017:             c = *eptr;
1.1.1.4 ! misho    6018:             if (c == CHAR_CR)
1.1       misho    6019:               {
                   6020:               if (++eptr >= md->end_subject) break;
1.1.1.4 ! misho    6021:               if (*eptr == CHAR_LF) eptr++;
1.1       misho    6022:               }
                   6023:             else
                   6024:               {
1.1.1.4 ! misho    6025:               if (c != CHAR_LF && (md->bsr_anycrlf ||
        !          6026:                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
        !          6027: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          6028:                  && c != 0x2028 && c != 0x2029
1.1.1.2   misho    6029: #endif
1.1.1.4 ! misho    6030:                  ))) break;
1.1       misho    6031:               eptr++;
                   6032:               }
                   6033:             }
                   6034:           break;
                   6035: 
                   6036:           case OP_NOT_HSPACE:
                   6037:           for (i = min; i < max; i++)
                   6038:             {
                   6039:             if (eptr >= md->end_subject)
                   6040:               {
                   6041:               SCHECK_PARTIAL();
                   6042:               break;
                   6043:               }
1.1.1.4 ! misho    6044:             switch(*eptr)
        !          6045:               {
        !          6046:               default: eptr++; break;
        !          6047:               HSPACE_BYTE_CASES:
        !          6048: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          6049:               HSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    6050: #endif
1.1.1.4 ! misho    6051:               goto ENDLOOP00;
        !          6052:               }
1.1       misho    6053:             }
1.1.1.4 ! misho    6054:           ENDLOOP00:
1.1       misho    6055:           break;
                   6056: 
                   6057:           case OP_HSPACE:
                   6058:           for (i = min; i < max; i++)
                   6059:             {
                   6060:             if (eptr >= md->end_subject)
                   6061:               {
                   6062:               SCHECK_PARTIAL();
                   6063:               break;
                   6064:               }
1.1.1.4 ! misho    6065:             switch(*eptr)
        !          6066:               {
        !          6067:               default: goto ENDLOOP01;
        !          6068:               HSPACE_BYTE_CASES:
        !          6069: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          6070:               HSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    6071: #endif
1.1.1.4 ! misho    6072:               eptr++; break;
        !          6073:               }
1.1       misho    6074:             }
1.1.1.4 ! misho    6075:           ENDLOOP01:
1.1       misho    6076:           break;
                   6077: 
                   6078:           case OP_NOT_VSPACE:
                   6079:           for (i = min; i < max; i++)
                   6080:             {
                   6081:             if (eptr >= md->end_subject)
                   6082:               {
                   6083:               SCHECK_PARTIAL();
                   6084:               break;
                   6085:               }
1.1.1.4 ! misho    6086:             switch(*eptr)
        !          6087:               {
        !          6088:               default: eptr++; break;
        !          6089:               VSPACE_BYTE_CASES:
        !          6090: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          6091:               VSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    6092: #endif
1.1.1.4 ! misho    6093:               goto ENDLOOP02;
        !          6094:               }
1.1       misho    6095:             }
1.1.1.4 ! misho    6096:           ENDLOOP02:
1.1       misho    6097:           break;
                   6098: 
                   6099:           case OP_VSPACE:
                   6100:           for (i = min; i < max; i++)
                   6101:             {
                   6102:             if (eptr >= md->end_subject)
                   6103:               {
                   6104:               SCHECK_PARTIAL();
                   6105:               break;
                   6106:               }
1.1.1.4 ! misho    6107:             switch(*eptr)
        !          6108:               {
        !          6109:               default: goto ENDLOOP03;
        !          6110:               VSPACE_BYTE_CASES:
        !          6111: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          6112:               VSPACE_MULTIBYTE_CASES:
1.1.1.2   misho    6113: #endif
1.1.1.4 ! misho    6114:               eptr++; break;
        !          6115:               }
1.1       misho    6116:             }
1.1.1.4 ! misho    6117:           ENDLOOP03:
1.1       misho    6118:           break;
                   6119: 
                   6120:           case OP_NOT_DIGIT:
                   6121:           for (i = min; i < max; i++)
                   6122:             {
                   6123:             if (eptr >= md->end_subject)
                   6124:               {
                   6125:               SCHECK_PARTIAL();
                   6126:               break;
                   6127:               }
1.1.1.2   misho    6128:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1       misho    6129:             eptr++;
                   6130:             }
                   6131:           break;
                   6132: 
                   6133:           case OP_DIGIT:
                   6134:           for (i = min; i < max; i++)
                   6135:             {
                   6136:             if (eptr >= md->end_subject)
                   6137:               {
                   6138:               SCHECK_PARTIAL();
                   6139:               break;
                   6140:               }
1.1.1.2   misho    6141:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1       misho    6142:             eptr++;
                   6143:             }
                   6144:           break;
                   6145: 
                   6146:           case OP_NOT_WHITESPACE:
                   6147:           for (i = min; i < max; i++)
                   6148:             {
                   6149:             if (eptr >= md->end_subject)
                   6150:               {
                   6151:               SCHECK_PARTIAL();
                   6152:               break;
                   6153:               }
1.1.1.2   misho    6154:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
1.1       misho    6155:             eptr++;
                   6156:             }
                   6157:           break;
                   6158: 
                   6159:           case OP_WHITESPACE:
                   6160:           for (i = min; i < max; i++)
                   6161:             {
                   6162:             if (eptr >= md->end_subject)
                   6163:               {
                   6164:               SCHECK_PARTIAL();
                   6165:               break;
                   6166:               }
1.1.1.2   misho    6167:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
1.1       misho    6168:             eptr++;
                   6169:             }
                   6170:           break;
                   6171: 
                   6172:           case OP_NOT_WORDCHAR:
                   6173:           for (i = min; i < max; i++)
                   6174:             {
                   6175:             if (eptr >= md->end_subject)
                   6176:               {
                   6177:               SCHECK_PARTIAL();
                   6178:               break;
                   6179:               }
1.1.1.2   misho    6180:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
1.1       misho    6181:             eptr++;
                   6182:             }
                   6183:           break;
                   6184: 
                   6185:           case OP_WORDCHAR:
                   6186:           for (i = min; i < max; i++)
                   6187:             {
                   6188:             if (eptr >= md->end_subject)
                   6189:               {
                   6190:               SCHECK_PARTIAL();
                   6191:               break;
                   6192:               }
1.1.1.2   misho    6193:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
1.1       misho    6194:             eptr++;
                   6195:             }
                   6196:           break;
                   6197: 
                   6198:           default:
                   6199:           RRETURN(PCRE_ERROR_INTERNAL);
                   6200:           }
                   6201: 
1.1.1.4 ! misho    6202:         if (possessive) continue;    /* No backtracking */
        !          6203:         for (;;)
1.1       misho    6204:           {
1.1.1.4 ! misho    6205:           if (eptr == pp) goto TAIL_RECURSE;
1.1       misho    6206:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
                   6207:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   6208:           eptr--;
1.1.1.4 ! misho    6209:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
        !          6210:               eptr[-1] == CHAR_CR) eptr--;
1.1       misho    6211:           }
                   6212:         }
                   6213: 
                   6214:       /* Get here if we can't make it match with any permitted repetitions */
                   6215: 
                   6216:       RRETURN(MATCH_NOMATCH);
                   6217:       }
                   6218:     /* Control never gets here */
                   6219: 
                   6220:     /* There's been some horrible disaster. Arrival here can only mean there is
                   6221:     something seriously wrong in the code above or the OP_xxx definitions. */
                   6222: 
                   6223:     default:
                   6224:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   6225:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   6226:     }
                   6227: 
                   6228:   /* Do not stick any code in here without much thought; it is assumed
                   6229:   that "continue" in the code above comes out to here to repeat the main
                   6230:   loop. */
                   6231: 
                   6232:   }             /* End of main loop */
                   6233: /* Control never reaches here */
                   6234: 
                   6235: 
                   6236: /* When compiling to use the heap rather than the stack for recursive calls to
                   6237: match(), the RRETURN() macro jumps here. The number that is saved in
                   6238: frame->Xwhere indicates which label we actually want to return to. */
                   6239: 
                   6240: #ifdef NO_RECURSE
                   6241: #define LBL(val) case val: goto L_RM##val;
                   6242: HEAP_RETURN:
                   6243: switch (frame->Xwhere)
                   6244:   {
                   6245:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   6246:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   6247:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   6248:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
                   6249:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
                   6250:   LBL(65) LBL(66)
1.1.1.2   misho    6251: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
                   6252:   LBL(21)
                   6253: #endif
                   6254: #ifdef SUPPORT_UTF
                   6255:   LBL(16) LBL(18) LBL(20)
                   6256:   LBL(22) LBL(23) LBL(28) LBL(30)
1.1       misho    6257:   LBL(32) LBL(34) LBL(42) LBL(46)
                   6258: #ifdef SUPPORT_UCP
                   6259:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
1.1.1.4 ! misho    6260:   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
1.1       misho    6261: #endif  /* SUPPORT_UCP */
1.1.1.2   misho    6262: #endif  /* SUPPORT_UTF */
1.1       misho    6263:   default:
                   6264:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   6265:   return PCRE_ERROR_INTERNAL;
                   6266:   }
                   6267: #undef LBL
                   6268: #endif  /* NO_RECURSE */
                   6269: }
                   6270: 
                   6271: 
                   6272: /***************************************************************************
                   6273: ****************************************************************************
                   6274:                    RECURSION IN THE match() FUNCTION
                   6275: 
                   6276: Undefine all the macros that were defined above to handle this. */
                   6277: 
                   6278: #ifdef NO_RECURSE
                   6279: #undef eptr
                   6280: #undef ecode
                   6281: #undef mstart
                   6282: #undef offset_top
                   6283: #undef eptrb
                   6284: #undef flags
                   6285: 
                   6286: #undef callpat
                   6287: #undef charptr
                   6288: #undef data
                   6289: #undef next
                   6290: #undef pp
                   6291: #undef prev
                   6292: #undef saved_eptr
                   6293: 
                   6294: #undef new_recursive
                   6295: 
                   6296: #undef cur_is_word
                   6297: #undef condition
                   6298: #undef prev_is_word
                   6299: 
                   6300: #undef ctype
                   6301: #undef length
                   6302: #undef max
                   6303: #undef min
                   6304: #undef number
                   6305: #undef offset
                   6306: #undef op
                   6307: #undef save_capture_last
                   6308: #undef save_offset1
                   6309: #undef save_offset2
                   6310: #undef save_offset3
                   6311: #undef stacksave
                   6312: 
                   6313: #undef newptrb
                   6314: 
                   6315: #endif
                   6316: 
                   6317: /* These two are defined as macros in both cases */
                   6318: 
                   6319: #undef fc
                   6320: #undef fi
                   6321: 
                   6322: /***************************************************************************
                   6323: ***************************************************************************/
                   6324: 
                   6325: 
1.1.1.3   misho    6326: #ifdef NO_RECURSE
                   6327: /*************************************************
                   6328: *          Release allocated heap frames         *
                   6329: *************************************************/
                   6330: 
                   6331: /* This function releases all the allocated frames. The base frame is on the
                   6332: machine stack, and so must not be freed.
                   6333: 
                   6334: Argument: the address of the base frame
                   6335: Returns:  nothing
                   6336: */
                   6337: 
                   6338: static void
                   6339: release_match_heapframes (heapframe *frame_base)
                   6340: {
                   6341: heapframe *nextframe = frame_base->Xnextframe;
                   6342: while (nextframe != NULL)
                   6343:   {
                   6344:   heapframe *oldframe = nextframe;
                   6345:   nextframe = nextframe->Xnextframe;
                   6346:   (PUBL(stack_free))(oldframe);
                   6347:   }
                   6348: }
                   6349: #endif
                   6350: 
1.1       misho    6351: 
                   6352: /*************************************************
                   6353: *         Execute a Regular Expression           *
                   6354: *************************************************/
                   6355: 
                   6356: /* This function applies a compiled re to a subject string and picks out
                   6357: portions of the string if it matches. Two elements in the vector are set for
                   6358: each substring: the offsets to the start and end of the substring.
                   6359: 
                   6360: Arguments:
                   6361:   argument_re     points to the compiled expression
                   6362:   extra_data      points to extra data or is NULL
                   6363:   subject         points to the subject string
                   6364:   length          length of subject string (may contain binary zeros)
                   6365:   start_offset    where to start in the subject string
                   6366:   options         option bits
                   6367:   offsets         points to a vector of ints to be filled in with offsets
                   6368:   offsetcount     the number of elements in the vector
                   6369: 
                   6370: Returns:          > 0 => success; value is the number of elements filled in
                   6371:                   = 0 => success, but offsets is not big enough
                   6372:                    -1 => failed to match
                   6373:                  < -1 => some kind of unexpected problem
                   6374: */
                   6375: 
1.1.1.4 ! misho    6376: #if defined COMPILE_PCRE8
1.1       misho    6377: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   6378: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   6379:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   6380:   int offsetcount)
1.1.1.4 ! misho    6381: #elif defined COMPILE_PCRE16
1.1.1.2   misho    6382: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   6383: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
                   6384:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
                   6385:   int offsetcount)
1.1.1.4 ! misho    6386: #elif defined COMPILE_PCRE32
        !          6387: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !          6388: pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
        !          6389:   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
        !          6390:   int offsetcount)
1.1.1.2   misho    6391: #endif
1.1       misho    6392: {
                   6393: int rc, ocount, arg_offset_max;
                   6394: int newline;
                   6395: BOOL using_temporary_offsets = FALSE;
                   6396: BOOL anchored;
                   6397: BOOL startline;
                   6398: BOOL firstline;
1.1.1.2   misho    6399: BOOL utf;
                   6400: BOOL has_first_char = FALSE;
                   6401: BOOL has_req_char = FALSE;
                   6402: pcre_uchar first_char = 0;
                   6403: pcre_uchar first_char2 = 0;
                   6404: pcre_uchar req_char = 0;
                   6405: pcre_uchar req_char2 = 0;
1.1       misho    6406: match_data match_block;
                   6407: match_data *md = &match_block;
1.1.1.2   misho    6408: const pcre_uint8 *tables;
                   6409: const pcre_uint8 *start_bits = NULL;
                   6410: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
                   6411: PCRE_PUCHAR end_subject;
                   6412: PCRE_PUCHAR start_partial = NULL;
1.1.1.4 ! misho    6413: PCRE_PUCHAR match_partial;
1.1.1.2   misho    6414: PCRE_PUCHAR req_char_ptr = start_match - 1;
1.1       misho    6415: 
                   6416: const pcre_study_data *study;
1.1.1.2   misho    6417: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
1.1       misho    6418: 
1.1.1.3   misho    6419: #ifdef NO_RECURSE
                   6420: heapframe frame_zero;
                   6421: frame_zero.Xprevframe = NULL;            /* Marks the top level */
                   6422: frame_zero.Xnextframe = NULL;            /* None are allocated yet */
                   6423: md->match_frames_base = &frame_zero;
                   6424: #endif
                   6425: 
1.1.1.2   misho    6426: /* Check for the special magic call that measures the size of the stack used
1.1.1.3   misho    6427: per recursive call of match(). Without the funny casting for sizeof, a Windows
                   6428: compiler gave this error: "unary minus operator applied to unsigned type,
                   6429: result still unsigned". Hopefully the cast fixes that. */
1.1.1.2   misho    6430: 
                   6431: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
                   6432:     start_offset == -999)
                   6433: #ifdef NO_RECURSE
1.1.1.3   misho    6434:   return -((int)sizeof(heapframe));
1.1.1.2   misho    6435: #else
                   6436:   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
                   6437: #endif
1.1       misho    6438: 
                   6439: /* Plausibility checks */
                   6440: 
                   6441: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1.1.1.2   misho    6442: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
                   6443:   return PCRE_ERROR_NULL;
1.1       misho    6444: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1.1.1.4 ! misho    6445: if (length < 0) return PCRE_ERROR_BADLENGTH;
1.1       misho    6446: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   6447: 
1.1.1.2   misho    6448: /* Check that the first field in the block is the magic number. If it is not,
                   6449: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
                   6450: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
                   6451: means that the pattern is likely compiled with different endianness. */
                   6452: 
                   6453: if (re->magic_number != MAGIC_NUMBER)
                   6454:   return re->magic_number == REVERSED_MAGIC_NUMBER?
                   6455:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
                   6456: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
                   6457: 
1.1       misho    6458: /* These two settings are used in the code for checking a UTF-8 string that
                   6459: follows immediately afterwards. Other values in the md block are used only
                   6460: during "normal" pcre_exec() processing, not when the JIT support is in use,
                   6461: so they are set up later. */
                   6462: 
1.1.1.2   misho    6463: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
                   6464: utf = md->utf = (re->options & PCRE_UTF8) != 0;
1.1       misho    6465: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
                   6466:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
                   6467: 
                   6468: /* Check a UTF-8 string if required. Pass back the character offset and error
                   6469: code for an invalid string if a results vector is available. */
                   6470: 
1.1.1.2   misho    6471: #ifdef SUPPORT_UTF
                   6472: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
1.1       misho    6473:   {
                   6474:   int erroroffset;
1.1.1.2   misho    6475:   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
1.1       misho    6476:   if (errorcode != 0)
                   6477:     {
                   6478:     if (offsetcount >= 2)
                   6479:       {
                   6480:       offsets[0] = erroroffset;
                   6481:       offsets[1] = errorcode;
                   6482:       }
1.1.1.4 ! misho    6483: #if defined COMPILE_PCRE8
1.1       misho    6484:     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
                   6485:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
1.1.1.4 ! misho    6486: #elif defined COMPILE_PCRE16
        !          6487:     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
        !          6488:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
        !          6489: #elif defined COMPILE_PCRE32
        !          6490:     return PCRE_ERROR_BADUTF32;
1.1.1.2   misho    6491: #endif
1.1       misho    6492:     }
1.1.1.4 ! misho    6493: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
1.1.1.2   misho    6494:   /* Check that a start_offset points to the start of a UTF character. */
1.1       misho    6495:   if (start_offset > 0 && start_offset < length &&
1.1.1.2   misho    6496:       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
1.1       misho    6497:     return PCRE_ERROR_BADUTF8_OFFSET;
1.1.1.4 ! misho    6498: #endif
1.1       misho    6499:   }
                   6500: #endif
                   6501: 
                   6502: /* If the pattern was successfully studied with JIT support, run the JIT
                   6503: executable instead of the rest of this function. Most options must be set at
                   6504: compile time for the JIT code to be usable. Fallback to the normal code path if
1.1.1.3   misho    6505: an unsupported flag is set. */
1.1       misho    6506: 
                   6507: #ifdef SUPPORT_JIT
                   6508: if (extra_data != NULL
1.1.1.3   misho    6509:     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
                   6510:                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
1.1       misho    6511:     && extra_data->executable_jit != NULL
1.1.1.4 ! misho    6512:     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
1.1.1.3   misho    6513:   {
1.1.1.4 ! misho    6514:   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
1.1.1.3   misho    6515:        start_offset, options, offsets, offsetcount);
                   6516: 
                   6517:   /* PCRE_ERROR_NULL means that the selected normal or partial matching
                   6518:   mode is not compiled. In this case we simply fallback to interpreter. */
                   6519: 
1.1.1.4 ! misho    6520:   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
1.1.1.3   misho    6521:   }
1.1       misho    6522: #endif
                   6523: 
                   6524: /* Carry on with non-JIT matching. This information is for finding all the
                   6525: numbers associated with a given name, for condition testing. */
                   6526: 
1.1.1.2   misho    6527: md->name_table = (pcre_uchar *)re + re->name_table_offset;
1.1       misho    6528: md->name_count = re->name_count;
                   6529: md->name_entry_size = re->name_entry_size;
                   6530: 
                   6531: /* Fish out the optional data from the extra_data structure, first setting
                   6532: the default values. */
                   6533: 
                   6534: study = NULL;
                   6535: md->match_limit = MATCH_LIMIT;
                   6536: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   6537: md->callout_data = NULL;
                   6538: 
                   6539: /* The table pointer is always in native byte order. */
                   6540: 
1.1.1.2   misho    6541: tables = re->tables;
1.1       misho    6542: 
1.1.1.4 ! misho    6543: /* The two limit values override the defaults, whatever their value. */
        !          6544: 
1.1       misho    6545: if (extra_data != NULL)
                   6546:   {
                   6547:   register unsigned int flags = extra_data->flags;
                   6548:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   6549:     study = (const pcre_study_data *)extra_data->study_data;
                   6550:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   6551:     md->match_limit = extra_data->match_limit;
                   6552:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   6553:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   6554:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   6555:     md->callout_data = extra_data->callout_data;
                   6556:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   6557:   }
                   6558: 
1.1.1.4 ! misho    6559: /* Limits in the regex override only if they are smaller. */
        !          6560: 
        !          6561: if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
        !          6562:   md->match_limit = re->limit_match;
        !          6563: 
        !          6564: if ((re->flags & PCRE_RLSET) != 0 &&
        !          6565:     re->limit_recursion < md->match_limit_recursion)
        !          6566:   md->match_limit_recursion = re->limit_recursion;
        !          6567: 
1.1       misho    6568: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   6569: is a feature that makes it possible to save compiled regex and re-use them
                   6570: in other programs later. */
                   6571: 
1.1.1.2   misho    6572: if (tables == NULL) tables = PRIV(default_tables);
1.1       misho    6573: 
                   6574: /* Set up other data */
                   6575: 
                   6576: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   6577: startline = (re->flags & PCRE_STARTLINE) != 0;
                   6578: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   6579: 
                   6580: /* The code starts after the real_pcre block and the capture name table. */
                   6581: 
1.1.1.2   misho    6582: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
1.1       misho    6583:   re->name_count * re->name_entry_size;
                   6584: 
1.1.1.2   misho    6585: md->start_subject = (PCRE_PUCHAR)subject;
1.1       misho    6586: md->start_offset = start_offset;
                   6587: md->end_subject = md->start_subject + length;
                   6588: end_subject = md->end_subject;
                   6589: 
                   6590: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   6591: md->use_ucp = (re->options & PCRE_UCP) != 0;
                   6592: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
1.1.1.4 ! misho    6593: md->ignore_skip_arg = 0;
1.1       misho    6594: 
                   6595: /* Some options are unpacked into BOOL variables in the hope that testing
                   6596: them will be faster than individual option bits. */
                   6597: 
                   6598: md->notbol = (options & PCRE_NOTBOL) != 0;
                   6599: md->noteol = (options & PCRE_NOTEOL) != 0;
                   6600: md->notempty = (options & PCRE_NOTEMPTY) != 0;
                   6601: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
                   6602: 
                   6603: md->hitend = FALSE;
                   6604: md->mark = md->nomatch_mark = NULL;     /* In case never set */
                   6605: 
                   6606: md->recursive = NULL;                   /* No recursion at top level */
                   6607: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
                   6608: 
                   6609: md->lcc = tables + lcc_offset;
1.1.1.2   misho    6610: md->fcc = tables + fcc_offset;
1.1       misho    6611: md->ctypes = tables + ctypes_offset;
                   6612: 
                   6613: /* Handle different \R options. */
                   6614: 
                   6615: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   6616:   {
                   6617:   case 0:
                   6618:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   6619:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   6620:   else
                   6621: #ifdef BSR_ANYCRLF
                   6622:   md->bsr_anycrlf = TRUE;
                   6623: #else
                   6624:   md->bsr_anycrlf = FALSE;
                   6625: #endif
                   6626:   break;
                   6627: 
                   6628:   case PCRE_BSR_ANYCRLF:
                   6629:   md->bsr_anycrlf = TRUE;
                   6630:   break;
                   6631: 
                   6632:   case PCRE_BSR_UNICODE:
                   6633:   md->bsr_anycrlf = FALSE;
                   6634:   break;
                   6635: 
                   6636:   default: return PCRE_ERROR_BADNEWLINE;
                   6637:   }
                   6638: 
                   6639: /* Handle different types of newline. The three bits give eight cases. If
                   6640: nothing is set at run time, whatever was used at compile time applies. */
                   6641: 
                   6642: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   6643:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   6644:   {
                   6645:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   6646:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   6647:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   6648:   case PCRE_NEWLINE_CR+
                   6649:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   6650:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   6651:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   6652:   default: return PCRE_ERROR_BADNEWLINE;
                   6653:   }
                   6654: 
                   6655: if (newline == -2)
                   6656:   {
                   6657:   md->nltype = NLTYPE_ANYCRLF;
                   6658:   }
                   6659: else if (newline < 0)
                   6660:   {
                   6661:   md->nltype = NLTYPE_ANY;
                   6662:   }
                   6663: else
                   6664:   {
                   6665:   md->nltype = NLTYPE_FIXED;
                   6666:   if (newline > 255)
                   6667:     {
                   6668:     md->nllen = 2;
                   6669:     md->nl[0] = (newline >> 8) & 255;
                   6670:     md->nl[1] = newline & 255;
                   6671:     }
                   6672:   else
                   6673:     {
                   6674:     md->nllen = 1;
                   6675:     md->nl[0] = newline;
                   6676:     }
                   6677:   }
                   6678: 
                   6679: /* Partial matching was originally supported only for a restricted set of
                   6680: regexes; from release 8.00 there are no restrictions, but the bits are still
                   6681: defined (though never set). So there's no harm in leaving this code. */
                   6682: 
                   6683: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   6684:   return PCRE_ERROR_BADPARTIAL;
                   6685: 
                   6686: /* If the expression has got more back references than the offsets supplied can
                   6687: hold, we get a temporary chunk of working store to use during the matching.
                   6688: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   6689: of 3. */
                   6690: 
                   6691: ocount = offsetcount - (offsetcount % 3);
                   6692: arg_offset_max = (2*ocount)/3;
                   6693: 
                   6694: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   6695:   {
                   6696:   ocount = re->top_backref * 3 + 3;
1.1.1.2   misho    6697:   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
1.1       misho    6698:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   6699:   using_temporary_offsets = TRUE;
                   6700:   DPRINTF(("Got memory to hold back references\n"));
                   6701:   }
                   6702: else md->offset_vector = offsets;
                   6703: md->offset_end = ocount;
                   6704: md->offset_max = (2*ocount)/3;
1.1.1.4 ! misho    6705: md->capture_last = 0;
1.1       misho    6706: 
                   6707: /* Reset the working variable associated with each extraction. These should
                   6708: never be used unless previously set, but they get saved and restored, and so we
                   6709: initialize them to avoid reading uninitialized locations. Also, unset the
                   6710: offsets for the matched string. This is really just for tidiness with callouts,
                   6711: in case they inspect these fields. */
                   6712: 
                   6713: if (md->offset_vector != NULL)
                   6714:   {
                   6715:   register int *iptr = md->offset_vector + ocount;
                   6716:   register int *iend = iptr - re->top_bracket;
                   6717:   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
                   6718:   while (--iptr >= iend) *iptr = -1;
                   6719:   md->offset_vector[0] = md->offset_vector[1] = -1;
                   6720:   }
                   6721: 
1.1.1.2   misho    6722: /* Set up the first character to match, if available. The first_char value is
1.1       misho    6723: never set for an anchored regular expression, but the anchoring may be forced
                   6724: at run time, so we have to test for anchoring. The first char may be unset for
                   6725: an unanchored pattern, of course. If there's no first char and the pattern was
                   6726: studied, there may be a bitmap of possible first characters. */
                   6727: 
                   6728: if (!anchored)
                   6729:   {
                   6730:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   6731:     {
1.1.1.2   misho    6732:     has_first_char = TRUE;
                   6733:     first_char = first_char2 = (pcre_uchar)(re->first_char);
                   6734:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
                   6735:       {
                   6736:       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
                   6737: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   6738:       if (utf && first_char > 127)
                   6739:         first_char2 = UCD_OTHERCASE(first_char);
                   6740: #endif
                   6741:       }
1.1       misho    6742:     }
                   6743:   else
                   6744:     if (!startline && study != NULL &&
                   6745:       (study->flags & PCRE_STUDY_MAPPED) != 0)
                   6746:         start_bits = study->start_bits;
                   6747:   }
                   6748: 
                   6749: /* For anchored or unanchored matches, there may be a "last known required
                   6750: character" set. */
                   6751: 
                   6752: if ((re->flags & PCRE_REQCHSET) != 0)
                   6753:   {
1.1.1.2   misho    6754:   has_req_char = TRUE;
                   6755:   req_char = req_char2 = (pcre_uchar)(re->req_char);
                   6756:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
                   6757:     {
                   6758:     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
                   6759: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   6760:     if (utf && req_char > 127)
                   6761:       req_char2 = UCD_OTHERCASE(req_char);
                   6762: #endif
                   6763:     }
1.1       misho    6764:   }
                   6765: 
                   6766: 
                   6767: /* ==========================================================================*/
                   6768: 
                   6769: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   6770: the loop runs just once. */
                   6771: 
                   6772: for(;;)
                   6773:   {
1.1.1.2   misho    6774:   PCRE_PUCHAR save_end_subject = end_subject;
                   6775:   PCRE_PUCHAR new_start_match;
1.1       misho    6776: 
                   6777:   /* If firstline is TRUE, the start of the match is constrained to the first
                   6778:   line of a multiline string. That is, the match must be before or at the first
                   6779:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   6780:   scanning at a newline. If the match fails at the newline, later code breaks
                   6781:   this loop. */
                   6782: 
                   6783:   if (firstline)
                   6784:     {
1.1.1.2   misho    6785:     PCRE_PUCHAR t = start_match;
                   6786: #ifdef SUPPORT_UTF
                   6787:     if (utf)
1.1       misho    6788:       {
                   6789:       while (t < md->end_subject && !IS_NEWLINE(t))
                   6790:         {
                   6791:         t++;
1.1.1.2   misho    6792:         ACROSSCHAR(t < end_subject, *t, t++);
1.1       misho    6793:         }
                   6794:       }
                   6795:     else
                   6796: #endif
                   6797:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   6798:     end_subject = t;
                   6799:     }
                   6800: 
                   6801:   /* There are some optimizations that avoid running the match if a known
                   6802:   starting point is not found, or if a known later character is not present.
                   6803:   However, there is an option that disables these, for testing and for ensuring
                   6804:   that all callouts do actually occur. The option can be set in the regex by
                   6805:   (*NO_START_OPT) or passed in match-time options. */
                   6806: 
                   6807:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   6808:     {
1.1.1.2   misho    6809:     /* Advance to a unique first char if there is one. */
1.1       misho    6810: 
1.1.1.2   misho    6811:     if (has_first_char)
1.1       misho    6812:       {
1.1.1.4 ! misho    6813:       pcre_uchar smc;
        !          6814: 
1.1.1.2   misho    6815:       if (first_char != first_char2)
                   6816:         while (start_match < end_subject &&
1.1.1.4 ! misho    6817:           (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
1.1       misho    6818:           start_match++;
                   6819:       else
1.1.1.4 ! misho    6820:         while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
1.1       misho    6821:           start_match++;
                   6822:       }
                   6823: 
                   6824:     /* Or to just after a linebreak for a multiline match */
                   6825: 
                   6826:     else if (startline)
                   6827:       {
                   6828:       if (start_match > md->start_subject + start_offset)
                   6829:         {
1.1.1.2   misho    6830: #ifdef SUPPORT_UTF
                   6831:         if (utf)
1.1       misho    6832:           {
                   6833:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6834:             {
                   6835:             start_match++;
1.1.1.2   misho    6836:             ACROSSCHAR(start_match < end_subject, *start_match,
                   6837:               start_match++);
1.1       misho    6838:             }
                   6839:           }
                   6840:         else
                   6841: #endif
                   6842:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6843:           start_match++;
                   6844: 
                   6845:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   6846:         and we are now at a LF, advance the match position by one more character.
                   6847:         */
                   6848: 
                   6849:         if (start_match[-1] == CHAR_CR &&
                   6850:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   6851:              start_match < end_subject &&
1.1.1.4 ! misho    6852:              RAWUCHARTEST(start_match) == CHAR_NL)
1.1       misho    6853:           start_match++;
                   6854:         }
                   6855:       }
                   6856: 
                   6857:     /* Or to a non-unique first byte after study */
                   6858: 
                   6859:     else if (start_bits != NULL)
                   6860:       {
                   6861:       while (start_match < end_subject)
                   6862:         {
1.1.1.4 ! misho    6863:         register pcre_uint32 c = RAWUCHARTEST(start_match);
1.1.1.2   misho    6864: #ifndef COMPILE_PCRE8
                   6865:         if (c > 255) c = 255;
                   6866: #endif
1.1       misho    6867:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   6868:           {
                   6869:           start_match++;
1.1.1.2   misho    6870: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
                   6871:           /* In non 8-bit mode, the iteration will stop for
                   6872:           characters > 255 at the beginning or not stop at all. */
                   6873:           if (utf)
                   6874:             ACROSSCHAR(start_match < end_subject, *start_match,
                   6875:               start_match++);
1.1       misho    6876: #endif
                   6877:           }
                   6878:         else break;
                   6879:         }
                   6880:       }
                   6881:     }   /* Starting optimizations */
                   6882: 
                   6883:   /* Restore fudged end_subject */
                   6884: 
                   6885:   end_subject = save_end_subject;
                   6886: 
                   6887:   /* The following two optimizations are disabled for partial matching or if
                   6888:   disabling is explicitly requested. */
                   6889: 
                   6890:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
                   6891:     {
                   6892:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6893:     a lower bound; no actual string of that length may actually match the
                   6894:     pattern. Although the value is, strictly, in characters, we treat it as
                   6895:     bytes to avoid spending too much time in this optimization. */
                   6896: 
                   6897:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6898:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6899:       {
                   6900:       rc = MATCH_NOMATCH;
                   6901:       break;
                   6902:       }
                   6903: 
1.1.1.2   misho    6904:     /* If req_char is set, we know that that character must appear in the
                   6905:     subject for the match to succeed. If the first character is set, req_char
1.1       misho    6906:     must be later in the subject; otherwise the test starts at the match point.
                   6907:     This optimization can save a huge amount of backtracking in patterns with
                   6908:     nested unlimited repeats that aren't going to match. Writing separate code
                   6909:     for cased/caseless versions makes it go faster, as does using an
                   6910:     autoincrement and backing off on a match.
                   6911: 
                   6912:     HOWEVER: when the subject string is very, very long, searching to its end
                   6913:     can take a long time, and give bad performance on quite ordinary patterns.
                   6914:     This showed up when somebody was matching something like /^\d+C/ on a
                   6915:     32-megabyte string... so we don't do this when the string is sufficiently
                   6916:     long. */
                   6917: 
1.1.1.2   misho    6918:     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
1.1       misho    6919:       {
1.1.1.2   misho    6920:       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
1.1       misho    6921: 
                   6922:       /* We don't need to repeat the search if we haven't yet reached the
                   6923:       place we found it at last time. */
                   6924: 
1.1.1.2   misho    6925:       if (p > req_char_ptr)
1.1       misho    6926:         {
1.1.1.2   misho    6927:         if (req_char != req_char2)
1.1       misho    6928:           {
                   6929:           while (p < end_subject)
                   6930:             {
1.1.1.4 ! misho    6931:             register pcre_uint32 pp = RAWUCHARINCTEST(p);
1.1.1.2   misho    6932:             if (pp == req_char || pp == req_char2) { p--; break; }
1.1       misho    6933:             }
                   6934:           }
                   6935:         else
                   6936:           {
                   6937:           while (p < end_subject)
                   6938:             {
1.1.1.4 ! misho    6939:             if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
1.1       misho    6940:             }
                   6941:           }
                   6942: 
                   6943:         /* If we can't find the required character, break the matching loop,
                   6944:         forcing a match failure. */
                   6945: 
                   6946:         if (p >= end_subject)
                   6947:           {
                   6948:           rc = MATCH_NOMATCH;
                   6949:           break;
                   6950:           }
                   6951: 
                   6952:         /* If we have found the required character, save the point where we
                   6953:         found it, so that we don't search again next time round the loop if
                   6954:         the start hasn't passed this character yet. */
                   6955: 
1.1.1.2   misho    6956:         req_char_ptr = p;
1.1       misho    6957:         }
                   6958:       }
                   6959:     }
                   6960: 
                   6961: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6962:   printf(">>>> Match against: ");
                   6963:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6964:   printf("\n");
                   6965: #endif
                   6966: 
                   6967:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6968:   first starting point for which a partial match was found. */
                   6969: 
                   6970:   md->start_match_ptr = start_match;
                   6971:   md->start_used_ptr = start_match;
                   6972:   md->match_call_count = 0;
                   6973:   md->match_function_type = 0;
                   6974:   md->end_offset_top = 0;
1.1.1.4 ! misho    6975:   md->skip_arg_count = 0;
1.1       misho    6976:   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
1.1.1.4 ! misho    6977:   if (md->hitend && start_partial == NULL)
        !          6978:     {
        !          6979:     start_partial = md->start_used_ptr;
        !          6980:     match_partial = start_match;
        !          6981:     }
1.1       misho    6982: 
                   6983:   switch(rc)
                   6984:     {
                   6985:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
                   6986:     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
                   6987:     entirely. The only way we can do that is to re-do the match at the same
                   6988:     point, with a flag to force SKIP with an argument to be ignored. Just
                   6989:     treating this case as NOMATCH does not work because it does not check other
                   6990:     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
                   6991: 
                   6992:     case MATCH_SKIP_ARG:
                   6993:     new_start_match = start_match;
1.1.1.4 ! misho    6994:     md->ignore_skip_arg = md->skip_arg_count;
1.1       misho    6995:     break;
                   6996: 
1.1.1.4 ! misho    6997:     /* SKIP passes back the next starting point explicitly, but if it is no
        !          6998:     greater than the match we have just done, treat it as NOMATCH. */
1.1       misho    6999: 
                   7000:     case MATCH_SKIP:
1.1.1.4 ! misho    7001:     if (md->start_match_ptr > start_match)
1.1       misho    7002:       {
                   7003:       new_start_match = md->start_match_ptr;
                   7004:       break;
                   7005:       }
                   7006:     /* Fall through */
                   7007: 
                   7008:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
1.1.1.4 ! misho    7009:     exactly like PRUNE. Unset ignore SKIP-with-argument. */
1.1       misho    7010: 
                   7011:     case MATCH_NOMATCH:
                   7012:     case MATCH_PRUNE:
                   7013:     case MATCH_THEN:
1.1.1.4 ! misho    7014:     md->ignore_skip_arg = 0;
1.1       misho    7015:     new_start_match = start_match + 1;
1.1.1.2   misho    7016: #ifdef SUPPORT_UTF
                   7017:     if (utf)
                   7018:       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
                   7019:         new_start_match++);
1.1       misho    7020: #endif
                   7021:     break;
                   7022: 
                   7023:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   7024: 
                   7025:     case MATCH_COMMIT:
                   7026:     rc = MATCH_NOMATCH;
                   7027:     goto ENDLOOP;
                   7028: 
                   7029:     /* Any other return is either a match, or some kind of error. */
                   7030: 
                   7031:     default:
                   7032:     goto ENDLOOP;
                   7033:     }
                   7034: 
                   7035:   /* Control reaches here for the various types of "no match at this point"
                   7036:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   7037: 
                   7038:   rc = MATCH_NOMATCH;
                   7039: 
                   7040:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   7041:   newline in the subject (though it may continue over the newline). Therefore,
                   7042:   if we have just failed to match, starting at a newline, do not continue. */
                   7043: 
                   7044:   if (firstline && IS_NEWLINE(start_match)) break;
                   7045: 
                   7046:   /* Advance to new matching position */
                   7047: 
                   7048:   start_match = new_start_match;
                   7049: 
                   7050:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   7051:   the subject. */
                   7052: 
                   7053:   if (anchored || start_match > end_subject) break;
                   7054: 
                   7055:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   7056:   not contain any explicit matches for \r or \n, and the newline option is CRLF
1.1.1.2   misho    7057:   or ANY or ANYCRLF, advance the match position by one more character. In
                   7058:   normal matching start_match will aways be greater than the first position at
                   7059:   this stage, but a failed *SKIP can cause a return at the same point, which is
                   7060:   why the first test exists. */
1.1       misho    7061: 
1.1.1.2   misho    7062:   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
                   7063:       start_match[-1] == CHAR_CR &&
1.1       misho    7064:       start_match < end_subject &&
                   7065:       *start_match == CHAR_NL &&
                   7066:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   7067:         (md->nltype == NLTYPE_ANY ||
                   7068:          md->nltype == NLTYPE_ANYCRLF ||
                   7069:          md->nllen == 2))
                   7070:     start_match++;
                   7071: 
                   7072:   md->mark = NULL;   /* Reset for start of next match attempt */
                   7073:   }                  /* End of for(;;) "bumpalong" loop */
                   7074: 
                   7075: /* ==========================================================================*/
                   7076: 
                   7077: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   7078: conditions is true:
                   7079: 
                   7080: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   7081: 
                   7082: (2) We are past the end of the subject;
                   7083: 
                   7084: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   7085:     this option requests that a match occur at or before the first newline in
                   7086:     the subject.
                   7087: 
                   7088: When we have a match and the offset vector is big enough to deal with any
                   7089: backreferences, captured substring offsets will already be set up. In the case
                   7090: where we had to get some local store to hold offsets for backreference
                   7091: processing, copy those that we can. In this case there need not be overflow if
                   7092: certain parts of the pattern were not used, even though there are more
                   7093: capturing parentheses than vector slots. */
                   7094: 
                   7095: ENDLOOP:
                   7096: 
                   7097: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
                   7098:   {
                   7099:   if (using_temporary_offsets)
                   7100:     {
                   7101:     if (arg_offset_max >= 4)
                   7102:       {
                   7103:       memcpy(offsets + 2, md->offset_vector + 2,
                   7104:         (arg_offset_max - 2) * sizeof(int));
                   7105:       DPRINTF(("Copied offsets from temporary memory\n"));
                   7106:       }
1.1.1.4 ! misho    7107:     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
1.1       misho    7108:     DPRINTF(("Freeing temporary memory\n"));
1.1.1.2   misho    7109:     (PUBL(free))(md->offset_vector);
1.1       misho    7110:     }
                   7111: 
                   7112:   /* Set the return code to the number of captured strings, or 0 if there were
                   7113:   too many to fit into the vector. */
                   7114: 
1.1.1.4 ! misho    7115:   rc = ((md->capture_last & OVFLBIT) != 0 &&
        !          7116:          md->end_offset_top >= arg_offset_max)?
1.1       misho    7117:     0 : md->end_offset_top/2;
                   7118: 
                   7119:   /* If there is space in the offset vector, set any unused pairs at the end of
                   7120:   the pattern to -1 for backwards compatibility. It is documented that this
                   7121:   happens. In earlier versions, the whole set of potential capturing offsets
                   7122:   was set to -1 each time round the loop, but this is handled differently now.
                   7123:   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
                   7124:   those at the end that need unsetting here. We can't just unset them all at
                   7125:   the start of the whole thing because they may get set in one branch that is
                   7126:   not the final matching branch. */
                   7127: 
                   7128:   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
                   7129:     {
                   7130:     register int *iptr, *iend;
                   7131:     int resetcount = 2 + re->top_bracket * 2;
1.1.1.3   misho    7132:     if (resetcount > offsetcount) resetcount = offsetcount;
1.1       misho    7133:     iptr = offsets + md->end_offset_top;
                   7134:     iend = offsets + resetcount;
                   7135:     while (iptr < iend) *iptr++ = -1;
                   7136:     }
                   7137: 
                   7138:   /* If there is space, set up the whole thing as substring 0. The value of
                   7139:   md->start_match_ptr might be modified if \K was encountered on the success
                   7140:   matching path. */
                   7141: 
                   7142:   if (offsetcount < 2) rc = 0; else
                   7143:     {
                   7144:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   7145:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
                   7146:     }
                   7147: 
                   7148:   /* Return MARK data if requested */
                   7149: 
                   7150:   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2   misho    7151:     *(extra_data->mark) = (pcre_uchar *)md->mark;
1.1       misho    7152:   DPRINTF((">>>> returning %d\n", rc));
1.1.1.3   misho    7153: #ifdef NO_RECURSE
                   7154:   release_match_heapframes(&frame_zero);
                   7155: #endif
1.1       misho    7156:   return rc;
                   7157:   }
                   7158: 
                   7159: /* Control gets here if there has been an error, or if the overall match
                   7160: attempt has failed at all permitted starting positions. */
                   7161: 
                   7162: if (using_temporary_offsets)
                   7163:   {
                   7164:   DPRINTF(("Freeing temporary memory\n"));
1.1.1.2   misho    7165:   (PUBL(free))(md->offset_vector);
1.1       misho    7166:   }
                   7167: 
                   7168: /* For anything other than nomatch or partial match, just return the code. */
                   7169: 
                   7170: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
                   7171:   {
                   7172:   DPRINTF((">>>> error: returning %d\n", rc));
1.1.1.3   misho    7173: #ifdef NO_RECURSE
                   7174:   release_match_heapframes(&frame_zero);
                   7175: #endif
1.1       misho    7176:   return rc;
                   7177:   }
                   7178: 
                   7179: /* Handle partial matches - disable any mark data */
                   7180: 
                   7181: if (start_partial != NULL)
                   7182:   {
                   7183:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
                   7184:   md->mark = NULL;
                   7185:   if (offsetcount > 1)
                   7186:     {
1.1.1.2   misho    7187:     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
                   7188:     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
1.1.1.4 ! misho    7189:     if (offsetcount > 2)
        !          7190:       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
1.1       misho    7191:     }
                   7192:   rc = PCRE_ERROR_PARTIAL;
                   7193:   }
                   7194: 
                   7195: /* This is the classic nomatch case */
                   7196: 
                   7197: else
                   7198:   {
                   7199:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
                   7200:   rc = PCRE_ERROR_NOMATCH;
                   7201:   }
                   7202: 
                   7203: /* Return the MARK data if it has been requested. */
                   7204: 
                   7205: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2   misho    7206:   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
1.1.1.3   misho    7207: #ifdef NO_RECURSE
                   7208:   release_match_heapframes(&frame_zero);
                   7209: #endif
1.1       misho    7210: return rc;
                   7211: }
                   7212: 
                   7213: /* End of pcre_exec.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>