Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_exec.c, revision 1.1.1.2

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.1.1.2 ! misho       9:            Copyright (c) 1997-2012 University of Cambridge
1.1       misho      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: /* This module contains pcre_exec(), the externally visible function that does
                     41: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     42: possible. There are also some static supporting functions. */
                     43: 
1.1.1.2 ! misho      44: #ifdef HAVE_CONFIG_H
1.1       misho      45: #include "config.h"
1.1.1.2 ! misho      46: #endif
1.1       misho      47: 
                     48: #define NLBLOCK md             /* Block containing newline information */
                     49: #define PSSTART start_subject  /* Field containing processed string start */
                     50: #define PSEND   end_subject    /* Field containing processed string end */
                     51: 
                     52: #include "pcre_internal.h"
                     53: 
                     54: /* Undefine some potentially clashing cpp symbols */
                     55: 
                     56: #undef min
                     57: #undef max
                     58: 
1.1.1.2 ! misho      59: /* Values for setting in md->match_function_type to indicate two special types
        !            60: of call to match(). We do it this way to save on using another stack variable,
        !            61: as stack usage is to be discouraged. */
1.1       misho      62: 
1.1.1.2 ! misho      63: #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
        !            64: #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
1.1       misho      65: 
                     66: /* Non-error returns from the match() function. Error returns are externally
                     67: defined PCRE_ERROR_xxx codes, which are all negative. */
                     68: 
                     69: #define MATCH_MATCH        1
                     70: #define MATCH_NOMATCH      0
                     71: 
                     72: /* Special internal returns from the match() function. Make them sufficiently
                     73: negative to avoid the external error codes. */
                     74: 
                     75: #define MATCH_ACCEPT       (-999)
                     76: #define MATCH_COMMIT       (-998)
1.1.1.2 ! misho      77: #define MATCH_KETRPOS      (-997)
        !            78: #define MATCH_ONCE         (-996)
        !            79: #define MATCH_PRUNE        (-995)
        !            80: #define MATCH_SKIP         (-994)
        !            81: #define MATCH_SKIP_ARG     (-993)
        !            82: #define MATCH_THEN         (-992)
1.1       misho      83: 
                     84: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     85: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     86: because the offset vector is always a multiple of 3 long. */
                     87: 
                     88: #define REC_STACK_SAVE_MAX 30
                     89: 
                     90: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     91: 
                     92: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     93: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     94: 
                     95: #ifdef PCRE_DEBUG
                     96: /*************************************************
                     97: *        Debugging function to print chars       *
                     98: *************************************************/
                     99: 
                    100: /* Print a sequence of chars in printable format, stopping at the end of the
                    101: subject if the requested.
                    102: 
                    103: Arguments:
                    104:   p           points to characters
                    105:   length      number to print
                    106:   is_subject  TRUE if printing from within md->start_subject
                    107:   md          pointer to matching data block, if is_subject is TRUE
                    108: 
                    109: Returns:     nothing
                    110: */
                    111: 
                    112: static void
1.1.1.2 ! misho     113: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
1.1       misho     114: {
1.1.1.2 ! misho     115: pcre_uint32 c;
        !           116: BOOL utf = md->utf;
1.1       misho     117: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    118: while (length-- > 0)
1.1.1.2 ! misho     119:   if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
1.1       misho     120: }
                    121: #endif
                    122: 
                    123: 
                    124: 
                    125: /*************************************************
                    126: *          Match a back-reference                *
                    127: *************************************************/
                    128: 
1.1.1.2 ! misho     129: /* Normally, if a back reference hasn't been set, the length that is passed is
        !           130: negative, so the match always fails. However, in JavaScript compatibility mode,
        !           131: the length passed is zero. Note that in caseless UTF-8 mode, the number of
        !           132: subject bytes matched may be different to the number of reference bytes.
1.1       misho     133: 
                    134: Arguments:
                    135:   offset      index into the offset vector
1.1.1.2 ! misho     136:   eptr        pointer into the subject
        !           137:   length      length of reference to be matched (number of bytes)
1.1       misho     138:   md          points to match data block
1.1.1.2 ! misho     139:   caseless    TRUE if caseless
1.1       misho     140: 
1.1.1.2 ! misho     141: Returns:      >= 0 the number of subject bytes matched
        !           142:               -1 no match
        !           143:               -2 partial match; always given if at end subject
1.1       misho     144: */
                    145: 
1.1.1.2 ! misho     146: static int
        !           147: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
        !           148:   BOOL caseless)
1.1       misho     149: {
1.1.1.2 ! misho     150: PCRE_PUCHAR eptr_start = eptr;
        !           151: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
        !           152: #ifdef SUPPORT_UTF
        !           153: BOOL utf = md->utf;
        !           154: #endif
1.1       misho     155: 
                    156: #ifdef PCRE_DEBUG
                    157: if (eptr >= md->end_subject)
                    158:   printf("matching subject <null>");
                    159: else
                    160:   {
                    161:   printf("matching subject ");
                    162:   pchars(eptr, length, TRUE, md);
                    163:   }
                    164: printf(" against backref ");
                    165: pchars(p, length, FALSE, md);
                    166: printf("\n");
                    167: #endif
                    168: 
1.1.1.2 ! misho     169: /* Always fail if reference not set (and not JavaScript compatible - in that
        !           170: case the length is passed as zero). */
1.1       misho     171: 
1.1.1.2 ! misho     172: if (length < 0) return -1;
1.1       misho     173: 
                    174: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    175: properly if Unicode properties are supported. Otherwise, we can check only
                    176: ASCII characters. */
                    177: 
1.1.1.2 ! misho     178: if (caseless)
1.1       misho     179:   {
1.1.1.2 ! misho     180: #ifdef SUPPORT_UTF
1.1       misho     181: #ifdef SUPPORT_UCP
1.1.1.2 ! misho     182:   if (utf)
1.1       misho     183:     {
1.1.1.2 ! misho     184:     /* Match characters up to the end of the reference. NOTE: the number of
        !           185:     data units matched may differ, because in UTF-8 there are some characters
        !           186:     whose upper and lower case versions code have different numbers of bytes.
        !           187:     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
        !           188:     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
        !           189:     sequence of two of the latter. It is important, therefore, to check the
        !           190:     length along the reference, not along the subject (earlier code did this
        !           191:     wrong). */
        !           192: 
        !           193:     PCRE_PUCHAR endptr = p + length;
        !           194:     while (p < endptr)
        !           195:       {
        !           196:       pcre_uint32 c, d;
        !           197:       const ucd_record *ur;
        !           198:       if (eptr >= md->end_subject) return -2;   /* Partial match */
1.1       misho     199:       GETCHARINC(c, eptr);
                    200:       GETCHARINC(d, p);
1.1.1.2 ! misho     201:       ur = GET_UCD(d);
        !           202:       if (c != d && c != d + ur->other_case)
        !           203:         {
        !           204:         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
        !           205:         for (;;)
        !           206:           {
        !           207:           if (c < *pp) return -1;
        !           208:           if (c == *pp++) break;
        !           209:           }
        !           210:         }
1.1       misho     211:       }
                    212:     }
                    213:   else
                    214: #endif
                    215: #endif
                    216: 
                    217:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    218:   is no UCP support. */
1.1.1.2 ! misho     219:     {
        !           220:     while (length-- > 0)
        !           221:       {
        !           222:       pcre_uchar cc, cp;
        !           223:       if (eptr >= md->end_subject) return -2;   /* Partial match */
        !           224:       cc = RAWUCHARTEST(eptr);
        !           225:       cp = RAWUCHARTEST(p);
        !           226:       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
        !           227:       p++;
        !           228:       eptr++;
        !           229:       }
        !           230:     }
1.1       misho     231:   }
                    232: 
                    233: /* In the caseful case, we can just compare the bytes, whether or not we
                    234: are in UTF-8 mode. */
                    235: 
                    236: else
1.1.1.2 ! misho     237:   {
        !           238:   while (length-- > 0)
        !           239:     {
        !           240:     if (eptr >= md->end_subject) return -2;   /* Partial match */
        !           241:     if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
        !           242:     }
        !           243:   }
1.1       misho     244: 
1.1.1.2 ! misho     245: return (int)(eptr - eptr_start);
1.1       misho     246: }
                    247: 
                    248: 
                    249: 
                    250: /***************************************************************************
                    251: ****************************************************************************
                    252:                    RECURSION IN THE match() FUNCTION
                    253: 
                    254: The match() function is highly recursive, though not every recursive call
                    255: increases the recursive depth. Nevertheless, some regular expressions can cause
                    256: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    257: itself recursively. This uses the stack for saving everything that has to be
                    258: saved for a recursive call. On Unix, the stack can be large, and this works
                    259: fine.
                    260: 
                    261: It turns out that on some non-Unix-like systems there are problems with
                    262: programs that use a lot of stack. (This despite the fact that every last chip
                    263: has oodles of memory these days, and techniques for extending the stack have
                    264: been known for decades.) So....
                    265: 
                    266: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    267: calls by keeping local variables that need to be preserved in blocks of memory
                    268: obtained from malloc() instead instead of on the stack. Macros are used to
                    269: achieve this so that the actual code doesn't look very different to what it
                    270: always used to.
                    271: 
                    272: The original heap-recursive code used longjmp(). However, it seems that this
                    273: can be very slow on some operating systems. Following a suggestion from Stan
                    274: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    275: provide a unique number for each call to RMATCH. There is no way of generating
                    276: a sequence of numbers at compile time in C. I have given them names, to make
                    277: them stand out more clearly.
                    278: 
                    279: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    280: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    281: tests. Furthermore, not using longjmp() means that local dynamic variables
                    282: don't have indeterminate values; this has meant that the frame size can be
                    283: reduced because the result can be "passed back" by straight setting of the
                    284: variable instead of being passed in the frame.
                    285: ****************************************************************************
                    286: ***************************************************************************/
                    287: 
                    288: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    289: below must be updated in sync.  */
                    290: 
                    291: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    292:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    293:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    294:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    295:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
                    296:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
1.1.1.2 ! misho     297:        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
1.1       misho     298: 
                    299: /* These versions of the macros use the stack, as normal. There are debugging
                    300: versions and production versions. Note that the "rw" argument of RMATCH isn't
                    301: actually used in this definition. */
                    302: 
                    303: #ifndef NO_RECURSE
                    304: #define REGISTER register
                    305: 
                    306: #ifdef PCRE_DEBUG
1.1.1.2 ! misho     307: #define RMATCH(ra,rb,rc,rd,re,rw) \
1.1       misho     308:   { \
                    309:   printf("match() called in line %d\n", __LINE__); \
1.1.1.2 ! misho     310:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
1.1       misho     311:   printf("to line %d\n", __LINE__); \
                    312:   }
                    313: #define RRETURN(ra) \
                    314:   { \
1.1.1.2 ! misho     315:   printf("match() returned %d from line %d\n", ra, __LINE__); \
1.1       misho     316:   return ra; \
                    317:   }
                    318: #else
1.1.1.2 ! misho     319: #define RMATCH(ra,rb,rc,rd,re,rw) \
        !           320:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
1.1       misho     321: #define RRETURN(ra) return ra
                    322: #endif
                    323: 
                    324: #else
                    325: 
                    326: 
                    327: /* These versions of the macros manage a private stack on the heap. Note that
                    328: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    329: argument of match(), which never changes. */
                    330: 
                    331: #define REGISTER
                    332: 
1.1.1.2 ! misho     333: #define RMATCH(ra,rb,rc,rd,re,rw)\
1.1       misho     334:   {\
1.1.1.2 ! misho     335:   heapframe *newframe = frame->Xnextframe;\
        !           336:   if (newframe == NULL)\
        !           337:     {\
        !           338:     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
        !           339:     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
        !           340:     newframe->Xnextframe = NULL;\
        !           341:     frame->Xnextframe = newframe;\
        !           342:     }\
        !           343:   frame->Xwhere = rw;\
1.1       misho     344:   newframe->Xeptr = ra;\
                    345:   newframe->Xecode = rb;\
                    346:   newframe->Xmstart = mstart;\
                    347:   newframe->Xoffset_top = rc;\
1.1.1.2 ! misho     348:   newframe->Xeptrb = re;\
1.1       misho     349:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    350:   newframe->Xprevframe = frame;\
                    351:   frame = newframe;\
                    352:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    353:   goto HEAP_RECURSE;\
                    354:   L_##rw:\
                    355:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    356:   }
                    357: 
                    358: #define RRETURN(ra)\
                    359:   {\
                    360:   heapframe *oldframe = frame;\
                    361:   frame = oldframe->Xprevframe;\
                    362:   if (frame != NULL)\
                    363:     {\
                    364:     rrc = ra;\
                    365:     goto HEAP_RETURN;\
                    366:     }\
                    367:   return ra;\
                    368:   }
                    369: 
                    370: 
                    371: /* Structure for remembering the local variables in a private frame */
                    372: 
                    373: typedef struct heapframe {
                    374:   struct heapframe *Xprevframe;
1.1.1.2 ! misho     375:   struct heapframe *Xnextframe;
1.1       misho     376: 
                    377:   /* Function arguments that may change */
                    378: 
1.1.1.2 ! misho     379:   PCRE_PUCHAR Xeptr;
        !           380:   const pcre_uchar *Xecode;
        !           381:   PCRE_PUCHAR Xmstart;
1.1       misho     382:   int Xoffset_top;
                    383:   eptrblock *Xeptrb;
                    384:   unsigned int Xrdepth;
                    385: 
                    386:   /* Function local variables */
                    387: 
1.1.1.2 ! misho     388:   PCRE_PUCHAR Xcallpat;
        !           389: #ifdef SUPPORT_UTF
        !           390:   PCRE_PUCHAR Xcharptr;
        !           391: #endif
        !           392:   PCRE_PUCHAR Xdata;
        !           393:   PCRE_PUCHAR Xnext;
        !           394:   PCRE_PUCHAR Xpp;
        !           395:   PCRE_PUCHAR Xprev;
        !           396:   PCRE_PUCHAR Xsaved_eptr;
1.1       misho     397: 
                    398:   recursion_info Xnew_recursive;
                    399: 
                    400:   BOOL Xcur_is_word;
                    401:   BOOL Xcondition;
                    402:   BOOL Xprev_is_word;
                    403: 
                    404: #ifdef SUPPORT_UCP
                    405:   int Xprop_type;
1.1.1.2 ! misho     406:   unsigned int Xprop_value;
1.1       misho     407:   int Xprop_fail_result;
                    408:   int Xoclength;
1.1.1.2 ! misho     409:   pcre_uchar Xocchars[6];
1.1       misho     410: #endif
                    411: 
                    412:   int Xcodelink;
                    413:   int Xctype;
                    414:   unsigned int Xfc;
                    415:   int Xfi;
                    416:   int Xlength;
                    417:   int Xmax;
                    418:   int Xmin;
                    419:   int Xnumber;
                    420:   int Xoffset;
                    421:   int Xop;
                    422:   int Xsave_capture_last;
                    423:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    424:   int Xstacksave[REC_STACK_SAVE_MAX];
                    425: 
                    426:   eptrblock Xnewptrb;
                    427: 
                    428:   /* Where to jump back to */
                    429: 
                    430:   int Xwhere;
                    431: 
                    432: } heapframe;
                    433: 
                    434: #endif
                    435: 
                    436: 
                    437: /***************************************************************************
                    438: ***************************************************************************/
                    439: 
                    440: 
                    441: 
                    442: /*************************************************
                    443: *         Match from current position            *
                    444: *************************************************/
                    445: 
                    446: /* This function is called recursively in many circumstances. Whenever it
                    447: returns a negative (error) response, the outer incarnation must also return the
                    448: same response. */
                    449: 
                    450: /* These macros pack up tests that are used for partial matching, and which
1.1.1.2 ! misho     451: appear several times in the code. We set the "hit end" flag if the pointer is
1.1       misho     452: at the end of the subject and also past the start of the subject (i.e.
                    453: something has been matched). For hard partial matching, we then return
                    454: immediately. The second one is used when we already know we are past the end of
                    455: the subject. */
                    456: 
                    457: #define CHECK_PARTIAL()\
                    458:   if (md->partial != 0 && eptr >= md->end_subject && \
                    459:       eptr > md->start_used_ptr) \
                    460:     { \
                    461:     md->hitend = TRUE; \
1.1.1.2 ! misho     462:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.1       misho     463:     }
                    464: 
                    465: #define SCHECK_PARTIAL()\
                    466:   if (md->partial != 0 && eptr > md->start_used_ptr) \
                    467:     { \
                    468:     md->hitend = TRUE; \
1.1.1.2 ! misho     469:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.1       misho     470:     }
                    471: 
                    472: 
                    473: /* Performance note: It might be tempting to extract commonly used fields from
1.1.1.2 ! misho     474: the md structure (e.g. utf, end_subject) into individual variables to improve
1.1       misho     475: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    476: made performance worse.
                    477: 
                    478: Arguments:
                    479:    eptr        pointer to current character in subject
                    480:    ecode       pointer to current position in compiled code
                    481:    mstart      pointer to the current match start position (can be modified
                    482:                  by encountering \K)
                    483:    offset_top  current top pointer
                    484:    md          pointer to "static" info for the match
                    485:    eptrb       pointer to chain of blocks containing eptr at start of
                    486:                  brackets - for testing for empty matches
                    487:    rdepth      the recursion depth
                    488: 
                    489: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    490:                MATCH_NOMATCH if failed to match  )
                    491:                a negative MATCH_xxx value for PRUNE, SKIP, etc
                    492:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    493:                  (e.g. stopped by repeated call or recursion limit)
                    494: */
                    495: 
                    496: static int
1.1.1.2 ! misho     497: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
        !           498:   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
        !           499:   unsigned int rdepth)
1.1       misho     500: {
                    501: /* These variables do not need to be preserved over recursion in this function,
                    502: so they can be ordinary variables in all cases. Mark some of them with
                    503: "register" because they are used a lot in loops. */
                    504: 
                    505: register int  rrc;         /* Returns from recursive calls */
                    506: register int  i;           /* Used for loops not involving calls to RMATCH() */
1.1.1.2 ! misho     507: register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
        !           508: register BOOL utf;         /* Local copy of UTF flag for speed */
1.1       misho     509: 
                    510: BOOL minimize, possessive; /* Quantifier options */
1.1.1.2 ! misho     511: BOOL caseless;
1.1       misho     512: int condcode;
                    513: 
                    514: /* When recursion is not being used, all "local" variables that have to be
1.1.1.2 ! misho     515: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
        !           516: frame on the stack here; subsequent instantiations are obtained from the heap
        !           517: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
        !           518: the top-level on the stack rather than malloc-ing them all gives a performance
        !           519: boost in many cases where there is not much "recursion". */
1.1       misho     520: 
                    521: #ifdef NO_RECURSE
1.1.1.2 ! misho     522: heapframe *frame = (heapframe *)md->match_frames_base;
1.1       misho     523: 
                    524: /* Copy in the original argument variables */
                    525: 
                    526: frame->Xeptr = eptr;
                    527: frame->Xecode = ecode;
                    528: frame->Xmstart = mstart;
                    529: frame->Xoffset_top = offset_top;
                    530: frame->Xeptrb = eptrb;
                    531: frame->Xrdepth = rdepth;
                    532: 
                    533: /* This is where control jumps back to to effect "recursion" */
                    534: 
                    535: HEAP_RECURSE:
                    536: 
                    537: /* Macros make the argument variables come from the current frame */
                    538: 
                    539: #define eptr               frame->Xeptr
                    540: #define ecode              frame->Xecode
                    541: #define mstart             frame->Xmstart
                    542: #define offset_top         frame->Xoffset_top
                    543: #define eptrb              frame->Xeptrb
                    544: #define rdepth             frame->Xrdepth
                    545: 
                    546: /* Ditto for the local variables */
                    547: 
1.1.1.2 ! misho     548: #ifdef SUPPORT_UTF
1.1       misho     549: #define charptr            frame->Xcharptr
                    550: #endif
                    551: #define callpat            frame->Xcallpat
                    552: #define codelink           frame->Xcodelink
                    553: #define data               frame->Xdata
                    554: #define next               frame->Xnext
                    555: #define pp                 frame->Xpp
                    556: #define prev               frame->Xprev
                    557: #define saved_eptr         frame->Xsaved_eptr
                    558: 
                    559: #define new_recursive      frame->Xnew_recursive
                    560: 
                    561: #define cur_is_word        frame->Xcur_is_word
                    562: #define condition          frame->Xcondition
                    563: #define prev_is_word       frame->Xprev_is_word
                    564: 
                    565: #ifdef SUPPORT_UCP
                    566: #define prop_type          frame->Xprop_type
                    567: #define prop_value         frame->Xprop_value
                    568: #define prop_fail_result   frame->Xprop_fail_result
                    569: #define oclength           frame->Xoclength
                    570: #define occhars            frame->Xocchars
                    571: #endif
                    572: 
                    573: #define ctype              frame->Xctype
                    574: #define fc                 frame->Xfc
                    575: #define fi                 frame->Xfi
                    576: #define length             frame->Xlength
                    577: #define max                frame->Xmax
                    578: #define min                frame->Xmin
                    579: #define number             frame->Xnumber
                    580: #define offset             frame->Xoffset
                    581: #define op                 frame->Xop
                    582: #define save_capture_last  frame->Xsave_capture_last
                    583: #define save_offset1       frame->Xsave_offset1
                    584: #define save_offset2       frame->Xsave_offset2
                    585: #define save_offset3       frame->Xsave_offset3
                    586: #define stacksave          frame->Xstacksave
                    587: 
                    588: #define newptrb            frame->Xnewptrb
                    589: 
                    590: /* When recursion is being used, local variables are allocated on the stack and
                    591: get preserved during recursion in the normal way. In this environment, fi and
                    592: i, and fc and c, can be the same variables. */
                    593: 
                    594: #else         /* NO_RECURSE not defined */
                    595: #define fi i
                    596: #define fc c
                    597: 
1.1.1.2 ! misho     598: /* Many of the following variables are used only in small blocks of the code.
        !           599: My normal style of coding would have declared them within each of those blocks.
        !           600: However, in order to accommodate the version of this code that uses an external
        !           601: "stack" implemented on the heap, it is easier to declare them all here, so the
        !           602: declarations can be cut out in a block. The only declarations within blocks
        !           603: below are for variables that do not have to be preserved over a recursive call
        !           604: to RMATCH(). */
        !           605: 
        !           606: #ifdef SUPPORT_UTF
        !           607: const pcre_uchar *charptr;
        !           608: #endif
        !           609: const pcre_uchar *callpat;
        !           610: const pcre_uchar *data;
        !           611: const pcre_uchar *next;
        !           612: PCRE_PUCHAR       pp;
        !           613: const pcre_uchar *prev;
        !           614: PCRE_PUCHAR       saved_eptr;
        !           615: 
        !           616: recursion_info new_recursive;
1.1       misho     617: 
1.1.1.2 ! misho     618: BOOL cur_is_word;
1.1       misho     619: BOOL condition;
                    620: BOOL prev_is_word;
                    621: 
                    622: #ifdef SUPPORT_UCP
                    623: int prop_type;
1.1.1.2 ! misho     624: unsigned int prop_value;
1.1       misho     625: int prop_fail_result;
                    626: int oclength;
1.1.1.2 ! misho     627: pcre_uchar occhars[6];
1.1       misho     628: #endif
                    629: 
                    630: int codelink;
                    631: int ctype;
                    632: int length;
                    633: int max;
                    634: int min;
1.1.1.2 ! misho     635: unsigned int number;
1.1       misho     636: int offset;
1.1.1.2 ! misho     637: pcre_uchar op;
1.1       misho     638: int save_capture_last;
                    639: int save_offset1, save_offset2, save_offset3;
                    640: int stacksave[REC_STACK_SAVE_MAX];
                    641: 
                    642: eptrblock newptrb;
1.1.1.2 ! misho     643: 
        !           644: /* There is a special fudge for calling match() in a way that causes it to
        !           645: measure the size of its basic stack frame when the stack is being used for
        !           646: recursion. The second argument (ecode) being NULL triggers this behaviour. It
        !           647: cannot normally ever be NULL. The return is the negated value of the frame
        !           648: size. */
        !           649: 
        !           650: if (ecode == NULL)
        !           651:   {
        !           652:   if (rdepth == 0)
        !           653:     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
        !           654:   else
        !           655:     {
        !           656:     int len = (char *)&rdepth - (char *)eptr;
        !           657:     return (len > 0)? -len : len;
        !           658:     }
        !           659:   }
1.1       misho     660: #endif     /* NO_RECURSE */
                    661: 
1.1.1.2 ! misho     662: /* To save space on the stack and in the heap frame, I have doubled up on some
        !           663: of the local variables that are used only in localised parts of the code, but
        !           664: still need to be preserved over recursive calls of match(). These macros define
        !           665: the alternative names that are used. */
        !           666: 
        !           667: #define allow_zero    cur_is_word
        !           668: #define cbegroup      condition
        !           669: #define code_offset   codelink
        !           670: #define condassert    condition
        !           671: #define matched_once  prev_is_word
        !           672: #define foc           number
        !           673: #define save_mark     data
        !           674: 
1.1       misho     675: /* These statements are here to stop the compiler complaining about unitialized
                    676: variables. */
                    677: 
                    678: #ifdef SUPPORT_UCP
                    679: prop_value = 0;
                    680: prop_fail_result = 0;
                    681: #endif
                    682: 
                    683: 
                    684: /* This label is used for tail recursion, which is used in a few cases even
                    685: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    686: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    687: original patch. */
                    688: 
                    689: TAIL_RECURSE:
                    690: 
                    691: /* OK, now we can get on with the real code of the function. Recursive calls
                    692: are specified by the macro RMATCH and RRETURN is used to return. When
                    693: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
                    694: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
                    695: defined). However, RMATCH isn't like a function call because it's quite a
                    696: complicated macro. It has to be used in one particular way. This shouldn't,
                    697: however, impact performance when true recursion is being used. */
                    698: 
1.1.1.2 ! misho     699: #ifdef SUPPORT_UTF
        !           700: utf = md->utf;       /* Local copy of the flag */
1.1       misho     701: #else
1.1.1.2 ! misho     702: utf = FALSE;
1.1       misho     703: #endif
                    704: 
                    705: /* First check that we haven't called match() too many times, or that we
                    706: haven't exceeded the recursive call limit. */
                    707: 
                    708: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    709: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    710: 
                    711: /* At the start of a group with an unlimited repeat that may match an empty
1.1.1.2 ! misho     712: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
        !           713: done this way to save having to use another function argument, which would take
        !           714: up space on the stack. See also MATCH_CONDASSERT below.
        !           715: 
        !           716: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
        !           717: such remembered pointers, to be checked when we hit the closing ket, in order
        !           718: to break infinite loops that match no characters. When match() is called in
        !           719: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
        !           720: NOT be used with tail recursion, because the memory block that is used is on
        !           721: the stack, so a new one may be required for each match(). */
1.1       misho     722: 
1.1.1.2 ! misho     723: if (md->match_function_type == MATCH_CBEGROUP)
1.1       misho     724:   {
                    725:   newptrb.epb_saved_eptr = eptr;
                    726:   newptrb.epb_prev = eptrb;
                    727:   eptrb = &newptrb;
1.1.1.2 ! misho     728:   md->match_function_type = 0;
1.1       misho     729:   }
                    730: 
                    731: /* Now start processing the opcodes. */
                    732: 
                    733: for (;;)
                    734:   {
                    735:   minimize = possessive = FALSE;
                    736:   op = *ecode;
                    737: 
                    738:   switch(op)
                    739:     {
                    740:     case OP_MARK:
1.1.1.2 ! misho     741:     md->nomatch_mark = ecode + 2;
        !           742:     md->mark = NULL;    /* In case previously set by assertion */
        !           743:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
        !           744:       eptrb, RM55);
        !           745:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
        !           746:          md->mark == NULL) md->mark = ecode + 2;
1.1       misho     747: 
                    748:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    749:     argument, and we must check whether that argument matches this MARK's
                    750:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    751:     variable). If it does match, we reset that variable to the current subject
                    752:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    753:     unaltered. */
                    754: 
1.1.1.2 ! misho     755:     else if (rrc == MATCH_SKIP_ARG &&
        !           756:         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
1.1       misho     757:       {
                    758:       md->start_match_ptr = eptr;
                    759:       RRETURN(MATCH_SKIP);
                    760:       }
                    761:     RRETURN(rrc);
                    762: 
                    763:     case OP_FAIL:
1.1.1.2 ! misho     764:     RRETURN(MATCH_NOMATCH);
1.1       misho     765: 
                    766:     /* COMMIT overrides PRUNE, SKIP, and THEN */
                    767: 
                    768:     case OP_COMMIT:
1.1.1.2 ! misho     769:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           770:       eptrb, RM52);
1.1       misho     771:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
                    772:         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
                    773:         rrc != MATCH_THEN)
                    774:       RRETURN(rrc);
1.1.1.2 ! misho     775:     RRETURN(MATCH_COMMIT);
1.1       misho     776: 
                    777:     /* PRUNE overrides THEN */
                    778: 
                    779:     case OP_PRUNE:
1.1.1.2 ! misho     780:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           781:       eptrb, RM51);
1.1       misho     782:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1.1.2 ! misho     783:     RRETURN(MATCH_PRUNE);
1.1       misho     784: 
                    785:     case OP_PRUNE_ARG:
1.1.1.2 ! misho     786:     md->nomatch_mark = ecode + 2;
        !           787:     md->mark = NULL;    /* In case previously set by assertion */
        !           788:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
        !           789:       eptrb, RM56);
        !           790:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
        !           791:          md->mark == NULL) md->mark = ecode + 2;
1.1       misho     792:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    793:     RRETURN(MATCH_PRUNE);
                    794: 
                    795:     /* SKIP overrides PRUNE and THEN */
                    796: 
                    797:     case OP_SKIP:
1.1.1.2 ! misho     798:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           799:       eptrb, RM53);
1.1       misho     800:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    801:       RRETURN(rrc);
                    802:     md->start_match_ptr = eptr;   /* Pass back current position */
1.1.1.2 ! misho     803:     RRETURN(MATCH_SKIP);
        !           804: 
        !           805:     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
        !           806:     nomatch_mark. There is a flag that disables this opcode when re-matching a
        !           807:     pattern that ended with a SKIP for which there was not a matching MARK. */
1.1       misho     808: 
                    809:     case OP_SKIP_ARG:
1.1.1.2 ! misho     810:     if (md->ignore_skip_arg)
        !           811:       {
        !           812:       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
        !           813:       break;
        !           814:       }
        !           815:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
        !           816:       eptrb, RM57);
1.1       misho     817:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    818:       RRETURN(rrc);
                    819: 
                    820:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    821:     returning the special MATCH_SKIP_ARG return code. This will either be
1.1.1.2 ! misho     822:     caught by a matching MARK, or get to the top, where it causes a rematch
        !           823:     with the md->ignore_skip_arg flag set. */
1.1       misho     824: 
                    825:     md->start_match_ptr = ecode + 2;
                    826:     RRETURN(MATCH_SKIP_ARG);
                    827: 
1.1.1.2 ! misho     828:     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
        !           829:     the branch in which it occurs can be determined. Overload the start of
        !           830:     match pointer to do this. */
1.1       misho     831: 
                    832:     case OP_THEN:
1.1.1.2 ! misho     833:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           834:       eptrb, RM54);
1.1       misho     835:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho     836:     md->start_match_ptr = ecode;
        !           837:     RRETURN(MATCH_THEN);
1.1       misho     838: 
                    839:     case OP_THEN_ARG:
1.1.1.2 ! misho     840:     md->nomatch_mark = ecode + 2;
        !           841:     md->mark = NULL;    /* In case previously set by assertion */
        !           842:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
        !           843:       md, eptrb, RM58);
        !           844:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
        !           845:          md->mark == NULL) md->mark = ecode + 2;
1.1       misho     846:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho     847:     md->start_match_ptr = ecode;
1.1       misho     848:     RRETURN(MATCH_THEN);
                    849: 
1.1.1.2 ! misho     850:     /* Handle an atomic group that does not contain any capturing parentheses.
        !           851:     This can be handled like an assertion. Prior to 8.13, all atomic groups
        !           852:     were handled this way. In 8.13, the code was changed as below for ONCE, so
        !           853:     that backups pass through the group and thereby reset captured values.
        !           854:     However, this uses a lot more stack, so in 8.20, atomic groups that do not
        !           855:     contain any captures generate OP_ONCE_NC, which can be handled in the old,
        !           856:     less stack intensive way.
        !           857: 
        !           858:     Check the alternative branches in turn - the matching won't pass the KET
        !           859:     for this kind of subpattern. If any one branch matches, we carry on as at
        !           860:     the end of a normal bracket, leaving the subject pointer, but resetting
        !           861:     the start-of-match value in case it was changed by \K. */
        !           862: 
        !           863:     case OP_ONCE_NC:
        !           864:     prev = ecode;
        !           865:     saved_eptr = eptr;
        !           866:     save_mark = md->mark;
        !           867:     do
        !           868:       {
        !           869:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
        !           870:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
        !           871:         {
        !           872:         mstart = md->start_match_ptr;
        !           873:         break;
        !           874:         }
        !           875:       if (rrc == MATCH_THEN)
        !           876:         {
        !           877:         next = ecode + GET(ecode,1);
        !           878:         if (md->start_match_ptr < next &&
        !           879:             (*ecode == OP_ALT || *next == OP_ALT))
        !           880:           rrc = MATCH_NOMATCH;
        !           881:         }
        !           882: 
        !           883:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           884:       ecode += GET(ecode,1);
        !           885:       md->mark = save_mark;
        !           886:       }
        !           887:     while (*ecode == OP_ALT);
        !           888: 
        !           889:     /* If hit the end of the group (which could be repeated), fail */
        !           890: 
        !           891:     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
        !           892: 
        !           893:     /* Continue as from after the group, updating the offsets high water
        !           894:     mark, since extracts may have been taken. */
        !           895: 
        !           896:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
        !           897: 
        !           898:     offset_top = md->end_offset_top;
        !           899:     eptr = md->end_match_ptr;
        !           900: 
        !           901:     /* For a non-repeating ket, just continue at this level. This also
        !           902:     happens for a repeating ket if no characters were matched in the group.
        !           903:     This is the forcible breaking of infinite loops as implemented in Perl
        !           904:     5.005. */
        !           905: 
        !           906:     if (*ecode == OP_KET || eptr == saved_eptr)
        !           907:       {
        !           908:       ecode += 1+LINK_SIZE;
        !           909:       break;
        !           910:       }
        !           911: 
        !           912:     /* The repeating kets try the rest of the pattern or restart from the
        !           913:     preceding bracket, in the appropriate order. The second "call" of match()
        !           914:     uses tail recursion, to avoid using another stack frame. */
        !           915: 
        !           916:     if (*ecode == OP_KETRMIN)
        !           917:       {
        !           918:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
        !           919:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           920:       ecode = prev;
        !           921:       goto TAIL_RECURSE;
        !           922:       }
        !           923:     else  /* OP_KETRMAX */
        !           924:       {
        !           925:       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
        !           926:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           927:       ecode += 1 + LINK_SIZE;
        !           928:       goto TAIL_RECURSE;
        !           929:       }
        !           930:     /* Control never gets here */
        !           931: 
        !           932:     /* Handle a capturing bracket, other than those that are possessive with an
        !           933:     unlimited repeat. If there is space in the offset vector, save the current
        !           934:     subject position in the working slot at the top of the vector. We mustn't
        !           935:     change the current values of the data slot, because they may be set from a
        !           936:     previous iteration of this group, and be referred to by a reference inside
        !           937:     the group. A failure to match might occur after the group has succeeded,
        !           938:     if something later on doesn't match. For this reason, we need to restore
        !           939:     the working value and also the values of the final offsets, in case they
        !           940:     were set by a previous iteration of the same bracket.
1.1       misho     941: 
                    942:     If there isn't enough space in the offset vector, treat this as if it were
                    943:     a non-capturing bracket. Don't worry about setting the flag for the error
                    944:     case here; that is handled in the code for KET. */
                    945: 
                    946:     case OP_CBRA:
                    947:     case OP_SCBRA:
                    948:     number = GET2(ecode, 1+LINK_SIZE);
                    949:     offset = number << 1;
                    950: 
                    951: #ifdef PCRE_DEBUG
                    952:     printf("start bracket %d\n", number);
                    953:     printf("subject=");
                    954:     pchars(eptr, 16, TRUE, md);
                    955:     printf("\n");
                    956: #endif
                    957: 
                    958:     if (offset < md->offset_max)
                    959:       {
                    960:       save_offset1 = md->offset_vector[offset];
                    961:       save_offset2 = md->offset_vector[offset+1];
                    962:       save_offset3 = md->offset_vector[md->offset_end - number];
                    963:       save_capture_last = md->capture_last;
1.1.1.2 ! misho     964:       save_mark = md->mark;
1.1       misho     965: 
                    966:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                    967:       md->offset_vector[md->offset_end - number] =
                    968:         (int)(eptr - md->start_subject);
                    969: 
1.1.1.2 ! misho     970:       for (;;)
1.1       misho     971:         {
1.1.1.2 ! misho     972:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
        !           973:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           974:           eptrb, RM1);
        !           975:         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
        !           976: 
        !           977:         /* If we backed up to a THEN, check whether it is within the current
        !           978:         branch by comparing the address of the THEN that is passed back with
        !           979:         the end of the branch. If it is within the current branch, and the
        !           980:         branch is one of two or more alternatives (it either starts or ends
        !           981:         with OP_ALT), we have reached the limit of THEN's action, so convert
        !           982:         the return code to NOMATCH, which will cause normal backtracking to
        !           983:         happen from now on. Otherwise, THEN is passed back to an outer
        !           984:         alternative. This implements Perl's treatment of parenthesized groups,
        !           985:         where a group not containing | does not affect the current alternative,
        !           986:         that is, (X) is NOT the same as (X|(*F)). */
        !           987: 
        !           988:         if (rrc == MATCH_THEN)
        !           989:           {
        !           990:           next = ecode + GET(ecode,1);
        !           991:           if (md->start_match_ptr < next &&
        !           992:               (*ecode == OP_ALT || *next == OP_ALT))
        !           993:             rrc = MATCH_NOMATCH;
        !           994:           }
        !           995: 
        !           996:         /* Anything other than NOMATCH is passed back. */
        !           997: 
        !           998:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho     999:         md->capture_last = save_capture_last;
                   1000:         ecode += GET(ecode, 1);
1.1.1.2 ! misho    1001:         md->mark = save_mark;
        !          1002:         if (*ecode != OP_ALT) break;
1.1       misho    1003:         }
                   1004: 
                   1005:       DPRINTF(("bracket %d failed\n", number));
                   1006:       md->offset_vector[offset] = save_offset1;
                   1007:       md->offset_vector[offset+1] = save_offset2;
                   1008:       md->offset_vector[md->offset_end - number] = save_offset3;
                   1009: 
1.1.1.2 ! misho    1010:       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
        !          1011: 
        !          1012:       RRETURN(rrc);
1.1       misho    1013:       }
                   1014: 
                   1015:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                   1016:     as a non-capturing bracket. */
                   1017: 
                   1018:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1019:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1020: 
                   1021:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                   1022: 
                   1023:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1024:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1025: 
1.1.1.2 ! misho    1026:     /* Non-capturing or atomic group, except for possessive with unlimited
        !          1027:     repeat and ONCE group with no captures. Loop for all the alternatives.
        !          1028: 
        !          1029:     When we get to the final alternative within the brackets, we used to return
        !          1030:     the result of a recursive call to match() whatever happened so it was
        !          1031:     possible to reduce stack usage by turning this into a tail recursion,
        !          1032:     except in the case of a possibly empty group. However, now that there is
        !          1033:     the possiblity of (*THEN) occurring in the final alternative, this
        !          1034:     optimization is no longer always possible.
        !          1035: 
        !          1036:     We can optimize if we know there are no (*THEN)s in the pattern; at present
        !          1037:     this is the best that can be done.
        !          1038: 
        !          1039:     MATCH_ONCE is returned when the end of an atomic group is successfully
        !          1040:     reached, but subsequent matching fails. It passes back up the tree (causing
        !          1041:     captured values to be reset) until the original atomic group level is
        !          1042:     reached. This is tested by comparing md->once_target with the start of the
        !          1043:     group. At this point, the return is converted into MATCH_NOMATCH so that
        !          1044:     previous backup points can be taken. */
1.1       misho    1045: 
1.1.1.2 ! misho    1046:     case OP_ONCE:
1.1       misho    1047:     case OP_BRA:
                   1048:     case OP_SBRA:
                   1049:     DPRINTF(("start non-capturing bracket\n"));
1.1.1.2 ! misho    1050: 
1.1       misho    1051:     for (;;)
                   1052:       {
1.1.1.2 ! misho    1053:       if (op >= OP_SBRA || op == OP_ONCE)
        !          1054:         md->match_function_type = MATCH_CBEGROUP;
        !          1055: 
        !          1056:       /* If this is not a possibly empty group, and there are no (*THEN)s in
        !          1057:       the pattern, and this is the final alternative, optimize as described
        !          1058:       above. */
        !          1059: 
        !          1060:       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1.1       misho    1061:         {
1.1.1.2 ! misho    1062:         ecode += PRIV(OP_lengths)[*ecode];
        !          1063:         goto TAIL_RECURSE;
        !          1064:         }
        !          1065: 
        !          1066:       /* In all other cases, we have to make another call to match(). */
        !          1067: 
        !          1068:       save_mark = md->mark;
        !          1069:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
        !          1070:         RM2);
        !          1071: 
        !          1072:       /* See comment in the code for capturing groups above about handling
        !          1073:       THEN. */
        !          1074: 
        !          1075:       if (rrc == MATCH_THEN)
        !          1076:         {
        !          1077:         next = ecode + GET(ecode,1);
        !          1078:         if (md->start_match_ptr < next &&
        !          1079:             (*ecode == OP_ALT || *next == OP_ALT))
        !          1080:           rrc = MATCH_NOMATCH;
        !          1081:         }
        !          1082: 
        !          1083:       if (rrc != MATCH_NOMATCH)
        !          1084:         {
        !          1085:         if (rrc == MATCH_ONCE)
1.1       misho    1086:           {
1.1.1.2 ! misho    1087:           const pcre_uchar *scode = ecode;
        !          1088:           if (*scode != OP_ONCE)           /* If not at start, find it */
        !          1089:             {
        !          1090:             while (*scode == OP_ALT) scode += GET(scode, 1);
        !          1091:             scode -= GET(scode, 1);
        !          1092:             }
        !          1093:           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1.1       misho    1094:           }
1.1.1.2 ! misho    1095:         RRETURN(rrc);
        !          1096:         }
        !          1097:       ecode += GET(ecode, 1);
        !          1098:       md->mark = save_mark;
        !          1099:       if (*ecode != OP_ALT) break;
        !          1100:       }
1.1       misho    1101: 
1.1.1.2 ! misho    1102:     RRETURN(MATCH_NOMATCH);
1.1       misho    1103: 
1.1.1.2 ! misho    1104:     /* Handle possessive capturing brackets with an unlimited repeat. We come
        !          1105:     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
        !          1106:     handled similarly to the normal case above. However, the matching is
        !          1107:     different. The end of these brackets will always be OP_KETRPOS, which
        !          1108:     returns MATCH_KETRPOS without going further in the pattern. By this means
        !          1109:     we can handle the group by iteration rather than recursion, thereby
        !          1110:     reducing the amount of stack needed. */
        !          1111: 
        !          1112:     case OP_CBRAPOS:
        !          1113:     case OP_SCBRAPOS:
        !          1114:     allow_zero = FALSE;
        !          1115: 
        !          1116:     POSSESSIVE_CAPTURE:
        !          1117:     number = GET2(ecode, 1+LINK_SIZE);
        !          1118:     offset = number << 1;
        !          1119: 
        !          1120: #ifdef PCRE_DEBUG
        !          1121:     printf("start possessive bracket %d\n", number);
        !          1122:     printf("subject=");
        !          1123:     pchars(eptr, 16, TRUE, md);
        !          1124:     printf("\n");
        !          1125: #endif
        !          1126: 
        !          1127:     if (offset < md->offset_max)
        !          1128:       {
        !          1129:       matched_once = FALSE;
        !          1130:       code_offset = (int)(ecode - md->start_code);
        !          1131: 
        !          1132:       save_offset1 = md->offset_vector[offset];
        !          1133:       save_offset2 = md->offset_vector[offset+1];
        !          1134:       save_offset3 = md->offset_vector[md->offset_end - number];
        !          1135:       save_capture_last = md->capture_last;
        !          1136: 
        !          1137:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
        !          1138: 
        !          1139:       /* Each time round the loop, save the current subject position for use
        !          1140:       when the group matches. For MATCH_MATCH, the group has matched, so we
        !          1141:       restart it with a new subject starting position, remembering that we had
        !          1142:       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
        !          1143:       usual. If we haven't matched any alternatives in any iteration, check to
        !          1144:       see if a previous iteration matched. If so, the group has matched;
        !          1145:       continue from afterwards. Otherwise it has failed; restore the previous
        !          1146:       capture values before returning NOMATCH. */
        !          1147: 
        !          1148:       for (;;)
        !          1149:         {
        !          1150:         md->offset_vector[md->offset_end - number] =
        !          1151:           (int)(eptr - md->start_subject);
        !          1152:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
        !          1153:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !          1154:           eptrb, RM63);
        !          1155:         if (rrc == MATCH_KETRPOS)
        !          1156:           {
        !          1157:           offset_top = md->end_offset_top;
        !          1158:           eptr = md->end_match_ptr;
        !          1159:           ecode = md->start_code + code_offset;
        !          1160:           save_capture_last = md->capture_last;
        !          1161:           matched_once = TRUE;
        !          1162:           continue;
        !          1163:           }
        !          1164: 
        !          1165:         /* See comment in the code for capturing groups above about handling
        !          1166:         THEN. */
        !          1167: 
        !          1168:         if (rrc == MATCH_THEN)
        !          1169:           {
        !          1170:           next = ecode + GET(ecode,1);
        !          1171:           if (md->start_match_ptr < next &&
        !          1172:               (*ecode == OP_ALT || *next == OP_ALT))
        !          1173:             rrc = MATCH_NOMATCH;
        !          1174:           }
        !          1175: 
        !          1176:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1177:         md->capture_last = save_capture_last;
        !          1178:         ecode += GET(ecode, 1);
        !          1179:         if (*ecode != OP_ALT) break;
1.1       misho    1180:         }
                   1181: 
1.1.1.2 ! misho    1182:       if (!matched_once)
        !          1183:         {
        !          1184:         md->offset_vector[offset] = save_offset1;
        !          1185:         md->offset_vector[offset+1] = save_offset2;
        !          1186:         md->offset_vector[md->offset_end - number] = save_offset3;
        !          1187:         }
1.1       misho    1188: 
1.1.1.2 ! misho    1189:       if (allow_zero || matched_once)
        !          1190:         {
        !          1191:         ecode += 1 + LINK_SIZE;
        !          1192:         break;
        !          1193:         }
        !          1194: 
        !          1195:       RRETURN(MATCH_NOMATCH);
        !          1196:       }
        !          1197: 
        !          1198:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
        !          1199:     as a non-capturing bracket. */
        !          1200: 
        !          1201:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
        !          1202:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
        !          1203: 
        !          1204:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
        !          1205: 
        !          1206:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
        !          1207:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
        !          1208: 
        !          1209:     /* Non-capturing possessive bracket with unlimited repeat. We come here
        !          1210:     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
        !          1211:     without the capturing complication. It is written out separately for speed
        !          1212:     and cleanliness. */
        !          1213: 
        !          1214:     case OP_BRAPOS:
        !          1215:     case OP_SBRAPOS:
        !          1216:     allow_zero = FALSE;
        !          1217: 
        !          1218:     POSSESSIVE_NON_CAPTURE:
        !          1219:     matched_once = FALSE;
        !          1220:     code_offset = (int)(ecode - md->start_code);
        !          1221: 
        !          1222:     for (;;)
        !          1223:       {
        !          1224:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
        !          1225:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !          1226:         eptrb, RM48);
        !          1227:       if (rrc == MATCH_KETRPOS)
        !          1228:         {
        !          1229:         offset_top = md->end_offset_top;
        !          1230:         eptr = md->end_match_ptr;
        !          1231:         ecode = md->start_code + code_offset;
        !          1232:         matched_once = TRUE;
        !          1233:         continue;
        !          1234:         }
        !          1235: 
        !          1236:       /* See comment in the code for capturing groups above about handling
        !          1237:       THEN. */
        !          1238: 
        !          1239:       if (rrc == MATCH_THEN)
        !          1240:         {
        !          1241:         next = ecode + GET(ecode,1);
        !          1242:         if (md->start_match_ptr < next &&
        !          1243:             (*ecode == OP_ALT || *next == OP_ALT))
        !          1244:           rrc = MATCH_NOMATCH;
        !          1245:         }
        !          1246: 
        !          1247:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misho    1248:       ecode += GET(ecode, 1);
1.1.1.2 ! misho    1249:       if (*ecode != OP_ALT) break;
        !          1250:       }
        !          1251: 
        !          1252:     if (matched_once || allow_zero)
        !          1253:       {
        !          1254:       ecode += 1 + LINK_SIZE;
        !          1255:       break;
1.1       misho    1256:       }
1.1.1.2 ! misho    1257:     RRETURN(MATCH_NOMATCH);
        !          1258: 
1.1       misho    1259:     /* Control never reaches here. */
                   1260: 
                   1261:     /* Conditional group: compilation checked that there are no more than
                   1262:     two branches. If the condition is false, skipping the first branch takes us
                   1263:     past the end if there is only one branch, but that's OK because that is
1.1.1.2 ! misho    1264:     exactly what going to the ket would do. */
1.1       misho    1265: 
                   1266:     case OP_COND:
                   1267:     case OP_SCOND:
1.1.1.2 ! misho    1268:     codelink = GET(ecode, 1);
1.1       misho    1269: 
                   1270:     /* Because of the way auto-callout works during compile, a callout item is
                   1271:     inserted between OP_COND and an assertion condition. */
                   1272: 
                   1273:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                   1274:       {
1.1.1.2 ! misho    1275:       if (PUBL(callout) != NULL)
1.1       misho    1276:         {
1.1.1.2 ! misho    1277:         PUBL(callout_block) cb;
        !          1278:         cb.version          = 2;   /* Version 1 of the callout block */
1.1       misho    1279:         cb.callout_number   = ecode[LINK_SIZE+2];
                   1280:         cb.offset_vector    = md->offset_vector;
1.1.1.2 ! misho    1281: #if defined COMPILE_PCRE8
1.1       misho    1282:         cb.subject          = (PCRE_SPTR)md->start_subject;
1.1.1.2 ! misho    1283: #elif defined COMPILE_PCRE16
        !          1284:         cb.subject          = (PCRE_SPTR16)md->start_subject;
        !          1285: #elif defined COMPILE_PCRE32
        !          1286:         cb.subject          = (PCRE_SPTR32)md->start_subject;
        !          1287: #endif
1.1       misho    1288:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1289:         cb.start_match      = (int)(mstart - md->start_subject);
                   1290:         cb.current_position = (int)(eptr - md->start_subject);
                   1291:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                   1292:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                   1293:         cb.capture_top      = offset_top/2;
                   1294:         cb.capture_last     = md->capture_last;
                   1295:         cb.callout_data     = md->callout_data;
1.1.1.2 ! misho    1296:         cb.mark             = md->nomatch_mark;
        !          1297:         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misho    1298:         if (rrc < 0) RRETURN(rrc);
                   1299:         }
1.1.1.2 ! misho    1300:       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1.1       misho    1301:       }
                   1302: 
                   1303:     condcode = ecode[LINK_SIZE+1];
                   1304: 
                   1305:     /* Now see what the actual condition is */
                   1306: 
                   1307:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
                   1308:       {
                   1309:       if (md->recursive == NULL)                /* Not recursing => FALSE */
                   1310:         {
                   1311:         condition = FALSE;
                   1312:         ecode += GET(ecode, 1);
                   1313:         }
                   1314:       else
                   1315:         {
1.1.1.2 ! misho    1316:         unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
        !          1317:         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1.1       misho    1318: 
                   1319:         /* If the test is for recursion into a specific subpattern, and it is
                   1320:         false, but the test was set up by name, scan the table to see if the
                   1321:         name refers to any other numbers, and test them. The condition is true
                   1322:         if any one is set. */
                   1323: 
1.1.1.2 ! misho    1324:         if (!condition && condcode == OP_NRREF)
1.1       misho    1325:           {
1.1.1.2 ! misho    1326:           pcre_uchar *slotA = md->name_table;
1.1       misho    1327:           for (i = 0; i < md->name_count; i++)
                   1328:             {
                   1329:             if (GET2(slotA, 0) == recno) break;
                   1330:             slotA += md->name_entry_size;
                   1331:             }
                   1332: 
                   1333:           /* Found a name for the number - there can be only one; duplicate
                   1334:           names for different numbers are allowed, but not vice versa. First
                   1335:           scan down for duplicates. */
                   1336: 
                   1337:           if (i < md->name_count)
                   1338:             {
1.1.1.2 ! misho    1339:             pcre_uchar *slotB = slotA;
1.1       misho    1340:             while (slotB > md->name_table)
                   1341:               {
                   1342:               slotB -= md->name_entry_size;
1.1.1.2 ! misho    1343:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1344:                 {
                   1345:                 condition = GET2(slotB, 0) == md->recursive->group_num;
                   1346:                 if (condition) break;
                   1347:                 }
                   1348:               else break;
                   1349:               }
                   1350: 
                   1351:             /* Scan up for duplicates */
                   1352: 
                   1353:             if (!condition)
                   1354:               {
                   1355:               slotB = slotA;
                   1356:               for (i++; i < md->name_count; i++)
                   1357:                 {
                   1358:                 slotB += md->name_entry_size;
1.1.1.2 ! misho    1359:                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1360:                   {
                   1361:                   condition = GET2(slotB, 0) == md->recursive->group_num;
                   1362:                   if (condition) break;
                   1363:                   }
                   1364:                 else break;
                   1365:                 }
                   1366:               }
                   1367:             }
                   1368:           }
                   1369: 
                   1370:         /* Chose branch according to the condition */
                   1371: 
1.1.1.2 ! misho    1372:         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1       misho    1373:         }
                   1374:       }
                   1375: 
                   1376:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
                   1377:       {
                   1378:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                   1379:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
                   1380: 
                   1381:       /* If the numbered capture is unset, but the reference was by name,
                   1382:       scan the table to see if the name refers to any other numbers, and test
                   1383:       them. The condition is true if any one is set. This is tediously similar
                   1384:       to the code above, but not close enough to try to amalgamate. */
                   1385: 
                   1386:       if (!condition && condcode == OP_NCREF)
                   1387:         {
1.1.1.2 ! misho    1388:         unsigned int refno = offset >> 1;
        !          1389:         pcre_uchar *slotA = md->name_table;
1.1       misho    1390: 
                   1391:         for (i = 0; i < md->name_count; i++)
                   1392:           {
                   1393:           if (GET2(slotA, 0) == refno) break;
                   1394:           slotA += md->name_entry_size;
                   1395:           }
                   1396: 
                   1397:         /* Found a name for the number - there can be only one; duplicate names
                   1398:         for different numbers are allowed, but not vice versa. First scan down
                   1399:         for duplicates. */
                   1400: 
                   1401:         if (i < md->name_count)
                   1402:           {
1.1.1.2 ! misho    1403:           pcre_uchar *slotB = slotA;
1.1       misho    1404:           while (slotB > md->name_table)
                   1405:             {
                   1406:             slotB -= md->name_entry_size;
1.1.1.2 ! misho    1407:             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1408:               {
                   1409:               offset = GET2(slotB, 0) << 1;
                   1410:               condition = offset < offset_top &&
                   1411:                 md->offset_vector[offset] >= 0;
                   1412:               if (condition) break;
                   1413:               }
                   1414:             else break;
                   1415:             }
                   1416: 
                   1417:           /* Scan up for duplicates */
                   1418: 
                   1419:           if (!condition)
                   1420:             {
                   1421:             slotB = slotA;
                   1422:             for (i++; i < md->name_count; i++)
                   1423:               {
                   1424:               slotB += md->name_entry_size;
1.1.1.2 ! misho    1425:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1       misho    1426:                 {
                   1427:                 offset = GET2(slotB, 0) << 1;
                   1428:                 condition = offset < offset_top &&
                   1429:                   md->offset_vector[offset] >= 0;
                   1430:                 if (condition) break;
                   1431:                 }
                   1432:               else break;
                   1433:               }
                   1434:             }
                   1435:           }
                   1436:         }
                   1437: 
                   1438:       /* Chose branch according to the condition */
                   1439: 
1.1.1.2 ! misho    1440:       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1       misho    1441:       }
                   1442: 
                   1443:     else if (condcode == OP_DEF)     /* DEFINE - always false */
                   1444:       {
                   1445:       condition = FALSE;
                   1446:       ecode += GET(ecode, 1);
                   1447:       }
                   1448: 
                   1449:     /* The condition is an assertion. Call match() to evaluate it - setting
1.1.1.2 ! misho    1450:     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
        !          1451:     an assertion. */
1.1       misho    1452: 
                   1453:     else
                   1454:       {
1.1.1.2 ! misho    1455:       md->match_function_type = MATCH_CONDASSERT;
        !          1456:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1.1       misho    1457:       if (rrc == MATCH_MATCH)
                   1458:         {
1.1.1.2 ! misho    1459:         if (md->end_offset_top > offset_top)
        !          1460:           offset_top = md->end_offset_top;  /* Captures may have happened */
1.1       misho    1461:         condition = TRUE;
                   1462:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1463:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1464:         }
1.1.1.2 ! misho    1465: 
        !          1466:       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
        !          1467:       assertion; it is therefore treated as NOMATCH. */
        !          1468: 
        !          1469:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1.1       misho    1470:         {
                   1471:         RRETURN(rrc);         /* Need braces because of following else */
                   1472:         }
                   1473:       else
                   1474:         {
                   1475:         condition = FALSE;
                   1476:         ecode += codelink;
                   1477:         }
                   1478:       }
                   1479: 
1.1.1.2 ! misho    1480:     /* We are now at the branch that is to be obeyed. As there is only one, can
        !          1481:     use tail recursion to avoid using another stack frame, except when there is
        !          1482:     unlimited repeat of a possibly empty group. In the latter case, a recursive
        !          1483:     call to match() is always required, unless the second alternative doesn't
        !          1484:     exist, in which case we can just plough on. Note that, for compatibility
        !          1485:     with Perl, the | in a conditional group is NOT treated as creating two
        !          1486:     alternatives. If a THEN is encountered in the branch, it propagates out to
        !          1487:     the enclosing alternative (unless nested in a deeper set of alternatives,
        !          1488:     of course). */
1.1       misho    1489: 
                   1490:     if (condition || *ecode == OP_ALT)
                   1491:       {
1.1.1.2 ! misho    1492:       if (op != OP_SCOND)
1.1       misho    1493:         {
1.1.1.2 ! misho    1494:         ecode += 1 + LINK_SIZE;
1.1       misho    1495:         goto TAIL_RECURSE;
                   1496:         }
1.1.1.2 ! misho    1497: 
        !          1498:       md->match_function_type = MATCH_CBEGROUP;
        !          1499:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
        !          1500:       RRETURN(rrc);
1.1       misho    1501:       }
1.1.1.2 ! misho    1502: 
        !          1503:      /* Condition false & no alternative; continue after the group. */
        !          1504: 
        !          1505:     else
1.1       misho    1506:       {
                   1507:       ecode += 1 + LINK_SIZE;
                   1508:       }
                   1509:     break;
                   1510: 
                   1511: 
                   1512:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1513:     to close any currently open capturing brackets. */
                   1514: 
                   1515:     case OP_CLOSE:
                   1516:     number = GET2(ecode, 1);
                   1517:     offset = number << 1;
                   1518: 
                   1519: #ifdef PCRE_DEBUG
                   1520:       printf("end bracket %d at *ACCEPT", number);
                   1521:       printf("\n");
                   1522: #endif
                   1523: 
                   1524:     md->capture_last = number;
                   1525:     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1526:       {
                   1527:       md->offset_vector[offset] =
                   1528:         md->offset_vector[md->offset_end - number];
                   1529:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1530:       if (offset_top <= offset) offset_top = offset + 2;
                   1531:       }
1.1.1.2 ! misho    1532:     ecode += 1 + IMM2_SIZE;
1.1       misho    1533:     break;
                   1534: 
                   1535: 
1.1.1.2 ! misho    1536:     /* End of the pattern, either real or forced. */
1.1       misho    1537: 
                   1538:     case OP_END:
1.1.1.2 ! misho    1539:     case OP_ACCEPT:
        !          1540:     case OP_ASSERT_ACCEPT:
1.1       misho    1541: 
1.1.1.2 ! misho    1542:     /* If we have matched an empty string, fail if not in an assertion and not
        !          1543:     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
        !          1544:     is set and we have matched at the start of the subject. In both cases,
        !          1545:     backtracking will then try other alternatives, if any. */
        !          1546: 
        !          1547:     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
        !          1548:          md->recursive == NULL &&
        !          1549:          (md->notempty ||
        !          1550:            (md->notempty_atstart &&
        !          1551:              mstart == md->start_subject + md->start_offset)))
        !          1552:       RRETURN(MATCH_NOMATCH);
1.1       misho    1553: 
                   1554:     /* Otherwise, we have a match. */
                   1555: 
                   1556:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1557:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1558:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
                   1559: 
                   1560:     /* For some reason, the macros don't work properly if an expression is
1.1.1.2 ! misho    1561:     given as the argument to RRETURN when the heap is in use. */
1.1       misho    1562: 
                   1563:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1.1.1.2 ! misho    1564:     RRETURN(rrc);
1.1       misho    1565: 
                   1566:     /* Assertion brackets. Check the alternative branches in turn - the
                   1567:     matching won't pass the KET for an assertion. If any one branch matches,
                   1568:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1569:     start of each branch to move the current point backwards, so the code at
1.1.1.2 ! misho    1570:     this level is identical to the lookahead case. When the assertion is part
        !          1571:     of a condition, we want to return immediately afterwards. The caller of
        !          1572:     this incarnation of the match() function will have set MATCH_CONDASSERT in
        !          1573:     md->match_function type, and one of these opcodes will be the first opcode
        !          1574:     that is processed. We use a local variable that is preserved over calls to
        !          1575:     match() to remember this case. */
1.1       misho    1576: 
                   1577:     case OP_ASSERT:
                   1578:     case OP_ASSERTBACK:
1.1.1.2 ! misho    1579:     save_mark = md->mark;
        !          1580:     if (md->match_function_type == MATCH_CONDASSERT)
        !          1581:       {
        !          1582:       condassert = TRUE;
        !          1583:       md->match_function_type = 0;
        !          1584:       }
        !          1585:     else condassert = FALSE;
        !          1586: 
1.1       misho    1587:     do
                   1588:       {
1.1.1.2 ! misho    1589:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1.1       misho    1590:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1591:         {
                   1592:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1593:         break;
                   1594:         }
1.1.1.2 ! misho    1595:       md->mark = save_mark;
        !          1596: 
        !          1597:       /* A COMMIT failure must fail the entire assertion, without trying any
        !          1598:       subsequent branches. */
        !          1599: 
        !          1600:       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
        !          1601: 
        !          1602:       /* PCRE does not allow THEN to escape beyond an assertion; it
        !          1603:       is treated as NOMATCH. */
        !          1604: 
        !          1605:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1       misho    1606:       ecode += GET(ecode, 1);
                   1607:       }
                   1608:     while (*ecode == OP_ALT);
1.1.1.2 ! misho    1609: 
        !          1610:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1.1       misho    1611: 
                   1612:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1613: 
1.1.1.2 ! misho    1614:     if (condassert) RRETURN(MATCH_MATCH);
1.1       misho    1615: 
                   1616:     /* Continue from after the assertion, updating the offsets high water
                   1617:     mark, since extracts may have been taken during the assertion. */
                   1618: 
                   1619:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1620:     ecode += 1 + LINK_SIZE;
                   1621:     offset_top = md->end_offset_top;
                   1622:     continue;
                   1623: 
                   1624:     /* Negative assertion: all branches must fail to match. Encountering SKIP,
                   1625:     PRUNE, or COMMIT means we must assume failure without checking subsequent
                   1626:     branches. */
                   1627: 
                   1628:     case OP_ASSERT_NOT:
                   1629:     case OP_ASSERTBACK_NOT:
1.1.1.2 ! misho    1630:     save_mark = md->mark;
        !          1631:     if (md->match_function_type == MATCH_CONDASSERT)
        !          1632:       {
        !          1633:       condassert = TRUE;
        !          1634:       md->match_function_type = 0;
        !          1635:       }
        !          1636:     else condassert = FALSE;
        !          1637: 
1.1       misho    1638:     do
                   1639:       {
1.1.1.2 ! misho    1640:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
        !          1641:       md->mark = save_mark;
        !          1642:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1.1       misho    1643:       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
                   1644:         {
                   1645:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1646:         break;
                   1647:         }
1.1.1.2 ! misho    1648: 
        !          1649:       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
        !          1650:       as NOMATCH. */
        !          1651: 
        !          1652:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1       misho    1653:       ecode += GET(ecode,1);
                   1654:       }
                   1655:     while (*ecode == OP_ALT);
                   1656: 
1.1.1.2 ! misho    1657:     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1.1       misho    1658: 
                   1659:     ecode += 1 + LINK_SIZE;
                   1660:     continue;
                   1661: 
                   1662:     /* Move the subject pointer back. This occurs only at the start of
                   1663:     each branch of a lookbehind assertion. If we are too close to the start to
                   1664:     move back, this match function fails. When working with UTF-8 we move
                   1665:     back a number of characters, not bytes. */
                   1666: 
                   1667:     case OP_REVERSE:
1.1.1.2 ! misho    1668: #ifdef SUPPORT_UTF
        !          1669:     if (utf)
1.1       misho    1670:       {
                   1671:       i = GET(ecode, 1);
                   1672:       while (i-- > 0)
                   1673:         {
                   1674:         eptr--;
1.1.1.2 ! misho    1675:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misho    1676:         BACKCHAR(eptr);
                   1677:         }
                   1678:       }
                   1679:     else
                   1680: #endif
                   1681: 
                   1682:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1683: 
                   1684:       {
                   1685:       eptr -= GET(ecode, 1);
1.1.1.2 ! misho    1686:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misho    1687:       }
                   1688: 
                   1689:     /* Save the earliest consulted character, then skip to next op code */
                   1690: 
                   1691:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
                   1692:     ecode += 1 + LINK_SIZE;
                   1693:     break;
                   1694: 
                   1695:     /* The callout item calls an external function, if one is provided, passing
                   1696:     details of the match so far. This is mainly for debugging, though the
                   1697:     function is able to force a failure. */
                   1698: 
                   1699:     case OP_CALLOUT:
1.1.1.2 ! misho    1700:     if (PUBL(callout) != NULL)
1.1       misho    1701:       {
1.1.1.2 ! misho    1702:       PUBL(callout_block) cb;
        !          1703:       cb.version          = 2;   /* Version 1 of the callout block */
1.1       misho    1704:       cb.callout_number   = ecode[1];
                   1705:       cb.offset_vector    = md->offset_vector;
1.1.1.2 ! misho    1706: #if defined COMPILE_PCRE8
1.1       misho    1707:       cb.subject          = (PCRE_SPTR)md->start_subject;
1.1.1.2 ! misho    1708: #elif defined COMPILE_PCRE16
        !          1709:       cb.subject          = (PCRE_SPTR16)md->start_subject;
        !          1710: #elif defined COMPILE_PCRE32
        !          1711:       cb.subject          = (PCRE_SPTR32)md->start_subject;
        !          1712: #endif
1.1       misho    1713:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1714:       cb.start_match      = (int)(mstart - md->start_subject);
                   1715:       cb.current_position = (int)(eptr - md->start_subject);
                   1716:       cb.pattern_position = GET(ecode, 2);
                   1717:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1718:       cb.capture_top      = offset_top/2;
                   1719:       cb.capture_last     = md->capture_last;
                   1720:       cb.callout_data     = md->callout_data;
1.1.1.2 ! misho    1721:       cb.mark             = md->nomatch_mark;
        !          1722:       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misho    1723:       if (rrc < 0) RRETURN(rrc);
                   1724:       }
                   1725:     ecode += 2 + 2*LINK_SIZE;
                   1726:     break;
                   1727: 
                   1728:     /* Recursion either matches the current regex, or some subexpression. The
                   1729:     offset data is the offset to the starting bracket from the start of the
                   1730:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1731: 
1.1.1.2 ! misho    1732:     The state of the capturing groups is preserved over recursion, and
        !          1733:     re-instated afterwards. We don't know how many are started and not yet
        !          1734:     finished (offset_top records the completed total) so we just have to save
        !          1735:     all the potential data. There may be up to 65535 such values, which is too
        !          1736:     large to put on the stack, but using malloc for small numbers seems
        !          1737:     expensive. As a compromise, the stack is used when there are no more than
        !          1738:     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1.1       misho    1739: 
                   1740:     There are also other values that have to be saved. We use a chained
                   1741:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1.1.1.2 ! misho    1742:     for the original version of this logic. It has, however, been hacked around
        !          1743:     a lot, so he is not to blame for the current way it works. */
1.1       misho    1744: 
                   1745:     case OP_RECURSE:
                   1746:       {
1.1.1.2 ! misho    1747:       recursion_info *ri;
        !          1748:       unsigned int recno;
        !          1749: 
1.1       misho    1750:       callpat = md->start_code + GET(ecode, 1);
1.1.1.2 ! misho    1751:       recno = (callpat == md->start_code)? 0 :
1.1       misho    1752:         GET2(callpat, 1 + LINK_SIZE);
                   1753: 
1.1.1.2 ! misho    1754:       /* Check for repeating a recursion without advancing the subject pointer.
        !          1755:       This should catch convoluted mutual recursions. (Some simple cases are
        !          1756:       caught at compile time.) */
        !          1757: 
        !          1758:       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
        !          1759:         if (recno == ri->group_num && eptr == ri->subject_position)
        !          1760:           RRETURN(PCRE_ERROR_RECURSELOOP);
        !          1761: 
1.1       misho    1762:       /* Add to "recursing stack" */
                   1763: 
1.1.1.2 ! misho    1764:       new_recursive.group_num = recno;
        !          1765:       new_recursive.subject_position = eptr;
1.1       misho    1766:       new_recursive.prevrec = md->recursive;
                   1767:       md->recursive = &new_recursive;
                   1768: 
1.1.1.2 ! misho    1769:       /* Where to continue from afterwards */
1.1       misho    1770: 
                   1771:       ecode += 1 + LINK_SIZE;
                   1772: 
1.1.1.2 ! misho    1773:       /* Now save the offset data */
1.1       misho    1774: 
                   1775:       new_recursive.saved_max = md->offset_end;
                   1776:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1777:         new_recursive.offset_save = stacksave;
                   1778:       else
                   1779:         {
                   1780:         new_recursive.offset_save =
1.1.1.2 ! misho    1781:           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1.1       misho    1782:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1783:         }
                   1784:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1785:             new_recursive.saved_max * sizeof(int));
                   1786: 
1.1.1.2 ! misho    1787:       /* OK, now we can do the recursion. After processing each alternative,
        !          1788:       restore the offset data. If there were nested recursions, md->recursive
        !          1789:       might be changed, so reset it before looping. */
1.1       misho    1790: 
                   1791:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1.1.1.2 ! misho    1792:       cbegroup = (*callpat >= OP_SBRA);
1.1       misho    1793:       do
                   1794:         {
1.1.1.2 ! misho    1795:         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
        !          1796:         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
        !          1797:           md, eptrb, RM6);
        !          1798:         memcpy(md->offset_vector, new_recursive.offset_save,
        !          1799:             new_recursive.saved_max * sizeof(int));
        !          1800:         md->recursive = new_recursive.prevrec;
1.1       misho    1801:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1802:           {
                   1803:           DPRINTF(("Recursion matched\n"));
                   1804:           if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho    1805:             (PUBL(free))(new_recursive.offset_save);
        !          1806: 
        !          1807:           /* Set where we got to in the subject, and reset the start in case
        !          1808:           it was changed by \K. This *is* propagated back out of a recursion,
        !          1809:           for Perl compatibility. */
        !          1810: 
        !          1811:           eptr = md->end_match_ptr;
        !          1812:           mstart = md->start_match_ptr;
        !          1813:           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1.1       misho    1814:           }
1.1.1.2 ! misho    1815: 
        !          1816:         /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
        !          1817:         is treated as NOMATCH. */
        !          1818: 
        !          1819:         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
        !          1820:                  rrc != MATCH_COMMIT)
1.1       misho    1821:           {
                   1822:           DPRINTF(("Recursion gave error %d\n", rrc));
                   1823:           if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho    1824:             (PUBL(free))(new_recursive.offset_save);
1.1       misho    1825:           RRETURN(rrc);
                   1826:           }
                   1827: 
                   1828:         md->recursive = &new_recursive;
                   1829:         callpat += GET(callpat, 1);
                   1830:         }
                   1831:       while (*callpat == OP_ALT);
                   1832: 
                   1833:       DPRINTF(("Recursion didn't match\n"));
                   1834:       md->recursive = new_recursive.prevrec;
                   1835:       if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho    1836:         (PUBL(free))(new_recursive.offset_save);
        !          1837:       RRETURN(MATCH_NOMATCH);
1.1       misho    1838:       }
                   1839: 
1.1.1.2 ! misho    1840:     RECURSION_MATCHED:
        !          1841:     break;
1.1       misho    1842: 
                   1843:     /* An alternation is the end of a branch; scan along to find the end of the
                   1844:     bracketed group and go to there. */
                   1845: 
                   1846:     case OP_ALT:
                   1847:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1848:     break;
                   1849: 
                   1850:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1851:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1852:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1853:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1854:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1855: 
                   1856:     case OP_BRAZERO:
1.1.1.2 ! misho    1857:     next = ecode + 1;
        !          1858:     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
        !          1859:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1860:     do next += GET(next, 1); while (*next == OP_ALT);
        !          1861:     ecode = next + 1 + LINK_SIZE;
1.1       misho    1862:     break;
                   1863: 
                   1864:     case OP_BRAMINZERO:
1.1.1.2 ! misho    1865:     next = ecode + 1;
        !          1866:     do next += GET(next, 1); while (*next == OP_ALT);
        !          1867:     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
        !          1868:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1869:     ecode++;
1.1       misho    1870:     break;
                   1871: 
                   1872:     case OP_SKIPZERO:
1.1.1.2 ! misho    1873:     next = ecode+1;
        !          1874:     do next += GET(next,1); while (*next == OP_ALT);
        !          1875:     ecode = next + 1 + LINK_SIZE;
1.1       misho    1876:     break;
                   1877: 
1.1.1.2 ! misho    1878:     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
        !          1879:     here; just jump to the group, with allow_zero set TRUE. */
        !          1880: 
        !          1881:     case OP_BRAPOSZERO:
        !          1882:     op = *(++ecode);
        !          1883:     allow_zero = TRUE;
        !          1884:     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
        !          1885:       goto POSSESSIVE_NON_CAPTURE;
        !          1886: 
1.1       misho    1887:     /* End of a group, repeated or non-repeating. */
                   1888: 
                   1889:     case OP_KET:
                   1890:     case OP_KETRMIN:
                   1891:     case OP_KETRMAX:
1.1.1.2 ! misho    1892:     case OP_KETRPOS:
1.1       misho    1893:     prev = ecode - GET(ecode, 1);
                   1894: 
                   1895:     /* If this was a group that remembered the subject start, in order to break
                   1896:     infinite repeats of empty string matches, retrieve the subject start from
                   1897:     the chain. Otherwise, set it NULL. */
                   1898: 
1.1.1.2 ! misho    1899:     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1.1       misho    1900:       {
                   1901:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1902:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1903:       }
                   1904:     else saved_eptr = NULL;
                   1905: 
1.1.1.2 ! misho    1906:     /* If we are at the end of an assertion group or a non-capturing atomic
        !          1907:     group, stop matching and return MATCH_MATCH, but record the current high
        !          1908:     water mark for use by positive assertions. We also need to record the match
        !          1909:     start in case it was changed by \K. */
        !          1910: 
        !          1911:     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
        !          1912:          *prev == OP_ONCE_NC)
1.1       misho    1913:       {
1.1.1.2 ! misho    1914:       md->end_match_ptr = eptr;      /* For ONCE_NC */
1.1       misho    1915:       md->end_offset_top = offset_top;
                   1916:       md->start_match_ptr = mstart;
1.1.1.2 ! misho    1917:       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1.1       misho    1918:       }
                   1919: 
                   1920:     /* For capturing groups we have to check the group number back at the start
                   1921:     and if necessary complete handling an extraction by setting the offsets and
1.1.1.2 ! misho    1922:     bumping the high water mark. Whole-pattern recursion is coded as a recurse
        !          1923:     into group 0, so it won't be picked up here. Instead, we catch it when the
        !          1924:     OP_END is reached. Other recursion is handled here. We just have to record
        !          1925:     the current subject position and start match pointer and give a MATCH
        !          1926:     return. */
1.1       misho    1927: 
1.1.1.2 ! misho    1928:     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
        !          1929:         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1.1       misho    1930:       {
                   1931:       number = GET2(prev, 1+LINK_SIZE);
                   1932:       offset = number << 1;
                   1933: 
                   1934: #ifdef PCRE_DEBUG
                   1935:       printf("end bracket %d", number);
                   1936:       printf("\n");
                   1937: #endif
                   1938: 
1.1.1.2 ! misho    1939:       /* Handle a recursively called group. */
        !          1940: 
        !          1941:       if (md->recursive != NULL && md->recursive->group_num == number)
        !          1942:         {
        !          1943:         md->end_match_ptr = eptr;
        !          1944:         md->start_match_ptr = mstart;
        !          1945:         RRETURN(MATCH_MATCH);
        !          1946:         }
        !          1947: 
        !          1948:       /* Deal with capturing */
        !          1949: 
1.1       misho    1950:       md->capture_last = number;
                   1951:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1952:         {
1.1.1.2 ! misho    1953:         /* If offset is greater than offset_top, it means that we are
        !          1954:         "skipping" a capturing group, and that group's offsets must be marked
        !          1955:         unset. In earlier versions of PCRE, all the offsets were unset at the
        !          1956:         start of matching, but this doesn't work because atomic groups and
        !          1957:         assertions can cause a value to be set that should later be unset.
        !          1958:         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
        !          1959:         part of the atomic group, but this is not on the final matching path,
        !          1960:         so must be unset when 2 is set. (If there is no group 2, there is no
        !          1961:         problem, because offset_top will then be 2, indicating no capture.) */
        !          1962: 
        !          1963:         if (offset > offset_top)
        !          1964:           {
        !          1965:           register int *iptr = md->offset_vector + offset_top;
        !          1966:           register int *iend = md->offset_vector + offset;
        !          1967:           while (iptr < iend) *iptr++ = -1;
        !          1968:           }
        !          1969: 
        !          1970:         /* Now make the extraction */
        !          1971: 
1.1       misho    1972:         md->offset_vector[offset] =
                   1973:           md->offset_vector[md->offset_end - number];
                   1974:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1975:         if (offset_top <= offset) offset_top = offset + 2;
                   1976:         }
                   1977:       }
                   1978: 
1.1.1.2 ! misho    1979:     /* For an ordinary non-repeating ket, just continue at this level. This
        !          1980:     also happens for a repeating ket if no characters were matched in the
        !          1981:     group. This is the forcible breaking of infinite loops as implemented in
        !          1982:     Perl 5.005. For a non-repeating atomic group that includes captures,
        !          1983:     establish a backup point by processing the rest of the pattern at a lower
        !          1984:     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
        !          1985:     original OP_ONCE level, thereby bypassing intermediate backup points, but
        !          1986:     resetting any captures that happened along the way. */
1.1       misho    1987: 
                   1988:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1989:       {
1.1.1.2 ! misho    1990:       if (*prev == OP_ONCE)
        !          1991:         {
        !          1992:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
        !          1993:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1994:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
        !          1995:         RRETURN(MATCH_ONCE);
        !          1996:         }
        !          1997:       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1.1       misho    1998:       break;
                   1999:       }
                   2000: 
1.1.1.2 ! misho    2001:     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
        !          2002:     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
        !          2003:     at a time from the outer level, thus saving stack. */
        !          2004: 
        !          2005:     if (*ecode == OP_KETRPOS)
        !          2006:       {
        !          2007:       md->end_match_ptr = eptr;
        !          2008:       md->end_offset_top = offset_top;
        !          2009:       RRETURN(MATCH_KETRPOS);
        !          2010:       }
1.1       misho    2011: 
1.1.1.2 ! misho    2012:     /* The normal repeating kets try the rest of the pattern or restart from
        !          2013:     the preceding bracket, in the appropriate order. In the second case, we can
        !          2014:     use tail recursion to avoid using another stack frame, unless we have an
        !          2015:     an atomic group or an unlimited repeat of a group that can match an empty
        !          2016:     string. */
1.1       misho    2017: 
                   2018:     if (*ecode == OP_KETRMIN)
                   2019:       {
1.1.1.2 ! misho    2020:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1.1       misho    2021:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    2022:       if (*prev == OP_ONCE)
1.1       misho    2023:         {
1.1.1.2 ! misho    2024:         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
        !          2025:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2026:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
        !          2027:         RRETURN(MATCH_ONCE);
        !          2028:         }
        !          2029:       if (*prev >= OP_SBRA)    /* Could match an empty string */
        !          2030:         {
        !          2031:         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1.1       misho    2032:         RRETURN(rrc);
                   2033:         }
                   2034:       ecode = prev;
                   2035:       goto TAIL_RECURSE;
                   2036:       }
                   2037:     else  /* OP_KETRMAX */
                   2038:       {
1.1.1.2 ! misho    2039:       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
        !          2040:       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1.1       misho    2041:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    2042:       if (*prev == OP_ONCE)
        !          2043:         {
        !          2044:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
        !          2045:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2046:         md->once_target = prev;
        !          2047:         RRETURN(MATCH_ONCE);
        !          2048:         }
1.1       misho    2049:       ecode += 1 + LINK_SIZE;
                   2050:       goto TAIL_RECURSE;
                   2051:       }
                   2052:     /* Control never gets here */
                   2053: 
1.1.1.2 ! misho    2054:     /* Not multiline mode: start of subject assertion, unless notbol. */
1.1       misho    2055: 
                   2056:     case OP_CIRC:
1.1.1.2 ! misho    2057:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misho    2058: 
                   2059:     /* Start of subject assertion */
                   2060: 
                   2061:     case OP_SOD:
1.1.1.2 ! misho    2062:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
        !          2063:     ecode++;
        !          2064:     break;
        !          2065: 
        !          2066:     /* Multiline mode: start of subject unless notbol, or after any newline. */
        !          2067: 
        !          2068:     case OP_CIRCM:
        !          2069:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
        !          2070:     if (eptr != md->start_subject &&
        !          2071:         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
        !          2072:       RRETURN(MATCH_NOMATCH);
1.1       misho    2073:     ecode++;
                   2074:     break;
                   2075: 
                   2076:     /* Start of match assertion */
                   2077: 
                   2078:     case OP_SOM:
1.1.1.2 ! misho    2079:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1.1       misho    2080:     ecode++;
                   2081:     break;
                   2082: 
                   2083:     /* Reset the start of match point */
                   2084: 
                   2085:     case OP_SET_SOM:
                   2086:     mstart = eptr;
                   2087:     ecode++;
                   2088:     break;
                   2089: 
1.1.1.2 ! misho    2090:     /* Multiline mode: assert before any newline, or before end of subject
        !          2091:     unless noteol is set. */
1.1       misho    2092: 
1.1.1.2 ! misho    2093:     case OP_DOLLM:
        !          2094:     if (eptr < md->end_subject)
1.1       misho    2095:       {
1.1.1.2 ! misho    2096:       if (!IS_NEWLINE(eptr))
1.1       misho    2097:         {
1.1.1.2 ! misho    2098:         if (md->partial != 0 &&
        !          2099:             eptr + 1 >= md->end_subject &&
        !          2100:             NLBLOCK->nltype == NLTYPE_FIXED &&
        !          2101:             NLBLOCK->nllen == 2 &&
        !          2102:             RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
        !          2103:           {
        !          2104:           md->hitend = TRUE;
        !          2105:           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          2106:           }
        !          2107:         RRETURN(MATCH_NOMATCH);
1.1       misho    2108:         }
                   2109:       }
1.1.1.2 ! misho    2110:     else
1.1       misho    2111:       {
1.1.1.2 ! misho    2112:       if (md->noteol) RRETURN(MATCH_NOMATCH);
        !          2113:       SCHECK_PARTIAL();
1.1       misho    2114:       }
1.1.1.2 ! misho    2115:     ecode++;
        !          2116:     break;
        !          2117: 
        !          2118:     /* Not multiline mode: assert before a terminating newline or before end of
        !          2119:     subject unless noteol is set. */
        !          2120: 
        !          2121:     case OP_DOLL:
        !          2122:     if (md->noteol) RRETURN(MATCH_NOMATCH);
        !          2123:     if (!md->endonly) goto ASSERT_NL_OR_EOS;
1.1       misho    2124: 
                   2125:     /* ... else fall through for endonly */
                   2126: 
                   2127:     /* End of subject assertion (\z) */
                   2128: 
                   2129:     case OP_EOD:
1.1.1.2 ! misho    2130:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1.1       misho    2131:     SCHECK_PARTIAL();
                   2132:     ecode++;
                   2133:     break;
                   2134: 
                   2135:     /* End of subject or ending \n assertion (\Z) */
                   2136: 
                   2137:     case OP_EODN:
                   2138:     ASSERT_NL_OR_EOS:
                   2139:     if (eptr < md->end_subject &&
                   2140:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.1.1.2 ! misho    2141:       {
        !          2142:       if (md->partial != 0 &&
        !          2143:           eptr + 1 >= md->end_subject &&
        !          2144:           NLBLOCK->nltype == NLTYPE_FIXED &&
        !          2145:           NLBLOCK->nllen == 2 &&
        !          2146:           RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
        !          2147:         {
        !          2148:         md->hitend = TRUE;
        !          2149:         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          2150:         }
        !          2151:       RRETURN(MATCH_NOMATCH);
        !          2152:       }
1.1       misho    2153: 
                   2154:     /* Either at end of string or \n before end. */
                   2155: 
                   2156:     SCHECK_PARTIAL();
                   2157:     ecode++;
                   2158:     break;
                   2159: 
                   2160:     /* Word boundary assertions */
                   2161: 
                   2162:     case OP_NOT_WORD_BOUNDARY:
                   2163:     case OP_WORD_BOUNDARY:
                   2164:       {
                   2165: 
                   2166:       /* Find out if the previous and current characters are "word" characters.
                   2167:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
                   2168:       be "non-word" characters. Remember the earliest consulted character for
                   2169:       partial matching. */
                   2170: 
1.1.1.2 ! misho    2171: #ifdef SUPPORT_UTF
        !          2172:       if (utf)
1.1       misho    2173:         {
                   2174:         /* Get status of previous character */
                   2175: 
                   2176:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2177:           {
1.1.1.2 ! misho    2178:           PCRE_PUCHAR lastptr = eptr - 1;
        !          2179:           BACKCHAR(lastptr);
1.1       misho    2180:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
                   2181:           GETCHAR(c, lastptr);
                   2182: #ifdef SUPPORT_UCP
                   2183:           if (md->use_ucp)
                   2184:             {
                   2185:             if (c == '_') prev_is_word = TRUE; else
                   2186:               {
                   2187:               int cat = UCD_CATEGORY(c);
                   2188:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2189:               }
                   2190:             }
                   2191:           else
                   2192: #endif
                   2193:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2194:           }
                   2195: 
                   2196:         /* Get status of next character */
                   2197: 
                   2198:         if (eptr >= md->end_subject)
                   2199:           {
                   2200:           SCHECK_PARTIAL();
                   2201:           cur_is_word = FALSE;
                   2202:           }
                   2203:         else
                   2204:           {
                   2205:           GETCHAR(c, eptr);
                   2206: #ifdef SUPPORT_UCP
                   2207:           if (md->use_ucp)
                   2208:             {
                   2209:             if (c == '_') cur_is_word = TRUE; else
                   2210:               {
                   2211:               int cat = UCD_CATEGORY(c);
                   2212:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2213:               }
                   2214:             }
                   2215:           else
                   2216: #endif
                   2217:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2218:           }
                   2219:         }
                   2220:       else
                   2221: #endif
                   2222: 
                   2223:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   2224:       consistency with the behaviour of \w we do use it in this case. */
                   2225: 
                   2226:         {
                   2227:         /* Get status of previous character */
                   2228: 
                   2229:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2230:           {
                   2231:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   2232: #ifdef SUPPORT_UCP
                   2233:           if (md->use_ucp)
                   2234:             {
                   2235:             c = eptr[-1];
                   2236:             if (c == '_') prev_is_word = TRUE; else
                   2237:               {
                   2238:               int cat = UCD_CATEGORY(c);
                   2239:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2240:               }
                   2241:             }
                   2242:           else
                   2243: #endif
1.1.1.2 ! misho    2244:           prev_is_word = MAX_255(eptr[-1])
        !          2245:             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1.1       misho    2246:           }
                   2247: 
                   2248:         /* Get status of next character */
                   2249: 
                   2250:         if (eptr >= md->end_subject)
                   2251:           {
                   2252:           SCHECK_PARTIAL();
                   2253:           cur_is_word = FALSE;
                   2254:           }
                   2255:         else
                   2256: #ifdef SUPPORT_UCP
                   2257:         if (md->use_ucp)
                   2258:           {
                   2259:           c = *eptr;
                   2260:           if (c == '_') cur_is_word = TRUE; else
                   2261:             {
                   2262:             int cat = UCD_CATEGORY(c);
                   2263:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2264:             }
                   2265:           }
                   2266:         else
                   2267: #endif
1.1.1.2 ! misho    2268:         cur_is_word = MAX_255(*eptr)
        !          2269:           && ((md->ctypes[*eptr] & ctype_word) != 0);
1.1       misho    2270:         }
                   2271: 
                   2272:       /* Now see if the situation is what we want */
                   2273: 
                   2274:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   2275:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1.1.1.2 ! misho    2276:         RRETURN(MATCH_NOMATCH);
1.1       misho    2277:       }
                   2278:     break;
                   2279: 
1.1.1.2 ! misho    2280:     /* Match any single character type except newline; have to take care with
        !          2281:     CRLF newlines and partial matching. */
1.1       misho    2282: 
                   2283:     case OP_ANY:
1.1.1.2 ! misho    2284:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
        !          2285:     if (md->partial != 0 &&
        !          2286:         eptr + 1 >= md->end_subject &&
        !          2287:         NLBLOCK->nltype == NLTYPE_FIXED &&
        !          2288:         NLBLOCK->nllen == 2 &&
        !          2289:         RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
        !          2290:       {
        !          2291:       md->hitend = TRUE;
        !          2292:       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          2293:       }
        !          2294: 
1.1       misho    2295:     /* Fall through */
                   2296: 
1.1.1.2 ! misho    2297:     /* Match any single character whatsoever. */
        !          2298: 
1.1       misho    2299:     case OP_ALLANY:
1.1.1.2 ! misho    2300:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
        !          2301:       {                            /* not be updated before SCHECK_PARTIAL. */
1.1       misho    2302:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2303:       RRETURN(MATCH_NOMATCH);
1.1       misho    2304:       }
1.1.1.2 ! misho    2305:     eptr++;
        !          2306: #ifdef SUPPORT_UTF
        !          2307:     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
        !          2308: #endif
1.1       misho    2309:     ecode++;
                   2310:     break;
                   2311: 
                   2312:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   2313:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   2314: 
                   2315:     case OP_ANYBYTE:
1.1.1.2 ! misho    2316:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
        !          2317:       {                            /* not be updated before SCHECK_PARTIAL. */
1.1       misho    2318:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2319:       RRETURN(MATCH_NOMATCH);
1.1       misho    2320:       }
1.1.1.2 ! misho    2321:     eptr++;
1.1       misho    2322:     ecode++;
                   2323:     break;
                   2324: 
                   2325:     case OP_NOT_DIGIT:
                   2326:     if (eptr >= md->end_subject)
                   2327:       {
                   2328:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2329:       RRETURN(MATCH_NOMATCH);
1.1       misho    2330:       }
                   2331:     GETCHARINCTEST(c, eptr);
                   2332:     if (
1.1.1.2 ! misho    2333: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2334:        c < 256 &&
                   2335: #endif
                   2336:        (md->ctypes[c] & ctype_digit) != 0
                   2337:        )
1.1.1.2 ! misho    2338:       RRETURN(MATCH_NOMATCH);
1.1       misho    2339:     ecode++;
                   2340:     break;
                   2341: 
                   2342:     case OP_DIGIT:
                   2343:     if (eptr >= md->end_subject)
                   2344:       {
                   2345:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2346:       RRETURN(MATCH_NOMATCH);
1.1       misho    2347:       }
                   2348:     GETCHARINCTEST(c, eptr);
                   2349:     if (
1.1.1.2 ! misho    2350: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        !          2351:        c > 255 ||
1.1       misho    2352: #endif
                   2353:        (md->ctypes[c] & ctype_digit) == 0
                   2354:        )
1.1.1.2 ! misho    2355:       RRETURN(MATCH_NOMATCH);
1.1       misho    2356:     ecode++;
                   2357:     break;
                   2358: 
                   2359:     case OP_NOT_WHITESPACE:
                   2360:     if (eptr >= md->end_subject)
                   2361:       {
                   2362:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2363:       RRETURN(MATCH_NOMATCH);
1.1       misho    2364:       }
                   2365:     GETCHARINCTEST(c, eptr);
                   2366:     if (
1.1.1.2 ! misho    2367: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2368:        c < 256 &&
                   2369: #endif
                   2370:        (md->ctypes[c] & ctype_space) != 0
                   2371:        )
1.1.1.2 ! misho    2372:       RRETURN(MATCH_NOMATCH);
1.1       misho    2373:     ecode++;
                   2374:     break;
                   2375: 
                   2376:     case OP_WHITESPACE:
                   2377:     if (eptr >= md->end_subject)
                   2378:       {
                   2379:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2380:       RRETURN(MATCH_NOMATCH);
1.1       misho    2381:       }
                   2382:     GETCHARINCTEST(c, eptr);
                   2383:     if (
1.1.1.2 ! misho    2384: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        !          2385:        c > 255 ||
1.1       misho    2386: #endif
                   2387:        (md->ctypes[c] & ctype_space) == 0
                   2388:        )
1.1.1.2 ! misho    2389:       RRETURN(MATCH_NOMATCH);
1.1       misho    2390:     ecode++;
                   2391:     break;
                   2392: 
                   2393:     case OP_NOT_WORDCHAR:
                   2394:     if (eptr >= md->end_subject)
                   2395:       {
                   2396:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2397:       RRETURN(MATCH_NOMATCH);
1.1       misho    2398:       }
                   2399:     GETCHARINCTEST(c, eptr);
                   2400:     if (
1.1.1.2 ! misho    2401: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misho    2402:        c < 256 &&
                   2403: #endif
                   2404:        (md->ctypes[c] & ctype_word) != 0
                   2405:        )
1.1.1.2 ! misho    2406:       RRETURN(MATCH_NOMATCH);
1.1       misho    2407:     ecode++;
                   2408:     break;
                   2409: 
                   2410:     case OP_WORDCHAR:
                   2411:     if (eptr >= md->end_subject)
                   2412:       {
                   2413:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2414:       RRETURN(MATCH_NOMATCH);
1.1       misho    2415:       }
                   2416:     GETCHARINCTEST(c, eptr);
                   2417:     if (
1.1.1.2 ! misho    2418: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        !          2419:        c > 255 ||
1.1       misho    2420: #endif
                   2421:        (md->ctypes[c] & ctype_word) == 0
                   2422:        )
1.1.1.2 ! misho    2423:       RRETURN(MATCH_NOMATCH);
1.1       misho    2424:     ecode++;
                   2425:     break;
                   2426: 
                   2427:     case OP_ANYNL:
                   2428:     if (eptr >= md->end_subject)
                   2429:       {
                   2430:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2431:       RRETURN(MATCH_NOMATCH);
1.1       misho    2432:       }
                   2433:     GETCHARINCTEST(c, eptr);
                   2434:     switch(c)
                   2435:       {
1.1.1.2 ! misho    2436:       default: RRETURN(MATCH_NOMATCH);
        !          2437: 
        !          2438:       case CHAR_CR:
        !          2439:       if (eptr >= md->end_subject)
        !          2440:         {
        !          2441:         SCHECK_PARTIAL();
        !          2442:         }
        !          2443:       else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
1.1       misho    2444:       break;
                   2445: 
1.1.1.2 ! misho    2446:       case CHAR_LF:
1.1       misho    2447:       break;
                   2448: 
1.1.1.2 ! misho    2449:       case CHAR_VT:
        !          2450:       case CHAR_FF:
        !          2451:       case CHAR_NEL:
        !          2452: #ifndef EBCDIC
1.1       misho    2453:       case 0x2028:
                   2454:       case 0x2029:
1.1.1.2 ! misho    2455: #endif  /* Not EBCDIC */
        !          2456:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misho    2457:       break;
                   2458:       }
                   2459:     ecode++;
                   2460:     break;
                   2461: 
                   2462:     case OP_NOT_HSPACE:
                   2463:     if (eptr >= md->end_subject)
                   2464:       {
                   2465:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2466:       RRETURN(MATCH_NOMATCH);
1.1       misho    2467:       }
                   2468:     GETCHARINCTEST(c, eptr);
                   2469:     switch(c)
                   2470:       {
1.1.1.2 ! misho    2471:       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
1.1       misho    2472:       default: break;
                   2473:       }
                   2474:     ecode++;
                   2475:     break;
                   2476: 
                   2477:     case OP_HSPACE:
                   2478:     if (eptr >= md->end_subject)
                   2479:       {
                   2480:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2481:       RRETURN(MATCH_NOMATCH);
1.1       misho    2482:       }
                   2483:     GETCHARINCTEST(c, eptr);
                   2484:     switch(c)
                   2485:       {
1.1.1.2 ! misho    2486:       HSPACE_CASES: break;  /* Byte and multibyte cases */
        !          2487:       default: RRETURN(MATCH_NOMATCH);
1.1       misho    2488:       }
                   2489:     ecode++;
                   2490:     break;
                   2491: 
                   2492:     case OP_NOT_VSPACE:
                   2493:     if (eptr >= md->end_subject)
                   2494:       {
                   2495:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2496:       RRETURN(MATCH_NOMATCH);
1.1       misho    2497:       }
                   2498:     GETCHARINCTEST(c, eptr);
                   2499:     switch(c)
                   2500:       {
1.1.1.2 ! misho    2501:       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    2502:       default: break;
                   2503:       }
                   2504:     ecode++;
                   2505:     break;
                   2506: 
                   2507:     case OP_VSPACE:
                   2508:     if (eptr >= md->end_subject)
                   2509:       {
                   2510:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2511:       RRETURN(MATCH_NOMATCH);
1.1       misho    2512:       }
                   2513:     GETCHARINCTEST(c, eptr);
                   2514:     switch(c)
                   2515:       {
1.1.1.2 ! misho    2516:       VSPACE_CASES: break;
        !          2517:       default: RRETURN(MATCH_NOMATCH);
1.1       misho    2518:       }
                   2519:     ecode++;
                   2520:     break;
                   2521: 
                   2522: #ifdef SUPPORT_UCP
                   2523:     /* Check the next character by Unicode property. We will get here only
                   2524:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2525: 
                   2526:     case OP_PROP:
                   2527:     case OP_NOTPROP:
                   2528:     if (eptr >= md->end_subject)
                   2529:       {
                   2530:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2531:       RRETURN(MATCH_NOMATCH);
1.1       misho    2532:       }
                   2533:     GETCHARINCTEST(c, eptr);
                   2534:       {
1.1.1.2 ! misho    2535:       const pcre_uint32 *cp;
1.1       misho    2536:       const ucd_record *prop = GET_UCD(c);
                   2537: 
                   2538:       switch(ecode[1])
                   2539:         {
                   2540:         case PT_ANY:
1.1.1.2 ! misho    2541:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1.1       misho    2542:         break;
                   2543: 
                   2544:         case PT_LAMP:
                   2545:         if ((prop->chartype == ucp_Lu ||
                   2546:              prop->chartype == ucp_Ll ||
                   2547:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.1.1.2 ! misho    2548:           RRETURN(MATCH_NOMATCH);
1.1       misho    2549:         break;
                   2550: 
                   2551:         case PT_GC:
1.1.1.2 ! misho    2552:         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
        !          2553:           RRETURN(MATCH_NOMATCH);
1.1       misho    2554:         break;
                   2555: 
                   2556:         case PT_PC:
                   2557:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.1.1.2 ! misho    2558:           RRETURN(MATCH_NOMATCH);
1.1       misho    2559:         break;
                   2560: 
                   2561:         case PT_SC:
                   2562:         if ((ecode[2] != prop->script) == (op == OP_PROP))
1.1.1.2 ! misho    2563:           RRETURN(MATCH_NOMATCH);
1.1       misho    2564:         break;
                   2565: 
                   2566:         /* These are specials */
                   2567: 
                   2568:         case PT_ALNUM:
1.1.1.2 ! misho    2569:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
        !          2570:              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
        !          2571:           RRETURN(MATCH_NOMATCH);
1.1       misho    2572:         break;
                   2573: 
                   2574:         case PT_SPACE:    /* Perl space */
1.1.1.2 ! misho    2575:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    2576:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                   2577:                == (op == OP_NOTPROP))
1.1.1.2 ! misho    2578:           RRETURN(MATCH_NOMATCH);
1.1       misho    2579:         break;
                   2580: 
                   2581:         case PT_PXSPACE:  /* POSIX space */
1.1.1.2 ! misho    2582:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1       misho    2583:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   2584:              c == CHAR_FF || c == CHAR_CR)
                   2585:                == (op == OP_NOTPROP))
1.1.1.2 ! misho    2586:           RRETURN(MATCH_NOMATCH);
1.1       misho    2587:         break;
                   2588: 
                   2589:         case PT_WORD:
1.1.1.2 ! misho    2590:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
        !          2591:              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1       misho    2592:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
1.1.1.2 ! misho    2593:           RRETURN(MATCH_NOMATCH);
        !          2594:         break;
        !          2595: 
        !          2596:         case PT_CLIST:
        !          2597:         cp = PRIV(ucd_caseless_sets) + ecode[2];
        !          2598:         for (;;)
        !          2599:           {
        !          2600:           if (c < *cp)
        !          2601:             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
        !          2602:           if (c == *cp++)
        !          2603:             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
        !          2604:           }
1.1       misho    2605:         break;
                   2606: 
                   2607:         /* This should never occur */
                   2608: 
                   2609:         default:
                   2610:         RRETURN(PCRE_ERROR_INTERNAL);
                   2611:         }
                   2612: 
                   2613:       ecode += 3;
                   2614:       }
                   2615:     break;
                   2616: 
                   2617:     /* Match an extended Unicode sequence. We will get here only if the support
                   2618:     is in the binary; otherwise a compile-time error occurs. */
                   2619: 
                   2620:     case OP_EXTUNI:
                   2621:     if (eptr >= md->end_subject)
                   2622:       {
                   2623:       SCHECK_PARTIAL();
1.1.1.2 ! misho    2624:       RRETURN(MATCH_NOMATCH);
1.1       misho    2625:       }
1.1.1.2 ! misho    2626:     else
1.1       misho    2627:       {
1.1.1.2 ! misho    2628:       int lgb, rgb;
        !          2629:       GETCHARINCTEST(c, eptr);
        !          2630:       lgb = UCD_GRAPHBREAK(c);
1.1       misho    2631:       while (eptr < md->end_subject)
                   2632:         {
                   2633:         int len = 1;
1.1.1.2 ! misho    2634:         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          2635:         rgb = UCD_GRAPHBREAK(c);
        !          2636:         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          2637:         lgb = rgb;
1.1       misho    2638:         eptr += len;
                   2639:         }
                   2640:       }
1.1.1.2 ! misho    2641:     CHECK_PARTIAL();
1.1       misho    2642:     ecode++;
                   2643:     break;
1.1.1.2 ! misho    2644: #endif  /* SUPPORT_UCP */
1.1       misho    2645: 
                   2646: 
                   2647:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2648:     item to see if there is repeat information following. The code is similar
                   2649:     to that for character classes, but repeated for efficiency. Then obey
                   2650:     similar code to character type repeats - written out again for speed.
                   2651:     However, if the referenced string is the empty string, always treat
                   2652:     it as matched, any number of times (otherwise there could be infinite
                   2653:     loops). */
                   2654: 
                   2655:     case OP_REF:
1.1.1.2 ! misho    2656:     case OP_REFI:
        !          2657:     caseless = op == OP_REFI;
        !          2658:     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
        !          2659:     ecode += 1 + IMM2_SIZE;
1.1       misho    2660: 
1.1.1.2 ! misho    2661:     /* If the reference is unset, there are two possibilities:
1.1       misho    2662: 
1.1.1.2 ! misho    2663:     (a) In the default, Perl-compatible state, set the length negative;
        !          2664:     this ensures that every attempt at a match fails. We can't just fail
        !          2665:     here, because of the possibility of quantifiers with zero minima.
1.1       misho    2666: 
1.1.1.2 ! misho    2667:     (b) If the JavaScript compatibility flag is set, set the length to zero
        !          2668:     so that the back reference matches an empty string.
1.1       misho    2669: 
1.1.1.2 ! misho    2670:     Otherwise, set the length to the length of what was matched by the
        !          2671:     referenced subpattern. */
1.1       misho    2672: 
1.1.1.2 ! misho    2673:     if (offset >= offset_top || md->offset_vector[offset] < 0)
        !          2674:       length = (md->jscript_compat)? 0 : -1;
        !          2675:     else
        !          2676:       length = md->offset_vector[offset+1] - md->offset_vector[offset];
1.1       misho    2677: 
1.1.1.2 ! misho    2678:     /* Set up for repetition, or handle the non-repeated case */
1.1       misho    2679: 
1.1.1.2 ! misho    2680:     switch (*ecode)
        !          2681:       {
        !          2682:       case OP_CRSTAR:
        !          2683:       case OP_CRMINSTAR:
        !          2684:       case OP_CRPLUS:
        !          2685:       case OP_CRMINPLUS:
        !          2686:       case OP_CRQUERY:
        !          2687:       case OP_CRMINQUERY:
        !          2688:       c = *ecode++ - OP_CRSTAR;
        !          2689:       minimize = (c & 1) != 0;
        !          2690:       min = rep_min[c];                 /* Pick up values from tables; */
        !          2691:       max = rep_max[c];                 /* zero for max => infinity */
        !          2692:       if (max == 0) max = INT_MAX;
        !          2693:       break;
1.1       misho    2694: 
1.1.1.2 ! misho    2695:       case OP_CRRANGE:
        !          2696:       case OP_CRMINRANGE:
        !          2697:       minimize = (*ecode == OP_CRMINRANGE);
        !          2698:       min = GET2(ecode, 1);
        !          2699:       max = GET2(ecode, 1 + IMM2_SIZE);
        !          2700:       if (max == 0) max = INT_MAX;
        !          2701:       ecode += 1 + 2 * IMM2_SIZE;
        !          2702:       break;
1.1       misho    2703: 
1.1.1.2 ! misho    2704:       default:               /* No repeat follows */
        !          2705:       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
        !          2706:         {
        !          2707:         if (length == -2) eptr = md->end_subject;   /* Partial match */
        !          2708:         CHECK_PARTIAL();
        !          2709:         RRETURN(MATCH_NOMATCH);
1.1       misho    2710:         }
1.1.1.2 ! misho    2711:       eptr += length;
        !          2712:       continue;              /* With the main loop */
        !          2713:       }
1.1       misho    2714: 
1.1.1.2 ! misho    2715:     /* Handle repeated back references. If the length of the reference is
        !          2716:     zero, just continue with the main loop. If the length is negative, it
        !          2717:     means the reference is unset in non-Java-compatible mode. If the minimum is
        !          2718:     zero, we can continue at the same level without recursion. For any other
        !          2719:     minimum, carrying on will result in NOMATCH. */
1.1       misho    2720: 
1.1.1.2 ! misho    2721:     if (length == 0) continue;
        !          2722:     if (length < 0 && min == 0) continue;
1.1       misho    2723: 
1.1.1.2 ! misho    2724:     /* First, ensure the minimum number of matches are present. We get back
        !          2725:     the length of the reference string explicitly rather than passing the
        !          2726:     address of eptr, so that eptr can be a register variable. */
1.1       misho    2727: 
1.1.1.2 ! misho    2728:     for (i = 1; i <= min; i++)
        !          2729:       {
        !          2730:       int slength;
        !          2731:       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misho    2732:         {
1.1.1.2 ! misho    2733:         if (slength == -2) eptr = md->end_subject;   /* Partial match */
        !          2734:         CHECK_PARTIAL();
        !          2735:         RRETURN(MATCH_NOMATCH);
1.1       misho    2736:         }
1.1.1.2 ! misho    2737:       eptr += slength;
        !          2738:       }
1.1       misho    2739: 
1.1.1.2 ! misho    2740:     /* If min = max, continue at the same level without recursion.
        !          2741:     They are not both allowed to be zero. */
1.1       misho    2742: 
1.1.1.2 ! misho    2743:     if (min == max) continue;
1.1       misho    2744: 
1.1.1.2 ! misho    2745:     /* If minimizing, keep trying and advancing the pointer */
1.1       misho    2746: 
1.1.1.2 ! misho    2747:     if (minimize)
        !          2748:       {
        !          2749:       for (fi = min;; fi++)
1.1       misho    2750:         {
1.1.1.2 ! misho    2751:         int slength;
        !          2752:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
        !          2753:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2754:         if (fi >= max) RRETURN(MATCH_NOMATCH);
        !          2755:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misho    2756:           {
1.1.1.2 ! misho    2757:           if (slength == -2) eptr = md->end_subject;   /* Partial match */
        !          2758:           CHECK_PARTIAL();
        !          2759:           RRETURN(MATCH_NOMATCH);
1.1       misho    2760:           }
1.1.1.2 ! misho    2761:         eptr += slength;
1.1       misho    2762:         }
1.1.1.2 ! misho    2763:       /* Control never gets here */
        !          2764:       }
1.1       misho    2765: 
1.1.1.2 ! misho    2766:     /* If maximizing, find the longest string and work backwards */
1.1       misho    2767: 
1.1.1.2 ! misho    2768:     else
        !          2769:       {
        !          2770:       pp = eptr;
        !          2771:       for (i = min; i < max; i++)
1.1       misho    2772:         {
1.1.1.2 ! misho    2773:         int slength;
        !          2774:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misho    2775:           {
1.1.1.2 ! misho    2776:           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
        !          2777:           the soft partial matching case. */
        !          2778: 
        !          2779:           if (slength == -2 && md->partial != 0 &&
        !          2780:               md->end_subject > md->start_used_ptr)
1.1       misho    2781:             {
1.1.1.2 ! misho    2782:             md->hitend = TRUE;
        !          2783:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
1.1       misho    2784:             }
1.1.1.2 ! misho    2785:           break;
1.1       misho    2786:           }
1.1.1.2 ! misho    2787:         eptr += slength;
        !          2788:         }
        !          2789: 
        !          2790:       while (eptr >= pp)
        !          2791:         {
        !          2792:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
        !          2793:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2794:         eptr -= length;
1.1       misho    2795:         }
1.1.1.2 ! misho    2796:       RRETURN(MATCH_NOMATCH);
1.1       misho    2797:       }
                   2798:     /* Control never gets here */
                   2799: 
                   2800:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2801:     used when all the characters in the class have values in the range 0-255,
                   2802:     and either the matching is caseful, or the characters are in the range
                   2803:     0-127 when UTF-8 processing is enabled. The only difference between
                   2804:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2805:     encountered.
                   2806: 
                   2807:     First, look past the end of the item to see if there is repeat information
                   2808:     following. Then obey similar code to character type repeats - written out
                   2809:     again for speed. */
                   2810: 
                   2811:     case OP_NCLASS:
                   2812:     case OP_CLASS:
                   2813:       {
1.1.1.2 ! misho    2814:       /* The data variable is saved across frames, so the byte map needs to
        !          2815:       be stored there. */
        !          2816: #define BYTE_MAP ((pcre_uint8 *)data)
1.1       misho    2817:       data = ecode + 1;                /* Save for matching */
1.1.1.2 ! misho    2818:       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
1.1       misho    2819: 
                   2820:       switch (*ecode)
                   2821:         {
                   2822:         case OP_CRSTAR:
                   2823:         case OP_CRMINSTAR:
                   2824:         case OP_CRPLUS:
                   2825:         case OP_CRMINPLUS:
                   2826:         case OP_CRQUERY:
                   2827:         case OP_CRMINQUERY:
                   2828:         c = *ecode++ - OP_CRSTAR;
                   2829:         minimize = (c & 1) != 0;
                   2830:         min = rep_min[c];                 /* Pick up values from tables; */
                   2831:         max = rep_max[c];                 /* zero for max => infinity */
                   2832:         if (max == 0) max = INT_MAX;
                   2833:         break;
                   2834: 
                   2835:         case OP_CRRANGE:
                   2836:         case OP_CRMINRANGE:
                   2837:         minimize = (*ecode == OP_CRMINRANGE);
                   2838:         min = GET2(ecode, 1);
1.1.1.2 ! misho    2839:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    2840:         if (max == 0) max = INT_MAX;
1.1.1.2 ! misho    2841:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    2842:         break;
                   2843: 
                   2844:         default:               /* No repeat follows */
                   2845:         min = max = 1;
                   2846:         break;
                   2847:         }
                   2848: 
                   2849:       /* First, ensure the minimum number of matches are present. */
                   2850: 
1.1.1.2 ! misho    2851: #ifdef SUPPORT_UTF
        !          2852:       if (utf)
1.1       misho    2853:         {
                   2854:         for (i = 1; i <= min; i++)
                   2855:           {
                   2856:           if (eptr >= md->end_subject)
                   2857:             {
                   2858:             SCHECK_PARTIAL();
1.1.1.2 ! misho    2859:             RRETURN(MATCH_NOMATCH);
1.1       misho    2860:             }
                   2861:           GETCHARINC(c, eptr);
                   2862:           if (c > 255)
                   2863:             {
1.1.1.2 ! misho    2864:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1       misho    2865:             }
                   2866:           else
1.1.1.2 ! misho    2867:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2868:           }
                   2869:         }
                   2870:       else
                   2871: #endif
1.1.1.2 ! misho    2872:       /* Not UTF mode */
1.1       misho    2873:         {
                   2874:         for (i = 1; i <= min; i++)
                   2875:           {
                   2876:           if (eptr >= md->end_subject)
                   2877:             {
                   2878:             SCHECK_PARTIAL();
1.1.1.2 ! misho    2879:             RRETURN(MATCH_NOMATCH);
1.1       misho    2880:             }
                   2881:           c = *eptr++;
1.1.1.2 ! misho    2882: #ifndef COMPILE_PCRE8
        !          2883:           if (c > 255)
        !          2884:             {
        !          2885:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
        !          2886:             }
        !          2887:           else
        !          2888: #endif
        !          2889:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2890:           }
                   2891:         }
                   2892: 
                   2893:       /* If max == min we can continue with the main loop without the
                   2894:       need to recurse. */
                   2895: 
                   2896:       if (min == max) continue;
                   2897: 
                   2898:       /* If minimizing, keep testing the rest of the expression and advancing
                   2899:       the pointer while it matches the class. */
                   2900: 
                   2901:       if (minimize)
                   2902:         {
1.1.1.2 ! misho    2903: #ifdef SUPPORT_UTF
        !          2904:         if (utf)
1.1       misho    2905:           {
                   2906:           for (fi = min;; fi++)
                   2907:             {
1.1.1.2 ! misho    2908:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
1.1       misho    2909:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    2910:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    2911:             if (eptr >= md->end_subject)
                   2912:               {
                   2913:               SCHECK_PARTIAL();
1.1.1.2 ! misho    2914:               RRETURN(MATCH_NOMATCH);
1.1       misho    2915:               }
                   2916:             GETCHARINC(c, eptr);
                   2917:             if (c > 255)
                   2918:               {
1.1.1.2 ! misho    2919:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1       misho    2920:               }
                   2921:             else
1.1.1.2 ! misho    2922:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2923:             }
                   2924:           }
                   2925:         else
                   2926: #endif
1.1.1.2 ! misho    2927:         /* Not UTF mode */
1.1       misho    2928:           {
                   2929:           for (fi = min;; fi++)
                   2930:             {
1.1.1.2 ! misho    2931:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
1.1       misho    2932:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    2933:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    2934:             if (eptr >= md->end_subject)
                   2935:               {
                   2936:               SCHECK_PARTIAL();
1.1.1.2 ! misho    2937:               RRETURN(MATCH_NOMATCH);
1.1       misho    2938:               }
                   2939:             c = *eptr++;
1.1.1.2 ! misho    2940: #ifndef COMPILE_PCRE8
        !          2941:             if (c > 255)
        !          2942:               {
        !          2943:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
        !          2944:               }
        !          2945:             else
        !          2946: #endif
        !          2947:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    2948:             }
                   2949:           }
                   2950:         /* Control never gets here */
                   2951:         }
                   2952: 
                   2953:       /* If maximizing, find the longest possible run, then work backwards. */
                   2954: 
                   2955:       else
                   2956:         {
                   2957:         pp = eptr;
                   2958: 
1.1.1.2 ! misho    2959: #ifdef SUPPORT_UTF
        !          2960:         if (utf)
1.1       misho    2961:           {
                   2962:           for (i = min; i < max; i++)
                   2963:             {
                   2964:             int len = 1;
                   2965:             if (eptr >= md->end_subject)
                   2966:               {
                   2967:               SCHECK_PARTIAL();
                   2968:               break;
                   2969:               }
                   2970:             GETCHARLEN(c, eptr, len);
                   2971:             if (c > 255)
                   2972:               {
                   2973:               if (op == OP_CLASS) break;
                   2974:               }
                   2975:             else
1.1.1.2 ! misho    2976:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misho    2977:             eptr += len;
                   2978:             }
                   2979:           for (;;)
                   2980:             {
1.1.1.2 ! misho    2981:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
1.1       misho    2982:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2983:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2984:             BACKCHAR(eptr);
                   2985:             }
                   2986:           }
                   2987:         else
                   2988: #endif
1.1.1.2 ! misho    2989:           /* Not UTF mode */
1.1       misho    2990:           {
                   2991:           for (i = min; i < max; i++)
                   2992:             {
                   2993:             if (eptr >= md->end_subject)
                   2994:               {
                   2995:               SCHECK_PARTIAL();
                   2996:               break;
                   2997:               }
                   2998:             c = *eptr;
1.1.1.2 ! misho    2999: #ifndef COMPILE_PCRE8
        !          3000:             if (c > 255)
        !          3001:               {
        !          3002:               if (op == OP_CLASS) break;
        !          3003:               }
        !          3004:             else
        !          3005: #endif
        !          3006:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misho    3007:             eptr++;
                   3008:             }
                   3009:           while (eptr >= pp)
                   3010:             {
1.1.1.2 ! misho    3011:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
1.1       misho    3012:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3013:             eptr--;
                   3014:             }
                   3015:           }
                   3016: 
1.1.1.2 ! misho    3017:         RRETURN(MATCH_NOMATCH);
1.1       misho    3018:         }
1.1.1.2 ! misho    3019: #undef BYTE_MAP
1.1       misho    3020:       }
                   3021:     /* Control never gets here */
                   3022: 
                   3023: 
                   3024:     /* Match an extended character class. This opcode is encountered only
                   3025:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   3026:     mode, because Unicode properties are supported in non-UTF-8 mode. */
                   3027: 
1.1.1.2 ! misho    3028: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1       misho    3029:     case OP_XCLASS:
                   3030:       {
                   3031:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   3032:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   3033: 
                   3034:       switch (*ecode)
                   3035:         {
                   3036:         case OP_CRSTAR:
                   3037:         case OP_CRMINSTAR:
                   3038:         case OP_CRPLUS:
                   3039:         case OP_CRMINPLUS:
                   3040:         case OP_CRQUERY:
                   3041:         case OP_CRMINQUERY:
                   3042:         c = *ecode++ - OP_CRSTAR;
                   3043:         minimize = (c & 1) != 0;
                   3044:         min = rep_min[c];                 /* Pick up values from tables; */
                   3045:         max = rep_max[c];                 /* zero for max => infinity */
                   3046:         if (max == 0) max = INT_MAX;
                   3047:         break;
                   3048: 
                   3049:         case OP_CRRANGE:
                   3050:         case OP_CRMINRANGE:
                   3051:         minimize = (*ecode == OP_CRMINRANGE);
                   3052:         min = GET2(ecode, 1);
1.1.1.2 ! misho    3053:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misho    3054:         if (max == 0) max = INT_MAX;
1.1.1.2 ! misho    3055:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misho    3056:         break;
                   3057: 
                   3058:         default:               /* No repeat follows */
                   3059:         min = max = 1;
                   3060:         break;
                   3061:         }
                   3062: 
                   3063:       /* First, ensure the minimum number of matches are present. */
                   3064: 
                   3065:       for (i = 1; i <= min; i++)
                   3066:         {
                   3067:         if (eptr >= md->end_subject)
                   3068:           {
                   3069:           SCHECK_PARTIAL();
1.1.1.2 ! misho    3070:           RRETURN(MATCH_NOMATCH);
1.1       misho    3071:           }
                   3072:         GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    3073:         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misho    3074:         }
                   3075: 
                   3076:       /* If max == min we can continue with the main loop without the
                   3077:       need to recurse. */
                   3078: 
                   3079:       if (min == max) continue;
                   3080: 
                   3081:       /* If minimizing, keep testing the rest of the expression and advancing
                   3082:       the pointer while it matches the class. */
                   3083: 
                   3084:       if (minimize)
                   3085:         {
                   3086:         for (fi = min;; fi++)
                   3087:           {
1.1.1.2 ! misho    3088:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
1.1       misho    3089:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3090:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3091:           if (eptr >= md->end_subject)
                   3092:             {
                   3093:             SCHECK_PARTIAL();
1.1.1.2 ! misho    3094:             RRETURN(MATCH_NOMATCH);
1.1       misho    3095:             }
                   3096:           GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    3097:           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misho    3098:           }
                   3099:         /* Control never gets here */
                   3100:         }
                   3101: 
                   3102:       /* If maximizing, find the longest possible run, then work backwards. */
                   3103: 
                   3104:       else
                   3105:         {
                   3106:         pp = eptr;
                   3107:         for (i = min; i < max; i++)
                   3108:           {
                   3109:           int len = 1;
                   3110:           if (eptr >= md->end_subject)
                   3111:             {
                   3112:             SCHECK_PARTIAL();
                   3113:             break;
                   3114:             }
1.1.1.2 ! misho    3115: #ifdef SUPPORT_UTF
1.1       misho    3116:           GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    3117: #else
        !          3118:           c = *eptr;
        !          3119: #endif
        !          3120:           if (!PRIV(xclass)(c, data, utf)) break;
1.1       misho    3121:           eptr += len;
                   3122:           }
                   3123:         for(;;)
                   3124:           {
1.1.1.2 ! misho    3125:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
1.1       misho    3126:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3127:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1.1.1.2 ! misho    3128: #ifdef SUPPORT_UTF
        !          3129:           if (utf) BACKCHAR(eptr);
        !          3130: #endif
1.1       misho    3131:           }
1.1.1.2 ! misho    3132:         RRETURN(MATCH_NOMATCH);
1.1       misho    3133:         }
                   3134: 
                   3135:       /* Control never gets here */
                   3136:       }
                   3137: #endif    /* End of XCLASS */
                   3138: 
                   3139:     /* Match a single character, casefully */
                   3140: 
                   3141:     case OP_CHAR:
1.1.1.2 ! misho    3142: #ifdef SUPPORT_UTF
        !          3143:     if (utf)
1.1       misho    3144:       {
                   3145:       length = 1;
                   3146:       ecode++;
                   3147:       GETCHARLEN(fc, ecode, length);
                   3148:       if (length > md->end_subject - eptr)
                   3149:         {
                   3150:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1.1.1.2 ! misho    3151:         RRETURN(MATCH_NOMATCH);
1.1       misho    3152:         }
1.1.1.2 ! misho    3153:       while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misho    3154:       }
                   3155:     else
                   3156: #endif
1.1.1.2 ! misho    3157:     /* Not UTF mode */
1.1       misho    3158:       {
                   3159:       if (md->end_subject - eptr < 1)
                   3160:         {
                   3161:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1.1.1.2 ! misho    3162:         RRETURN(MATCH_NOMATCH);
1.1       misho    3163:         }
1.1.1.2 ! misho    3164:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misho    3165:       ecode += 2;
                   3166:       }
                   3167:     break;
                   3168: 
1.1.1.2 ! misho    3169:     /* Match a single character, caselessly. If we are at the end of the
        !          3170:     subject, give up immediately. */
        !          3171: 
        !          3172:     case OP_CHARI:
        !          3173:     if (eptr >= md->end_subject)
        !          3174:       {
        !          3175:       SCHECK_PARTIAL();
        !          3176:       RRETURN(MATCH_NOMATCH);
        !          3177:       }
1.1       misho    3178: 
1.1.1.2 ! misho    3179: #ifdef SUPPORT_UTF
        !          3180:     if (utf)
1.1       misho    3181:       {
                   3182:       length = 1;
                   3183:       ecode++;
                   3184:       GETCHARLEN(fc, ecode, length);
                   3185: 
                   3186:       /* If the pattern character's value is < 128, we have only one byte, and
1.1.1.2 ! misho    3187:       we know that its other case must also be one byte long, so we can use the
        !          3188:       fast lookup table. We know that there is at least one byte left in the
        !          3189:       subject. */
1.1       misho    3190: 
                   3191:       if (fc < 128)
                   3192:         {
1.1.1.2 ! misho    3193:         pcre_uchar cc = RAWUCHAR(eptr);
        !          3194:         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
        !          3195:         ecode++;
        !          3196:         eptr++;
1.1       misho    3197:         }
                   3198: 
1.1.1.2 ! misho    3199:       /* Otherwise we must pick up the subject character. Note that we cannot
        !          3200:       use the value of "length" to check for sufficient bytes left, because the
        !          3201:       other case of the character may have more or fewer bytes.  */
1.1       misho    3202: 
                   3203:       else
                   3204:         {
1.1.1.2 ! misho    3205:         pcre_uint32 dc;
1.1       misho    3206:         GETCHARINC(dc, eptr);
                   3207:         ecode += length;
                   3208: 
                   3209:         /* If we have Unicode property support, we can use it to test the other
                   3210:         case of the character, if there is one. */
                   3211: 
                   3212:         if (fc != dc)
                   3213:           {
                   3214: #ifdef SUPPORT_UCP
                   3215:           if (dc != UCD_OTHERCASE(fc))
                   3216: #endif
1.1.1.2 ! misho    3217:             RRETURN(MATCH_NOMATCH);
1.1       misho    3218:           }
                   3219:         }
                   3220:       }
                   3221:     else
1.1.1.2 ! misho    3222: #endif   /* SUPPORT_UTF */
1.1       misho    3223: 
1.1.1.2 ! misho    3224:     /* Not UTF mode */
1.1       misho    3225:       {
1.1.1.2 ! misho    3226:       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
        !          3227:           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
        !          3228:       eptr++;
1.1       misho    3229:       ecode += 2;
                   3230:       }
                   3231:     break;
                   3232: 
                   3233:     /* Match a single character repeatedly. */
                   3234: 
                   3235:     case OP_EXACT:
1.1.1.2 ! misho    3236:     case OP_EXACTI:
1.1       misho    3237:     min = max = GET2(ecode, 1);
1.1.1.2 ! misho    3238:     ecode += 1 + IMM2_SIZE;
1.1       misho    3239:     goto REPEATCHAR;
                   3240: 
                   3241:     case OP_POSUPTO:
1.1.1.2 ! misho    3242:     case OP_POSUPTOI:
1.1       misho    3243:     possessive = TRUE;
                   3244:     /* Fall through */
                   3245: 
                   3246:     case OP_UPTO:
1.1.1.2 ! misho    3247:     case OP_UPTOI:
1.1       misho    3248:     case OP_MINUPTO:
1.1.1.2 ! misho    3249:     case OP_MINUPTOI:
1.1       misho    3250:     min = 0;
                   3251:     max = GET2(ecode, 1);
1.1.1.2 ! misho    3252:     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
        !          3253:     ecode += 1 + IMM2_SIZE;
1.1       misho    3254:     goto REPEATCHAR;
                   3255: 
                   3256:     case OP_POSSTAR:
1.1.1.2 ! misho    3257:     case OP_POSSTARI:
1.1       misho    3258:     possessive = TRUE;
                   3259:     min = 0;
                   3260:     max = INT_MAX;
                   3261:     ecode++;
                   3262:     goto REPEATCHAR;
                   3263: 
                   3264:     case OP_POSPLUS:
1.1.1.2 ! misho    3265:     case OP_POSPLUSI:
1.1       misho    3266:     possessive = TRUE;
                   3267:     min = 1;
                   3268:     max = INT_MAX;
                   3269:     ecode++;
                   3270:     goto REPEATCHAR;
                   3271: 
                   3272:     case OP_POSQUERY:
1.1.1.2 ! misho    3273:     case OP_POSQUERYI:
1.1       misho    3274:     possessive = TRUE;
                   3275:     min = 0;
                   3276:     max = 1;
                   3277:     ecode++;
                   3278:     goto REPEATCHAR;
                   3279: 
                   3280:     case OP_STAR:
1.1.1.2 ! misho    3281:     case OP_STARI:
1.1       misho    3282:     case OP_MINSTAR:
1.1.1.2 ! misho    3283:     case OP_MINSTARI:
1.1       misho    3284:     case OP_PLUS:
1.1.1.2 ! misho    3285:     case OP_PLUSI:
1.1       misho    3286:     case OP_MINPLUS:
1.1.1.2 ! misho    3287:     case OP_MINPLUSI:
1.1       misho    3288:     case OP_QUERY:
1.1.1.2 ! misho    3289:     case OP_QUERYI:
1.1       misho    3290:     case OP_MINQUERY:
1.1.1.2 ! misho    3291:     case OP_MINQUERYI:
        !          3292:     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
1.1       misho    3293:     minimize = (c & 1) != 0;
                   3294:     min = rep_min[c];                 /* Pick up values from tables; */
                   3295:     max = rep_max[c];                 /* zero for max => infinity */
                   3296:     if (max == 0) max = INT_MAX;
                   3297: 
                   3298:     /* Common code for all repeated single-character matches. */
                   3299: 
                   3300:     REPEATCHAR:
1.1.1.2 ! misho    3301: #ifdef SUPPORT_UTF
        !          3302:     if (utf)
1.1       misho    3303:       {
                   3304:       length = 1;
                   3305:       charptr = ecode;
                   3306:       GETCHARLEN(fc, ecode, length);
                   3307:       ecode += length;
                   3308: 
                   3309:       /* Handle multibyte character matching specially here. There is
                   3310:       support for caseless matching if UCP support is present. */
                   3311: 
                   3312:       if (length > 1)
                   3313:         {
                   3314: #ifdef SUPPORT_UCP
1.1.1.2 ! misho    3315:         pcre_uint32 othercase;
        !          3316:         if (op >= OP_STARI &&     /* Caseless */
1.1       misho    3317:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.1.1.2 ! misho    3318:           oclength = PRIV(ord2utf)(othercase, occhars);
1.1       misho    3319:         else oclength = 0;
                   3320: #endif  /* SUPPORT_UCP */
                   3321: 
                   3322:         for (i = 1; i <= min; i++)
                   3323:           {
                   3324:           if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho    3325:             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3326: #ifdef SUPPORT_UCP
                   3327:           else if (oclength > 0 &&
                   3328:                    eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho    3329:                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3330: #endif  /* SUPPORT_UCP */
                   3331:           else
                   3332:             {
                   3333:             CHECK_PARTIAL();
1.1.1.2 ! misho    3334:             RRETURN(MATCH_NOMATCH);
1.1       misho    3335:             }
                   3336:           }
                   3337: 
                   3338:         if (min == max) continue;
                   3339: 
                   3340:         if (minimize)
                   3341:           {
                   3342:           for (fi = min;; fi++)
                   3343:             {
1.1.1.2 ! misho    3344:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
1.1       misho    3345:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3346:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3347:             if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho    3348:               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3349: #ifdef SUPPORT_UCP
                   3350:             else if (oclength > 0 &&
                   3351:                      eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho    3352:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3353: #endif  /* SUPPORT_UCP */
                   3354:             else
                   3355:               {
                   3356:               CHECK_PARTIAL();
1.1.1.2 ! misho    3357:               RRETURN(MATCH_NOMATCH);
1.1       misho    3358:               }
                   3359:             }
                   3360:           /* Control never gets here */
                   3361:           }
                   3362: 
                   3363:         else  /* Maximize */
                   3364:           {
                   3365:           pp = eptr;
                   3366:           for (i = min; i < max; i++)
                   3367:             {
                   3368:             if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho    3369:                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misho    3370: #ifdef SUPPORT_UCP
                   3371:             else if (oclength > 0 &&
                   3372:                      eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho    3373:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1       misho    3374: #endif  /* SUPPORT_UCP */
                   3375:             else
                   3376:               {
                   3377:               CHECK_PARTIAL();
                   3378:               break;
                   3379:               }
                   3380:             }
                   3381: 
                   3382:           if (possessive) continue;
                   3383: 
                   3384:           for(;;)
                   3385:             {
1.1.1.2 ! misho    3386:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
1.1       misho    3387:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3388:             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
1.1       misho    3389: #ifdef SUPPORT_UCP
                   3390:             eptr--;
                   3391:             BACKCHAR(eptr);
                   3392: #else   /* without SUPPORT_UCP */
                   3393:             eptr -= length;
                   3394: #endif  /* SUPPORT_UCP */
                   3395:             }
                   3396:           }
                   3397:         /* Control never gets here */
                   3398:         }
                   3399: 
                   3400:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   3401:       obey the code as for non-UTF-8 characters below, though in this case the
                   3402:       value of fc will always be < 128. */
                   3403:       }
                   3404:     else
1.1.1.2 ! misho    3405: #endif  /* SUPPORT_UTF */
        !          3406:       /* When not in UTF-8 mode, load a single-byte character. */
        !          3407:       fc = *ecode++;
1.1       misho    3408: 
1.1.1.2 ! misho    3409:     /* The value of fc at this point is always one character, though we may
        !          3410:     or may not be in UTF mode. The code is duplicated for the caseless and
1.1       misho    3411:     caseful cases, for speed, since matching characters is likely to be quite
                   3412:     common. First, ensure the minimum number of matches are present. If min =
                   3413:     max, continue at the same level without recursing. Otherwise, if
                   3414:     minimizing, keep trying the rest of the expression and advancing one
                   3415:     matching character if failing, up to the maximum. Alternatively, if
                   3416:     maximizing, find the maximum number of characters and work backwards. */
                   3417: 
                   3418:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.2 ! misho    3419:       max, (char *)eptr));
1.1       misho    3420: 
1.1.1.2 ! misho    3421:     if (op >= OP_STARI)  /* Caseless */
1.1       misho    3422:       {
1.1.1.2 ! misho    3423: #ifdef COMPILE_PCRE8
        !          3424:       /* fc must be < 128 if UTF is enabled. */
        !          3425:       foc = md->fcc[fc];
        !          3426: #else
        !          3427: #ifdef SUPPORT_UTF
        !          3428: #ifdef SUPPORT_UCP
        !          3429:       if (utf && fc > 127)
        !          3430:         foc = UCD_OTHERCASE(fc);
        !          3431: #else
        !          3432:       if (utf && fc > 127)
        !          3433:         foc = fc;
        !          3434: #endif /* SUPPORT_UCP */
        !          3435:       else
        !          3436: #endif /* SUPPORT_UTF */
        !          3437:         foc = TABLE_GET(fc, md->fcc, fc);
        !          3438: #endif /* COMPILE_PCRE8 */
        !          3439: 
1.1       misho    3440:       for (i = 1; i <= min; i++)
                   3441:         {
1.1.1.2 ! misho    3442:         pcre_uchar cc;
        !          3443: 
1.1       misho    3444:         if (eptr >= md->end_subject)
                   3445:           {
                   3446:           SCHECK_PARTIAL();
1.1.1.2 ! misho    3447:           RRETURN(MATCH_NOMATCH);
1.1       misho    3448:           }
1.1.1.2 ! misho    3449:         cc = RAWUCHARTEST(eptr);
        !          3450:         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
        !          3451:         eptr++;
1.1       misho    3452:         }
                   3453:       if (min == max) continue;
                   3454:       if (minimize)
                   3455:         {
                   3456:         for (fi = min;; fi++)
                   3457:           {
1.1.1.2 ! misho    3458:           pcre_uchar cc;
        !          3459: 
        !          3460:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
1.1       misho    3461:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3462:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3463:           if (eptr >= md->end_subject)
                   3464:             {
                   3465:             SCHECK_PARTIAL();
1.1.1.2 ! misho    3466:             RRETURN(MATCH_NOMATCH);
1.1       misho    3467:             }
1.1.1.2 ! misho    3468:           cc = RAWUCHARTEST(eptr);
        !          3469:           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
        !          3470:           eptr++;
1.1       misho    3471:           }
                   3472:         /* Control never gets here */
                   3473:         }
                   3474:       else  /* Maximize */
                   3475:         {
                   3476:         pp = eptr;
                   3477:         for (i = min; i < max; i++)
                   3478:           {
1.1.1.2 ! misho    3479:           pcre_uchar cc;
        !          3480: 
1.1       misho    3481:           if (eptr >= md->end_subject)
                   3482:             {
                   3483:             SCHECK_PARTIAL();
                   3484:             break;
                   3485:             }
1.1.1.2 ! misho    3486:           cc = RAWUCHARTEST(eptr);
        !          3487:           if (fc != cc && foc != cc) break;
1.1       misho    3488:           eptr++;
                   3489:           }
                   3490: 
                   3491:         if (possessive) continue;
                   3492: 
                   3493:         while (eptr >= pp)
                   3494:           {
1.1.1.2 ! misho    3495:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
1.1       misho    3496:           eptr--;
                   3497:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3498:           }
1.1.1.2 ! misho    3499:         RRETURN(MATCH_NOMATCH);
1.1       misho    3500:         }
                   3501:       /* Control never gets here */
                   3502:       }
                   3503: 
                   3504:     /* Caseful comparisons (includes all multi-byte characters) */
                   3505: 
                   3506:     else
                   3507:       {
                   3508:       for (i = 1; i <= min; i++)
                   3509:         {
                   3510:         if (eptr >= md->end_subject)
                   3511:           {
                   3512:           SCHECK_PARTIAL();
1.1.1.2 ! misho    3513:           RRETURN(MATCH_NOMATCH);
1.1       misho    3514:           }
1.1.1.2 ! misho    3515:         if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misho    3516:         }
                   3517: 
                   3518:       if (min == max) continue;
                   3519: 
                   3520:       if (minimize)
                   3521:         {
                   3522:         for (fi = min;; fi++)
                   3523:           {
1.1.1.2 ! misho    3524:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
1.1       misho    3525:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3526:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3527:           if (eptr >= md->end_subject)
                   3528:             {
                   3529:             SCHECK_PARTIAL();
1.1.1.2 ! misho    3530:             RRETURN(MATCH_NOMATCH);
1.1       misho    3531:             }
1.1.1.2 ! misho    3532:           if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misho    3533:           }
                   3534:         /* Control never gets here */
                   3535:         }
                   3536:       else  /* Maximize */
                   3537:         {
                   3538:         pp = eptr;
                   3539:         for (i = min; i < max; i++)
                   3540:           {
                   3541:           if (eptr >= md->end_subject)
                   3542:             {
                   3543:             SCHECK_PARTIAL();
                   3544:             break;
                   3545:             }
1.1.1.2 ! misho    3546:           if (fc != RAWUCHARTEST(eptr)) break;
1.1       misho    3547:           eptr++;
                   3548:           }
                   3549:         if (possessive) continue;
                   3550: 
                   3551:         while (eptr >= pp)
                   3552:           {
1.1.1.2 ! misho    3553:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
1.1       misho    3554:           eptr--;
                   3555:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3556:           }
1.1.1.2 ! misho    3557:         RRETURN(MATCH_NOMATCH);
1.1       misho    3558:         }
                   3559:       }
                   3560:     /* Control never gets here */
                   3561: 
                   3562:     /* Match a negated single one-byte character. The character we are
                   3563:     checking can be multibyte. */
                   3564: 
                   3565:     case OP_NOT:
1.1.1.2 ! misho    3566:     case OP_NOTI:
1.1       misho    3567:     if (eptr >= md->end_subject)
                   3568:       {
                   3569:       SCHECK_PARTIAL();
1.1.1.2 ! misho    3570:       RRETURN(MATCH_NOMATCH);
1.1       misho    3571:       }
1.1.1.2 ! misho    3572: #ifdef SUPPORT_UTF
        !          3573:     if (utf)
1.1       misho    3574:       {
1.1.1.2 ! misho    3575:       register pcre_uint32 ch, och;
        !          3576: 
        !          3577:       ecode++;
        !          3578:       GETCHARINC(ch, ecode);
        !          3579:       GETCHARINC(c, eptr);
        !          3580: 
        !          3581:       if (op == OP_NOT)
        !          3582:         {
        !          3583:         if (ch == c) RRETURN(MATCH_NOMATCH);
        !          3584:         }
        !          3585:       else
        !          3586:         {
        !          3587: #ifdef SUPPORT_UCP
        !          3588:         if (ch > 127)
        !          3589:           och = UCD_OTHERCASE(ch);
        !          3590: #else
        !          3591:         if (ch > 127)
        !          3592:           och = ch;
        !          3593: #endif /* SUPPORT_UCP */
        !          3594:         else
        !          3595:           och = TABLE_GET(ch, md->fcc, ch);
        !          3596:         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
        !          3597:         }
1.1       misho    3598:       }
                   3599:     else
1.1.1.2 ! misho    3600: #endif
1.1       misho    3601:       {
1.1.1.2 ! misho    3602:       register pcre_uint32 ch = ecode[1];
        !          3603:       c = *eptr++;
        !          3604:       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
        !          3605:         RRETURN(MATCH_NOMATCH);
        !          3606:       ecode += 2;
1.1       misho    3607:       }
                   3608:     break;
                   3609: 
                   3610:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3611:     repeat of the code for a repeated single character, but I haven't found a
                   3612:     nice way of commoning these up that doesn't require a test of the
                   3613:     positive/negative option for each character match. Maybe that wouldn't add
                   3614:     very much to the time taken, but character matching *is* what this is all
                   3615:     about... */
                   3616: 
                   3617:     case OP_NOTEXACT:
1.1.1.2 ! misho    3618:     case OP_NOTEXACTI:
1.1       misho    3619:     min = max = GET2(ecode, 1);
1.1.1.2 ! misho    3620:     ecode += 1 + IMM2_SIZE;
1.1       misho    3621:     goto REPEATNOTCHAR;
                   3622: 
                   3623:     case OP_NOTUPTO:
1.1.1.2 ! misho    3624:     case OP_NOTUPTOI:
1.1       misho    3625:     case OP_NOTMINUPTO:
1.1.1.2 ! misho    3626:     case OP_NOTMINUPTOI:
1.1       misho    3627:     min = 0;
                   3628:     max = GET2(ecode, 1);
1.1.1.2 ! misho    3629:     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
        !          3630:     ecode += 1 + IMM2_SIZE;
1.1       misho    3631:     goto REPEATNOTCHAR;
                   3632: 
                   3633:     case OP_NOTPOSSTAR:
1.1.1.2 ! misho    3634:     case OP_NOTPOSSTARI:
1.1       misho    3635:     possessive = TRUE;
                   3636:     min = 0;
                   3637:     max = INT_MAX;
                   3638:     ecode++;
                   3639:     goto REPEATNOTCHAR;
                   3640: 
                   3641:     case OP_NOTPOSPLUS:
1.1.1.2 ! misho    3642:     case OP_NOTPOSPLUSI:
1.1       misho    3643:     possessive = TRUE;
                   3644:     min = 1;
                   3645:     max = INT_MAX;
                   3646:     ecode++;
                   3647:     goto REPEATNOTCHAR;
                   3648: 
                   3649:     case OP_NOTPOSQUERY:
1.1.1.2 ! misho    3650:     case OP_NOTPOSQUERYI:
1.1       misho    3651:     possessive = TRUE;
                   3652:     min = 0;
                   3653:     max = 1;
                   3654:     ecode++;
                   3655:     goto REPEATNOTCHAR;
                   3656: 
                   3657:     case OP_NOTPOSUPTO:
1.1.1.2 ! misho    3658:     case OP_NOTPOSUPTOI:
1.1       misho    3659:     possessive = TRUE;
                   3660:     min = 0;
                   3661:     max = GET2(ecode, 1);
1.1.1.2 ! misho    3662:     ecode += 1 + IMM2_SIZE;
1.1       misho    3663:     goto REPEATNOTCHAR;
                   3664: 
                   3665:     case OP_NOTSTAR:
1.1.1.2 ! misho    3666:     case OP_NOTSTARI:
1.1       misho    3667:     case OP_NOTMINSTAR:
1.1.1.2 ! misho    3668:     case OP_NOTMINSTARI:
1.1       misho    3669:     case OP_NOTPLUS:
1.1.1.2 ! misho    3670:     case OP_NOTPLUSI:
1.1       misho    3671:     case OP_NOTMINPLUS:
1.1.1.2 ! misho    3672:     case OP_NOTMINPLUSI:
1.1       misho    3673:     case OP_NOTQUERY:
1.1.1.2 ! misho    3674:     case OP_NOTQUERYI:
1.1       misho    3675:     case OP_NOTMINQUERY:
1.1.1.2 ! misho    3676:     case OP_NOTMINQUERYI:
        !          3677:     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1.1       misho    3678:     minimize = (c & 1) != 0;
                   3679:     min = rep_min[c];                 /* Pick up values from tables; */
                   3680:     max = rep_max[c];                 /* zero for max => infinity */
                   3681:     if (max == 0) max = INT_MAX;
                   3682: 
                   3683:     /* Common code for all repeated single-byte matches. */
                   3684: 
                   3685:     REPEATNOTCHAR:
1.1.1.2 ! misho    3686:     GETCHARINCTEST(fc, ecode);
1.1       misho    3687: 
                   3688:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3689:     since matching characters is likely to be quite common. First, ensure the
                   3690:     minimum number of matches are present. If min = max, continue at the same
                   3691:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3692:     the expression and advancing one matching character if failing, up to the
                   3693:     maximum. Alternatively, if maximizing, find the maximum number of
                   3694:     characters and work backwards. */
                   3695: 
                   3696:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.2 ! misho    3697:       max, (char *)eptr));
1.1       misho    3698: 
1.1.1.2 ! misho    3699:     if (op >= OP_NOTSTARI)     /* Caseless */
1.1       misho    3700:       {
1.1.1.2 ! misho    3701: #ifdef SUPPORT_UTF
        !          3702: #ifdef SUPPORT_UCP
        !          3703:       if (utf && fc > 127)
        !          3704:         foc = UCD_OTHERCASE(fc);
        !          3705: #else
        !          3706:       if (utf && fc > 127)
        !          3707:         foc = fc;
        !          3708: #endif /* SUPPORT_UCP */
        !          3709:       else
        !          3710: #endif /* SUPPORT_UTF */
        !          3711:         foc = TABLE_GET(fc, md->fcc, fc);
1.1       misho    3712: 
1.1.1.2 ! misho    3713: #ifdef SUPPORT_UTF
        !          3714:       if (utf)
1.1       misho    3715:         {
1.1.1.2 ! misho    3716:         register pcre_uint32 d;
1.1       misho    3717:         for (i = 1; i <= min; i++)
                   3718:           {
                   3719:           if (eptr >= md->end_subject)
                   3720:             {
                   3721:             SCHECK_PARTIAL();
1.1.1.2 ! misho    3722:             RRETURN(MATCH_NOMATCH);
1.1       misho    3723:             }
                   3724:           GETCHARINC(d, eptr);
1.1.1.2 ! misho    3725:           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3726:           }
                   3727:         }
                   3728:       else
                   3729: #endif
1.1.1.2 ! misho    3730:       /* Not UTF mode */
1.1       misho    3731:         {
                   3732:         for (i = 1; i <= min; i++)
                   3733:           {
                   3734:           if (eptr >= md->end_subject)
                   3735:             {
                   3736:             SCHECK_PARTIAL();
1.1.1.2 ! misho    3737:             RRETURN(MATCH_NOMATCH);
1.1       misho    3738:             }
1.1.1.2 ! misho    3739:           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
        !          3740:           eptr++;
1.1       misho    3741:           }
                   3742:         }
                   3743: 
                   3744:       if (min == max) continue;
                   3745: 
                   3746:       if (minimize)
                   3747:         {
1.1.1.2 ! misho    3748: #ifdef SUPPORT_UTF
        !          3749:         if (utf)
1.1       misho    3750:           {
1.1.1.2 ! misho    3751:           register pcre_uint32 d;
1.1       misho    3752:           for (fi = min;; fi++)
                   3753:             {
1.1.1.2 ! misho    3754:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
1.1       misho    3755:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3756:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3757:             if (eptr >= md->end_subject)
                   3758:               {
                   3759:               SCHECK_PARTIAL();
1.1.1.2 ! misho    3760:               RRETURN(MATCH_NOMATCH);
1.1       misho    3761:               }
                   3762:             GETCHARINC(d, eptr);
1.1.1.2 ! misho    3763:             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3764:             }
                   3765:           }
                   3766:         else
                   3767: #endif
1.1.1.2 ! misho    3768:         /* Not UTF mode */
1.1       misho    3769:           {
                   3770:           for (fi = min;; fi++)
                   3771:             {
1.1.1.2 ! misho    3772:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
1.1       misho    3773:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3774:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3775:             if (eptr >= md->end_subject)
                   3776:               {
                   3777:               SCHECK_PARTIAL();
1.1.1.2 ! misho    3778:               RRETURN(MATCH_NOMATCH);
1.1       misho    3779:               }
1.1.1.2 ! misho    3780:             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
        !          3781:             eptr++;
1.1       misho    3782:             }
                   3783:           }
                   3784:         /* Control never gets here */
                   3785:         }
                   3786: 
                   3787:       /* Maximize case */
                   3788: 
                   3789:       else
                   3790:         {
                   3791:         pp = eptr;
                   3792: 
1.1.1.2 ! misho    3793: #ifdef SUPPORT_UTF
        !          3794:         if (utf)
1.1       misho    3795:           {
1.1.1.2 ! misho    3796:           register pcre_uint32 d;
1.1       misho    3797:           for (i = min; i < max; i++)
                   3798:             {
                   3799:             int len = 1;
                   3800:             if (eptr >= md->end_subject)
                   3801:               {
                   3802:               SCHECK_PARTIAL();
                   3803:               break;
                   3804:               }
                   3805:             GETCHARLEN(d, eptr, len);
1.1.1.2 ! misho    3806:             if (fc == d || (unsigned int)foc == d) break;
1.1       misho    3807:             eptr += len;
                   3808:             }
1.1.1.2 ! misho    3809:           if (possessive) continue;
        !          3810:           for(;;)
1.1       misho    3811:             {
1.1.1.2 ! misho    3812:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
1.1       misho    3813:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3814:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3815:             BACKCHAR(eptr);
                   3816:             }
                   3817:           }
                   3818:         else
                   3819: #endif
1.1.1.2 ! misho    3820:         /* Not UTF mode */
1.1       misho    3821:           {
                   3822:           for (i = min; i < max; i++)
                   3823:             {
                   3824:             if (eptr >= md->end_subject)
                   3825:               {
                   3826:               SCHECK_PARTIAL();
                   3827:               break;
                   3828:               }
1.1.1.2 ! misho    3829:             if (fc == *eptr || foc == *eptr) break;
1.1       misho    3830:             eptr++;
                   3831:             }
                   3832:           if (possessive) continue;
                   3833:           while (eptr >= pp)
                   3834:             {
1.1.1.2 ! misho    3835:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
1.1       misho    3836:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3837:             eptr--;
                   3838:             }
                   3839:           }
                   3840: 
1.1.1.2 ! misho    3841:         RRETURN(MATCH_NOMATCH);
1.1       misho    3842:         }
                   3843:       /* Control never gets here */
                   3844:       }
                   3845: 
                   3846:     /* Caseful comparisons */
                   3847: 
                   3848:     else
                   3849:       {
1.1.1.2 ! misho    3850: #ifdef SUPPORT_UTF
        !          3851:       if (utf)
1.1       misho    3852:         {
1.1.1.2 ! misho    3853:         register pcre_uint32 d;
1.1       misho    3854:         for (i = 1; i <= min; i++)
                   3855:           {
                   3856:           if (eptr >= md->end_subject)
                   3857:             {
                   3858:             SCHECK_PARTIAL();
1.1.1.2 ! misho    3859:             RRETURN(MATCH_NOMATCH);
1.1       misho    3860:             }
                   3861:           GETCHARINC(d, eptr);
1.1.1.2 ! misho    3862:           if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3863:           }
                   3864:         }
                   3865:       else
                   3866: #endif
1.1.1.2 ! misho    3867:       /* Not UTF mode */
1.1       misho    3868:         {
                   3869:         for (i = 1; i <= min; i++)
                   3870:           {
                   3871:           if (eptr >= md->end_subject)
                   3872:             {
                   3873:             SCHECK_PARTIAL();
1.1.1.2 ! misho    3874:             RRETURN(MATCH_NOMATCH);
1.1       misho    3875:             }
1.1.1.2 ! misho    3876:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misho    3877:           }
                   3878:         }
                   3879: 
                   3880:       if (min == max) continue;
                   3881: 
                   3882:       if (minimize)
                   3883:         {
1.1.1.2 ! misho    3884: #ifdef SUPPORT_UTF
        !          3885:         if (utf)
1.1       misho    3886:           {
1.1.1.2 ! misho    3887:           register pcre_uint32 d;
1.1       misho    3888:           for (fi = min;; fi++)
                   3889:             {
1.1.1.2 ! misho    3890:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
1.1       misho    3891:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3892:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3893:             if (eptr >= md->end_subject)
                   3894:               {
                   3895:               SCHECK_PARTIAL();
1.1.1.2 ! misho    3896:               RRETURN(MATCH_NOMATCH);
1.1       misho    3897:               }
                   3898:             GETCHARINC(d, eptr);
1.1.1.2 ! misho    3899:             if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misho    3900:             }
                   3901:           }
                   3902:         else
                   3903: #endif
1.1.1.2 ! misho    3904:         /* Not UTF mode */
1.1       misho    3905:           {
                   3906:           for (fi = min;; fi++)
                   3907:             {
1.1.1.2 ! misho    3908:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
1.1       misho    3909:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    3910:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    3911:             if (eptr >= md->end_subject)
                   3912:               {
                   3913:               SCHECK_PARTIAL();
1.1.1.2 ! misho    3914:               RRETURN(MATCH_NOMATCH);
1.1       misho    3915:               }
1.1.1.2 ! misho    3916:             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misho    3917:             }
                   3918:           }
                   3919:         /* Control never gets here */
                   3920:         }
                   3921: 
                   3922:       /* Maximize case */
                   3923: 
                   3924:       else
                   3925:         {
                   3926:         pp = eptr;
                   3927: 
1.1.1.2 ! misho    3928: #ifdef SUPPORT_UTF
        !          3929:         if (utf)
1.1       misho    3930:           {
1.1.1.2 ! misho    3931:           register pcre_uint32 d;
1.1       misho    3932:           for (i = min; i < max; i++)
                   3933:             {
                   3934:             int len = 1;
                   3935:             if (eptr >= md->end_subject)
                   3936:               {
                   3937:               SCHECK_PARTIAL();
                   3938:               break;
                   3939:               }
                   3940:             GETCHARLEN(d, eptr, len);
                   3941:             if (fc == d) break;
                   3942:             eptr += len;
                   3943:             }
                   3944:           if (possessive) continue;
                   3945:           for(;;)
                   3946:             {
1.1.1.2 ! misho    3947:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
1.1       misho    3948:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3949:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3950:             BACKCHAR(eptr);
                   3951:             }
                   3952:           }
                   3953:         else
                   3954: #endif
1.1.1.2 ! misho    3955:         /* Not UTF mode */
1.1       misho    3956:           {
                   3957:           for (i = min; i < max; i++)
                   3958:             {
                   3959:             if (eptr >= md->end_subject)
                   3960:               {
                   3961:               SCHECK_PARTIAL();
                   3962:               break;
                   3963:               }
                   3964:             if (fc == *eptr) break;
                   3965:             eptr++;
                   3966:             }
                   3967:           if (possessive) continue;
                   3968:           while (eptr >= pp)
                   3969:             {
1.1.1.2 ! misho    3970:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
1.1       misho    3971:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3972:             eptr--;
                   3973:             }
                   3974:           }
                   3975: 
1.1.1.2 ! misho    3976:         RRETURN(MATCH_NOMATCH);
1.1       misho    3977:         }
                   3978:       }
                   3979:     /* Control never gets here */
                   3980: 
                   3981:     /* Match a single character type repeatedly; several different opcodes
                   3982:     share code. This is very similar to the code for single characters, but we
                   3983:     repeat it in the interests of efficiency. */
                   3984: 
                   3985:     case OP_TYPEEXACT:
                   3986:     min = max = GET2(ecode, 1);
                   3987:     minimize = TRUE;
1.1.1.2 ! misho    3988:     ecode += 1 + IMM2_SIZE;
1.1       misho    3989:     goto REPEATTYPE;
                   3990: 
                   3991:     case OP_TYPEUPTO:
                   3992:     case OP_TYPEMINUPTO:
                   3993:     min = 0;
                   3994:     max = GET2(ecode, 1);
                   3995:     minimize = *ecode == OP_TYPEMINUPTO;
1.1.1.2 ! misho    3996:     ecode += 1 + IMM2_SIZE;
1.1       misho    3997:     goto REPEATTYPE;
                   3998: 
                   3999:     case OP_TYPEPOSSTAR:
                   4000:     possessive = TRUE;
                   4001:     min = 0;
                   4002:     max = INT_MAX;
                   4003:     ecode++;
                   4004:     goto REPEATTYPE;
                   4005: 
                   4006:     case OP_TYPEPOSPLUS:
                   4007:     possessive = TRUE;
                   4008:     min = 1;
                   4009:     max = INT_MAX;
                   4010:     ecode++;
                   4011:     goto REPEATTYPE;
                   4012: 
                   4013:     case OP_TYPEPOSQUERY:
                   4014:     possessive = TRUE;
                   4015:     min = 0;
                   4016:     max = 1;
                   4017:     ecode++;
                   4018:     goto REPEATTYPE;
                   4019: 
                   4020:     case OP_TYPEPOSUPTO:
                   4021:     possessive = TRUE;
                   4022:     min = 0;
                   4023:     max = GET2(ecode, 1);
1.1.1.2 ! misho    4024:     ecode += 1 + IMM2_SIZE;
1.1       misho    4025:     goto REPEATTYPE;
                   4026: 
                   4027:     case OP_TYPESTAR:
                   4028:     case OP_TYPEMINSTAR:
                   4029:     case OP_TYPEPLUS:
                   4030:     case OP_TYPEMINPLUS:
                   4031:     case OP_TYPEQUERY:
                   4032:     case OP_TYPEMINQUERY:
                   4033:     c = *ecode++ - OP_TYPESTAR;
                   4034:     minimize = (c & 1) != 0;
                   4035:     min = rep_min[c];                 /* Pick up values from tables; */
                   4036:     max = rep_max[c];                 /* zero for max => infinity */
                   4037:     if (max == 0) max = INT_MAX;
                   4038: 
                   4039:     /* Common code for all repeated single character type matches. Note that
                   4040:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   4041:     character types, the valid characters are all one-byte long. */
                   4042: 
                   4043:     REPEATTYPE:
                   4044:     ctype = *ecode++;      /* Code for the character type */
                   4045: 
                   4046: #ifdef SUPPORT_UCP
                   4047:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   4048:       {
                   4049:       prop_fail_result = ctype == OP_NOTPROP;
                   4050:       prop_type = *ecode++;
                   4051:       prop_value = *ecode++;
                   4052:       }
                   4053:     else prop_type = -1;
                   4054: #endif
                   4055: 
                   4056:     /* First, ensure the minimum number of matches are present. Use inline
                   4057:     code for maximizing the speed, and do the type test once at the start
                   4058:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
                   4059:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   4060:     and single-bytes. */
                   4061: 
                   4062:     if (min > 0)
                   4063:       {
                   4064: #ifdef SUPPORT_UCP
                   4065:       if (prop_type >= 0)
                   4066:         {
                   4067:         switch(prop_type)
                   4068:           {
                   4069:           case PT_ANY:
1.1.1.2 ! misho    4070:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1       misho    4071:           for (i = 1; i <= min; i++)
                   4072:             {
                   4073:             if (eptr >= md->end_subject)
                   4074:               {
                   4075:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4076:               RRETURN(MATCH_NOMATCH);
1.1       misho    4077:               }
                   4078:             GETCHARINCTEST(c, eptr);
                   4079:             }
                   4080:           break;
                   4081: 
                   4082:           case PT_LAMP:
                   4083:           for (i = 1; i <= min; i++)
                   4084:             {
1.1.1.2 ! misho    4085:             int chartype;
1.1       misho    4086:             if (eptr >= md->end_subject)
                   4087:               {
                   4088:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4089:               RRETURN(MATCH_NOMATCH);
1.1       misho    4090:               }
                   4091:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4092:             chartype = UCD_CHARTYPE(c);
        !          4093:             if ((chartype == ucp_Lu ||
        !          4094:                  chartype == ucp_Ll ||
        !          4095:                  chartype == ucp_Lt) == prop_fail_result)
        !          4096:               RRETURN(MATCH_NOMATCH);
1.1       misho    4097:             }
                   4098:           break;
                   4099: 
                   4100:           case PT_GC:
                   4101:           for (i = 1; i <= min; i++)
                   4102:             {
                   4103:             if (eptr >= md->end_subject)
                   4104:               {
                   4105:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4106:               RRETURN(MATCH_NOMATCH);
1.1       misho    4107:               }
                   4108:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4109:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
        !          4110:               RRETURN(MATCH_NOMATCH);
1.1       misho    4111:             }
                   4112:           break;
                   4113: 
                   4114:           case PT_PC:
                   4115:           for (i = 1; i <= min; i++)
                   4116:             {
                   4117:             if (eptr >= md->end_subject)
                   4118:               {
                   4119:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4120:               RRETURN(MATCH_NOMATCH);
1.1       misho    4121:               }
                   4122:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4123:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
        !          4124:               RRETURN(MATCH_NOMATCH);
1.1       misho    4125:             }
                   4126:           break;
                   4127: 
                   4128:           case PT_SC:
                   4129:           for (i = 1; i <= min; i++)
                   4130:             {
                   4131:             if (eptr >= md->end_subject)
                   4132:               {
                   4133:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4134:               RRETURN(MATCH_NOMATCH);
1.1       misho    4135:               }
                   4136:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4137:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
        !          4138:               RRETURN(MATCH_NOMATCH);
1.1       misho    4139:             }
                   4140:           break;
                   4141: 
                   4142:           case PT_ALNUM:
                   4143:           for (i = 1; i <= min; i++)
                   4144:             {
1.1.1.2 ! misho    4145:             int category;
1.1       misho    4146:             if (eptr >= md->end_subject)
                   4147:               {
                   4148:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4149:               RRETURN(MATCH_NOMATCH);
1.1       misho    4150:               }
                   4151:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4152:             category = UCD_CATEGORY(c);
        !          4153:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
        !          4154:               RRETURN(MATCH_NOMATCH);
1.1       misho    4155:             }
                   4156:           break;
                   4157: 
                   4158:           case PT_SPACE:    /* Perl space */
                   4159:           for (i = 1; i <= min; i++)
                   4160:             {
                   4161:             if (eptr >= md->end_subject)
                   4162:               {
                   4163:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4164:               RRETURN(MATCH_NOMATCH);
1.1       misho    4165:               }
                   4166:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4167:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1       misho    4168:                  c == CHAR_FF || c == CHAR_CR)
                   4169:                    == prop_fail_result)
1.1.1.2 ! misho    4170:               RRETURN(MATCH_NOMATCH);
1.1       misho    4171:             }
                   4172:           break;
                   4173: 
                   4174:           case PT_PXSPACE:  /* POSIX space */
                   4175:           for (i = 1; i <= min; i++)
                   4176:             {
                   4177:             if (eptr >= md->end_subject)
                   4178:               {
                   4179:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4180:               RRETURN(MATCH_NOMATCH);
1.1       misho    4181:               }
                   4182:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4183:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1       misho    4184:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4185:                    == prop_fail_result)
1.1.1.2 ! misho    4186:               RRETURN(MATCH_NOMATCH);
1.1       misho    4187:             }
                   4188:           break;
                   4189: 
                   4190:           case PT_WORD:
                   4191:           for (i = 1; i <= min; i++)
                   4192:             {
1.1.1.2 ! misho    4193:             int category;
1.1       misho    4194:             if (eptr >= md->end_subject)
                   4195:               {
                   4196:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4197:               RRETURN(MATCH_NOMATCH);
1.1       misho    4198:               }
                   4199:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4200:             category = UCD_CATEGORY(c);
        !          4201:             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
1.1       misho    4202:                    == prop_fail_result)
1.1.1.2 ! misho    4203:               RRETURN(MATCH_NOMATCH);
        !          4204:             }
        !          4205:           break;
        !          4206: 
        !          4207:           case PT_CLIST:
        !          4208:           for (i = 1; i <= min; i++)
        !          4209:             {
        !          4210:             const pcre_uint32 *cp;
        !          4211:             if (eptr >= md->end_subject)
        !          4212:               {
        !          4213:               SCHECK_PARTIAL();
        !          4214:               RRETURN(MATCH_NOMATCH);
        !          4215:               }
        !          4216:             GETCHARINCTEST(c, eptr);
        !          4217:             cp = PRIV(ucd_caseless_sets) + prop_value;
        !          4218:             for (;;)
        !          4219:               {
        !          4220:               if (c < *cp)
        !          4221:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
        !          4222:               if (c == *cp++)
        !          4223:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
        !          4224:               }
1.1       misho    4225:             }
                   4226:           break;
                   4227: 
                   4228:           /* This should not occur */
                   4229: 
                   4230:           default:
                   4231:           RRETURN(PCRE_ERROR_INTERNAL);
                   4232:           }
                   4233:         }
                   4234: 
                   4235:       /* Match extended Unicode sequences. We will get here only if the
                   4236:       support is in the binary; otherwise a compile-time error occurs. */
                   4237: 
                   4238:       else if (ctype == OP_EXTUNI)
                   4239:         {
                   4240:         for (i = 1; i <= min; i++)
                   4241:           {
                   4242:           if (eptr >= md->end_subject)
                   4243:             {
                   4244:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4245:             RRETURN(MATCH_NOMATCH);
1.1       misho    4246:             }
1.1.1.2 ! misho    4247:           else
1.1       misho    4248:             {
1.1.1.2 ! misho    4249:             int lgb, rgb;
        !          4250:             GETCHARINCTEST(c, eptr);
        !          4251:             lgb = UCD_GRAPHBREAK(c);
        !          4252:            while (eptr < md->end_subject)
        !          4253:               {
        !          4254:               int len = 1;
        !          4255:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          4256:               rgb = UCD_GRAPHBREAK(c);
        !          4257:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          4258:               lgb = rgb;
        !          4259:               eptr += len;
        !          4260:               }
1.1       misho    4261:             }
1.1.1.2 ! misho    4262:           CHECK_PARTIAL();
1.1       misho    4263:           }
                   4264:         }
                   4265: 
                   4266:       else
                   4267: #endif     /* SUPPORT_UCP */
                   4268: 
                   4269: /* Handle all other cases when the coding is UTF-8 */
                   4270: 
1.1.1.2 ! misho    4271: #ifdef SUPPORT_UTF
        !          4272:       if (utf) switch(ctype)
1.1       misho    4273:         {
                   4274:         case OP_ANY:
                   4275:         for (i = 1; i <= min; i++)
                   4276:           {
                   4277:           if (eptr >= md->end_subject)
                   4278:             {
                   4279:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4280:             RRETURN(MATCH_NOMATCH);
        !          4281:             }
        !          4282:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
        !          4283:           if (md->partial != 0 &&
        !          4284:               eptr + 1 >= md->end_subject &&
        !          4285:               NLBLOCK->nltype == NLTYPE_FIXED &&
        !          4286:               NLBLOCK->nllen == 2 &&
        !          4287:               RAWUCHAR(eptr) == NLBLOCK->nl[0])
        !          4288:             {
        !          4289:             md->hitend = TRUE;
        !          4290:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
1.1       misho    4291:             }
                   4292:           eptr++;
1.1.1.2 ! misho    4293:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4294:           }
                   4295:         break;
                   4296: 
                   4297:         case OP_ALLANY:
                   4298:         for (i = 1; i <= min; i++)
                   4299:           {
                   4300:           if (eptr >= md->end_subject)
                   4301:             {
                   4302:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4303:             RRETURN(MATCH_NOMATCH);
1.1       misho    4304:             }
                   4305:           eptr++;
1.1.1.2 ! misho    4306:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4307:           }
                   4308:         break;
                   4309: 
                   4310:         case OP_ANYBYTE:
1.1.1.2 ! misho    4311:         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
1.1       misho    4312:         eptr += min;
                   4313:         break;
                   4314: 
                   4315:         case OP_ANYNL:
                   4316:         for (i = 1; i <= min; i++)
                   4317:           {
                   4318:           if (eptr >= md->end_subject)
                   4319:             {
                   4320:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4321:             RRETURN(MATCH_NOMATCH);
1.1       misho    4322:             }
                   4323:           GETCHARINC(c, eptr);
                   4324:           switch(c)
                   4325:             {
1.1.1.2 ! misho    4326:             default: RRETURN(MATCH_NOMATCH);
        !          4327: 
        !          4328:             case CHAR_CR:
        !          4329:             if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1       misho    4330:             break;
                   4331: 
1.1.1.2 ! misho    4332:             case CHAR_LF:
1.1       misho    4333:             break;
                   4334: 
1.1.1.2 ! misho    4335:             case CHAR_VT:
        !          4336:             case CHAR_FF:
        !          4337:             case CHAR_NEL:
        !          4338: #ifndef EBCDIC
1.1       misho    4339:             case 0x2028:
                   4340:             case 0x2029:
1.1.1.2 ! misho    4341: #endif  /* Not EBCDIC */
        !          4342:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misho    4343:             break;
                   4344:             }
                   4345:           }
                   4346:         break;
                   4347: 
                   4348:         case OP_NOT_HSPACE:
                   4349:         for (i = 1; i <= min; i++)
                   4350:           {
                   4351:           if (eptr >= md->end_subject)
                   4352:             {
                   4353:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4354:             RRETURN(MATCH_NOMATCH);
1.1       misho    4355:             }
                   4356:           GETCHARINC(c, eptr);
                   4357:           switch(c)
                   4358:             {
1.1.1.2 ! misho    4359:             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
1.1       misho    4360:             default: break;
                   4361:             }
                   4362:           }
                   4363:         break;
                   4364: 
                   4365:         case OP_HSPACE:
                   4366:         for (i = 1; i <= min; i++)
                   4367:           {
                   4368:           if (eptr >= md->end_subject)
                   4369:             {
                   4370:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4371:             RRETURN(MATCH_NOMATCH);
1.1       misho    4372:             }
                   4373:           GETCHARINC(c, eptr);
                   4374:           switch(c)
                   4375:             {
1.1.1.2 ! misho    4376:             HSPACE_CASES: break;  /* Byte and multibyte cases */
        !          4377:             default: RRETURN(MATCH_NOMATCH);
1.1       misho    4378:             }
                   4379:           }
                   4380:         break;
                   4381: 
                   4382:         case OP_NOT_VSPACE:
                   4383:         for (i = 1; i <= min; i++)
                   4384:           {
                   4385:           if (eptr >= md->end_subject)
                   4386:             {
                   4387:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4388:             RRETURN(MATCH_NOMATCH);
1.1       misho    4389:             }
                   4390:           GETCHARINC(c, eptr);
                   4391:           switch(c)
                   4392:             {
1.1.1.2 ! misho    4393:             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    4394:             default: break;
                   4395:             }
                   4396:           }
                   4397:         break;
                   4398: 
                   4399:         case OP_VSPACE:
                   4400:         for (i = 1; i <= min; i++)
                   4401:           {
                   4402:           if (eptr >= md->end_subject)
                   4403:             {
                   4404:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4405:             RRETURN(MATCH_NOMATCH);
1.1       misho    4406:             }
                   4407:           GETCHARINC(c, eptr);
                   4408:           switch(c)
                   4409:             {
1.1.1.2 ! misho    4410:             VSPACE_CASES: break;
        !          4411:             default: RRETURN(MATCH_NOMATCH);
1.1       misho    4412:             }
                   4413:           }
                   4414:         break;
                   4415: 
                   4416:         case OP_NOT_DIGIT:
                   4417:         for (i = 1; i <= min; i++)
                   4418:           {
                   4419:           if (eptr >= md->end_subject)
                   4420:             {
                   4421:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4422:             RRETURN(MATCH_NOMATCH);
1.1       misho    4423:             }
                   4424:           GETCHARINC(c, eptr);
                   4425:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
1.1.1.2 ! misho    4426:             RRETURN(MATCH_NOMATCH);
1.1       misho    4427:           }
                   4428:         break;
                   4429: 
                   4430:         case OP_DIGIT:
                   4431:         for (i = 1; i <= min; i++)
                   4432:           {
1.1.1.2 ! misho    4433:           pcre_uchar cc;
        !          4434: 
1.1       misho    4435:           if (eptr >= md->end_subject)
                   4436:             {
                   4437:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4438:             RRETURN(MATCH_NOMATCH);
1.1       misho    4439:             }
1.1.1.2 ! misho    4440:           cc = RAWUCHAR(eptr);
        !          4441:           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
        !          4442:             RRETURN(MATCH_NOMATCH);
        !          4443:           eptr++;
1.1       misho    4444:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4445:           }
                   4446:         break;
                   4447: 
                   4448:         case OP_NOT_WHITESPACE:
                   4449:         for (i = 1; i <= min; i++)
                   4450:           {
1.1.1.2 ! misho    4451:           pcre_uchar cc;
        !          4452: 
1.1       misho    4453:           if (eptr >= md->end_subject)
                   4454:             {
                   4455:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4456:             RRETURN(MATCH_NOMATCH);
1.1       misho    4457:             }
1.1.1.2 ! misho    4458:           cc = RAWUCHAR(eptr);
        !          4459:           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
        !          4460:             RRETURN(MATCH_NOMATCH);
        !          4461:           eptr++;
        !          4462:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4463:           }
                   4464:         break;
                   4465: 
                   4466:         case OP_WHITESPACE:
                   4467:         for (i = 1; i <= min; i++)
                   4468:           {
1.1.1.2 ! misho    4469:           pcre_uchar cc;
        !          4470: 
1.1       misho    4471:           if (eptr >= md->end_subject)
                   4472:             {
                   4473:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4474:             RRETURN(MATCH_NOMATCH);
1.1       misho    4475:             }
1.1.1.2 ! misho    4476:           cc = RAWUCHAR(eptr);
        !          4477:           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
        !          4478:             RRETURN(MATCH_NOMATCH);
        !          4479:           eptr++;
1.1       misho    4480:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4481:           }
                   4482:         break;
                   4483: 
                   4484:         case OP_NOT_WORDCHAR:
                   4485:         for (i = 1; i <= min; i++)
                   4486:           {
1.1.1.2 ! misho    4487:           pcre_uchar cc;
        !          4488: 
1.1       misho    4489:           if (eptr >= md->end_subject)
                   4490:             {
                   4491:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4492:             RRETURN(MATCH_NOMATCH);
1.1       misho    4493:             }
1.1.1.2 ! misho    4494:           cc = RAWUCHAR(eptr);
        !          4495:           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
        !          4496:             RRETURN(MATCH_NOMATCH);
        !          4497:           eptr++;
        !          4498:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    4499:           }
                   4500:         break;
                   4501: 
                   4502:         case OP_WORDCHAR:
                   4503:         for (i = 1; i <= min; i++)
                   4504:           {
1.1.1.2 ! misho    4505:           pcre_uchar cc;
        !          4506: 
1.1       misho    4507:           if (eptr >= md->end_subject)
                   4508:             {
                   4509:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4510:             RRETURN(MATCH_NOMATCH);
1.1       misho    4511:             }
1.1.1.2 ! misho    4512:           cc = RAWUCHAR(eptr);
        !          4513:           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
        !          4514:             RRETURN(MATCH_NOMATCH);
        !          4515:           eptr++;
1.1       misho    4516:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4517:           }
                   4518:         break;
                   4519: 
                   4520:         default:
                   4521:         RRETURN(PCRE_ERROR_INTERNAL);
                   4522:         }  /* End switch(ctype) */
                   4523: 
                   4524:       else
1.1.1.2 ! misho    4525: #endif     /* SUPPORT_UTF */
1.1       misho    4526: 
                   4527:       /* Code for the non-UTF-8 case for minimum matching of operators other
                   4528:       than OP_PROP and OP_NOTPROP. */
                   4529: 
                   4530:       switch(ctype)
                   4531:         {
                   4532:         case OP_ANY:
                   4533:         for (i = 1; i <= min; i++)
                   4534:           {
                   4535:           if (eptr >= md->end_subject)
                   4536:             {
                   4537:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4538:             RRETURN(MATCH_NOMATCH);
        !          4539:             }
        !          4540:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
        !          4541:           if (md->partial != 0 &&
        !          4542:               eptr + 1 >= md->end_subject &&
        !          4543:               NLBLOCK->nltype == NLTYPE_FIXED &&
        !          4544:               NLBLOCK->nllen == 2 &&
        !          4545:               *eptr == NLBLOCK->nl[0])
        !          4546:             {
        !          4547:             md->hitend = TRUE;
        !          4548:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
1.1       misho    4549:             }
                   4550:           eptr++;
                   4551:           }
                   4552:         break;
                   4553: 
                   4554:         case OP_ALLANY:
                   4555:         if (eptr > md->end_subject - min)
                   4556:           {
                   4557:           SCHECK_PARTIAL();
1.1.1.2 ! misho    4558:           RRETURN(MATCH_NOMATCH);
1.1       misho    4559:           }
                   4560:         eptr += min;
                   4561:         break;
                   4562: 
                   4563:         case OP_ANYBYTE:
                   4564:         if (eptr > md->end_subject - min)
                   4565:           {
                   4566:           SCHECK_PARTIAL();
1.1.1.2 ! misho    4567:           RRETURN(MATCH_NOMATCH);
1.1       misho    4568:           }
                   4569:         eptr += min;
                   4570:         break;
                   4571: 
                   4572:         case OP_ANYNL:
                   4573:         for (i = 1; i <= min; i++)
                   4574:           {
                   4575:           if (eptr >= md->end_subject)
                   4576:             {
                   4577:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4578:             RRETURN(MATCH_NOMATCH);
1.1       misho    4579:             }
                   4580:           switch(*eptr++)
                   4581:             {
1.1.1.2 ! misho    4582:             default: RRETURN(MATCH_NOMATCH);
        !          4583: 
        !          4584:             case CHAR_CR:
        !          4585:             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1       misho    4586:             break;
1.1.1.2 ! misho    4587: 
        !          4588:             case CHAR_LF:
1.1       misho    4589:             break;
                   4590: 
1.1.1.2 ! misho    4591:             case CHAR_VT:
        !          4592:             case CHAR_FF:
        !          4593:             case CHAR_NEL:
        !          4594: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4595:             case 0x2028:
        !          4596:             case 0x2029:
        !          4597: #endif
        !          4598:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misho    4599:             break;
                   4600:             }
                   4601:           }
                   4602:         break;
                   4603: 
                   4604:         case OP_NOT_HSPACE:
                   4605:         for (i = 1; i <= min; i++)
                   4606:           {
                   4607:           if (eptr >= md->end_subject)
                   4608:             {
                   4609:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4610:             RRETURN(MATCH_NOMATCH);
1.1       misho    4611:             }
                   4612:           switch(*eptr++)
                   4613:             {
                   4614:             default: break;
1.1.1.2 ! misho    4615:             HSPACE_BYTE_CASES:
        !          4616: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4617:             HSPACE_MULTIBYTE_CASES:
        !          4618: #endif
        !          4619:             RRETURN(MATCH_NOMATCH);
1.1       misho    4620:             }
                   4621:           }
                   4622:         break;
                   4623: 
                   4624:         case OP_HSPACE:
                   4625:         for (i = 1; i <= min; i++)
                   4626:           {
                   4627:           if (eptr >= md->end_subject)
                   4628:             {
                   4629:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4630:             RRETURN(MATCH_NOMATCH);
1.1       misho    4631:             }
                   4632:           switch(*eptr++)
                   4633:             {
1.1.1.2 ! misho    4634:             default: RRETURN(MATCH_NOMATCH);
        !          4635:             HSPACE_BYTE_CASES:
        !          4636: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4637:             HSPACE_MULTIBYTE_CASES:
        !          4638: #endif
1.1       misho    4639:             break;
                   4640:             }
                   4641:           }
                   4642:         break;
                   4643: 
                   4644:         case OP_NOT_VSPACE:
                   4645:         for (i = 1; i <= min; i++)
                   4646:           {
                   4647:           if (eptr >= md->end_subject)
                   4648:             {
                   4649:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4650:             RRETURN(MATCH_NOMATCH);
1.1       misho    4651:             }
                   4652:           switch(*eptr++)
                   4653:             {
1.1.1.2 ! misho    4654:             VSPACE_BYTE_CASES:
        !          4655: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4656:             VSPACE_MULTIBYTE_CASES:
        !          4657: #endif
        !          4658:             RRETURN(MATCH_NOMATCH);
1.1       misho    4659:             default: break;
                   4660:             }
                   4661:           }
                   4662:         break;
                   4663: 
                   4664:         case OP_VSPACE:
                   4665:         for (i = 1; i <= min; i++)
                   4666:           {
                   4667:           if (eptr >= md->end_subject)
                   4668:             {
                   4669:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4670:             RRETURN(MATCH_NOMATCH);
1.1       misho    4671:             }
                   4672:           switch(*eptr++)
                   4673:             {
1.1.1.2 ! misho    4674:             default: RRETURN(MATCH_NOMATCH);
        !          4675:             VSPACE_BYTE_CASES:
        !          4676: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          4677:             VSPACE_MULTIBYTE_CASES:
        !          4678: #endif
1.1       misho    4679:             break;
                   4680:             }
                   4681:           }
                   4682:         break;
                   4683: 
                   4684:         case OP_NOT_DIGIT:
                   4685:         for (i = 1; i <= min; i++)
                   4686:           {
                   4687:           if (eptr >= md->end_subject)
                   4688:             {
                   4689:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4690:             RRETURN(MATCH_NOMATCH);
1.1       misho    4691:             }
1.1.1.2 ! misho    4692:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
        !          4693:             RRETURN(MATCH_NOMATCH);
        !          4694:           eptr++;
1.1       misho    4695:           }
                   4696:         break;
                   4697: 
                   4698:         case OP_DIGIT:
                   4699:         for (i = 1; i <= min; i++)
                   4700:           {
                   4701:           if (eptr >= md->end_subject)
                   4702:             {
                   4703:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4704:             RRETURN(MATCH_NOMATCH);
1.1       misho    4705:             }
1.1.1.2 ! misho    4706:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
        !          4707:             RRETURN(MATCH_NOMATCH);
        !          4708:           eptr++;
1.1       misho    4709:           }
                   4710:         break;
                   4711: 
                   4712:         case OP_NOT_WHITESPACE:
                   4713:         for (i = 1; i <= min; i++)
                   4714:           {
                   4715:           if (eptr >= md->end_subject)
                   4716:             {
                   4717:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4718:             RRETURN(MATCH_NOMATCH);
1.1       misho    4719:             }
1.1.1.2 ! misho    4720:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
        !          4721:             RRETURN(MATCH_NOMATCH);
        !          4722:           eptr++;
1.1       misho    4723:           }
                   4724:         break;
                   4725: 
                   4726:         case OP_WHITESPACE:
                   4727:         for (i = 1; i <= min; i++)
                   4728:           {
                   4729:           if (eptr >= md->end_subject)
                   4730:             {
                   4731:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4732:             RRETURN(MATCH_NOMATCH);
1.1       misho    4733:             }
1.1.1.2 ! misho    4734:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
        !          4735:             RRETURN(MATCH_NOMATCH);
        !          4736:           eptr++;
1.1       misho    4737:           }
                   4738:         break;
                   4739: 
                   4740:         case OP_NOT_WORDCHAR:
                   4741:         for (i = 1; i <= min; i++)
                   4742:           {
                   4743:           if (eptr >= md->end_subject)
                   4744:             {
                   4745:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4746:             RRETURN(MATCH_NOMATCH);
1.1       misho    4747:             }
1.1.1.2 ! misho    4748:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
        !          4749:             RRETURN(MATCH_NOMATCH);
        !          4750:           eptr++;
1.1       misho    4751:           }
                   4752:         break;
                   4753: 
                   4754:         case OP_WORDCHAR:
                   4755:         for (i = 1; i <= min; i++)
                   4756:           {
                   4757:           if (eptr >= md->end_subject)
                   4758:             {
                   4759:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4760:             RRETURN(MATCH_NOMATCH);
1.1       misho    4761:             }
1.1.1.2 ! misho    4762:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
        !          4763:             RRETURN(MATCH_NOMATCH);
        !          4764:           eptr++;
1.1       misho    4765:           }
                   4766:         break;
                   4767: 
                   4768:         default:
                   4769:         RRETURN(PCRE_ERROR_INTERNAL);
                   4770:         }
                   4771:       }
                   4772: 
                   4773:     /* If min = max, continue at the same level without recursing */
                   4774: 
                   4775:     if (min == max) continue;
                   4776: 
                   4777:     /* If minimizing, we have to test the rest of the pattern before each
                   4778:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4779:     separate the UCP cases. */
                   4780: 
                   4781:     if (minimize)
                   4782:       {
                   4783: #ifdef SUPPORT_UCP
                   4784:       if (prop_type >= 0)
                   4785:         {
                   4786:         switch(prop_type)
                   4787:           {
                   4788:           case PT_ANY:
                   4789:           for (fi = min;; fi++)
                   4790:             {
1.1.1.2 ! misho    4791:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
1.1       misho    4792:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4793:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4794:             if (eptr >= md->end_subject)
                   4795:               {
                   4796:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4797:               RRETURN(MATCH_NOMATCH);
1.1       misho    4798:               }
                   4799:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4800:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1       misho    4801:             }
                   4802:           /* Control never gets here */
                   4803: 
                   4804:           case PT_LAMP:
                   4805:           for (fi = min;; fi++)
                   4806:             {
1.1.1.2 ! misho    4807:             int chartype;
        !          4808:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
1.1       misho    4809:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4810:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4811:             if (eptr >= md->end_subject)
                   4812:               {
                   4813:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4814:               RRETURN(MATCH_NOMATCH);
1.1       misho    4815:               }
                   4816:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4817:             chartype = UCD_CHARTYPE(c);
        !          4818:             if ((chartype == ucp_Lu ||
        !          4819:                  chartype == ucp_Ll ||
        !          4820:                  chartype == ucp_Lt) == prop_fail_result)
        !          4821:               RRETURN(MATCH_NOMATCH);
1.1       misho    4822:             }
                   4823:           /* Control never gets here */
                   4824: 
                   4825:           case PT_GC:
                   4826:           for (fi = min;; fi++)
                   4827:             {
1.1.1.2 ! misho    4828:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
1.1       misho    4829:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4830:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4831:             if (eptr >= md->end_subject)
                   4832:               {
                   4833:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4834:               RRETURN(MATCH_NOMATCH);
1.1       misho    4835:               }
                   4836:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4837:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
        !          4838:               RRETURN(MATCH_NOMATCH);
1.1       misho    4839:             }
                   4840:           /* Control never gets here */
                   4841: 
                   4842:           case PT_PC:
                   4843:           for (fi = min;; fi++)
                   4844:             {
1.1.1.2 ! misho    4845:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
1.1       misho    4846:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4847:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4848:             if (eptr >= md->end_subject)
                   4849:               {
                   4850:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4851:               RRETURN(MATCH_NOMATCH);
1.1       misho    4852:               }
                   4853:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4854:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
        !          4855:               RRETURN(MATCH_NOMATCH);
1.1       misho    4856:             }
                   4857:           /* Control never gets here */
                   4858: 
                   4859:           case PT_SC:
                   4860:           for (fi = min;; fi++)
                   4861:             {
1.1.1.2 ! misho    4862:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
1.1       misho    4863:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4864:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4865:             if (eptr >= md->end_subject)
                   4866:               {
                   4867:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4868:               RRETURN(MATCH_NOMATCH);
1.1       misho    4869:               }
                   4870:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4871:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
        !          4872:               RRETURN(MATCH_NOMATCH);
1.1       misho    4873:             }
                   4874:           /* Control never gets here */
                   4875: 
                   4876:           case PT_ALNUM:
                   4877:           for (fi = min;; fi++)
                   4878:             {
1.1.1.2 ! misho    4879:             int category;
        !          4880:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
1.1       misho    4881:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4882:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4883:             if (eptr >= md->end_subject)
                   4884:               {
                   4885:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4886:               RRETURN(MATCH_NOMATCH);
1.1       misho    4887:               }
                   4888:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4889:             category = UCD_CATEGORY(c);
        !          4890:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
        !          4891:               RRETURN(MATCH_NOMATCH);
1.1       misho    4892:             }
                   4893:           /* Control never gets here */
                   4894: 
                   4895:           case PT_SPACE:    /* Perl space */
                   4896:           for (fi = min;; fi++)
                   4897:             {
1.1.1.2 ! misho    4898:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
1.1       misho    4899:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4900:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4901:             if (eptr >= md->end_subject)
                   4902:               {
                   4903:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4904:               RRETURN(MATCH_NOMATCH);
1.1       misho    4905:               }
                   4906:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4907:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1       misho    4908:                  c == CHAR_FF || c == CHAR_CR)
                   4909:                    == prop_fail_result)
1.1.1.2 ! misho    4910:               RRETURN(MATCH_NOMATCH);
1.1       misho    4911:             }
                   4912:           /* Control never gets here */
                   4913: 
                   4914:           case PT_PXSPACE:  /* POSIX space */
                   4915:           for (fi = min;; fi++)
                   4916:             {
1.1.1.2 ! misho    4917:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
1.1       misho    4918:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4919:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4920:             if (eptr >= md->end_subject)
                   4921:               {
                   4922:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4923:               RRETURN(MATCH_NOMATCH);
1.1       misho    4924:               }
                   4925:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4926:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1       misho    4927:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4928:                    == prop_fail_result)
1.1.1.2 ! misho    4929:               RRETURN(MATCH_NOMATCH);
1.1       misho    4930:             }
                   4931:           /* Control never gets here */
                   4932: 
                   4933:           case PT_WORD:
                   4934:           for (fi = min;; fi++)
                   4935:             {
1.1.1.2 ! misho    4936:             int category;
        !          4937:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
1.1       misho    4938:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4939:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4940:             if (eptr >= md->end_subject)
                   4941:               {
                   4942:               SCHECK_PARTIAL();
1.1.1.2 ! misho    4943:               RRETURN(MATCH_NOMATCH);
1.1       misho    4944:               }
                   4945:             GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho    4946:             category = UCD_CATEGORY(c);
        !          4947:             if ((category == ucp_L ||
        !          4948:                  category == ucp_N ||
1.1       misho    4949:                  c == CHAR_UNDERSCORE)
                   4950:                    == prop_fail_result)
1.1.1.2 ! misho    4951:               RRETURN(MATCH_NOMATCH);
1.1       misho    4952:             }
                   4953:           /* Control never gets here */
                   4954: 
1.1.1.2 ! misho    4955:           case PT_CLIST:
        !          4956:           for (fi = min;; fi++)
        !          4957:             {
        !          4958:             const pcre_uint32 *cp;
        !          4959:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
        !          4960:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          4961:             if (fi >= max) RRETURN(MATCH_NOMATCH);
        !          4962:             if (eptr >= md->end_subject)
        !          4963:               {
        !          4964:               SCHECK_PARTIAL();
        !          4965:               RRETURN(MATCH_NOMATCH);
        !          4966:               }
        !          4967:             GETCHARINCTEST(c, eptr);
        !          4968:             cp = PRIV(ucd_caseless_sets) + prop_value;
        !          4969:             for (;;)
        !          4970:               {
        !          4971:               if (c < *cp)
        !          4972:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
        !          4973:               if (c == *cp++)
        !          4974:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
        !          4975:               }
        !          4976:             }
        !          4977:           /* Control never gets here */
1.1       misho    4978: 
1.1.1.2 ! misho    4979:           /* This should never occur */
1.1       misho    4980:           default:
                   4981:           RRETURN(PCRE_ERROR_INTERNAL);
                   4982:           }
                   4983:         }
                   4984: 
                   4985:       /* Match extended Unicode sequences. We will get here only if the
                   4986:       support is in the binary; otherwise a compile-time error occurs. */
                   4987: 
                   4988:       else if (ctype == OP_EXTUNI)
                   4989:         {
                   4990:         for (fi = min;; fi++)
                   4991:           {
1.1.1.2 ! misho    4992:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
1.1       misho    4993:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    4994:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    4995:           if (eptr >= md->end_subject)
                   4996:             {
                   4997:             SCHECK_PARTIAL();
1.1.1.2 ! misho    4998:             RRETURN(MATCH_NOMATCH);
1.1       misho    4999:             }
1.1.1.2 ! misho    5000:           else
1.1       misho    5001:             {
1.1.1.2 ! misho    5002:             int lgb, rgb;
        !          5003:             GETCHARINCTEST(c, eptr);
        !          5004:             lgb = UCD_GRAPHBREAK(c);
        !          5005:             while (eptr < md->end_subject)
        !          5006:               {
        !          5007:               int len = 1;
        !          5008:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          5009:               rgb = UCD_GRAPHBREAK(c);
        !          5010:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          5011:               lgb = rgb;
        !          5012:               eptr += len;
        !          5013:               }
1.1       misho    5014:             }
1.1.1.2 ! misho    5015:           CHECK_PARTIAL();
1.1       misho    5016:           }
                   5017:         }
                   5018:       else
                   5019: #endif     /* SUPPORT_UCP */
                   5020: 
1.1.1.2 ! misho    5021: #ifdef SUPPORT_UTF
        !          5022:       if (utf)
1.1       misho    5023:         {
                   5024:         for (fi = min;; fi++)
                   5025:           {
1.1.1.2 ! misho    5026:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
1.1       misho    5027:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    5028:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    5029:           if (eptr >= md->end_subject)
                   5030:             {
                   5031:             SCHECK_PARTIAL();
1.1.1.2 ! misho    5032:             RRETURN(MATCH_NOMATCH);
1.1       misho    5033:             }
                   5034:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.1.1.2 ! misho    5035:             RRETURN(MATCH_NOMATCH);
1.1       misho    5036:           GETCHARINC(c, eptr);
                   5037:           switch(ctype)
                   5038:             {
1.1.1.2 ! misho    5039:             case OP_ANY:               /* This is the non-NL case */
        !          5040:             if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5041:                 eptr >= md->end_subject &&
        !          5042:                 NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5043:                 NLBLOCK->nllen == 2 &&
        !          5044:                 c == NLBLOCK->nl[0])
        !          5045:               {
        !          5046:               md->hitend = TRUE;
        !          5047:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5048:               }
        !          5049:             break;
        !          5050: 
1.1       misho    5051:             case OP_ALLANY:
                   5052:             case OP_ANYBYTE:
                   5053:             break;
                   5054: 
                   5055:             case OP_ANYNL:
                   5056:             switch(c)
                   5057:               {
1.1.1.2 ! misho    5058:               default: RRETURN(MATCH_NOMATCH);
        !          5059:               case CHAR_CR:
        !          5060:               if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1       misho    5061:               break;
1.1.1.2 ! misho    5062: 
        !          5063:               case CHAR_LF:
1.1       misho    5064:               break;
                   5065: 
1.1.1.2 ! misho    5066:               case CHAR_VT:
        !          5067:               case CHAR_FF:
        !          5068:               case CHAR_NEL:
        !          5069: #ifndef EBCDIC
1.1       misho    5070:               case 0x2028:
                   5071:               case 0x2029:
1.1.1.2 ! misho    5072: #endif  /* Not EBCDIC */
        !          5073:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misho    5074:               break;
                   5075:               }
                   5076:             break;
                   5077: 
                   5078:             case OP_NOT_HSPACE:
                   5079:             switch(c)
                   5080:               {
1.1.1.2 ! misho    5081:               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    5082:               default: break;
                   5083:               }
                   5084:             break;
                   5085: 
                   5086:             case OP_HSPACE:
                   5087:             switch(c)
                   5088:               {
1.1.1.2 ! misho    5089:               HSPACE_CASES: break;
        !          5090:               default: RRETURN(MATCH_NOMATCH);
1.1       misho    5091:               }
                   5092:             break;
                   5093: 
                   5094:             case OP_NOT_VSPACE:
                   5095:             switch(c)
                   5096:               {
1.1.1.2 ! misho    5097:               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misho    5098:               default: break;
                   5099:               }
                   5100:             break;
                   5101: 
                   5102:             case OP_VSPACE:
                   5103:             switch(c)
                   5104:               {
1.1.1.2 ! misho    5105:               VSPACE_CASES: break;
        !          5106:               default: RRETURN(MATCH_NOMATCH);
1.1       misho    5107:               }
                   5108:             break;
                   5109: 
                   5110:             case OP_NOT_DIGIT:
                   5111:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
1.1.1.2 ! misho    5112:               RRETURN(MATCH_NOMATCH);
1.1       misho    5113:             break;
                   5114: 
                   5115:             case OP_DIGIT:
                   5116:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
1.1.1.2 ! misho    5117:               RRETURN(MATCH_NOMATCH);
1.1       misho    5118:             break;
                   5119: 
                   5120:             case OP_NOT_WHITESPACE:
                   5121:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
1.1.1.2 ! misho    5122:               RRETURN(MATCH_NOMATCH);
1.1       misho    5123:             break;
                   5124: 
                   5125:             case OP_WHITESPACE:
1.1.1.2 ! misho    5126:             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
        !          5127:               RRETURN(MATCH_NOMATCH);
1.1       misho    5128:             break;
                   5129: 
                   5130:             case OP_NOT_WORDCHAR:
                   5131:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
1.1.1.2 ! misho    5132:               RRETURN(MATCH_NOMATCH);
1.1       misho    5133:             break;
                   5134: 
                   5135:             case OP_WORDCHAR:
                   5136:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
1.1.1.2 ! misho    5137:               RRETURN(MATCH_NOMATCH);
1.1       misho    5138:             break;
                   5139: 
                   5140:             default:
                   5141:             RRETURN(PCRE_ERROR_INTERNAL);
                   5142:             }
                   5143:           }
                   5144:         }
                   5145:       else
                   5146: #endif
1.1.1.2 ! misho    5147:       /* Not UTF mode */
1.1       misho    5148:         {
                   5149:         for (fi = min;; fi++)
                   5150:           {
1.1.1.2 ! misho    5151:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
1.1       misho    5152:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    5153:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1       misho    5154:           if (eptr >= md->end_subject)
                   5155:             {
                   5156:             SCHECK_PARTIAL();
1.1.1.2 ! misho    5157:             RRETURN(MATCH_NOMATCH);
1.1       misho    5158:             }
                   5159:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.1.1.2 ! misho    5160:             RRETURN(MATCH_NOMATCH);
1.1       misho    5161:           c = *eptr++;
                   5162:           switch(ctype)
                   5163:             {
1.1.1.2 ! misho    5164:             case OP_ANY:               /* This is the non-NL case */
        !          5165:             if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5166:                 eptr >= md->end_subject &&
        !          5167:                 NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5168:                 NLBLOCK->nllen == 2 &&
        !          5169:                 c == NLBLOCK->nl[0])
        !          5170:               {
        !          5171:               md->hitend = TRUE;
        !          5172:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5173:               }
        !          5174:             break;
        !          5175: 
1.1       misho    5176:             case OP_ALLANY:
                   5177:             case OP_ANYBYTE:
                   5178:             break;
                   5179: 
                   5180:             case OP_ANYNL:
                   5181:             switch(c)
                   5182:               {
1.1.1.2 ! misho    5183:               default: RRETURN(MATCH_NOMATCH);
        !          5184:               case CHAR_CR:
        !          5185:               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1       misho    5186:               break;
                   5187: 
1.1.1.2 ! misho    5188:               case CHAR_LF:
1.1       misho    5189:               break;
                   5190: 
1.1.1.2 ! misho    5191:               case CHAR_VT:
        !          5192:               case CHAR_FF:
        !          5193:               case CHAR_NEL:
        !          5194: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5195:               case 0x2028:
        !          5196:               case 0x2029:
        !          5197: #endif
        !          5198:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misho    5199:               break;
                   5200:               }
                   5201:             break;
                   5202: 
                   5203:             case OP_NOT_HSPACE:
                   5204:             switch(c)
                   5205:               {
                   5206:               default: break;
1.1.1.2 ! misho    5207:               HSPACE_BYTE_CASES:
        !          5208: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5209:               HSPACE_MULTIBYTE_CASES:
        !          5210: #endif
        !          5211:               RRETURN(MATCH_NOMATCH);
1.1       misho    5212:               }
                   5213:             break;
                   5214: 
                   5215:             case OP_HSPACE:
                   5216:             switch(c)
                   5217:               {
1.1.1.2 ! misho    5218:               default: RRETURN(MATCH_NOMATCH);
        !          5219:               HSPACE_BYTE_CASES:
        !          5220: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5221:               HSPACE_MULTIBYTE_CASES:
        !          5222: #endif
1.1       misho    5223:               break;
                   5224:               }
                   5225:             break;
                   5226: 
                   5227:             case OP_NOT_VSPACE:
                   5228:             switch(c)
                   5229:               {
                   5230:               default: break;
1.1.1.2 ! misho    5231:               VSPACE_BYTE_CASES:
        !          5232: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5233:               VSPACE_MULTIBYTE_CASES:
        !          5234: #endif
        !          5235:               RRETURN(MATCH_NOMATCH);
1.1       misho    5236:               }
                   5237:             break;
                   5238: 
                   5239:             case OP_VSPACE:
                   5240:             switch(c)
                   5241:               {
1.1.1.2 ! misho    5242:               default: RRETURN(MATCH_NOMATCH);
        !          5243:               VSPACE_BYTE_CASES:
        !          5244: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5245:               VSPACE_MULTIBYTE_CASES:
        !          5246: #endif
1.1       misho    5247:               break;
                   5248:               }
                   5249:             break;
                   5250: 
                   5251:             case OP_NOT_DIGIT:
1.1.1.2 ! misho    5252:             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5253:             break;
                   5254: 
                   5255:             case OP_DIGIT:
1.1.1.2 ! misho    5256:             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5257:             break;
                   5258: 
                   5259:             case OP_NOT_WHITESPACE:
1.1.1.2 ! misho    5260:             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5261:             break;
                   5262: 
                   5263:             case OP_WHITESPACE:
1.1.1.2 ! misho    5264:             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5265:             break;
                   5266: 
                   5267:             case OP_NOT_WORDCHAR:
1.1.1.2 ! misho    5268:             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5269:             break;
                   5270: 
                   5271:             case OP_WORDCHAR:
1.1.1.2 ! misho    5272:             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
1.1       misho    5273:             break;
                   5274: 
                   5275:             default:
                   5276:             RRETURN(PCRE_ERROR_INTERNAL);
                   5277:             }
                   5278:           }
                   5279:         }
                   5280:       /* Control never gets here */
                   5281:       }
                   5282: 
                   5283:     /* If maximizing, it is worth using inline code for speed, doing the type
                   5284:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   5285:     UTF-8 and UCP stuff separate. */
                   5286: 
                   5287:     else
                   5288:       {
                   5289:       pp = eptr;  /* Remember where we started */
                   5290: 
                   5291: #ifdef SUPPORT_UCP
                   5292:       if (prop_type >= 0)
                   5293:         {
                   5294:         switch(prop_type)
                   5295:           {
                   5296:           case PT_ANY:
                   5297:           for (i = min; i < max; i++)
                   5298:             {
                   5299:             int len = 1;
                   5300:             if (eptr >= md->end_subject)
                   5301:               {
                   5302:               SCHECK_PARTIAL();
                   5303:               break;
                   5304:               }
                   5305:             GETCHARLENTEST(c, eptr, len);
                   5306:             if (prop_fail_result) break;
                   5307:             eptr+= len;
                   5308:             }
                   5309:           break;
                   5310: 
                   5311:           case PT_LAMP:
                   5312:           for (i = min; i < max; i++)
                   5313:             {
1.1.1.2 ! misho    5314:             int chartype;
1.1       misho    5315:             int len = 1;
                   5316:             if (eptr >= md->end_subject)
                   5317:               {
                   5318:               SCHECK_PARTIAL();
                   5319:               break;
                   5320:               }
                   5321:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5322:             chartype = UCD_CHARTYPE(c);
        !          5323:             if ((chartype == ucp_Lu ||
        !          5324:                  chartype == ucp_Ll ||
        !          5325:                  chartype == ucp_Lt) == prop_fail_result)
1.1       misho    5326:               break;
                   5327:             eptr+= len;
                   5328:             }
                   5329:           break;
                   5330: 
                   5331:           case PT_GC:
                   5332:           for (i = min; i < max; i++)
                   5333:             {
                   5334:             int len = 1;
                   5335:             if (eptr >= md->end_subject)
                   5336:               {
                   5337:               SCHECK_PARTIAL();
                   5338:               break;
                   5339:               }
                   5340:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5341:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
1.1       misho    5342:             eptr+= len;
                   5343:             }
                   5344:           break;
                   5345: 
                   5346:           case PT_PC:
                   5347:           for (i = min; i < max; i++)
                   5348:             {
                   5349:             int len = 1;
                   5350:             if (eptr >= md->end_subject)
                   5351:               {
                   5352:               SCHECK_PARTIAL();
                   5353:               break;
                   5354:               }
                   5355:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5356:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
1.1       misho    5357:             eptr+= len;
                   5358:             }
                   5359:           break;
                   5360: 
                   5361:           case PT_SC:
                   5362:           for (i = min; i < max; i++)
                   5363:             {
                   5364:             int len = 1;
                   5365:             if (eptr >= md->end_subject)
                   5366:               {
                   5367:               SCHECK_PARTIAL();
                   5368:               break;
                   5369:               }
                   5370:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5371:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
1.1       misho    5372:             eptr+= len;
                   5373:             }
                   5374:           break;
                   5375: 
                   5376:           case PT_ALNUM:
                   5377:           for (i = min; i < max; i++)
                   5378:             {
1.1.1.2 ! misho    5379:             int category;
1.1       misho    5380:             int len = 1;
                   5381:             if (eptr >= md->end_subject)
                   5382:               {
                   5383:               SCHECK_PARTIAL();
                   5384:               break;
                   5385:               }
                   5386:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5387:             category = UCD_CATEGORY(c);
        !          5388:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
1.1       misho    5389:               break;
                   5390:             eptr+= len;
                   5391:             }
                   5392:           break;
                   5393: 
                   5394:           case PT_SPACE:    /* Perl space */
                   5395:           for (i = min; i < max; i++)
                   5396:             {
                   5397:             int len = 1;
                   5398:             if (eptr >= md->end_subject)
                   5399:               {
                   5400:               SCHECK_PARTIAL();
                   5401:               break;
                   5402:               }
                   5403:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5404:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1       misho    5405:                  c == CHAR_FF || c == CHAR_CR)
                   5406:                  == prop_fail_result)
                   5407:               break;
                   5408:             eptr+= len;
                   5409:             }
                   5410:           break;
                   5411: 
                   5412:           case PT_PXSPACE:  /* POSIX space */
                   5413:           for (i = min; i < max; i++)
                   5414:             {
                   5415:             int len = 1;
                   5416:             if (eptr >= md->end_subject)
                   5417:               {
                   5418:               SCHECK_PARTIAL();
                   5419:               break;
                   5420:               }
                   5421:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5422:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1       misho    5423:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   5424:                  == prop_fail_result)
                   5425:               break;
                   5426:             eptr+= len;
                   5427:             }
                   5428:           break;
                   5429: 
                   5430:           case PT_WORD:
                   5431:           for (i = min; i < max; i++)
                   5432:             {
1.1.1.2 ! misho    5433:             int category;
1.1       misho    5434:             int len = 1;
                   5435:             if (eptr >= md->end_subject)
                   5436:               {
                   5437:               SCHECK_PARTIAL();
                   5438:               break;
                   5439:               }
                   5440:             GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho    5441:             category = UCD_CATEGORY(c);
        !          5442:             if ((category == ucp_L || category == ucp_N ||
1.1       misho    5443:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   5444:               break;
                   5445:             eptr+= len;
                   5446:             }
                   5447:           break;
                   5448: 
1.1.1.2 ! misho    5449:           case PT_CLIST:
        !          5450:           for (i = min; i < max; i++)
        !          5451:             {
        !          5452:             const pcre_uint32 *cp;
        !          5453:             int len = 1;
        !          5454:             if (eptr >= md->end_subject)
        !          5455:               {
        !          5456:               SCHECK_PARTIAL();
        !          5457:               break;
        !          5458:               }
        !          5459:             GETCHARLENTEST(c, eptr, len);
        !          5460:             cp = PRIV(ucd_caseless_sets) + prop_value;
        !          5461:             for (;;)
        !          5462:               {
        !          5463:               if (c < *cp)
        !          5464:                 { if (prop_fail_result) break; else goto GOT_MAX; }
        !          5465:               if (c == *cp++)
        !          5466:                 { if (prop_fail_result) goto GOT_MAX; else break; }
        !          5467:               }
        !          5468:             eptr += len;
        !          5469:             }
        !          5470:           GOT_MAX:
        !          5471:           break;
        !          5472: 
1.1       misho    5473:           default:
                   5474:           RRETURN(PCRE_ERROR_INTERNAL);
                   5475:           }
                   5476: 
                   5477:         /* eptr is now past the end of the maximum run */
                   5478: 
                   5479:         if (possessive) continue;
                   5480:         for(;;)
                   5481:           {
1.1.1.2 ! misho    5482:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
1.1       misho    5483:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5484:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1.1.1.2 ! misho    5485:           if (utf) BACKCHAR(eptr);
1.1       misho    5486:           }
                   5487:         }
                   5488: 
                   5489:       /* Match extended Unicode sequences. We will get here only if the
                   5490:       support is in the binary; otherwise a compile-time error occurs. */
                   5491: 
                   5492:       else if (ctype == OP_EXTUNI)
                   5493:         {
                   5494:         for (i = min; i < max; i++)
                   5495:           {
                   5496:           if (eptr >= md->end_subject)
                   5497:             {
                   5498:             SCHECK_PARTIAL();
                   5499:             break;
                   5500:             }
1.1.1.2 ! misho    5501:           else
1.1       misho    5502:             {
1.1.1.2 ! misho    5503:             int lgb, rgb;
        !          5504:             GETCHARINCTEST(c, eptr);
        !          5505:             lgb = UCD_GRAPHBREAK(c);
        !          5506:             while (eptr < md->end_subject)
1.1       misho    5507:               {
1.1.1.2 ! misho    5508:               int len = 1;
        !          5509:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          5510:               rgb = UCD_GRAPHBREAK(c);
        !          5511:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
        !          5512:               lgb = rgb;
        !          5513:               eptr += len;
1.1       misho    5514:               }
                   5515:             }
1.1.1.2 ! misho    5516:           CHECK_PARTIAL();
1.1       misho    5517:           }
                   5518: 
                   5519:         /* eptr is now past the end of the maximum run */
                   5520: 
                   5521:         if (possessive) continue;
                   5522: 
                   5523:         for(;;)
                   5524:           {
1.1.1.2 ! misho    5525:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
1.1       misho    5526:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5527:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5528:           for (;;)                        /* Move back over one extended */
                   5529:             {
1.1.1.2 ! misho    5530:             if (!utf) c = *eptr; else
1.1       misho    5531:               {
                   5532:               BACKCHAR(eptr);
1.1.1.2 ! misho    5533:               GETCHAR(c, eptr);
1.1       misho    5534:               }
1.1.1.2 ! misho    5535:             if (UCD_CATEGORY(c) != ucp_M) break;
1.1       misho    5536:             eptr--;
                   5537:             }
                   5538:           }
                   5539:         }
                   5540: 
                   5541:       else
                   5542: #endif   /* SUPPORT_UCP */
                   5543: 
1.1.1.2 ! misho    5544: #ifdef SUPPORT_UTF
        !          5545:       if (utf)
1.1       misho    5546:         {
                   5547:         switch(ctype)
                   5548:           {
                   5549:           case OP_ANY:
                   5550:           if (max < INT_MAX)
                   5551:             {
                   5552:             for (i = min; i < max; i++)
                   5553:               {
                   5554:               if (eptr >= md->end_subject)
                   5555:                 {
                   5556:                 SCHECK_PARTIAL();
                   5557:                 break;
                   5558:                 }
                   5559:               if (IS_NEWLINE(eptr)) break;
1.1.1.2 ! misho    5560:               if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5561:                   eptr + 1 >= md->end_subject &&
        !          5562:                   NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5563:                   NLBLOCK->nllen == 2 &&
        !          5564:                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
        !          5565:                 {
        !          5566:                 md->hitend = TRUE;
        !          5567:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5568:                 }
1.1       misho    5569:               eptr++;
1.1.1.2 ! misho    5570:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5571:               }
                   5572:             }
                   5573: 
                   5574:           /* Handle unlimited UTF-8 repeat */
                   5575: 
                   5576:           else
                   5577:             {
                   5578:             for (i = min; i < max; i++)
                   5579:               {
                   5580:               if (eptr >= md->end_subject)
                   5581:                 {
                   5582:                 SCHECK_PARTIAL();
                   5583:                 break;
                   5584:                 }
                   5585:               if (IS_NEWLINE(eptr)) break;
1.1.1.2 ! misho    5586:               if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5587:                   eptr + 1 >= md->end_subject &&
        !          5588:                   NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5589:                   NLBLOCK->nllen == 2 &&
        !          5590:                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
        !          5591:                 {
        !          5592:                 md->hitend = TRUE;
        !          5593:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5594:                 }
1.1       misho    5595:               eptr++;
1.1.1.2 ! misho    5596:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5597:               }
                   5598:             }
                   5599:           break;
                   5600: 
                   5601:           case OP_ALLANY:
                   5602:           if (max < INT_MAX)
                   5603:             {
                   5604:             for (i = min; i < max; i++)
                   5605:               {
                   5606:               if (eptr >= md->end_subject)
                   5607:                 {
                   5608:                 SCHECK_PARTIAL();
                   5609:                 break;
                   5610:                 }
                   5611:               eptr++;
1.1.1.2 ! misho    5612:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misho    5613:               }
                   5614:             }
1.1.1.2 ! misho    5615:           else
        !          5616:             {
        !          5617:             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
        !          5618:             SCHECK_PARTIAL();
        !          5619:             }
1.1       misho    5620:           break;
                   5621: 
                   5622:           /* The byte case is the same as non-UTF8 */
                   5623: 
                   5624:           case OP_ANYBYTE:
                   5625:           c = max - min;
                   5626:           if (c > (unsigned int)(md->end_subject - eptr))
                   5627:             {
                   5628:             eptr = md->end_subject;
                   5629:             SCHECK_PARTIAL();
                   5630:             }
                   5631:           else eptr += c;
                   5632:           break;
                   5633: 
                   5634:           case OP_ANYNL:
                   5635:           for (i = min; i < max; i++)
                   5636:             {
                   5637:             int len = 1;
                   5638:             if (eptr >= md->end_subject)
                   5639:               {
                   5640:               SCHECK_PARTIAL();
                   5641:               break;
                   5642:               }
                   5643:             GETCHARLEN(c, eptr, len);
1.1.1.2 ! misho    5644:             if (c == CHAR_CR)
1.1       misho    5645:               {
                   5646:               if (++eptr >= md->end_subject) break;
1.1.1.2 ! misho    5647:               if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1       misho    5648:               }
                   5649:             else
                   5650:               {
1.1.1.2 ! misho    5651:               if (c != CHAR_LF &&
1.1       misho    5652:                   (md->bsr_anycrlf ||
1.1.1.2 ! misho    5653:                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
        !          5654: #ifndef EBCDIC
        !          5655:                     && c != 0x2028 && c != 0x2029
        !          5656: #endif  /* Not EBCDIC */
        !          5657:                     )))
1.1       misho    5658:                 break;
                   5659:               eptr += len;
                   5660:               }
                   5661:             }
                   5662:           break;
                   5663: 
                   5664:           case OP_NOT_HSPACE:
                   5665:           case OP_HSPACE:
                   5666:           for (i = min; i < max; i++)
                   5667:             {
                   5668:             BOOL gotspace;
                   5669:             int len = 1;
                   5670:             if (eptr >= md->end_subject)
                   5671:               {
                   5672:               SCHECK_PARTIAL();
                   5673:               break;
                   5674:               }
                   5675:             GETCHARLEN(c, eptr, len);
                   5676:             switch(c)
                   5677:               {
1.1.1.2 ! misho    5678:               HSPACE_CASES: gotspace = TRUE; break;
1.1       misho    5679:               default: gotspace = FALSE; break;
                   5680:               }
                   5681:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5682:             eptr += len;
                   5683:             }
                   5684:           break;
                   5685: 
                   5686:           case OP_NOT_VSPACE:
                   5687:           case OP_VSPACE:
                   5688:           for (i = min; i < max; i++)
                   5689:             {
                   5690:             BOOL gotspace;
                   5691:             int len = 1;
                   5692:             if (eptr >= md->end_subject)
                   5693:               {
                   5694:               SCHECK_PARTIAL();
                   5695:               break;
                   5696:               }
                   5697:             GETCHARLEN(c, eptr, len);
                   5698:             switch(c)
                   5699:               {
1.1.1.2 ! misho    5700:               VSPACE_CASES: gotspace = TRUE; break;
1.1       misho    5701:               default: gotspace = FALSE; break;
                   5702:               }
                   5703:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5704:             eptr += len;
                   5705:             }
                   5706:           break;
                   5707: 
                   5708:           case OP_NOT_DIGIT:
                   5709:           for (i = min; i < max; i++)
                   5710:             {
                   5711:             int len = 1;
                   5712:             if (eptr >= md->end_subject)
                   5713:               {
                   5714:               SCHECK_PARTIAL();
                   5715:               break;
                   5716:               }
                   5717:             GETCHARLEN(c, eptr, len);
                   5718:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5719:             eptr+= len;
                   5720:             }
                   5721:           break;
                   5722: 
                   5723:           case OP_DIGIT:
                   5724:           for (i = min; i < max; i++)
                   5725:             {
                   5726:             int len = 1;
                   5727:             if (eptr >= md->end_subject)
                   5728:               {
                   5729:               SCHECK_PARTIAL();
                   5730:               break;
                   5731:               }
                   5732:             GETCHARLEN(c, eptr, len);
                   5733:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5734:             eptr+= len;
                   5735:             }
                   5736:           break;
                   5737: 
                   5738:           case OP_NOT_WHITESPACE:
                   5739:           for (i = min; i < max; i++)
                   5740:             {
                   5741:             int len = 1;
                   5742:             if (eptr >= md->end_subject)
                   5743:               {
                   5744:               SCHECK_PARTIAL();
                   5745:               break;
                   5746:               }
                   5747:             GETCHARLEN(c, eptr, len);
                   5748:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5749:             eptr+= len;
                   5750:             }
                   5751:           break;
                   5752: 
                   5753:           case OP_WHITESPACE:
                   5754:           for (i = min; i < max; i++)
                   5755:             {
                   5756:             int len = 1;
                   5757:             if (eptr >= md->end_subject)
                   5758:               {
                   5759:               SCHECK_PARTIAL();
                   5760:               break;
                   5761:               }
                   5762:             GETCHARLEN(c, eptr, len);
                   5763:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5764:             eptr+= len;
                   5765:             }
                   5766:           break;
                   5767: 
                   5768:           case OP_NOT_WORDCHAR:
                   5769:           for (i = min; i < max; i++)
                   5770:             {
                   5771:             int len = 1;
                   5772:             if (eptr >= md->end_subject)
                   5773:               {
                   5774:               SCHECK_PARTIAL();
                   5775:               break;
                   5776:               }
                   5777:             GETCHARLEN(c, eptr, len);
                   5778:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5779:             eptr+= len;
                   5780:             }
                   5781:           break;
                   5782: 
                   5783:           case OP_WORDCHAR:
                   5784:           for (i = min; i < max; i++)
                   5785:             {
                   5786:             int len = 1;
                   5787:             if (eptr >= md->end_subject)
                   5788:               {
                   5789:               SCHECK_PARTIAL();
                   5790:               break;
                   5791:               }
                   5792:             GETCHARLEN(c, eptr, len);
                   5793:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5794:             eptr+= len;
                   5795:             }
                   5796:           break;
                   5797: 
                   5798:           default:
                   5799:           RRETURN(PCRE_ERROR_INTERNAL);
                   5800:           }
                   5801: 
1.1.1.2 ! misho    5802:         /* eptr is now past the end of the maximum run. If possessive, we are
        !          5803:         done (no backing up). Otherwise, match at this position; anything other
        !          5804:         than no match is immediately returned. For nomatch, back up one
        !          5805:         character, unless we are matching \R and the last thing matched was
        !          5806:         \r\n, in which case, back up two bytes. */
1.1       misho    5807: 
                   5808:         if (possessive) continue;
                   5809:         for(;;)
                   5810:           {
1.1.1.2 ! misho    5811:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
1.1       misho    5812:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5813:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5814:           BACKCHAR(eptr);
1.1.1.2 ! misho    5815:           if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
        !          5816:               RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
1.1       misho    5817:           }
                   5818:         }
                   5819:       else
1.1.1.2 ! misho    5820: #endif  /* SUPPORT_UTF */
        !          5821:       /* Not UTF mode */
1.1       misho    5822:         {
                   5823:         switch(ctype)
                   5824:           {
                   5825:           case OP_ANY:
                   5826:           for (i = min; i < max; i++)
                   5827:             {
                   5828:             if (eptr >= md->end_subject)
                   5829:               {
                   5830:               SCHECK_PARTIAL();
                   5831:               break;
                   5832:               }
                   5833:             if (IS_NEWLINE(eptr)) break;
1.1.1.2 ! misho    5834:             if (md->partial != 0 &&    /* Take care with CRLF partial */
        !          5835:                 eptr + 1 >= md->end_subject &&
        !          5836:                 NLBLOCK->nltype == NLTYPE_FIXED &&
        !          5837:                 NLBLOCK->nllen == 2 &&
        !          5838:                 *eptr == NLBLOCK->nl[0])
        !          5839:               {
        !          5840:               md->hitend = TRUE;
        !          5841:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
        !          5842:               }
1.1       misho    5843:             eptr++;
                   5844:             }
                   5845:           break;
                   5846: 
                   5847:           case OP_ALLANY:
                   5848:           case OP_ANYBYTE:
                   5849:           c = max - min;
                   5850:           if (c > (unsigned int)(md->end_subject - eptr))
                   5851:             {
                   5852:             eptr = md->end_subject;
                   5853:             SCHECK_PARTIAL();
                   5854:             }
                   5855:           else eptr += c;
                   5856:           break;
                   5857: 
                   5858:           case OP_ANYNL:
                   5859:           for (i = min; i < max; i++)
                   5860:             {
                   5861:             if (eptr >= md->end_subject)
                   5862:               {
                   5863:               SCHECK_PARTIAL();
                   5864:               break;
                   5865:               }
                   5866:             c = *eptr;
1.1.1.2 ! misho    5867:             if (c == CHAR_CR)
1.1       misho    5868:               {
                   5869:               if (++eptr >= md->end_subject) break;
1.1.1.2 ! misho    5870:               if (*eptr == CHAR_LF) eptr++;
1.1       misho    5871:               }
                   5872:             else
                   5873:               {
1.1.1.2 ! misho    5874:               if (c != CHAR_LF && (md->bsr_anycrlf ||
        !          5875:                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
        !          5876: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5877:                  && c != 0x2028 && c != 0x2029
        !          5878: #endif
        !          5879:                  ))) break;
1.1       misho    5880:               eptr++;
                   5881:               }
                   5882:             }
                   5883:           break;
                   5884: 
                   5885:           case OP_NOT_HSPACE:
                   5886:           for (i = min; i < max; i++)
                   5887:             {
                   5888:             if (eptr >= md->end_subject)
                   5889:               {
                   5890:               SCHECK_PARTIAL();
                   5891:               break;
                   5892:               }
1.1.1.2 ! misho    5893:             switch(*eptr)
        !          5894:               {
        !          5895:               default: eptr++; break;
        !          5896:               HSPACE_BYTE_CASES:
        !          5897: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5898:               HSPACE_MULTIBYTE_CASES:
        !          5899: #endif
        !          5900:               goto ENDLOOP00;
        !          5901:               }
1.1       misho    5902:             }
1.1.1.2 ! misho    5903:           ENDLOOP00:
1.1       misho    5904:           break;
                   5905: 
                   5906:           case OP_HSPACE:
                   5907:           for (i = min; i < max; i++)
                   5908:             {
                   5909:             if (eptr >= md->end_subject)
                   5910:               {
                   5911:               SCHECK_PARTIAL();
                   5912:               break;
                   5913:               }
1.1.1.2 ! misho    5914:             switch(*eptr)
        !          5915:               {
        !          5916:               default: goto ENDLOOP01;
        !          5917:               HSPACE_BYTE_CASES:
        !          5918: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5919:               HSPACE_MULTIBYTE_CASES:
        !          5920: #endif
        !          5921:               eptr++; break;
        !          5922:               }
1.1       misho    5923:             }
1.1.1.2 ! misho    5924:           ENDLOOP01:
1.1       misho    5925:           break;
                   5926: 
                   5927:           case OP_NOT_VSPACE:
                   5928:           for (i = min; i < max; i++)
                   5929:             {
                   5930:             if (eptr >= md->end_subject)
                   5931:               {
                   5932:               SCHECK_PARTIAL();
                   5933:               break;
                   5934:               }
1.1.1.2 ! misho    5935:             switch(*eptr)
        !          5936:               {
        !          5937:               default: eptr++; break;
        !          5938:               VSPACE_BYTE_CASES:
        !          5939: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5940:               VSPACE_MULTIBYTE_CASES:
        !          5941: #endif
        !          5942:               goto ENDLOOP02;
        !          5943:               }
1.1       misho    5944:             }
1.1.1.2 ! misho    5945:           ENDLOOP02:
1.1       misho    5946:           break;
                   5947: 
                   5948:           case OP_VSPACE:
                   5949:           for (i = min; i < max; i++)
                   5950:             {
                   5951:             if (eptr >= md->end_subject)
                   5952:               {
                   5953:               SCHECK_PARTIAL();
                   5954:               break;
                   5955:               }
1.1.1.2 ! misho    5956:             switch(*eptr)
        !          5957:               {
        !          5958:               default: goto ENDLOOP03;
        !          5959:               VSPACE_BYTE_CASES:
        !          5960: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
        !          5961:               VSPACE_MULTIBYTE_CASES:
        !          5962: #endif
        !          5963:               eptr++; break;
        !          5964:               }
1.1       misho    5965:             }
1.1.1.2 ! misho    5966:           ENDLOOP03:
1.1       misho    5967:           break;
                   5968: 
                   5969:           case OP_NOT_DIGIT:
                   5970:           for (i = min; i < max; i++)
                   5971:             {
                   5972:             if (eptr >= md->end_subject)
                   5973:               {
                   5974:               SCHECK_PARTIAL();
                   5975:               break;
                   5976:               }
1.1.1.2 ! misho    5977:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1       misho    5978:             eptr++;
                   5979:             }
                   5980:           break;
                   5981: 
                   5982:           case OP_DIGIT:
                   5983:           for (i = min; i < max; i++)
                   5984:             {
                   5985:             if (eptr >= md->end_subject)
                   5986:               {
                   5987:               SCHECK_PARTIAL();
                   5988:               break;
                   5989:               }
1.1.1.2 ! misho    5990:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1       misho    5991:             eptr++;
                   5992:             }
                   5993:           break;
                   5994: 
                   5995:           case OP_NOT_WHITESPACE:
                   5996:           for (i = min; i < max; i++)
                   5997:             {
                   5998:             if (eptr >= md->end_subject)
                   5999:               {
                   6000:               SCHECK_PARTIAL();
                   6001:               break;
                   6002:               }
1.1.1.2 ! misho    6003:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
1.1       misho    6004:             eptr++;
                   6005:             }
                   6006:           break;
                   6007: 
                   6008:           case OP_WHITESPACE:
                   6009:           for (i = min; i < max; i++)
                   6010:             {
                   6011:             if (eptr >= md->end_subject)
                   6012:               {
                   6013:               SCHECK_PARTIAL();
                   6014:               break;
                   6015:               }
1.1.1.2 ! misho    6016:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
1.1       misho    6017:             eptr++;
                   6018:             }
                   6019:           break;
                   6020: 
                   6021:           case OP_NOT_WORDCHAR:
                   6022:           for (i = min; i < max; i++)
                   6023:             {
                   6024:             if (eptr >= md->end_subject)
                   6025:               {
                   6026:               SCHECK_PARTIAL();
                   6027:               break;
                   6028:               }
1.1.1.2 ! misho    6029:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
1.1       misho    6030:             eptr++;
                   6031:             }
                   6032:           break;
                   6033: 
                   6034:           case OP_WORDCHAR:
                   6035:           for (i = min; i < max; i++)
                   6036:             {
                   6037:             if (eptr >= md->end_subject)
                   6038:               {
                   6039:               SCHECK_PARTIAL();
                   6040:               break;
                   6041:               }
1.1.1.2 ! misho    6042:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
1.1       misho    6043:             eptr++;
                   6044:             }
                   6045:           break;
                   6046: 
                   6047:           default:
                   6048:           RRETURN(PCRE_ERROR_INTERNAL);
                   6049:           }
                   6050: 
1.1.1.2 ! misho    6051:         /* eptr is now past the end of the maximum run. If possessive, we are
        !          6052:         done (no backing up). Otherwise, match at this position; anything other
        !          6053:         than no match is immediately returned. For nomatch, back up one
        !          6054:         character (byte), unless we are matching \R and the last thing matched
        !          6055:         was \r\n, in which case, back up two bytes. */
1.1       misho    6056: 
                   6057:         if (possessive) continue;
                   6058:         while (eptr >= pp)
                   6059:           {
1.1.1.2 ! misho    6060:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
1.1       misho    6061:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho    6062:           eptr--;
        !          6063:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
        !          6064:               eptr[-1] == CHAR_CR) eptr--;
1.1       misho    6065:           }
                   6066:         }
                   6067: 
                   6068:       /* Get here if we can't make it match with any permitted repetitions */
                   6069: 
1.1.1.2 ! misho    6070:       RRETURN(MATCH_NOMATCH);
1.1       misho    6071:       }
                   6072:     /* Control never gets here */
                   6073: 
                   6074:     /* There's been some horrible disaster. Arrival here can only mean there is
                   6075:     something seriously wrong in the code above or the OP_xxx definitions. */
                   6076: 
                   6077:     default:
                   6078:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   6079:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   6080:     }
                   6081: 
                   6082:   /* Do not stick any code in here without much thought; it is assumed
                   6083:   that "continue" in the code above comes out to here to repeat the main
                   6084:   loop. */
                   6085: 
                   6086:   }             /* End of main loop */
                   6087: /* Control never reaches here */
                   6088: 
                   6089: 
                   6090: /* When compiling to use the heap rather than the stack for recursive calls to
                   6091: match(), the RRETURN() macro jumps here. The number that is saved in
                   6092: frame->Xwhere indicates which label we actually want to return to. */
                   6093: 
                   6094: #ifdef NO_RECURSE
                   6095: #define LBL(val) case val: goto L_RM##val;
                   6096: HEAP_RETURN:
                   6097: switch (frame->Xwhere)
                   6098:   {
                   6099:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   6100:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   6101:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   6102:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
1.1.1.2 ! misho    6103:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
        !          6104:   LBL(65) LBL(66)
        !          6105: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
        !          6106:   LBL(21)
        !          6107: #endif
        !          6108: #ifdef SUPPORT_UTF
        !          6109:   LBL(16) LBL(18) LBL(20)
        !          6110:   LBL(22) LBL(23) LBL(28) LBL(30)
1.1       misho    6111:   LBL(32) LBL(34) LBL(42) LBL(46)
                   6112: #ifdef SUPPORT_UCP
                   6113:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
1.1.1.2 ! misho    6114:   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
1.1       misho    6115: #endif  /* SUPPORT_UCP */
1.1.1.2 ! misho    6116: #endif  /* SUPPORT_UTF */
1.1       misho    6117:   default:
                   6118:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   6119:   return PCRE_ERROR_INTERNAL;
                   6120:   }
                   6121: #undef LBL
                   6122: #endif  /* NO_RECURSE */
                   6123: }
                   6124: 
                   6125: 
                   6126: /***************************************************************************
                   6127: ****************************************************************************
                   6128:                    RECURSION IN THE match() FUNCTION
                   6129: 
                   6130: Undefine all the macros that were defined above to handle this. */
                   6131: 
                   6132: #ifdef NO_RECURSE
                   6133: #undef eptr
                   6134: #undef ecode
                   6135: #undef mstart
                   6136: #undef offset_top
                   6137: #undef eptrb
                   6138: #undef flags
                   6139: 
                   6140: #undef callpat
                   6141: #undef charptr
                   6142: #undef data
                   6143: #undef next
                   6144: #undef pp
                   6145: #undef prev
                   6146: #undef saved_eptr
                   6147: 
                   6148: #undef new_recursive
                   6149: 
                   6150: #undef cur_is_word
                   6151: #undef condition
                   6152: #undef prev_is_word
                   6153: 
                   6154: #undef ctype
                   6155: #undef length
                   6156: #undef max
                   6157: #undef min
                   6158: #undef number
                   6159: #undef offset
                   6160: #undef op
                   6161: #undef save_capture_last
                   6162: #undef save_offset1
                   6163: #undef save_offset2
                   6164: #undef save_offset3
                   6165: #undef stacksave
                   6166: 
                   6167: #undef newptrb
                   6168: 
                   6169: #endif
                   6170: 
                   6171: /* These two are defined as macros in both cases */
                   6172: 
                   6173: #undef fc
                   6174: #undef fi
                   6175: 
                   6176: /***************************************************************************
                   6177: ***************************************************************************/
                   6178: 
                   6179: 
1.1.1.2 ! misho    6180: #ifdef NO_RECURSE
        !          6181: /*************************************************
        !          6182: *          Release allocated heap frames         *
        !          6183: *************************************************/
        !          6184: 
        !          6185: /* This function releases all the allocated frames. The base frame is on the
        !          6186: machine stack, and so must not be freed.
        !          6187: 
        !          6188: Argument: the address of the base frame
        !          6189: Returns:  nothing
        !          6190: */
        !          6191: 
        !          6192: static void
        !          6193: release_match_heapframes (heapframe *frame_base)
        !          6194: {
        !          6195: heapframe *nextframe = frame_base->Xnextframe;
        !          6196: while (nextframe != NULL)
        !          6197:   {
        !          6198:   heapframe *oldframe = nextframe;
        !          6199:   nextframe = nextframe->Xnextframe;
        !          6200:   (PUBL(stack_free))(oldframe);
        !          6201:   }
        !          6202: }
        !          6203: #endif
        !          6204: 
1.1       misho    6205: 
                   6206: /*************************************************
                   6207: *         Execute a Regular Expression           *
                   6208: *************************************************/
                   6209: 
                   6210: /* This function applies a compiled re to a subject string and picks out
                   6211: portions of the string if it matches. Two elements in the vector are set for
                   6212: each substring: the offsets to the start and end of the substring.
                   6213: 
                   6214: Arguments:
                   6215:   argument_re     points to the compiled expression
                   6216:   extra_data      points to extra data or is NULL
                   6217:   subject         points to the subject string
                   6218:   length          length of subject string (may contain binary zeros)
                   6219:   start_offset    where to start in the subject string
                   6220:   options         option bits
                   6221:   offsets         points to a vector of ints to be filled in with offsets
                   6222:   offsetcount     the number of elements in the vector
                   6223: 
                   6224: Returns:          > 0 => success; value is the number of elements filled in
                   6225:                   = 0 => success, but offsets is not big enough
                   6226:                    -1 => failed to match
                   6227:                  < -1 => some kind of unexpected problem
                   6228: */
                   6229: 
1.1.1.2 ! misho    6230: #if defined COMPILE_PCRE8
1.1       misho    6231: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   6232: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   6233:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   6234:   int offsetcount)
1.1.1.2 ! misho    6235: #elif defined COMPILE_PCRE16
        !          6236: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !          6237: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
        !          6238:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
        !          6239:   int offsetcount)
        !          6240: #elif defined COMPILE_PCRE32
        !          6241: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !          6242: pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
        !          6243:   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
        !          6244:   int offsetcount)
        !          6245: #endif
1.1       misho    6246: {
1.1.1.2 ! misho    6247: int rc, ocount, arg_offset_max;
1.1       misho    6248: int newline;
                   6249: BOOL using_temporary_offsets = FALSE;
                   6250: BOOL anchored;
                   6251: BOOL startline;
                   6252: BOOL firstline;
1.1.1.2 ! misho    6253: BOOL utf;
        !          6254: BOOL has_first_char = FALSE;
        !          6255: BOOL has_req_char = FALSE;
        !          6256: pcre_uchar first_char = 0;
        !          6257: pcre_uchar first_char2 = 0;
        !          6258: pcre_uchar req_char = 0;
        !          6259: pcre_uchar req_char2 = 0;
1.1       misho    6260: match_data match_block;
                   6261: match_data *md = &match_block;
1.1.1.2 ! misho    6262: const pcre_uint8 *tables;
        !          6263: const pcre_uint8 *start_bits = NULL;
        !          6264: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
        !          6265: PCRE_PUCHAR end_subject;
        !          6266: PCRE_PUCHAR start_partial = NULL;
        !          6267: PCRE_PUCHAR req_char_ptr = start_match - 1;
1.1       misho    6268: 
                   6269: const pcre_study_data *study;
1.1.1.2 ! misho    6270: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
        !          6271: 
        !          6272: #ifdef NO_RECURSE
        !          6273: heapframe frame_zero;
        !          6274: frame_zero.Xprevframe = NULL;            /* Marks the top level */
        !          6275: frame_zero.Xnextframe = NULL;            /* None are allocated yet */
        !          6276: md->match_frames_base = &frame_zero;
        !          6277: #endif
        !          6278: 
        !          6279: /* Check for the special magic call that measures the size of the stack used
        !          6280: per recursive call of match(). Without the funny casting for sizeof, a Windows
        !          6281: compiler gave this error: "unary minus operator applied to unsigned type,
        !          6282: result still unsigned". Hopefully the cast fixes that. */
1.1       misho    6283: 
1.1.1.2 ! misho    6284: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
        !          6285:     start_offset == -999)
        !          6286: #ifdef NO_RECURSE
        !          6287:   return -((int)sizeof(heapframe));
        !          6288: #else
        !          6289:   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
        !          6290: #endif
1.1       misho    6291: 
                   6292: /* Plausibility checks */
                   6293: 
                   6294: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1.1.1.2 ! misho    6295: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
        !          6296:   return PCRE_ERROR_NULL;
1.1       misho    6297: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1.1.1.2 ! misho    6298: if (length < 0) return PCRE_ERROR_BADLENGTH;
1.1       misho    6299: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
                   6300: 
1.1.1.2 ! misho    6301: /* Check that the first field in the block is the magic number. If it is not,
        !          6302: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
        !          6303: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
        !          6304: means that the pattern is likely compiled with different endianness. */
        !          6305: 
        !          6306: if (re->magic_number != MAGIC_NUMBER)
        !          6307:   return re->magic_number == REVERSED_MAGIC_NUMBER?
        !          6308:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
        !          6309: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
        !          6310: 
        !          6311: /* These two settings are used in the code for checking a UTF-8 string that
        !          6312: follows immediately afterwards. Other values in the md block are used only
        !          6313: during "normal" pcre_exec() processing, not when the JIT support is in use,
        !          6314: so they are set up later. */
        !          6315: 
        !          6316: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
        !          6317: utf = md->utf = (re->options & PCRE_UTF8) != 0;
        !          6318: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
        !          6319:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
        !          6320: 
        !          6321: /* Check a UTF-8 string if required. Pass back the character offset and error
        !          6322: code for an invalid string if a results vector is available. */
        !          6323: 
        !          6324: #ifdef SUPPORT_UTF
        !          6325: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
        !          6326:   {
        !          6327:   int erroroffset;
        !          6328:   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
        !          6329:   if (errorcode != 0)
        !          6330:     {
        !          6331:     if (offsetcount >= 2)
        !          6332:       {
        !          6333:       offsets[0] = erroroffset;
        !          6334:       offsets[1] = errorcode;
        !          6335:       }
        !          6336: #if defined COMPILE_PCRE8
        !          6337:     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
        !          6338:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
        !          6339: #elif defined COMPILE_PCRE16
        !          6340:     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
        !          6341:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
        !          6342: #elif defined COMPILE_PCRE32
        !          6343:     return PCRE_ERROR_BADUTF32;
        !          6344: #endif
        !          6345:     }
        !          6346: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
        !          6347:   /* Check that a start_offset points to the start of a UTF character. */
        !          6348:   if (start_offset > 0 && start_offset < length &&
        !          6349:       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
        !          6350:     return PCRE_ERROR_BADUTF8_OFFSET;
        !          6351: #endif
        !          6352:   }
        !          6353: #endif
        !          6354: 
        !          6355: /* If the pattern was successfully studied with JIT support, run the JIT
        !          6356: executable instead of the rest of this function. Most options must be set at
        !          6357: compile time for the JIT code to be usable. Fallback to the normal code path if
        !          6358: an unsupported flag is set. */
        !          6359: 
        !          6360: #ifdef SUPPORT_JIT
        !          6361: if (extra_data != NULL
        !          6362:     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
        !          6363:                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
        !          6364:     && extra_data->executable_jit != NULL
        !          6365:     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
        !          6366:   {
        !          6367:   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
        !          6368:        start_offset, options, offsets, offsetcount);
        !          6369: 
        !          6370:   /* PCRE_ERROR_NULL means that the selected normal or partial matching
        !          6371:   mode is not compiled. In this case we simply fallback to interpreter. */
        !          6372: 
        !          6373:   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
        !          6374:   }
        !          6375: #endif
        !          6376: 
        !          6377: /* Carry on with non-JIT matching. This information is for finding all the
        !          6378: numbers associated with a given name, for condition testing. */
1.1       misho    6379: 
1.1.1.2 ! misho    6380: md->name_table = (pcre_uchar *)re + re->name_table_offset;
1.1       misho    6381: md->name_count = re->name_count;
                   6382: md->name_entry_size = re->name_entry_size;
                   6383: 
                   6384: /* Fish out the optional data from the extra_data structure, first setting
                   6385: the default values. */
                   6386: 
                   6387: study = NULL;
                   6388: md->match_limit = MATCH_LIMIT;
                   6389: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   6390: md->callout_data = NULL;
                   6391: 
                   6392: /* The table pointer is always in native byte order. */
                   6393: 
1.1.1.2 ! misho    6394: tables = re->tables;
1.1       misho    6395: 
                   6396: if (extra_data != NULL)
                   6397:   {
                   6398:   register unsigned int flags = extra_data->flags;
                   6399:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   6400:     study = (const pcre_study_data *)extra_data->study_data;
                   6401:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   6402:     md->match_limit = extra_data->match_limit;
                   6403:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   6404:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   6405:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   6406:     md->callout_data = extra_data->callout_data;
                   6407:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   6408:   }
                   6409: 
                   6410: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   6411: is a feature that makes it possible to save compiled regex and re-use them
                   6412: in other programs later. */
                   6413: 
1.1.1.2 ! misho    6414: if (tables == NULL) tables = PRIV(default_tables);
1.1       misho    6415: 
                   6416: /* Set up other data */
                   6417: 
                   6418: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   6419: startline = (re->flags & PCRE_STARTLINE) != 0;
                   6420: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   6421: 
                   6422: /* The code starts after the real_pcre block and the capture name table. */
                   6423: 
1.1.1.2 ! misho    6424: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
1.1       misho    6425:   re->name_count * re->name_entry_size;
                   6426: 
1.1.1.2 ! misho    6427: md->start_subject = (PCRE_PUCHAR)subject;
1.1       misho    6428: md->start_offset = start_offset;
                   6429: md->end_subject = md->start_subject + length;
                   6430: end_subject = md->end_subject;
                   6431: 
                   6432: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   6433: md->use_ucp = (re->options & PCRE_UCP) != 0;
                   6434: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
1.1.1.2 ! misho    6435: md->ignore_skip_arg = FALSE;
        !          6436: 
        !          6437: /* Some options are unpacked into BOOL variables in the hope that testing
        !          6438: them will be faster than individual option bits. */
1.1       misho    6439: 
                   6440: md->notbol = (options & PCRE_NOTBOL) != 0;
                   6441: md->noteol = (options & PCRE_NOTEOL) != 0;
                   6442: md->notempty = (options & PCRE_NOTEMPTY) != 0;
                   6443: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
1.1.1.2 ! misho    6444: 
1.1       misho    6445: md->hitend = FALSE;
1.1.1.2 ! misho    6446: md->mark = md->nomatch_mark = NULL;     /* In case never set */
1.1       misho    6447: 
                   6448: md->recursive = NULL;                   /* No recursion at top level */
1.1.1.2 ! misho    6449: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
1.1       misho    6450: 
                   6451: md->lcc = tables + lcc_offset;
1.1.1.2 ! misho    6452: md->fcc = tables + fcc_offset;
1.1       misho    6453: md->ctypes = tables + ctypes_offset;
                   6454: 
                   6455: /* Handle different \R options. */
                   6456: 
                   6457: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   6458:   {
                   6459:   case 0:
                   6460:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   6461:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   6462:   else
                   6463: #ifdef BSR_ANYCRLF
                   6464:   md->bsr_anycrlf = TRUE;
                   6465: #else
                   6466:   md->bsr_anycrlf = FALSE;
                   6467: #endif
                   6468:   break;
                   6469: 
                   6470:   case PCRE_BSR_ANYCRLF:
                   6471:   md->bsr_anycrlf = TRUE;
                   6472:   break;
                   6473: 
                   6474:   case PCRE_BSR_UNICODE:
                   6475:   md->bsr_anycrlf = FALSE;
                   6476:   break;
                   6477: 
                   6478:   default: return PCRE_ERROR_BADNEWLINE;
                   6479:   }
                   6480: 
                   6481: /* Handle different types of newline. The three bits give eight cases. If
                   6482: nothing is set at run time, whatever was used at compile time applies. */
                   6483: 
                   6484: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   6485:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   6486:   {
                   6487:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   6488:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   6489:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
                   6490:   case PCRE_NEWLINE_CR+
                   6491:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
                   6492:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   6493:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   6494:   default: return PCRE_ERROR_BADNEWLINE;
                   6495:   }
                   6496: 
                   6497: if (newline == -2)
                   6498:   {
                   6499:   md->nltype = NLTYPE_ANYCRLF;
                   6500:   }
                   6501: else if (newline < 0)
                   6502:   {
                   6503:   md->nltype = NLTYPE_ANY;
                   6504:   }
                   6505: else
                   6506:   {
                   6507:   md->nltype = NLTYPE_FIXED;
                   6508:   if (newline > 255)
                   6509:     {
                   6510:     md->nllen = 2;
                   6511:     md->nl[0] = (newline >> 8) & 255;
                   6512:     md->nl[1] = newline & 255;
                   6513:     }
                   6514:   else
                   6515:     {
                   6516:     md->nllen = 1;
                   6517:     md->nl[0] = newline;
                   6518:     }
                   6519:   }
                   6520: 
                   6521: /* Partial matching was originally supported only for a restricted set of
                   6522: regexes; from release 8.00 there are no restrictions, but the bits are still
                   6523: defined (though never set). So there's no harm in leaving this code. */
                   6524: 
                   6525: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   6526:   return PCRE_ERROR_BADPARTIAL;
                   6527: 
                   6528: /* If the expression has got more back references than the offsets supplied can
                   6529: hold, we get a temporary chunk of working store to use during the matching.
                   6530: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   6531: of 3. */
                   6532: 
                   6533: ocount = offsetcount - (offsetcount % 3);
1.1.1.2 ! misho    6534: arg_offset_max = (2*ocount)/3;
1.1       misho    6535: 
                   6536: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   6537:   {
                   6538:   ocount = re->top_backref * 3 + 3;
1.1.1.2 ! misho    6539:   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
1.1       misho    6540:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   6541:   using_temporary_offsets = TRUE;
                   6542:   DPRINTF(("Got memory to hold back references\n"));
                   6543:   }
                   6544: else md->offset_vector = offsets;
                   6545: 
                   6546: md->offset_end = ocount;
                   6547: md->offset_max = (2*ocount)/3;
                   6548: md->offset_overflow = FALSE;
                   6549: md->capture_last = -1;
                   6550: 
                   6551: /* Reset the working variable associated with each extraction. These should
                   6552: never be used unless previously set, but they get saved and restored, and so we
1.1.1.2 ! misho    6553: initialize them to avoid reading uninitialized locations. Also, unset the
        !          6554: offsets for the matched string. This is really just for tidiness with callouts,
        !          6555: in case they inspect these fields. */
1.1       misho    6556: 
                   6557: if (md->offset_vector != NULL)
                   6558:   {
                   6559:   register int *iptr = md->offset_vector + ocount;
1.1.1.2 ! misho    6560:   register int *iend = iptr - re->top_bracket;
        !          6561:   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
1.1       misho    6562:   while (--iptr >= iend) *iptr = -1;
1.1.1.2 ! misho    6563:   md->offset_vector[0] = md->offset_vector[1] = -1;
1.1       misho    6564:   }
                   6565: 
1.1.1.2 ! misho    6566: /* Set up the first character to match, if available. The first_char value is
1.1       misho    6567: never set for an anchored regular expression, but the anchoring may be forced
                   6568: at run time, so we have to test for anchoring. The first char may be unset for
                   6569: an unanchored pattern, of course. If there's no first char and the pattern was
                   6570: studied, there may be a bitmap of possible first characters. */
                   6571: 
                   6572: if (!anchored)
                   6573:   {
                   6574:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   6575:     {
1.1.1.2 ! misho    6576:     has_first_char = TRUE;
        !          6577:     first_char = first_char2 = (pcre_uchar)(re->first_char);
        !          6578:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
        !          6579:       {
        !          6580:       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
        !          6581: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
        !          6582:       if (utf && first_char > 127)
        !          6583:         first_char2 = UCD_OTHERCASE(first_char);
        !          6584: #endif
        !          6585:       }
1.1       misho    6586:     }
                   6587:   else
                   6588:     if (!startline && study != NULL &&
                   6589:       (study->flags & PCRE_STUDY_MAPPED) != 0)
                   6590:         start_bits = study->start_bits;
                   6591:   }
                   6592: 
                   6593: /* For anchored or unanchored matches, there may be a "last known required
                   6594: character" set. */
                   6595: 
                   6596: if ((re->flags & PCRE_REQCHSET) != 0)
                   6597:   {
1.1.1.2 ! misho    6598:   has_req_char = TRUE;
        !          6599:   req_char = req_char2 = (pcre_uchar)(re->req_char);
        !          6600:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
        !          6601:     {
        !          6602:     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
        !          6603: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
        !          6604:     if (utf && req_char > 127)
        !          6605:       req_char2 = UCD_OTHERCASE(req_char);
        !          6606: #endif
        !          6607:     }
1.1       misho    6608:   }
                   6609: 
                   6610: 
                   6611: /* ==========================================================================*/
                   6612: 
                   6613: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   6614: the loop runs just once. */
                   6615: 
                   6616: for(;;)
                   6617:   {
1.1.1.2 ! misho    6618:   PCRE_PUCHAR save_end_subject = end_subject;
        !          6619:   PCRE_PUCHAR new_start_match;
1.1       misho    6620: 
                   6621:   /* If firstline is TRUE, the start of the match is constrained to the first
                   6622:   line of a multiline string. That is, the match must be before or at the first
                   6623:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   6624:   scanning at a newline. If the match fails at the newline, later code breaks
                   6625:   this loop. */
                   6626: 
                   6627:   if (firstline)
                   6628:     {
1.1.1.2 ! misho    6629:     PCRE_PUCHAR t = start_match;
        !          6630: #ifdef SUPPORT_UTF
        !          6631:     if (utf)
1.1       misho    6632:       {
                   6633:       while (t < md->end_subject && !IS_NEWLINE(t))
                   6634:         {
                   6635:         t++;
1.1.1.2 ! misho    6636:         ACROSSCHAR(t < end_subject, *t, t++);
1.1       misho    6637:         }
                   6638:       }
                   6639:     else
                   6640: #endif
                   6641:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   6642:     end_subject = t;
                   6643:     }
                   6644: 
                   6645:   /* There are some optimizations that avoid running the match if a known
                   6646:   starting point is not found, or if a known later character is not present.
                   6647:   However, there is an option that disables these, for testing and for ensuring
                   6648:   that all callouts do actually occur. The option can be set in the regex by
                   6649:   (*NO_START_OPT) or passed in match-time options. */
                   6650: 
                   6651:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
                   6652:     {
1.1.1.2 ! misho    6653:     /* Advance to a unique first char if there is one. */
1.1       misho    6654: 
1.1.1.2 ! misho    6655:     if (has_first_char)
1.1       misho    6656:       {
1.1.1.2 ! misho    6657:       pcre_uchar smc;
        !          6658: 
        !          6659:       if (first_char != first_char2)
        !          6660:         while (start_match < end_subject &&
        !          6661:           (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
1.1       misho    6662:           start_match++;
                   6663:       else
1.1.1.2 ! misho    6664:         while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
1.1       misho    6665:           start_match++;
                   6666:       }
                   6667: 
                   6668:     /* Or to just after a linebreak for a multiline match */
                   6669: 
                   6670:     else if (startline)
                   6671:       {
                   6672:       if (start_match > md->start_subject + start_offset)
                   6673:         {
1.1.1.2 ! misho    6674: #ifdef SUPPORT_UTF
        !          6675:         if (utf)
1.1       misho    6676:           {
                   6677:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6678:             {
                   6679:             start_match++;
1.1.1.2 ! misho    6680:             ACROSSCHAR(start_match < end_subject, *start_match,
        !          6681:               start_match++);
1.1       misho    6682:             }
                   6683:           }
                   6684:         else
                   6685: #endif
                   6686:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6687:           start_match++;
                   6688: 
                   6689:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   6690:         and we are now at a LF, advance the match position by one more character.
                   6691:         */
                   6692: 
                   6693:         if (start_match[-1] == CHAR_CR &&
                   6694:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   6695:              start_match < end_subject &&
1.1.1.2 ! misho    6696:              RAWUCHARTEST(start_match) == CHAR_NL)
1.1       misho    6697:           start_match++;
                   6698:         }
                   6699:       }
                   6700: 
                   6701:     /* Or to a non-unique first byte after study */
                   6702: 
                   6703:     else if (start_bits != NULL)
                   6704:       {
                   6705:       while (start_match < end_subject)
                   6706:         {
1.1.1.2 ! misho    6707:         register pcre_uint32 c = RAWUCHARTEST(start_match);
        !          6708: #ifndef COMPILE_PCRE8
        !          6709:         if (c > 255) c = 255;
        !          6710: #endif
1.1       misho    6711:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   6712:           {
                   6713:           start_match++;
1.1.1.2 ! misho    6714: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
        !          6715:           /* In non 8-bit mode, the iteration will stop for
        !          6716:           characters > 255 at the beginning or not stop at all. */
        !          6717:           if (utf)
        !          6718:             ACROSSCHAR(start_match < end_subject, *start_match,
        !          6719:               start_match++);
1.1       misho    6720: #endif
                   6721:           }
                   6722:         else break;
                   6723:         }
                   6724:       }
                   6725:     }   /* Starting optimizations */
                   6726: 
                   6727:   /* Restore fudged end_subject */
                   6728: 
                   6729:   end_subject = save_end_subject;
                   6730: 
                   6731:   /* The following two optimizations are disabled for partial matching or if
                   6732:   disabling is explicitly requested. */
                   6733: 
1.1.1.2 ! misho    6734:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
1.1       misho    6735:     {
                   6736:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6737:     a lower bound; no actual string of that length may actually match the
                   6738:     pattern. Although the value is, strictly, in characters, we treat it as
                   6739:     bytes to avoid spending too much time in this optimization. */
                   6740: 
                   6741:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6742:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6743:       {
                   6744:       rc = MATCH_NOMATCH;
                   6745:       break;
                   6746:       }
                   6747: 
1.1.1.2 ! misho    6748:     /* If req_char is set, we know that that character must appear in the
        !          6749:     subject for the match to succeed. If the first character is set, req_char
1.1       misho    6750:     must be later in the subject; otherwise the test starts at the match point.
                   6751:     This optimization can save a huge amount of backtracking in patterns with
                   6752:     nested unlimited repeats that aren't going to match. Writing separate code
                   6753:     for cased/caseless versions makes it go faster, as does using an
                   6754:     autoincrement and backing off on a match.
                   6755: 
                   6756:     HOWEVER: when the subject string is very, very long, searching to its end
                   6757:     can take a long time, and give bad performance on quite ordinary patterns.
                   6758:     This showed up when somebody was matching something like /^\d+C/ on a
                   6759:     32-megabyte string... so we don't do this when the string is sufficiently
                   6760:     long. */
                   6761: 
1.1.1.2 ! misho    6762:     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
1.1       misho    6763:       {
1.1.1.2 ! misho    6764:       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
1.1       misho    6765: 
                   6766:       /* We don't need to repeat the search if we haven't yet reached the
                   6767:       place we found it at last time. */
                   6768: 
1.1.1.2 ! misho    6769:       if (p > req_char_ptr)
1.1       misho    6770:         {
1.1.1.2 ! misho    6771:         if (req_char != req_char2)
1.1       misho    6772:           {
                   6773:           while (p < end_subject)
                   6774:             {
1.1.1.2 ! misho    6775:             register pcre_uint32 pp = RAWUCHARINCTEST(p);
        !          6776:             if (pp == req_char || pp == req_char2) { p--; break; }
1.1       misho    6777:             }
                   6778:           }
                   6779:         else
                   6780:           {
                   6781:           while (p < end_subject)
                   6782:             {
1.1.1.2 ! misho    6783:             if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
1.1       misho    6784:             }
                   6785:           }
                   6786: 
                   6787:         /* If we can't find the required character, break the matching loop,
                   6788:         forcing a match failure. */
                   6789: 
                   6790:         if (p >= end_subject)
                   6791:           {
                   6792:           rc = MATCH_NOMATCH;
                   6793:           break;
                   6794:           }
                   6795: 
                   6796:         /* If we have found the required character, save the point where we
                   6797:         found it, so that we don't search again next time round the loop if
                   6798:         the start hasn't passed this character yet. */
                   6799: 
1.1.1.2 ! misho    6800:         req_char_ptr = p;
1.1       misho    6801:         }
                   6802:       }
                   6803:     }
                   6804: 
                   6805: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6806:   printf(">>>> Match against: ");
                   6807:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6808:   printf("\n");
                   6809: #endif
                   6810: 
                   6811:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6812:   first starting point for which a partial match was found. */
                   6813: 
                   6814:   md->start_match_ptr = start_match;
                   6815:   md->start_used_ptr = start_match;
                   6816:   md->match_call_count = 0;
1.1.1.2 ! misho    6817:   md->match_function_type = 0;
        !          6818:   md->end_offset_top = 0;
        !          6819:   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
1.1       misho    6820:   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
                   6821: 
                   6822:   switch(rc)
                   6823:     {
1.1.1.2 ! misho    6824:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
        !          6825:     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
        !          6826:     entirely. The only way we can do that is to re-do the match at the same
        !          6827:     point, with a flag to force SKIP with an argument to be ignored. Just
        !          6828:     treating this case as NOMATCH does not work because it does not check other
        !          6829:     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
        !          6830: 
        !          6831:     case MATCH_SKIP_ARG:
        !          6832:     new_start_match = start_match;
        !          6833:     md->ignore_skip_arg = TRUE;
        !          6834:     break;
        !          6835: 
1.1       misho    6836:     /* SKIP passes back the next starting point explicitly, but if it is the
                   6837:     same as the match we have just done, treat it as NOMATCH. */
                   6838: 
                   6839:     case MATCH_SKIP:
                   6840:     if (md->start_match_ptr != start_match)
                   6841:       {
                   6842:       new_start_match = md->start_match_ptr;
                   6843:       break;
                   6844:       }
                   6845:     /* Fall through */
                   6846: 
                   6847:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
1.1.1.2 ! misho    6848:     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
1.1       misho    6849: 
                   6850:     case MATCH_NOMATCH:
                   6851:     case MATCH_PRUNE:
                   6852:     case MATCH_THEN:
1.1.1.2 ! misho    6853:     md->ignore_skip_arg = FALSE;
1.1       misho    6854:     new_start_match = start_match + 1;
1.1.1.2 ! misho    6855: #ifdef SUPPORT_UTF
        !          6856:     if (utf)
        !          6857:       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
        !          6858:         new_start_match++);
1.1       misho    6859: #endif
                   6860:     break;
                   6861: 
                   6862:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6863: 
                   6864:     case MATCH_COMMIT:
                   6865:     rc = MATCH_NOMATCH;
                   6866:     goto ENDLOOP;
                   6867: 
                   6868:     /* Any other return is either a match, or some kind of error. */
                   6869: 
                   6870:     default:
                   6871:     goto ENDLOOP;
                   6872:     }
                   6873: 
                   6874:   /* Control reaches here for the various types of "no match at this point"
                   6875:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6876: 
                   6877:   rc = MATCH_NOMATCH;
                   6878: 
                   6879:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   6880:   newline in the subject (though it may continue over the newline). Therefore,
                   6881:   if we have just failed to match, starting at a newline, do not continue. */
                   6882: 
                   6883:   if (firstline && IS_NEWLINE(start_match)) break;
                   6884: 
                   6885:   /* Advance to new matching position */
                   6886: 
                   6887:   start_match = new_start_match;
                   6888: 
                   6889:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   6890:   the subject. */
                   6891: 
                   6892:   if (anchored || start_match > end_subject) break;
                   6893: 
                   6894:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   6895:   not contain any explicit matches for \r or \n, and the newline option is CRLF
1.1.1.2 ! misho    6896:   or ANY or ANYCRLF, advance the match position by one more character. In
        !          6897:   normal matching start_match will aways be greater than the first position at
        !          6898:   this stage, but a failed *SKIP can cause a return at the same point, which is
        !          6899:   why the first test exists. */
1.1       misho    6900: 
1.1.1.2 ! misho    6901:   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
        !          6902:       start_match[-1] == CHAR_CR &&
1.1       misho    6903:       start_match < end_subject &&
                   6904:       *start_match == CHAR_NL &&
                   6905:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   6906:         (md->nltype == NLTYPE_ANY ||
                   6907:          md->nltype == NLTYPE_ANYCRLF ||
                   6908:          md->nllen == 2))
                   6909:     start_match++;
                   6910: 
                   6911:   md->mark = NULL;   /* Reset for start of next match attempt */
                   6912:   }                  /* End of for(;;) "bumpalong" loop */
                   6913: 
                   6914: /* ==========================================================================*/
                   6915: 
                   6916: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   6917: conditions is true:
                   6918: 
                   6919: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   6920: 
                   6921: (2) We are past the end of the subject;
                   6922: 
                   6923: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   6924:     this option requests that a match occur at or before the first newline in
                   6925:     the subject.
                   6926: 
                   6927: When we have a match and the offset vector is big enough to deal with any
                   6928: backreferences, captured substring offsets will already be set up. In the case
                   6929: where we had to get some local store to hold offsets for backreference
                   6930: processing, copy those that we can. In this case there need not be overflow if
                   6931: certain parts of the pattern were not used, even though there are more
                   6932: capturing parentheses than vector slots. */
                   6933: 
                   6934: ENDLOOP:
                   6935: 
                   6936: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
                   6937:   {
                   6938:   if (using_temporary_offsets)
                   6939:     {
1.1.1.2 ! misho    6940:     if (arg_offset_max >= 4)
1.1       misho    6941:       {
                   6942:       memcpy(offsets + 2, md->offset_vector + 2,
1.1.1.2 ! misho    6943:         (arg_offset_max - 2) * sizeof(int));
1.1       misho    6944:       DPRINTF(("Copied offsets from temporary memory\n"));
                   6945:       }
1.1.1.2 ! misho    6946:     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
1.1       misho    6947:     DPRINTF(("Freeing temporary memory\n"));
1.1.1.2 ! misho    6948:     (PUBL(free))(md->offset_vector);
1.1       misho    6949:     }
                   6950: 
1.1.1.2 ! misho    6951:   /* Set the return code to the number of captured strings, or 0 if there were
1.1       misho    6952:   too many to fit into the vector. */
                   6953: 
1.1.1.2 ! misho    6954:   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
        !          6955:     0 : md->end_offset_top/2;
        !          6956: 
        !          6957:   /* If there is space in the offset vector, set any unused pairs at the end of
        !          6958:   the pattern to -1 for backwards compatibility. It is documented that this
        !          6959:   happens. In earlier versions, the whole set of potential capturing offsets
        !          6960:   was set to -1 each time round the loop, but this is handled differently now.
        !          6961:   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
        !          6962:   those at the end that need unsetting here. We can't just unset them all at
        !          6963:   the start of the whole thing because they may get set in one branch that is
        !          6964:   not the final matching branch. */
        !          6965: 
        !          6966:   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
        !          6967:     {
        !          6968:     register int *iptr, *iend;
        !          6969:     int resetcount = 2 + re->top_bracket * 2;
        !          6970:     if (resetcount > offsetcount) resetcount = offsetcount;
        !          6971:     iptr = offsets + md->end_offset_top;
        !          6972:     iend = offsets + resetcount;
        !          6973:     while (iptr < iend) *iptr++ = -1;
        !          6974:     }
1.1       misho    6975: 
                   6976:   /* If there is space, set up the whole thing as substring 0. The value of
                   6977:   md->start_match_ptr might be modified if \K was encountered on the success
                   6978:   matching path. */
                   6979: 
                   6980:   if (offsetcount < 2) rc = 0; else
                   6981:     {
                   6982:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   6983:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
                   6984:     }
                   6985: 
1.1.1.2 ! misho    6986:   /* Return MARK data if requested */
        !          6987: 
        !          6988:   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
        !          6989:     *(extra_data->mark) = (pcre_uchar *)md->mark;
1.1       misho    6990:   DPRINTF((">>>> returning %d\n", rc));
1.1.1.2 ! misho    6991: #ifdef NO_RECURSE
        !          6992:   release_match_heapframes(&frame_zero);
        !          6993: #endif
        !          6994:   return rc;
1.1       misho    6995:   }
                   6996: 
                   6997: /* Control gets here if there has been an error, or if the overall match
                   6998: attempt has failed at all permitted starting positions. */
                   6999: 
                   7000: if (using_temporary_offsets)
                   7001:   {
                   7002:   DPRINTF(("Freeing temporary memory\n"));
1.1.1.2 ! misho    7003:   (PUBL(free))(md->offset_vector);
1.1       misho    7004:   }
                   7005: 
                   7006: /* For anything other than nomatch or partial match, just return the code. */
                   7007: 
                   7008: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
                   7009:   {
                   7010:   DPRINTF((">>>> error: returning %d\n", rc));
1.1.1.2 ! misho    7011: #ifdef NO_RECURSE
        !          7012:   release_match_heapframes(&frame_zero);
        !          7013: #endif
1.1       misho    7014:   return rc;
                   7015:   }
                   7016: 
                   7017: /* Handle partial matches - disable any mark data */
                   7018: 
                   7019: if (start_partial != NULL)
                   7020:   {
                   7021:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
                   7022:   md->mark = NULL;
                   7023:   if (offsetcount > 1)
                   7024:     {
1.1.1.2 ! misho    7025:     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
        !          7026:     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
1.1       misho    7027:     }
                   7028:   rc = PCRE_ERROR_PARTIAL;
                   7029:   }
                   7030: 
                   7031: /* This is the classic nomatch case */
                   7032: 
                   7033: else
                   7034:   {
                   7035:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
                   7036:   rc = PCRE_ERROR_NOMATCH;
                   7037:   }
                   7038: 
                   7039: /* Return the MARK data if it has been requested. */
                   7040: 
                   7041: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2 ! misho    7042:   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
        !          7043: #ifdef NO_RECURSE
        !          7044:   release_match_heapframes(&frame_zero);
        !          7045: #endif
1.1       misho    7046: return rc;
                   7047: }
                   7048: 
                   7049: /* End of pcre_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>