File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_exec.c
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:04 2014 UTC (10 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: /*************************************************
    2: *      Perl-Compatible Regular Expressions       *
    3: *************************************************/
    4: 
    5: /* PCRE is a library of functions to support regular expressions whose syntax
    6: and semantics are as close as possible to those of the Perl 5 language.
    7: 
    8:                        Written by Philip Hazel
    9:            Copyright (c) 1997-2013 University of Cambridge
   10: 
   11: -----------------------------------------------------------------------------
   12: Redistribution and use in source and binary forms, with or without
   13: modification, are permitted provided that the following conditions are met:
   14: 
   15:     * Redistributions of source code must retain the above copyright notice,
   16:       this list of conditions and the following disclaimer.
   17: 
   18:     * Redistributions in binary form must reproduce the above copyright
   19:       notice, this list of conditions and the following disclaimer in the
   20:       documentation and/or other materials provided with the distribution.
   21: 
   22:     * Neither the name of the University of Cambridge nor the names of its
   23:       contributors may be used to endorse or promote products derived from
   24:       this software without specific prior written permission.
   25: 
   26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36: POSSIBILITY OF SUCH DAMAGE.
   37: -----------------------------------------------------------------------------
   38: */
   39: 
   40: /* This module contains pcre_exec(), the externally visible function that does
   41: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
   42: possible. There are also some static supporting functions. */
   43: 
   44: #ifdef HAVE_CONFIG_H
   45: #include "config.h"
   46: #endif
   47: 
   48: #define NLBLOCK md             /* Block containing newline information */
   49: #define PSSTART start_subject  /* Field containing processed string start */
   50: #define PSEND   end_subject    /* Field containing processed string end */
   51: 
   52: #include "pcre_internal.h"
   53: 
   54: /* Undefine some potentially clashing cpp symbols */
   55: 
   56: #undef min
   57: #undef max
   58: 
   59: /* The md->capture_last field uses the lower 16 bits for the last captured
   60: substring (which can never be greater than 65535) and a bit in the top half
   61: to mean "capture vector overflowed". This odd way of doing things was
   62: implemented when it was realized that preserving and restoring the overflow bit
   63: whenever the last capture number was saved/restored made for a neater
   64: interface, and doing it this way saved on (a) another variable, which would
   65: have increased the stack frame size (a big NO-NO in PCRE) and (b) another
   66: separate set of save/restore instructions. The following defines are used in
   67: implementing this. */
   68: 
   69: #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
   70: #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
   71: #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
   72: 
   73: /* Values for setting in md->match_function_type to indicate two special types
   74: of call to match(). We do it this way to save on using another stack variable,
   75: as stack usage is to be discouraged. */
   76: 
   77: #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
   78: #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
   79: 
   80: /* Non-error returns from the match() function. Error returns are externally
   81: defined PCRE_ERROR_xxx codes, which are all negative. */
   82: 
   83: #define MATCH_MATCH        1
   84: #define MATCH_NOMATCH      0
   85: 
   86: /* Special internal returns from the match() function. Make them sufficiently
   87: negative to avoid the external error codes. */
   88: 
   89: #define MATCH_ACCEPT       (-999)
   90: #define MATCH_KETRPOS      (-998)
   91: #define MATCH_ONCE         (-997)
   92: /* The next 5 must be kept together and in sequence so that a test that checks
   93: for any one of them can use a range. */
   94: #define MATCH_COMMIT       (-996)
   95: #define MATCH_PRUNE        (-995)
   96: #define MATCH_SKIP         (-994)
   97: #define MATCH_SKIP_ARG     (-993)
   98: #define MATCH_THEN         (-992)
   99: #define MATCH_BACKTRACK_MAX MATCH_THEN
  100: #define MATCH_BACKTRACK_MIN MATCH_COMMIT
  101: 
  102: /* Maximum number of ints of offset to save on the stack for recursive calls.
  103: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  104: because the offset vector is always a multiple of 3 long. */
  105: 
  106: #define REC_STACK_SAVE_MAX 30
  107: 
  108: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  109: 
  110: static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
  111: static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
  112: 
  113: #ifdef PCRE_DEBUG
  114: /*************************************************
  115: *        Debugging function to print chars       *
  116: *************************************************/
  117: 
  118: /* Print a sequence of chars in printable format, stopping at the end of the
  119: subject if the requested.
  120: 
  121: Arguments:
  122:   p           points to characters
  123:   length      number to print
  124:   is_subject  TRUE if printing from within md->start_subject
  125:   md          pointer to matching data block, if is_subject is TRUE
  126: 
  127: Returns:     nothing
  128: */
  129: 
  130: static void
  131: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
  132: {
  133: pcre_uint32 c;
  134: BOOL utf = md->utf;
  135: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
  136: while (length-- > 0)
  137:   if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
  138: }
  139: #endif
  140: 
  141: 
  142: 
  143: /*************************************************
  144: *          Match a back-reference                *
  145: *************************************************/
  146: 
  147: /* Normally, if a back reference hasn't been set, the length that is passed is
  148: negative, so the match always fails. However, in JavaScript compatibility mode,
  149: the length passed is zero. Note that in caseless UTF-8 mode, the number of
  150: subject bytes matched may be different to the number of reference bytes.
  151: 
  152: Arguments:
  153:   offset      index into the offset vector
  154:   eptr        pointer into the subject
  155:   length      length of reference to be matched (number of bytes)
  156:   md          points to match data block
  157:   caseless    TRUE if caseless
  158: 
  159: Returns:      >= 0 the number of subject bytes matched
  160:               -1 no match
  161:               -2 partial match; always given if at end subject
  162: */
  163: 
  164: static int
  165: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
  166:   BOOL caseless)
  167: {
  168: PCRE_PUCHAR eptr_start = eptr;
  169: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
  170: #if defined SUPPORT_UTF && defined SUPPORT_UCP
  171: BOOL utf = md->utf;
  172: #endif
  173: 
  174: #ifdef PCRE_DEBUG
  175: if (eptr >= md->end_subject)
  176:   printf("matching subject <null>");
  177: else
  178:   {
  179:   printf("matching subject ");
  180:   pchars(eptr, length, TRUE, md);
  181:   }
  182: printf(" against backref ");
  183: pchars(p, length, FALSE, md);
  184: printf("\n");
  185: #endif
  186: 
  187: /* Always fail if reference not set (and not JavaScript compatible - in that
  188: case the length is passed as zero). */
  189: 
  190: if (length < 0) return -1;
  191: 
  192: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
  193: properly if Unicode properties are supported. Otherwise, we can check only
  194: ASCII characters. */
  195: 
  196: if (caseless)
  197:   {
  198: #if defined SUPPORT_UTF && defined SUPPORT_UCP
  199:   if (utf)
  200:     {
  201:     /* Match characters up to the end of the reference. NOTE: the number of
  202:     data units matched may differ, because in UTF-8 there are some characters
  203:     whose upper and lower case versions code have different numbers of bytes.
  204:     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
  205:     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
  206:     sequence of two of the latter. It is important, therefore, to check the
  207:     length along the reference, not along the subject (earlier code did this
  208:     wrong). */
  209: 
  210:     PCRE_PUCHAR endptr = p + length;
  211:     while (p < endptr)
  212:       {
  213:       pcre_uint32 c, d;
  214:       const ucd_record *ur;
  215:       if (eptr >= md->end_subject) return -2;   /* Partial match */
  216:       GETCHARINC(c, eptr);
  217:       GETCHARINC(d, p);
  218:       ur = GET_UCD(d);
  219:       if (c != d && c != d + ur->other_case)
  220:         {
  221:         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
  222:         for (;;)
  223:           {
  224:           if (c < *pp) return -1;
  225:           if (c == *pp++) break;
  226:           }
  227:         }
  228:       }
  229:     }
  230:   else
  231: #endif
  232: 
  233:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
  234:   is no UCP support. */
  235:     {
  236:     while (length-- > 0)
  237:       {
  238:       pcre_uint32 cc, cp;
  239:       if (eptr >= md->end_subject) return -2;   /* Partial match */
  240:       cc = RAWUCHARTEST(eptr);
  241:       cp = RAWUCHARTEST(p);
  242:       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
  243:       p++;
  244:       eptr++;
  245:       }
  246:     }
  247:   }
  248: 
  249: /* In the caseful case, we can just compare the bytes, whether or not we
  250: are in UTF-8 mode. */
  251: 
  252: else
  253:   {
  254:   while (length-- > 0)
  255:     {
  256:     if (eptr >= md->end_subject) return -2;   /* Partial match */
  257:     if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
  258:     }
  259:   }
  260: 
  261: return (int)(eptr - eptr_start);
  262: }
  263: 
  264: 
  265: 
  266: /***************************************************************************
  267: ****************************************************************************
  268:                    RECURSION IN THE match() FUNCTION
  269: 
  270: The match() function is highly recursive, though not every recursive call
  271: increases the recursive depth. Nevertheless, some regular expressions can cause
  272: it to recurse to a great depth. I was writing for Unix, so I just let it call
  273: itself recursively. This uses the stack for saving everything that has to be
  274: saved for a recursive call. On Unix, the stack can be large, and this works
  275: fine.
  276: 
  277: It turns out that on some non-Unix-like systems there are problems with
  278: programs that use a lot of stack. (This despite the fact that every last chip
  279: has oodles of memory these days, and techniques for extending the stack have
  280: been known for decades.) So....
  281: 
  282: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
  283: calls by keeping local variables that need to be preserved in blocks of memory
  284: obtained from malloc() instead instead of on the stack. Macros are used to
  285: achieve this so that the actual code doesn't look very different to what it
  286: always used to.
  287: 
  288: The original heap-recursive code used longjmp(). However, it seems that this
  289: can be very slow on some operating systems. Following a suggestion from Stan
  290: Switzer, the use of longjmp() has been abolished, at the cost of having to
  291: provide a unique number for each call to RMATCH. There is no way of generating
  292: a sequence of numbers at compile time in C. I have given them names, to make
  293: them stand out more clearly.
  294: 
  295: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
  296: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
  297: tests. Furthermore, not using longjmp() means that local dynamic variables
  298: don't have indeterminate values; this has meant that the frame size can be
  299: reduced because the result can be "passed back" by straight setting of the
  300: variable instead of being passed in the frame.
  301: ****************************************************************************
  302: ***************************************************************************/
  303: 
  304: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
  305: below must be updated in sync.  */
  306: 
  307: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
  308:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
  309:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
  310:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
  311:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
  312:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
  313:        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
  314: 
  315: /* These versions of the macros use the stack, as normal. There are debugging
  316: versions and production versions. Note that the "rw" argument of RMATCH isn't
  317: actually used in this definition. */
  318: 
  319: #ifndef NO_RECURSE
  320: #define REGISTER register
  321: 
  322: #ifdef PCRE_DEBUG
  323: #define RMATCH(ra,rb,rc,rd,re,rw) \
  324:   { \
  325:   printf("match() called in line %d\n", __LINE__); \
  326:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
  327:   printf("to line %d\n", __LINE__); \
  328:   }
  329: #define RRETURN(ra) \
  330:   { \
  331:   printf("match() returned %d from line %d\n", ra, __LINE__); \
  332:   return ra; \
  333:   }
  334: #else
  335: #define RMATCH(ra,rb,rc,rd,re,rw) \
  336:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
  337: #define RRETURN(ra) return ra
  338: #endif
  339: 
  340: #else
  341: 
  342: 
  343: /* These versions of the macros manage a private stack on the heap. Note that
  344: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
  345: argument of match(), which never changes. */
  346: 
  347: #define REGISTER
  348: 
  349: #define RMATCH(ra,rb,rc,rd,re,rw)\
  350:   {\
  351:   heapframe *newframe = frame->Xnextframe;\
  352:   if (newframe == NULL)\
  353:     {\
  354:     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
  355:     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
  356:     newframe->Xnextframe = NULL;\
  357:     frame->Xnextframe = newframe;\
  358:     }\
  359:   frame->Xwhere = rw;\
  360:   newframe->Xeptr = ra;\
  361:   newframe->Xecode = rb;\
  362:   newframe->Xmstart = mstart;\
  363:   newframe->Xoffset_top = rc;\
  364:   newframe->Xeptrb = re;\
  365:   newframe->Xrdepth = frame->Xrdepth + 1;\
  366:   newframe->Xprevframe = frame;\
  367:   frame = newframe;\
  368:   DPRINTF(("restarting from line %d\n", __LINE__));\
  369:   goto HEAP_RECURSE;\
  370:   L_##rw:\
  371:   DPRINTF(("jumped back to line %d\n", __LINE__));\
  372:   }
  373: 
  374: #define RRETURN(ra)\
  375:   {\
  376:   heapframe *oldframe = frame;\
  377:   frame = oldframe->Xprevframe;\
  378:   if (frame != NULL)\
  379:     {\
  380:     rrc = ra;\
  381:     goto HEAP_RETURN;\
  382:     }\
  383:   return ra;\
  384:   }
  385: 
  386: 
  387: /* Structure for remembering the local variables in a private frame */
  388: 
  389: typedef struct heapframe {
  390:   struct heapframe *Xprevframe;
  391:   struct heapframe *Xnextframe;
  392: 
  393:   /* Function arguments that may change */
  394: 
  395:   PCRE_PUCHAR Xeptr;
  396:   const pcre_uchar *Xecode;
  397:   PCRE_PUCHAR Xmstart;
  398:   int Xoffset_top;
  399:   eptrblock *Xeptrb;
  400:   unsigned int Xrdepth;
  401: 
  402:   /* Function local variables */
  403: 
  404:   PCRE_PUCHAR Xcallpat;
  405: #ifdef SUPPORT_UTF
  406:   PCRE_PUCHAR Xcharptr;
  407: #endif
  408:   PCRE_PUCHAR Xdata;
  409:   PCRE_PUCHAR Xnext;
  410:   PCRE_PUCHAR Xpp;
  411:   PCRE_PUCHAR Xprev;
  412:   PCRE_PUCHAR Xsaved_eptr;
  413: 
  414:   recursion_info Xnew_recursive;
  415: 
  416:   BOOL Xcur_is_word;
  417:   BOOL Xcondition;
  418:   BOOL Xprev_is_word;
  419: 
  420: #ifdef SUPPORT_UCP
  421:   int Xprop_type;
  422:   unsigned int Xprop_value;
  423:   int Xprop_fail_result;
  424:   int Xoclength;
  425:   pcre_uchar Xocchars[6];
  426: #endif
  427: 
  428:   int Xcodelink;
  429:   int Xctype;
  430:   unsigned int Xfc;
  431:   int Xfi;
  432:   int Xlength;
  433:   int Xmax;
  434:   int Xmin;
  435:   unsigned int Xnumber;
  436:   int Xoffset;
  437:   unsigned int Xop;
  438:   pcre_int32 Xsave_capture_last;
  439:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
  440:   int Xstacksave[REC_STACK_SAVE_MAX];
  441: 
  442:   eptrblock Xnewptrb;
  443: 
  444:   /* Where to jump back to */
  445: 
  446:   int Xwhere;
  447: 
  448: } heapframe;
  449: 
  450: #endif
  451: 
  452: 
  453: /***************************************************************************
  454: ***************************************************************************/
  455: 
  456: 
  457: 
  458: /*************************************************
  459: *         Match from current position            *
  460: *************************************************/
  461: 
  462: /* This function is called recursively in many circumstances. Whenever it
  463: returns a negative (error) response, the outer incarnation must also return the
  464: same response. */
  465: 
  466: /* These macros pack up tests that are used for partial matching, and which
  467: appear several times in the code. We set the "hit end" flag if the pointer is
  468: at the end of the subject and also past the start of the subject (i.e.
  469: something has been matched). For hard partial matching, we then return
  470: immediately. The second one is used when we already know we are past the end of
  471: the subject. */
  472: 
  473: #define CHECK_PARTIAL()\
  474:   if (md->partial != 0 && eptr >= md->end_subject && \
  475:       eptr > md->start_used_ptr) \
  476:     { \
  477:     md->hitend = TRUE; \
  478:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
  479:     }
  480: 
  481: #define SCHECK_PARTIAL()\
  482:   if (md->partial != 0 && eptr > md->start_used_ptr) \
  483:     { \
  484:     md->hitend = TRUE; \
  485:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
  486:     }
  487: 
  488: 
  489: /* Performance note: It might be tempting to extract commonly used fields from
  490: the md structure (e.g. utf, end_subject) into individual variables to improve
  491: performance. Tests using gcc on a SPARC disproved this; in the first case, it
  492: made performance worse.
  493: 
  494: Arguments:
  495:    eptr        pointer to current character in subject
  496:    ecode       pointer to current position in compiled code
  497:    mstart      pointer to the current match start position (can be modified
  498:                  by encountering \K)
  499:    offset_top  current top pointer
  500:    md          pointer to "static" info for the match
  501:    eptrb       pointer to chain of blocks containing eptr at start of
  502:                  brackets - for testing for empty matches
  503:    rdepth      the recursion depth
  504: 
  505: Returns:       MATCH_MATCH if matched            )  these values are >= 0
  506:                MATCH_NOMATCH if failed to match  )
  507:                a negative MATCH_xxx value for PRUNE, SKIP, etc
  508:                a negative PCRE_ERROR_xxx value if aborted by an error condition
  509:                  (e.g. stopped by repeated call or recursion limit)
  510: */
  511: 
  512: static int
  513: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
  514:   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
  515:   unsigned int rdepth)
  516: {
  517: /* These variables do not need to be preserved over recursion in this function,
  518: so they can be ordinary variables in all cases. Mark some of them with
  519: "register" because they are used a lot in loops. */
  520: 
  521: register int  rrc;         /* Returns from recursive calls */
  522: register int  i;           /* Used for loops not involving calls to RMATCH() */
  523: register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
  524: register BOOL utf;         /* Local copy of UTF flag for speed */
  525: 
  526: BOOL minimize, possessive; /* Quantifier options */
  527: BOOL caseless;
  528: int condcode;
  529: 
  530: /* When recursion is not being used, all "local" variables that have to be
  531: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
  532: frame on the stack here; subsequent instantiations are obtained from the heap
  533: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
  534: the top-level on the stack rather than malloc-ing them all gives a performance
  535: boost in many cases where there is not much "recursion". */
  536: 
  537: #ifdef NO_RECURSE
  538: heapframe *frame = (heapframe *)md->match_frames_base;
  539: 
  540: /* Copy in the original argument variables */
  541: 
  542: frame->Xeptr = eptr;
  543: frame->Xecode = ecode;
  544: frame->Xmstart = mstart;
  545: frame->Xoffset_top = offset_top;
  546: frame->Xeptrb = eptrb;
  547: frame->Xrdepth = rdepth;
  548: 
  549: /* This is where control jumps back to to effect "recursion" */
  550: 
  551: HEAP_RECURSE:
  552: 
  553: /* Macros make the argument variables come from the current frame */
  554: 
  555: #define eptr               frame->Xeptr
  556: #define ecode              frame->Xecode
  557: #define mstart             frame->Xmstart
  558: #define offset_top         frame->Xoffset_top
  559: #define eptrb              frame->Xeptrb
  560: #define rdepth             frame->Xrdepth
  561: 
  562: /* Ditto for the local variables */
  563: 
  564: #ifdef SUPPORT_UTF
  565: #define charptr            frame->Xcharptr
  566: #endif
  567: #define callpat            frame->Xcallpat
  568: #define codelink           frame->Xcodelink
  569: #define data               frame->Xdata
  570: #define next               frame->Xnext
  571: #define pp                 frame->Xpp
  572: #define prev               frame->Xprev
  573: #define saved_eptr         frame->Xsaved_eptr
  574: 
  575: #define new_recursive      frame->Xnew_recursive
  576: 
  577: #define cur_is_word        frame->Xcur_is_word
  578: #define condition          frame->Xcondition
  579: #define prev_is_word       frame->Xprev_is_word
  580: 
  581: #ifdef SUPPORT_UCP
  582: #define prop_type          frame->Xprop_type
  583: #define prop_value         frame->Xprop_value
  584: #define prop_fail_result   frame->Xprop_fail_result
  585: #define oclength           frame->Xoclength
  586: #define occhars            frame->Xocchars
  587: #endif
  588: 
  589: #define ctype              frame->Xctype
  590: #define fc                 frame->Xfc
  591: #define fi                 frame->Xfi
  592: #define length             frame->Xlength
  593: #define max                frame->Xmax
  594: #define min                frame->Xmin
  595: #define number             frame->Xnumber
  596: #define offset             frame->Xoffset
  597: #define op                 frame->Xop
  598: #define save_capture_last  frame->Xsave_capture_last
  599: #define save_offset1       frame->Xsave_offset1
  600: #define save_offset2       frame->Xsave_offset2
  601: #define save_offset3       frame->Xsave_offset3
  602: #define stacksave          frame->Xstacksave
  603: 
  604: #define newptrb            frame->Xnewptrb
  605: 
  606: /* When recursion is being used, local variables are allocated on the stack and
  607: get preserved during recursion in the normal way. In this environment, fi and
  608: i, and fc and c, can be the same variables. */
  609: 
  610: #else         /* NO_RECURSE not defined */
  611: #define fi i
  612: #define fc c
  613: 
  614: /* Many of the following variables are used only in small blocks of the code.
  615: My normal style of coding would have declared them within each of those blocks.
  616: However, in order to accommodate the version of this code that uses an external
  617: "stack" implemented on the heap, it is easier to declare them all here, so the
  618: declarations can be cut out in a block. The only declarations within blocks
  619: below are for variables that do not have to be preserved over a recursive call
  620: to RMATCH(). */
  621: 
  622: #ifdef SUPPORT_UTF
  623: const pcre_uchar *charptr;
  624: #endif
  625: const pcre_uchar *callpat;
  626: const pcre_uchar *data;
  627: const pcre_uchar *next;
  628: PCRE_PUCHAR       pp;
  629: const pcre_uchar *prev;
  630: PCRE_PUCHAR       saved_eptr;
  631: 
  632: recursion_info new_recursive;
  633: 
  634: BOOL cur_is_word;
  635: BOOL condition;
  636: BOOL prev_is_word;
  637: 
  638: #ifdef SUPPORT_UCP
  639: int prop_type;
  640: unsigned int prop_value;
  641: int prop_fail_result;
  642: int oclength;
  643: pcre_uchar occhars[6];
  644: #endif
  645: 
  646: int codelink;
  647: int ctype;
  648: int length;
  649: int max;
  650: int min;
  651: unsigned int number;
  652: int offset;
  653: unsigned int op;
  654: pcre_int32 save_capture_last;
  655: int save_offset1, save_offset2, save_offset3;
  656: int stacksave[REC_STACK_SAVE_MAX];
  657: 
  658: eptrblock newptrb;
  659: 
  660: /* There is a special fudge for calling match() in a way that causes it to
  661: measure the size of its basic stack frame when the stack is being used for
  662: recursion. The second argument (ecode) being NULL triggers this behaviour. It
  663: cannot normally ever be NULL. The return is the negated value of the frame
  664: size. */
  665: 
  666: if (ecode == NULL)
  667:   {
  668:   if (rdepth == 0)
  669:     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
  670:   else
  671:     {
  672:     int len = (char *)&rdepth - (char *)eptr;
  673:     return (len > 0)? -len : len;
  674:     }
  675:   }
  676: #endif     /* NO_RECURSE */
  677: 
  678: /* To save space on the stack and in the heap frame, I have doubled up on some
  679: of the local variables that are used only in localised parts of the code, but
  680: still need to be preserved over recursive calls of match(). These macros define
  681: the alternative names that are used. */
  682: 
  683: #define allow_zero    cur_is_word
  684: #define cbegroup      condition
  685: #define code_offset   codelink
  686: #define condassert    condition
  687: #define matched_once  prev_is_word
  688: #define foc           number
  689: #define save_mark     data
  690: 
  691: /* These statements are here to stop the compiler complaining about unitialized
  692: variables. */
  693: 
  694: #ifdef SUPPORT_UCP
  695: prop_value = 0;
  696: prop_fail_result = 0;
  697: #endif
  698: 
  699: 
  700: /* This label is used for tail recursion, which is used in a few cases even
  701: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
  702: used. Thanks to Ian Taylor for noticing this possibility and sending the
  703: original patch. */
  704: 
  705: TAIL_RECURSE:
  706: 
  707: /* OK, now we can get on with the real code of the function. Recursive calls
  708: are specified by the macro RMATCH and RRETURN is used to return. When
  709: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
  710: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
  711: defined). However, RMATCH isn't like a function call because it's quite a
  712: complicated macro. It has to be used in one particular way. This shouldn't,
  713: however, impact performance when true recursion is being used. */
  714: 
  715: #ifdef SUPPORT_UTF
  716: utf = md->utf;       /* Local copy of the flag */
  717: #else
  718: utf = FALSE;
  719: #endif
  720: 
  721: /* First check that we haven't called match() too many times, or that we
  722: haven't exceeded the recursive call limit. */
  723: 
  724: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
  725: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
  726: 
  727: /* At the start of a group with an unlimited repeat that may match an empty
  728: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
  729: done this way to save having to use another function argument, which would take
  730: up space on the stack. See also MATCH_CONDASSERT below.
  731: 
  732: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
  733: such remembered pointers, to be checked when we hit the closing ket, in order
  734: to break infinite loops that match no characters. When match() is called in
  735: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
  736: NOT be used with tail recursion, because the memory block that is used is on
  737: the stack, so a new one may be required for each match(). */
  738: 
  739: if (md->match_function_type == MATCH_CBEGROUP)
  740:   {
  741:   newptrb.epb_saved_eptr = eptr;
  742:   newptrb.epb_prev = eptrb;
  743:   eptrb = &newptrb;
  744:   md->match_function_type = 0;
  745:   }
  746: 
  747: /* Now start processing the opcodes. */
  748: 
  749: for (;;)
  750:   {
  751:   minimize = possessive = FALSE;
  752:   op = *ecode;
  753: 
  754:   switch(op)
  755:     {
  756:     case OP_MARK:
  757:     md->nomatch_mark = ecode + 2;
  758:     md->mark = NULL;    /* In case previously set by assertion */
  759:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
  760:       eptrb, RM55);
  761:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
  762:          md->mark == NULL) md->mark = ecode + 2;
  763: 
  764:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
  765:     argument, and we must check whether that argument matches this MARK's
  766:     argument. It is passed back in md->start_match_ptr (an overloading of that
  767:     variable). If it does match, we reset that variable to the current subject
  768:     position and return MATCH_SKIP. Otherwise, pass back the return code
  769:     unaltered. */
  770: 
  771:     else if (rrc == MATCH_SKIP_ARG &&
  772:         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
  773:       {
  774:       md->start_match_ptr = eptr;
  775:       RRETURN(MATCH_SKIP);
  776:       }
  777:     RRETURN(rrc);
  778: 
  779:     case OP_FAIL:
  780:     RRETURN(MATCH_NOMATCH);
  781: 
  782:     case OP_COMMIT:
  783:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
  784:       eptrb, RM52);
  785:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  786:     RRETURN(MATCH_COMMIT);
  787: 
  788:     case OP_PRUNE:
  789:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
  790:       eptrb, RM51);
  791:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  792:     RRETURN(MATCH_PRUNE);
  793: 
  794:     case OP_PRUNE_ARG:
  795:     md->nomatch_mark = ecode + 2;
  796:     md->mark = NULL;    /* In case previously set by assertion */
  797:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
  798:       eptrb, RM56);
  799:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
  800:          md->mark == NULL) md->mark = ecode + 2;
  801:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  802:     RRETURN(MATCH_PRUNE);
  803: 
  804:     case OP_SKIP:
  805:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
  806:       eptrb, RM53);
  807:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  808:     md->start_match_ptr = eptr;   /* Pass back current position */
  809:     RRETURN(MATCH_SKIP);
  810: 
  811:     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
  812:     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
  813:     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
  814:     that failed and any that precede it (either they also failed, or were not
  815:     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
  816:     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
  817:     set to the count of the one that failed. */
  818: 
  819:     case OP_SKIP_ARG:
  820:     md->skip_arg_count++;
  821:     if (md->skip_arg_count <= md->ignore_skip_arg)
  822:       {
  823:       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
  824:       break;
  825:       }
  826:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
  827:       eptrb, RM57);
  828:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  829: 
  830:     /* Pass back the current skip name by overloading md->start_match_ptr and
  831:     returning the special MATCH_SKIP_ARG return code. This will either be
  832:     caught by a matching MARK, or get to the top, where it causes a rematch
  833:     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
  834: 
  835:     md->start_match_ptr = ecode + 2;
  836:     RRETURN(MATCH_SKIP_ARG);
  837: 
  838:     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
  839:     the branch in which it occurs can be determined. Overload the start of
  840:     match pointer to do this. */
  841: 
  842:     case OP_THEN:
  843:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
  844:       eptrb, RM54);
  845:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  846:     md->start_match_ptr = ecode;
  847:     RRETURN(MATCH_THEN);
  848: 
  849:     case OP_THEN_ARG:
  850:     md->nomatch_mark = ecode + 2;
  851:     md->mark = NULL;    /* In case previously set by assertion */
  852:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
  853:       md, eptrb, RM58);
  854:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
  855:          md->mark == NULL) md->mark = ecode + 2;
  856:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  857:     md->start_match_ptr = ecode;
  858:     RRETURN(MATCH_THEN);
  859: 
  860:     /* Handle an atomic group that does not contain any capturing parentheses.
  861:     This can be handled like an assertion. Prior to 8.13, all atomic groups
  862:     were handled this way. In 8.13, the code was changed as below for ONCE, so
  863:     that backups pass through the group and thereby reset captured values.
  864:     However, this uses a lot more stack, so in 8.20, atomic groups that do not
  865:     contain any captures generate OP_ONCE_NC, which can be handled in the old,
  866:     less stack intensive way.
  867: 
  868:     Check the alternative branches in turn - the matching won't pass the KET
  869:     for this kind of subpattern. If any one branch matches, we carry on as at
  870:     the end of a normal bracket, leaving the subject pointer, but resetting
  871:     the start-of-match value in case it was changed by \K. */
  872: 
  873:     case OP_ONCE_NC:
  874:     prev = ecode;
  875:     saved_eptr = eptr;
  876:     save_mark = md->mark;
  877:     do
  878:       {
  879:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
  880:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
  881:         {
  882:         mstart = md->start_match_ptr;
  883:         break;
  884:         }
  885:       if (rrc == MATCH_THEN)
  886:         {
  887:         next = ecode + GET(ecode,1);
  888:         if (md->start_match_ptr < next &&
  889:             (*ecode == OP_ALT || *next == OP_ALT))
  890:           rrc = MATCH_NOMATCH;
  891:         }
  892: 
  893:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  894:       ecode += GET(ecode,1);
  895:       md->mark = save_mark;
  896:       }
  897:     while (*ecode == OP_ALT);
  898: 
  899:     /* If hit the end of the group (which could be repeated), fail */
  900: 
  901:     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
  902: 
  903:     /* Continue as from after the group, updating the offsets high water
  904:     mark, since extracts may have been taken. */
  905: 
  906:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
  907: 
  908:     offset_top = md->end_offset_top;
  909:     eptr = md->end_match_ptr;
  910: 
  911:     /* For a non-repeating ket, just continue at this level. This also
  912:     happens for a repeating ket if no characters were matched in the group.
  913:     This is the forcible breaking of infinite loops as implemented in Perl
  914:     5.005. */
  915: 
  916:     if (*ecode == OP_KET || eptr == saved_eptr)
  917:       {
  918:       ecode += 1+LINK_SIZE;
  919:       break;
  920:       }
  921: 
  922:     /* The repeating kets try the rest of the pattern or restart from the
  923:     preceding bracket, in the appropriate order. The second "call" of match()
  924:     uses tail recursion, to avoid using another stack frame. */
  925: 
  926:     if (*ecode == OP_KETRMIN)
  927:       {
  928:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
  929:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  930:       ecode = prev;
  931:       goto TAIL_RECURSE;
  932:       }
  933:     else  /* OP_KETRMAX */
  934:       {
  935:       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
  936:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  937:       ecode += 1 + LINK_SIZE;
  938:       goto TAIL_RECURSE;
  939:       }
  940:     /* Control never gets here */
  941: 
  942:     /* Handle a capturing bracket, other than those that are possessive with an
  943:     unlimited repeat. If there is space in the offset vector, save the current
  944:     subject position in the working slot at the top of the vector. We mustn't
  945:     change the current values of the data slot, because they may be set from a
  946:     previous iteration of this group, and be referred to by a reference inside
  947:     the group. A failure to match might occur after the group has succeeded,
  948:     if something later on doesn't match. For this reason, we need to restore
  949:     the working value and also the values of the final offsets, in case they
  950:     were set by a previous iteration of the same bracket.
  951: 
  952:     If there isn't enough space in the offset vector, treat this as if it were
  953:     a non-capturing bracket. Don't worry about setting the flag for the error
  954:     case here; that is handled in the code for KET. */
  955: 
  956:     case OP_CBRA:
  957:     case OP_SCBRA:
  958:     number = GET2(ecode, 1+LINK_SIZE);
  959:     offset = number << 1;
  960: 
  961: #ifdef PCRE_DEBUG
  962:     printf("start bracket %d\n", number);
  963:     printf("subject=");
  964:     pchars(eptr, 16, TRUE, md);
  965:     printf("\n");
  966: #endif
  967: 
  968:     if (offset < md->offset_max)
  969:       {
  970:       save_offset1 = md->offset_vector[offset];
  971:       save_offset2 = md->offset_vector[offset+1];
  972:       save_offset3 = md->offset_vector[md->offset_end - number];
  973:       save_capture_last = md->capture_last;
  974:       save_mark = md->mark;
  975: 
  976:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
  977:       md->offset_vector[md->offset_end - number] =
  978:         (int)(eptr - md->start_subject);
  979: 
  980:       for (;;)
  981:         {
  982:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
  983:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
  984:           eptrb, RM1);
  985:         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
  986: 
  987:         /* If we backed up to a THEN, check whether it is within the current
  988:         branch by comparing the address of the THEN that is passed back with
  989:         the end of the branch. If it is within the current branch, and the
  990:         branch is one of two or more alternatives (it either starts or ends
  991:         with OP_ALT), we have reached the limit of THEN's action, so convert
  992:         the return code to NOMATCH, which will cause normal backtracking to
  993:         happen from now on. Otherwise, THEN is passed back to an outer
  994:         alternative. This implements Perl's treatment of parenthesized groups,
  995:         where a group not containing | does not affect the current alternative,
  996:         that is, (X) is NOT the same as (X|(*F)). */
  997: 
  998:         if (rrc == MATCH_THEN)
  999:           {
 1000:           next = ecode + GET(ecode,1);
 1001:           if (md->start_match_ptr < next &&
 1002:               (*ecode == OP_ALT || *next == OP_ALT))
 1003:             rrc = MATCH_NOMATCH;
 1004:           }
 1005: 
 1006:         /* Anything other than NOMATCH is passed back. */
 1007: 
 1008:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1009:         md->capture_last = save_capture_last;
 1010:         ecode += GET(ecode, 1);
 1011:         md->mark = save_mark;
 1012:         if (*ecode != OP_ALT) break;
 1013:         }
 1014: 
 1015:       DPRINTF(("bracket %d failed\n", number));
 1016:       md->offset_vector[offset] = save_offset1;
 1017:       md->offset_vector[offset+1] = save_offset2;
 1018:       md->offset_vector[md->offset_end - number] = save_offset3;
 1019: 
 1020:       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
 1021: 
 1022:       RRETURN(rrc);
 1023:       }
 1024: 
 1025:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 1026:     as a non-capturing bracket. */
 1027: 
 1028:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1029:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1030: 
 1031:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 1032: 
 1033:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1034:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1035: 
 1036:     /* Non-capturing or atomic group, except for possessive with unlimited
 1037:     repeat and ONCE group with no captures. Loop for all the alternatives.
 1038: 
 1039:     When we get to the final alternative within the brackets, we used to return
 1040:     the result of a recursive call to match() whatever happened so it was
 1041:     possible to reduce stack usage by turning this into a tail recursion,
 1042:     except in the case of a possibly empty group. However, now that there is
 1043:     the possiblity of (*THEN) occurring in the final alternative, this
 1044:     optimization is no longer always possible.
 1045: 
 1046:     We can optimize if we know there are no (*THEN)s in the pattern; at present
 1047:     this is the best that can be done.
 1048: 
 1049:     MATCH_ONCE is returned when the end of an atomic group is successfully
 1050:     reached, but subsequent matching fails. It passes back up the tree (causing
 1051:     captured values to be reset) until the original atomic group level is
 1052:     reached. This is tested by comparing md->once_target with the start of the
 1053:     group. At this point, the return is converted into MATCH_NOMATCH so that
 1054:     previous backup points can be taken. */
 1055: 
 1056:     case OP_ONCE:
 1057:     case OP_BRA:
 1058:     case OP_SBRA:
 1059:     DPRINTF(("start non-capturing bracket\n"));
 1060: 
 1061:     for (;;)
 1062:       {
 1063:       if (op >= OP_SBRA || op == OP_ONCE)
 1064:         md->match_function_type = MATCH_CBEGROUP;
 1065: 
 1066:       /* If this is not a possibly empty group, and there are no (*THEN)s in
 1067:       the pattern, and this is the final alternative, optimize as described
 1068:       above. */
 1069: 
 1070:       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
 1071:         {
 1072:         ecode += PRIV(OP_lengths)[*ecode];
 1073:         goto TAIL_RECURSE;
 1074:         }
 1075: 
 1076:       /* In all other cases, we have to make another call to match(). */
 1077: 
 1078:       save_mark = md->mark;
 1079:       save_capture_last = md->capture_last;
 1080:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
 1081:         RM2);
 1082: 
 1083:       /* See comment in the code for capturing groups above about handling
 1084:       THEN. */
 1085: 
 1086:       if (rrc == MATCH_THEN)
 1087:         {
 1088:         next = ecode + GET(ecode,1);
 1089:         if (md->start_match_ptr < next &&
 1090:             (*ecode == OP_ALT || *next == OP_ALT))
 1091:           rrc = MATCH_NOMATCH;
 1092:         }
 1093: 
 1094:       if (rrc != MATCH_NOMATCH)
 1095:         {
 1096:         if (rrc == MATCH_ONCE)
 1097:           {
 1098:           const pcre_uchar *scode = ecode;
 1099:           if (*scode != OP_ONCE)           /* If not at start, find it */
 1100:             {
 1101:             while (*scode == OP_ALT) scode += GET(scode, 1);
 1102:             scode -= GET(scode, 1);
 1103:             }
 1104:           if (md->once_target == scode) rrc = MATCH_NOMATCH;
 1105:           }
 1106:         RRETURN(rrc);
 1107:         }
 1108:       ecode += GET(ecode, 1);
 1109:       md->mark = save_mark;
 1110:       if (*ecode != OP_ALT) break;
 1111:       md->capture_last = save_capture_last;
 1112:       }
 1113: 
 1114:     RRETURN(MATCH_NOMATCH);
 1115: 
 1116:     /* Handle possessive capturing brackets with an unlimited repeat. We come
 1117:     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
 1118:     handled similarly to the normal case above. However, the matching is
 1119:     different. The end of these brackets will always be OP_KETRPOS, which
 1120:     returns MATCH_KETRPOS without going further in the pattern. By this means
 1121:     we can handle the group by iteration rather than recursion, thereby
 1122:     reducing the amount of stack needed. */
 1123: 
 1124:     case OP_CBRAPOS:
 1125:     case OP_SCBRAPOS:
 1126:     allow_zero = FALSE;
 1127: 
 1128:     POSSESSIVE_CAPTURE:
 1129:     number = GET2(ecode, 1+LINK_SIZE);
 1130:     offset = number << 1;
 1131: 
 1132: #ifdef PCRE_DEBUG
 1133:     printf("start possessive bracket %d\n", number);
 1134:     printf("subject=");
 1135:     pchars(eptr, 16, TRUE, md);
 1136:     printf("\n");
 1137: #endif
 1138: 
 1139:     if (offset < md->offset_max)
 1140:       {
 1141:       matched_once = FALSE;
 1142:       code_offset = (int)(ecode - md->start_code);
 1143: 
 1144:       save_offset1 = md->offset_vector[offset];
 1145:       save_offset2 = md->offset_vector[offset+1];
 1146:       save_offset3 = md->offset_vector[md->offset_end - number];
 1147:       save_capture_last = md->capture_last;
 1148: 
 1149:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 1150: 
 1151:       /* Each time round the loop, save the current subject position for use
 1152:       when the group matches. For MATCH_MATCH, the group has matched, so we
 1153:       restart it with a new subject starting position, remembering that we had
 1154:       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
 1155:       usual. If we haven't matched any alternatives in any iteration, check to
 1156:       see if a previous iteration matched. If so, the group has matched;
 1157:       continue from afterwards. Otherwise it has failed; restore the previous
 1158:       capture values before returning NOMATCH. */
 1159: 
 1160:       for (;;)
 1161:         {
 1162:         md->offset_vector[md->offset_end - number] =
 1163:           (int)(eptr - md->start_subject);
 1164:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
 1165:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 1166:           eptrb, RM63);
 1167:         if (rrc == MATCH_KETRPOS)
 1168:           {
 1169:           offset_top = md->end_offset_top;
 1170:           eptr = md->end_match_ptr;
 1171:           ecode = md->start_code + code_offset;
 1172:           save_capture_last = md->capture_last;
 1173:           matched_once = TRUE;
 1174:           mstart = md->start_match_ptr;    /* In case \K changed it */
 1175:           continue;
 1176:           }
 1177: 
 1178:         /* See comment in the code for capturing groups above about handling
 1179:         THEN. */
 1180: 
 1181:         if (rrc == MATCH_THEN)
 1182:           {
 1183:           next = ecode + GET(ecode,1);
 1184:           if (md->start_match_ptr < next &&
 1185:               (*ecode == OP_ALT || *next == OP_ALT))
 1186:             rrc = MATCH_NOMATCH;
 1187:           }
 1188: 
 1189:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1190:         md->capture_last = save_capture_last;
 1191:         ecode += GET(ecode, 1);
 1192:         if (*ecode != OP_ALT) break;
 1193:         }
 1194: 
 1195:       if (!matched_once)
 1196:         {
 1197:         md->offset_vector[offset] = save_offset1;
 1198:         md->offset_vector[offset+1] = save_offset2;
 1199:         md->offset_vector[md->offset_end - number] = save_offset3;
 1200:         }
 1201: 
 1202:       if (allow_zero || matched_once)
 1203:         {
 1204:         ecode += 1 + LINK_SIZE;
 1205:         break;
 1206:         }
 1207: 
 1208:       RRETURN(MATCH_NOMATCH);
 1209:       }
 1210: 
 1211:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 1212:     as a non-capturing bracket. */
 1213: 
 1214:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1215:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1216: 
 1217:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 1218: 
 1219:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1220:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 1221: 
 1222:     /* Non-capturing possessive bracket with unlimited repeat. We come here
 1223:     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
 1224:     without the capturing complication. It is written out separately for speed
 1225:     and cleanliness. */
 1226: 
 1227:     case OP_BRAPOS:
 1228:     case OP_SBRAPOS:
 1229:     allow_zero = FALSE;
 1230: 
 1231:     POSSESSIVE_NON_CAPTURE:
 1232:     matched_once = FALSE;
 1233:     code_offset = (int)(ecode - md->start_code);
 1234:     save_capture_last = md->capture_last;
 1235: 
 1236:     for (;;)
 1237:       {
 1238:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
 1239:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 1240:         eptrb, RM48);
 1241:       if (rrc == MATCH_KETRPOS)
 1242:         {
 1243:         offset_top = md->end_offset_top;
 1244:         eptr = md->end_match_ptr;
 1245:         ecode = md->start_code + code_offset;
 1246:         matched_once = TRUE;
 1247:         mstart = md->start_match_ptr;   /* In case \K reset it */
 1248:         continue;
 1249:         }
 1250: 
 1251:       /* See comment in the code for capturing groups above about handling
 1252:       THEN. */
 1253: 
 1254:       if (rrc == MATCH_THEN)
 1255:         {
 1256:         next = ecode + GET(ecode,1);
 1257:         if (md->start_match_ptr < next &&
 1258:             (*ecode == OP_ALT || *next == OP_ALT))
 1259:           rrc = MATCH_NOMATCH;
 1260:         }
 1261: 
 1262:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1263:       ecode += GET(ecode, 1);
 1264:       if (*ecode != OP_ALT) break;
 1265:       md->capture_last = save_capture_last;
 1266:       }
 1267: 
 1268:     if (matched_once || allow_zero)
 1269:       {
 1270:       ecode += 1 + LINK_SIZE;
 1271:       break;
 1272:       }
 1273:     RRETURN(MATCH_NOMATCH);
 1274: 
 1275:     /* Control never reaches here. */
 1276: 
 1277:     /* Conditional group: compilation checked that there are no more than two
 1278:     branches. If the condition is false, skipping the first branch takes us
 1279:     past the end of the item if there is only one branch, but that's exactly
 1280:     what we want. */
 1281: 
 1282:     case OP_COND:
 1283:     case OP_SCOND:
 1284: 
 1285:     /* The variable codelink will be added to ecode when the condition is
 1286:     false, to get to the second branch. Setting it to the offset to the ALT
 1287:     or KET, then incrementing ecode achieves this effect. We now have ecode
 1288:     pointing to the condition or callout. */
 1289: 
 1290:     codelink = GET(ecode, 1);   /* Offset to the second branch */
 1291:     ecode += 1 + LINK_SIZE;     /* From this opcode */
 1292: 
 1293:     /* Because of the way auto-callout works during compile, a callout item is
 1294:     inserted between OP_COND and an assertion condition. */
 1295: 
 1296:     if (*ecode == OP_CALLOUT)
 1297:       {
 1298:       if (PUBL(callout) != NULL)
 1299:         {
 1300:         PUBL(callout_block) cb;
 1301:         cb.version          = 2;   /* Version 1 of the callout block */
 1302:         cb.callout_number   = ecode[1];
 1303:         cb.offset_vector    = md->offset_vector;
 1304: #if defined COMPILE_PCRE8
 1305:         cb.subject          = (PCRE_SPTR)md->start_subject;
 1306: #elif defined COMPILE_PCRE16
 1307:         cb.subject          = (PCRE_SPTR16)md->start_subject;
 1308: #elif defined COMPILE_PCRE32
 1309:         cb.subject          = (PCRE_SPTR32)md->start_subject;
 1310: #endif
 1311:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
 1312:         cb.start_match      = (int)(mstart - md->start_subject);
 1313:         cb.current_position = (int)(eptr - md->start_subject);
 1314:         cb.pattern_position = GET(ecode, 2);
 1315:         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
 1316:         cb.capture_top      = offset_top/2;
 1317:         cb.capture_last     = md->capture_last & CAPLMASK;
 1318:         /* Internal change requires this for API compatibility. */
 1319:         if (cb.capture_last == 0) cb.capture_last = -1;
 1320:         cb.callout_data     = md->callout_data;
 1321:         cb.mark             = md->nomatch_mark;
 1322:         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 1323:         if (rrc < 0) RRETURN(rrc);
 1324:         }
 1325: 
 1326:       /* Advance ecode past the callout, so it now points to the condition. We
 1327:       must adjust codelink so that the value of ecode+codelink is unchanged. */
 1328: 
 1329:       ecode += PRIV(OP_lengths)[OP_CALLOUT];
 1330:       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
 1331:       }
 1332: 
 1333:     /* Test the various possible conditions */
 1334: 
 1335:     condition = FALSE;
 1336:     switch(condcode = *ecode)
 1337:       {
 1338:       case OP_RREF:         /* Numbered group recursion test */
 1339:       if (md->recursive != NULL)     /* Not recursing => FALSE */
 1340:         {
 1341:         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
 1342:         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
 1343:         }
 1344:       break;
 1345: 
 1346:       case OP_DNRREF:       /* Duplicate named group recursion test */
 1347:       if (md->recursive != NULL)
 1348:         {
 1349:         int count = GET2(ecode, 1 + IMM2_SIZE);
 1350:         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
 1351:         while (count-- > 0)
 1352:           {
 1353:           unsigned int recno = GET2(slot, 0);
 1354:           condition = recno == md->recursive->group_num;
 1355:           if (condition) break;
 1356:           slot += md->name_entry_size;
 1357:           }
 1358:         }
 1359:       break;
 1360: 
 1361:       case OP_CREF:         /* Numbered group used test */
 1362:       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
 1363:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
 1364:       break;
 1365: 
 1366:       case OP_DNCREF:      /* Duplicate named group used test */
 1367:         {
 1368:         int count = GET2(ecode, 1 + IMM2_SIZE);
 1369:         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
 1370:         while (count-- > 0)
 1371:           {
 1372:           offset = GET2(slot, 0) << 1;
 1373:           condition = offset < offset_top && md->offset_vector[offset] >= 0;
 1374:           if (condition) break;
 1375:           slot += md->name_entry_size;
 1376:           }
 1377:         }
 1378:       break;
 1379: 
 1380:       case OP_DEF:     /* DEFINE - always false */
 1381:       break;
 1382: 
 1383:       /* The condition is an assertion. Call match() to evaluate it - setting
 1384:       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
 1385:       of an assertion. */
 1386: 
 1387:       default:
 1388:       md->match_function_type = MATCH_CONDASSERT;
 1389:       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
 1390:       if (rrc == MATCH_MATCH)
 1391:         {
 1392:         if (md->end_offset_top > offset_top)
 1393:           offset_top = md->end_offset_top;  /* Captures may have happened */
 1394:         condition = TRUE;
 1395: 
 1396:         /* Advance ecode past the assertion to the start of the first branch,
 1397:         but adjust it so that the general choosing code below works. */
 1398: 
 1399:         ecode += GET(ecode, 1);
 1400:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
 1401:         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
 1402:         }
 1403: 
 1404:       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
 1405:       assertion; it is therefore treated as NOMATCH. Any other return is an
 1406:       error. */
 1407: 
 1408:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
 1409:         {
 1410:         RRETURN(rrc);         /* Need braces because of following else */
 1411:         }
 1412:       break;
 1413:       }
 1414: 
 1415:     /* Choose branch according to the condition */
 1416: 
 1417:     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
 1418: 
 1419:     /* We are now at the branch that is to be obeyed. As there is only one, we
 1420:     can use tail recursion to avoid using another stack frame, except when
 1421:     there is unlimited repeat of a possibly empty group. In the latter case, a
 1422:     recursive call to match() is always required, unless the second alternative
 1423:     doesn't exist, in which case we can just plough on. Note that, for
 1424:     compatibility with Perl, the | in a conditional group is NOT treated as
 1425:     creating two alternatives. If a THEN is encountered in the branch, it
 1426:     propagates out to the enclosing alternative (unless nested in a deeper set
 1427:     of alternatives, of course). */
 1428: 
 1429:     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
 1430:       {
 1431:       if (op != OP_SCOND)
 1432:         {
 1433:         goto TAIL_RECURSE;
 1434:         }
 1435: 
 1436:       md->match_function_type = MATCH_CBEGROUP;
 1437:       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
 1438:       RRETURN(rrc);
 1439:       }
 1440: 
 1441:      /* Condition false & no alternative; continue after the group. */
 1442: 
 1443:     else
 1444:       {
 1445:       }
 1446:     break;
 1447: 
 1448: 
 1449:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
 1450:     to close any currently open capturing brackets. */
 1451: 
 1452:     case OP_CLOSE:
 1453:     number = GET2(ecode, 1);   /* Must be less than 65536 */
 1454:     offset = number << 1;
 1455: 
 1456: #ifdef PCRE_DEBUG
 1457:       printf("end bracket %d at *ACCEPT", number);
 1458:       printf("\n");
 1459: #endif
 1460: 
 1461:     md->capture_last = (md->capture_last & OVFLMASK) | number;
 1462:     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
 1463:       {
 1464:       md->offset_vector[offset] =
 1465:         md->offset_vector[md->offset_end - number];
 1466:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
 1467:       if (offset_top <= offset) offset_top = offset + 2;
 1468:       }
 1469:     ecode += 1 + IMM2_SIZE;
 1470:     break;
 1471: 
 1472: 
 1473:     /* End of the pattern, either real or forced. */
 1474: 
 1475:     case OP_END:
 1476:     case OP_ACCEPT:
 1477:     case OP_ASSERT_ACCEPT:
 1478: 
 1479:     /* If we have matched an empty string, fail if not in an assertion and not
 1480:     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
 1481:     is set and we have matched at the start of the subject. In both cases,
 1482:     backtracking will then try other alternatives, if any. */
 1483: 
 1484:     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
 1485:          md->recursive == NULL &&
 1486:          (md->notempty ||
 1487:            (md->notempty_atstart &&
 1488:              mstart == md->start_subject + md->start_offset)))
 1489:       RRETURN(MATCH_NOMATCH);
 1490: 
 1491:     /* Otherwise, we have a match. */
 1492: 
 1493:     md->end_match_ptr = eptr;           /* Record where we ended */
 1494:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
 1495:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
 1496: 
 1497:     /* For some reason, the macros don't work properly if an expression is
 1498:     given as the argument to RRETURN when the heap is in use. */
 1499: 
 1500:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
 1501:     RRETURN(rrc);
 1502: 
 1503:     /* Assertion brackets. Check the alternative branches in turn - the
 1504:     matching won't pass the KET for an assertion. If any one branch matches,
 1505:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
 1506:     start of each branch to move the current point backwards, so the code at
 1507:     this level is identical to the lookahead case. When the assertion is part
 1508:     of a condition, we want to return immediately afterwards. The caller of
 1509:     this incarnation of the match() function will have set MATCH_CONDASSERT in
 1510:     md->match_function type, and one of these opcodes will be the first opcode
 1511:     that is processed. We use a local variable that is preserved over calls to
 1512:     match() to remember this case. */
 1513: 
 1514:     case OP_ASSERT:
 1515:     case OP_ASSERTBACK:
 1516:     save_mark = md->mark;
 1517:     if (md->match_function_type == MATCH_CONDASSERT)
 1518:       {
 1519:       condassert = TRUE;
 1520:       md->match_function_type = 0;
 1521:       }
 1522:     else condassert = FALSE;
 1523: 
 1524:     /* Loop for each branch */
 1525: 
 1526:     do
 1527:       {
 1528:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
 1529: 
 1530:       /* A match means that the assertion is true; break out of the loop
 1531:       that matches its alternatives. */
 1532: 
 1533:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
 1534:         {
 1535:         mstart = md->start_match_ptr;   /* In case \K reset it */
 1536:         break;
 1537:         }
 1538: 
 1539:       /* If not matched, restore the previous mark setting. */
 1540: 
 1541:       md->mark = save_mark;
 1542: 
 1543:       /* See comment in the code for capturing groups above about handling
 1544:       THEN. */
 1545: 
 1546:       if (rrc == MATCH_THEN)
 1547:         {
 1548:         next = ecode + GET(ecode,1);
 1549:         if (md->start_match_ptr < next &&
 1550:             (*ecode == OP_ALT || *next == OP_ALT))
 1551:           rrc = MATCH_NOMATCH;
 1552:         }
 1553: 
 1554:       /* Anything other than NOMATCH causes the entire assertion to fail,
 1555:       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
 1556:       uncaptured THEN, which means they take their normal effect. This
 1557:       consistent approach does not always have exactly the same effect as in
 1558:       Perl. */
 1559: 
 1560:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1561:       ecode += GET(ecode, 1);
 1562:       }
 1563:     while (*ecode == OP_ALT);   /* Continue for next alternative */
 1564: 
 1565:     /* If we have tried all the alternative branches, the assertion has
 1566:     failed. If not, we broke out after a match. */
 1567: 
 1568:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
 1569: 
 1570:     /* If checking an assertion for a condition, return MATCH_MATCH. */
 1571: 
 1572:     if (condassert) RRETURN(MATCH_MATCH);
 1573: 
 1574:     /* Continue from after a successful assertion, updating the offsets high
 1575:     water mark, since extracts may have been taken during the assertion. */
 1576: 
 1577:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 1578:     ecode += 1 + LINK_SIZE;
 1579:     offset_top = md->end_offset_top;
 1580:     continue;
 1581: 
 1582:     /* Negative assertion: all branches must fail to match for the assertion to
 1583:     succeed. */
 1584: 
 1585:     case OP_ASSERT_NOT:
 1586:     case OP_ASSERTBACK_NOT:
 1587:     save_mark = md->mark;
 1588:     if (md->match_function_type == MATCH_CONDASSERT)
 1589:       {
 1590:       condassert = TRUE;
 1591:       md->match_function_type = 0;
 1592:       }
 1593:     else condassert = FALSE;
 1594: 
 1595:     /* Loop for each alternative branch. */
 1596: 
 1597:     do
 1598:       {
 1599:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
 1600:       md->mark = save_mark;   /* Always restore the mark setting */
 1601: 
 1602:       switch(rrc)
 1603:         {
 1604:         case MATCH_MATCH:            /* A successful match means */
 1605:         case MATCH_ACCEPT:           /* the assertion has failed. */
 1606:         RRETURN(MATCH_NOMATCH);
 1607: 
 1608:         case MATCH_NOMATCH:          /* Carry on with next branch */
 1609:         break;
 1610: 
 1611:         /* See comment in the code for capturing groups above about handling
 1612:         THEN. */
 1613: 
 1614:         case MATCH_THEN:
 1615:         next = ecode + GET(ecode,1);
 1616:         if (md->start_match_ptr < next &&
 1617:             (*ecode == OP_ALT || *next == OP_ALT))
 1618:           {
 1619:           rrc = MATCH_NOMATCH;
 1620:           break;
 1621:           }
 1622:         /* Otherwise fall through. */
 1623: 
 1624:         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
 1625:         assertion to fail to match, without considering any more alternatives.
 1626:         Failing to match means the assertion is true. This is a consistent
 1627:         approach, but does not always have the same effect as in Perl. */
 1628: 
 1629:         case MATCH_COMMIT:
 1630:         case MATCH_SKIP:
 1631:         case MATCH_SKIP_ARG:
 1632:         case MATCH_PRUNE:
 1633:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 1634:         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
 1635: 
 1636:         /* Anything else is an error */
 1637: 
 1638:         default:
 1639:         RRETURN(rrc);
 1640:         }
 1641: 
 1642:       /* Continue with next branch */
 1643: 
 1644:       ecode += GET(ecode,1);
 1645:       }
 1646:     while (*ecode == OP_ALT);
 1647: 
 1648:     /* All branches in the assertion failed to match. */
 1649: 
 1650:     NEG_ASSERT_TRUE:
 1651:     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
 1652:     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
 1653:     continue;
 1654: 
 1655:     /* Move the subject pointer back. This occurs only at the start of
 1656:     each branch of a lookbehind assertion. If we are too close to the start to
 1657:     move back, this match function fails. When working with UTF-8 we move
 1658:     back a number of characters, not bytes. */
 1659: 
 1660:     case OP_REVERSE:
 1661: #ifdef SUPPORT_UTF
 1662:     if (utf)
 1663:       {
 1664:       i = GET(ecode, 1);
 1665:       while (i-- > 0)
 1666:         {
 1667:         eptr--;
 1668:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 1669:         BACKCHAR(eptr);
 1670:         }
 1671:       }
 1672:     else
 1673: #endif
 1674: 
 1675:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
 1676: 
 1677:       {
 1678:       eptr -= GET(ecode, 1);
 1679:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 1680:       }
 1681: 
 1682:     /* Save the earliest consulted character, then skip to next op code */
 1683: 
 1684:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
 1685:     ecode += 1 + LINK_SIZE;
 1686:     break;
 1687: 
 1688:     /* The callout item calls an external function, if one is provided, passing
 1689:     details of the match so far. This is mainly for debugging, though the
 1690:     function is able to force a failure. */
 1691: 
 1692:     case OP_CALLOUT:
 1693:     if (PUBL(callout) != NULL)
 1694:       {
 1695:       PUBL(callout_block) cb;
 1696:       cb.version          = 2;   /* Version 1 of the callout block */
 1697:       cb.callout_number   = ecode[1];
 1698:       cb.offset_vector    = md->offset_vector;
 1699: #if defined COMPILE_PCRE8
 1700:       cb.subject          = (PCRE_SPTR)md->start_subject;
 1701: #elif defined COMPILE_PCRE16
 1702:       cb.subject          = (PCRE_SPTR16)md->start_subject;
 1703: #elif defined COMPILE_PCRE32
 1704:       cb.subject          = (PCRE_SPTR32)md->start_subject;
 1705: #endif
 1706:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
 1707:       cb.start_match      = (int)(mstart - md->start_subject);
 1708:       cb.current_position = (int)(eptr - md->start_subject);
 1709:       cb.pattern_position = GET(ecode, 2);
 1710:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
 1711:       cb.capture_top      = offset_top/2;
 1712:       cb.capture_last     = md->capture_last & CAPLMASK;
 1713:       /* Internal change requires this for API compatibility. */
 1714:       if (cb.capture_last == 0) cb.capture_last = -1;
 1715:       cb.callout_data     = md->callout_data;
 1716:       cb.mark             = md->nomatch_mark;
 1717:       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 1718:       if (rrc < 0) RRETURN(rrc);
 1719:       }
 1720:     ecode += 2 + 2*LINK_SIZE;
 1721:     break;
 1722: 
 1723:     /* Recursion either matches the current regex, or some subexpression. The
 1724:     offset data is the offset to the starting bracket from the start of the
 1725:     whole pattern. (This is so that it works from duplicated subpatterns.)
 1726: 
 1727:     The state of the capturing groups is preserved over recursion, and
 1728:     re-instated afterwards. We don't know how many are started and not yet
 1729:     finished (offset_top records the completed total) so we just have to save
 1730:     all the potential data. There may be up to 65535 such values, which is too
 1731:     large to put on the stack, but using malloc for small numbers seems
 1732:     expensive. As a compromise, the stack is used when there are no more than
 1733:     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
 1734: 
 1735:     There are also other values that have to be saved. We use a chained
 1736:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
 1737:     for the original version of this logic. It has, however, been hacked around
 1738:     a lot, so he is not to blame for the current way it works. */
 1739: 
 1740:     case OP_RECURSE:
 1741:       {
 1742:       recursion_info *ri;
 1743:       unsigned int recno;
 1744: 
 1745:       callpat = md->start_code + GET(ecode, 1);
 1746:       recno = (callpat == md->start_code)? 0 :
 1747:         GET2(callpat, 1 + LINK_SIZE);
 1748: 
 1749:       /* Check for repeating a recursion without advancing the subject pointer.
 1750:       This should catch convoluted mutual recursions. (Some simple cases are
 1751:       caught at compile time.) */
 1752: 
 1753:       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
 1754:         if (recno == ri->group_num && eptr == ri->subject_position)
 1755:           RRETURN(PCRE_ERROR_RECURSELOOP);
 1756: 
 1757:       /* Add to "recursing stack" */
 1758: 
 1759:       new_recursive.group_num = recno;
 1760:       new_recursive.saved_capture_last = md->capture_last;
 1761:       new_recursive.subject_position = eptr;
 1762:       new_recursive.prevrec = md->recursive;
 1763:       md->recursive = &new_recursive;
 1764: 
 1765:       /* Where to continue from afterwards */
 1766: 
 1767:       ecode += 1 + LINK_SIZE;
 1768: 
 1769:       /* Now save the offset data */
 1770: 
 1771:       new_recursive.saved_max = md->offset_end;
 1772:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
 1773:         new_recursive.offset_save = stacksave;
 1774:       else
 1775:         {
 1776:         new_recursive.offset_save =
 1777:           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
 1778:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
 1779:         }
 1780:       memcpy(new_recursive.offset_save, md->offset_vector,
 1781:             new_recursive.saved_max * sizeof(int));
 1782: 
 1783:       /* OK, now we can do the recursion. After processing each alternative,
 1784:       restore the offset data and the last captured value. If there were nested
 1785:       recursions, md->recursive might be changed, so reset it before looping.
 1786:       */
 1787: 
 1788:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
 1789:       cbegroup = (*callpat >= OP_SBRA);
 1790:       do
 1791:         {
 1792:         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
 1793:         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
 1794:           md, eptrb, RM6);
 1795:         memcpy(md->offset_vector, new_recursive.offset_save,
 1796:             new_recursive.saved_max * sizeof(int));
 1797:         md->capture_last = new_recursive.saved_capture_last;
 1798:         md->recursive = new_recursive.prevrec;
 1799:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
 1800:           {
 1801:           DPRINTF(("Recursion matched\n"));
 1802:           if (new_recursive.offset_save != stacksave)
 1803:             (PUBL(free))(new_recursive.offset_save);
 1804: 
 1805:           /* Set where we got to in the subject, and reset the start in case
 1806:           it was changed by \K. This *is* propagated back out of a recursion,
 1807:           for Perl compatibility. */
 1808: 
 1809:           eptr = md->end_match_ptr;
 1810:           mstart = md->start_match_ptr;
 1811:           goto RECURSION_MATCHED;        /* Exit loop; end processing */
 1812:           }
 1813: 
 1814:         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
 1815:         recursion; they cause a NOMATCH for the entire recursion. These codes
 1816:         are defined in a range that can be tested for. */
 1817: 
 1818:         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
 1819:           RRETURN(MATCH_NOMATCH);
 1820: 
 1821:         /* Any return code other than NOMATCH is an error. */
 1822: 
 1823:         if (rrc != MATCH_NOMATCH)
 1824:           {
 1825:           DPRINTF(("Recursion gave error %d\n", rrc));
 1826:           if (new_recursive.offset_save != stacksave)
 1827:             (PUBL(free))(new_recursive.offset_save);
 1828:           RRETURN(rrc);
 1829:           }
 1830: 
 1831:         md->recursive = &new_recursive;
 1832:         callpat += GET(callpat, 1);
 1833:         }
 1834:       while (*callpat == OP_ALT);
 1835: 
 1836:       DPRINTF(("Recursion didn't match\n"));
 1837:       md->recursive = new_recursive.prevrec;
 1838:       if (new_recursive.offset_save != stacksave)
 1839:         (PUBL(free))(new_recursive.offset_save);
 1840:       RRETURN(MATCH_NOMATCH);
 1841:       }
 1842: 
 1843:     RECURSION_MATCHED:
 1844:     break;
 1845: 
 1846:     /* An alternation is the end of a branch; scan along to find the end of the
 1847:     bracketed group and go to there. */
 1848: 
 1849:     case OP_ALT:
 1850:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 1851:     break;
 1852: 
 1853:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
 1854:     indicating that it may occur zero times. It may repeat infinitely, or not
 1855:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
 1856:     with fixed upper repeat limits are compiled as a number of copies, with the
 1857:     optional ones preceded by BRAZERO or BRAMINZERO. */
 1858: 
 1859:     case OP_BRAZERO:
 1860:     next = ecode + 1;
 1861:     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
 1862:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1863:     do next += GET(next, 1); while (*next == OP_ALT);
 1864:     ecode = next + 1 + LINK_SIZE;
 1865:     break;
 1866: 
 1867:     case OP_BRAMINZERO:
 1868:     next = ecode + 1;
 1869:     do next += GET(next, 1); while (*next == OP_ALT);
 1870:     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
 1871:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1872:     ecode++;
 1873:     break;
 1874: 
 1875:     case OP_SKIPZERO:
 1876:     next = ecode+1;
 1877:     do next += GET(next,1); while (*next == OP_ALT);
 1878:     ecode = next + 1 + LINK_SIZE;
 1879:     break;
 1880: 
 1881:     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
 1882:     here; just jump to the group, with allow_zero set TRUE. */
 1883: 
 1884:     case OP_BRAPOSZERO:
 1885:     op = *(++ecode);
 1886:     allow_zero = TRUE;
 1887:     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
 1888:       goto POSSESSIVE_NON_CAPTURE;
 1889: 
 1890:     /* End of a group, repeated or non-repeating. */
 1891: 
 1892:     case OP_KET:
 1893:     case OP_KETRMIN:
 1894:     case OP_KETRMAX:
 1895:     case OP_KETRPOS:
 1896:     prev = ecode - GET(ecode, 1);
 1897: 
 1898:     /* If this was a group that remembered the subject start, in order to break
 1899:     infinite repeats of empty string matches, retrieve the subject start from
 1900:     the chain. Otherwise, set it NULL. */
 1901: 
 1902:     if (*prev >= OP_SBRA || *prev == OP_ONCE)
 1903:       {
 1904:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
 1905:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
 1906:       }
 1907:     else saved_eptr = NULL;
 1908: 
 1909:     /* If we are at the end of an assertion group or a non-capturing atomic
 1910:     group, stop matching and return MATCH_MATCH, but record the current high
 1911:     water mark for use by positive assertions. We also need to record the match
 1912:     start in case it was changed by \K. */
 1913: 
 1914:     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
 1915:          *prev == OP_ONCE_NC)
 1916:       {
 1917:       md->end_match_ptr = eptr;      /* For ONCE_NC */
 1918:       md->end_offset_top = offset_top;
 1919:       md->start_match_ptr = mstart;
 1920:       RRETURN(MATCH_MATCH);         /* Sets md->mark */
 1921:       }
 1922: 
 1923:     /* For capturing groups we have to check the group number back at the start
 1924:     and if necessary complete handling an extraction by setting the offsets and
 1925:     bumping the high water mark. Whole-pattern recursion is coded as a recurse
 1926:     into group 0, so it won't be picked up here. Instead, we catch it when the
 1927:     OP_END is reached. Other recursion is handled here. We just have to record
 1928:     the current subject position and start match pointer and give a MATCH
 1929:     return. */
 1930: 
 1931:     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
 1932:         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
 1933:       {
 1934:       number = GET2(prev, 1+LINK_SIZE);
 1935:       offset = number << 1;
 1936: 
 1937: #ifdef PCRE_DEBUG
 1938:       printf("end bracket %d", number);
 1939:       printf("\n");
 1940: #endif
 1941: 
 1942:       /* Handle a recursively called group. */
 1943: 
 1944:       if (md->recursive != NULL && md->recursive->group_num == number)
 1945:         {
 1946:         md->end_match_ptr = eptr;
 1947:         md->start_match_ptr = mstart;
 1948:         RRETURN(MATCH_MATCH);
 1949:         }
 1950: 
 1951:       /* Deal with capturing */
 1952: 
 1953:       md->capture_last = (md->capture_last & OVFLMASK) | number;
 1954:       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
 1955:         {
 1956:         /* If offset is greater than offset_top, it means that we are
 1957:         "skipping" a capturing group, and that group's offsets must be marked
 1958:         unset. In earlier versions of PCRE, all the offsets were unset at the
 1959:         start of matching, but this doesn't work because atomic groups and
 1960:         assertions can cause a value to be set that should later be unset.
 1961:         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
 1962:         part of the atomic group, but this is not on the final matching path,
 1963:         so must be unset when 2 is set. (If there is no group 2, there is no
 1964:         problem, because offset_top will then be 2, indicating no capture.) */
 1965: 
 1966:         if (offset > offset_top)
 1967:           {
 1968:           register int *iptr = md->offset_vector + offset_top;
 1969:           register int *iend = md->offset_vector + offset;
 1970:           while (iptr < iend) *iptr++ = -1;
 1971:           }
 1972: 
 1973:         /* Now make the extraction */
 1974: 
 1975:         md->offset_vector[offset] =
 1976:           md->offset_vector[md->offset_end - number];
 1977:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
 1978:         if (offset_top <= offset) offset_top = offset + 2;
 1979:         }
 1980:       }
 1981: 
 1982:     /* For an ordinary non-repeating ket, just continue at this level. This
 1983:     also happens for a repeating ket if no characters were matched in the
 1984:     group. This is the forcible breaking of infinite loops as implemented in
 1985:     Perl 5.005. For a non-repeating atomic group that includes captures,
 1986:     establish a backup point by processing the rest of the pattern at a lower
 1987:     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
 1988:     original OP_ONCE level, thereby bypassing intermediate backup points, but
 1989:     resetting any captures that happened along the way. */
 1990: 
 1991:     if (*ecode == OP_KET || eptr == saved_eptr)
 1992:       {
 1993:       if (*prev == OP_ONCE)
 1994:         {
 1995:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
 1996:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 1997:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
 1998:         RRETURN(MATCH_ONCE);
 1999:         }
 2000:       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
 2001:       break;
 2002:       }
 2003: 
 2004:     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
 2005:     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
 2006:     at a time from the outer level, thus saving stack. */
 2007: 
 2008:     if (*ecode == OP_KETRPOS)
 2009:       {
 2010:       md->start_match_ptr = mstart;    /* In case \K reset it */
 2011:       md->end_match_ptr = eptr;
 2012:       md->end_offset_top = offset_top;
 2013:       RRETURN(MATCH_KETRPOS);
 2014:       }
 2015: 
 2016:     /* The normal repeating kets try the rest of the pattern or restart from
 2017:     the preceding bracket, in the appropriate order. In the second case, we can
 2018:     use tail recursion to avoid using another stack frame, unless we have an
 2019:     an atomic group or an unlimited repeat of a group that can match an empty
 2020:     string. */
 2021: 
 2022:     if (*ecode == OP_KETRMIN)
 2023:       {
 2024:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
 2025:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2026:       if (*prev == OP_ONCE)
 2027:         {
 2028:         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
 2029:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2030:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
 2031:         RRETURN(MATCH_ONCE);
 2032:         }
 2033:       if (*prev >= OP_SBRA)    /* Could match an empty string */
 2034:         {
 2035:         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
 2036:         RRETURN(rrc);
 2037:         }
 2038:       ecode = prev;
 2039:       goto TAIL_RECURSE;
 2040:       }
 2041:     else  /* OP_KETRMAX */
 2042:       {
 2043:       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
 2044:       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
 2045:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2046:       if (*prev == OP_ONCE)
 2047:         {
 2048:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
 2049:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2050:         md->once_target = prev;
 2051:         RRETURN(MATCH_ONCE);
 2052:         }
 2053:       ecode += 1 + LINK_SIZE;
 2054:       goto TAIL_RECURSE;
 2055:       }
 2056:     /* Control never gets here */
 2057: 
 2058:     /* Not multiline mode: start of subject assertion, unless notbol. */
 2059: 
 2060:     case OP_CIRC:
 2061:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
 2062: 
 2063:     /* Start of subject assertion */
 2064: 
 2065:     case OP_SOD:
 2066:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
 2067:     ecode++;
 2068:     break;
 2069: 
 2070:     /* Multiline mode: start of subject unless notbol, or after any newline. */
 2071: 
 2072:     case OP_CIRCM:
 2073:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
 2074:     if (eptr != md->start_subject &&
 2075:         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
 2076:       RRETURN(MATCH_NOMATCH);
 2077:     ecode++;
 2078:     break;
 2079: 
 2080:     /* Start of match assertion */
 2081: 
 2082:     case OP_SOM:
 2083:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
 2084:     ecode++;
 2085:     break;
 2086: 
 2087:     /* Reset the start of match point */
 2088: 
 2089:     case OP_SET_SOM:
 2090:     mstart = eptr;
 2091:     ecode++;
 2092:     break;
 2093: 
 2094:     /* Multiline mode: assert before any newline, or before end of subject
 2095:     unless noteol is set. */
 2096: 
 2097:     case OP_DOLLM:
 2098:     if (eptr < md->end_subject)
 2099:       {
 2100:       if (!IS_NEWLINE(eptr))
 2101:         {
 2102:         if (md->partial != 0 &&
 2103:             eptr + 1 >= md->end_subject &&
 2104:             NLBLOCK->nltype == NLTYPE_FIXED &&
 2105:             NLBLOCK->nllen == 2 &&
 2106:             RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
 2107:           {
 2108:           md->hitend = TRUE;
 2109:           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 2110:           }
 2111:         RRETURN(MATCH_NOMATCH);
 2112:         }
 2113:       }
 2114:     else
 2115:       {
 2116:       if (md->noteol) RRETURN(MATCH_NOMATCH);
 2117:       SCHECK_PARTIAL();
 2118:       }
 2119:     ecode++;
 2120:     break;
 2121: 
 2122:     /* Not multiline mode: assert before a terminating newline or before end of
 2123:     subject unless noteol is set. */
 2124: 
 2125:     case OP_DOLL:
 2126:     if (md->noteol) RRETURN(MATCH_NOMATCH);
 2127:     if (!md->endonly) goto ASSERT_NL_OR_EOS;
 2128: 
 2129:     /* ... else fall through for endonly */
 2130: 
 2131:     /* End of subject assertion (\z) */
 2132: 
 2133:     case OP_EOD:
 2134:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
 2135:     SCHECK_PARTIAL();
 2136:     ecode++;
 2137:     break;
 2138: 
 2139:     /* End of subject or ending \n assertion (\Z) */
 2140: 
 2141:     case OP_EODN:
 2142:     ASSERT_NL_OR_EOS:
 2143:     if (eptr < md->end_subject &&
 2144:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
 2145:       {
 2146:       if (md->partial != 0 &&
 2147:           eptr + 1 >= md->end_subject &&
 2148:           NLBLOCK->nltype == NLTYPE_FIXED &&
 2149:           NLBLOCK->nllen == 2 &&
 2150:           RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
 2151:         {
 2152:         md->hitend = TRUE;
 2153:         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 2154:         }
 2155:       RRETURN(MATCH_NOMATCH);
 2156:       }
 2157: 
 2158:     /* Either at end of string or \n before end. */
 2159: 
 2160:     SCHECK_PARTIAL();
 2161:     ecode++;
 2162:     break;
 2163: 
 2164:     /* Word boundary assertions */
 2165: 
 2166:     case OP_NOT_WORD_BOUNDARY:
 2167:     case OP_WORD_BOUNDARY:
 2168:       {
 2169: 
 2170:       /* Find out if the previous and current characters are "word" characters.
 2171:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
 2172:       be "non-word" characters. Remember the earliest consulted character for
 2173:       partial matching. */
 2174: 
 2175: #ifdef SUPPORT_UTF
 2176:       if (utf)
 2177:         {
 2178:         /* Get status of previous character */
 2179: 
 2180:         if (eptr == md->start_subject) prev_is_word = FALSE; else
 2181:           {
 2182:           PCRE_PUCHAR lastptr = eptr - 1;
 2183:           BACKCHAR(lastptr);
 2184:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
 2185:           GETCHAR(c, lastptr);
 2186: #ifdef SUPPORT_UCP
 2187:           if (md->use_ucp)
 2188:             {
 2189:             if (c == '_') prev_is_word = TRUE; else
 2190:               {
 2191:               int cat = UCD_CATEGORY(c);
 2192:               prev_is_word = (cat == ucp_L || cat == ucp_N);
 2193:               }
 2194:             }
 2195:           else
 2196: #endif
 2197:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
 2198:           }
 2199: 
 2200:         /* Get status of next character */
 2201: 
 2202:         if (eptr >= md->end_subject)
 2203:           {
 2204:           SCHECK_PARTIAL();
 2205:           cur_is_word = FALSE;
 2206:           }
 2207:         else
 2208:           {
 2209:           GETCHAR(c, eptr);
 2210: #ifdef SUPPORT_UCP
 2211:           if (md->use_ucp)
 2212:             {
 2213:             if (c == '_') cur_is_word = TRUE; else
 2214:               {
 2215:               int cat = UCD_CATEGORY(c);
 2216:               cur_is_word = (cat == ucp_L || cat == ucp_N);
 2217:               }
 2218:             }
 2219:           else
 2220: #endif
 2221:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
 2222:           }
 2223:         }
 2224:       else
 2225: #endif
 2226: 
 2227:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
 2228:       consistency with the behaviour of \w we do use it in this case. */
 2229: 
 2230:         {
 2231:         /* Get status of previous character */
 2232: 
 2233:         if (eptr == md->start_subject) prev_is_word = FALSE; else
 2234:           {
 2235:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
 2236: #ifdef SUPPORT_UCP
 2237:           if (md->use_ucp)
 2238:             {
 2239:             c = eptr[-1];
 2240:             if (c == '_') prev_is_word = TRUE; else
 2241:               {
 2242:               int cat = UCD_CATEGORY(c);
 2243:               prev_is_word = (cat == ucp_L || cat == ucp_N);
 2244:               }
 2245:             }
 2246:           else
 2247: #endif
 2248:           prev_is_word = MAX_255(eptr[-1])
 2249:             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
 2250:           }
 2251: 
 2252:         /* Get status of next character */
 2253: 
 2254:         if (eptr >= md->end_subject)
 2255:           {
 2256:           SCHECK_PARTIAL();
 2257:           cur_is_word = FALSE;
 2258:           }
 2259:         else
 2260: #ifdef SUPPORT_UCP
 2261:         if (md->use_ucp)
 2262:           {
 2263:           c = *eptr;
 2264:           if (c == '_') cur_is_word = TRUE; else
 2265:             {
 2266:             int cat = UCD_CATEGORY(c);
 2267:             cur_is_word = (cat == ucp_L || cat == ucp_N);
 2268:             }
 2269:           }
 2270:         else
 2271: #endif
 2272:         cur_is_word = MAX_255(*eptr)
 2273:           && ((md->ctypes[*eptr] & ctype_word) != 0);
 2274:         }
 2275: 
 2276:       /* Now see if the situation is what we want */
 2277: 
 2278:       if ((*ecode++ == OP_WORD_BOUNDARY)?
 2279:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
 2280:         RRETURN(MATCH_NOMATCH);
 2281:       }
 2282:     break;
 2283: 
 2284:     /* Match any single character type except newline; have to take care with
 2285:     CRLF newlines and partial matching. */
 2286: 
 2287:     case OP_ANY:
 2288:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
 2289:     if (md->partial != 0 &&
 2290:         eptr + 1 >= md->end_subject &&
 2291:         NLBLOCK->nltype == NLTYPE_FIXED &&
 2292:         NLBLOCK->nllen == 2 &&
 2293:         RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
 2294:       {
 2295:       md->hitend = TRUE;
 2296:       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 2297:       }
 2298: 
 2299:     /* Fall through */
 2300: 
 2301:     /* Match any single character whatsoever. */
 2302: 
 2303:     case OP_ALLANY:
 2304:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
 2305:       {                            /* not be updated before SCHECK_PARTIAL. */
 2306:       SCHECK_PARTIAL();
 2307:       RRETURN(MATCH_NOMATCH);
 2308:       }
 2309:     eptr++;
 2310: #ifdef SUPPORT_UTF
 2311:     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 2312: #endif
 2313:     ecode++;
 2314:     break;
 2315: 
 2316:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
 2317:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
 2318: 
 2319:     case OP_ANYBYTE:
 2320:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
 2321:       {                            /* not be updated before SCHECK_PARTIAL. */
 2322:       SCHECK_PARTIAL();
 2323:       RRETURN(MATCH_NOMATCH);
 2324:       }
 2325:     eptr++;
 2326:     ecode++;
 2327:     break;
 2328: 
 2329:     case OP_NOT_DIGIT:
 2330:     if (eptr >= md->end_subject)
 2331:       {
 2332:       SCHECK_PARTIAL();
 2333:       RRETURN(MATCH_NOMATCH);
 2334:       }
 2335:     GETCHARINCTEST(c, eptr);
 2336:     if (
 2337: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
 2338:        c < 256 &&
 2339: #endif
 2340:        (md->ctypes[c] & ctype_digit) != 0
 2341:        )
 2342:       RRETURN(MATCH_NOMATCH);
 2343:     ecode++;
 2344:     break;
 2345: 
 2346:     case OP_DIGIT:
 2347:     if (eptr >= md->end_subject)
 2348:       {
 2349:       SCHECK_PARTIAL();
 2350:       RRETURN(MATCH_NOMATCH);
 2351:       }
 2352:     GETCHARINCTEST(c, eptr);
 2353:     if (
 2354: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
 2355:        c > 255 ||
 2356: #endif
 2357:        (md->ctypes[c] & ctype_digit) == 0
 2358:        )
 2359:       RRETURN(MATCH_NOMATCH);
 2360:     ecode++;
 2361:     break;
 2362: 
 2363:     case OP_NOT_WHITESPACE:
 2364:     if (eptr >= md->end_subject)
 2365:       {
 2366:       SCHECK_PARTIAL();
 2367:       RRETURN(MATCH_NOMATCH);
 2368:       }
 2369:     GETCHARINCTEST(c, eptr);
 2370:     if (
 2371: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
 2372:        c < 256 &&
 2373: #endif
 2374:        (md->ctypes[c] & ctype_space) != 0
 2375:        )
 2376:       RRETURN(MATCH_NOMATCH);
 2377:     ecode++;
 2378:     break;
 2379: 
 2380:     case OP_WHITESPACE:
 2381:     if (eptr >= md->end_subject)
 2382:       {
 2383:       SCHECK_PARTIAL();
 2384:       RRETURN(MATCH_NOMATCH);
 2385:       }
 2386:     GETCHARINCTEST(c, eptr);
 2387:     if (
 2388: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
 2389:        c > 255 ||
 2390: #endif
 2391:        (md->ctypes[c] & ctype_space) == 0
 2392:        )
 2393:       RRETURN(MATCH_NOMATCH);
 2394:     ecode++;
 2395:     break;
 2396: 
 2397:     case OP_NOT_WORDCHAR:
 2398:     if (eptr >= md->end_subject)
 2399:       {
 2400:       SCHECK_PARTIAL();
 2401:       RRETURN(MATCH_NOMATCH);
 2402:       }
 2403:     GETCHARINCTEST(c, eptr);
 2404:     if (
 2405: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
 2406:        c < 256 &&
 2407: #endif
 2408:        (md->ctypes[c] & ctype_word) != 0
 2409:        )
 2410:       RRETURN(MATCH_NOMATCH);
 2411:     ecode++;
 2412:     break;
 2413: 
 2414:     case OP_WORDCHAR:
 2415:     if (eptr >= md->end_subject)
 2416:       {
 2417:       SCHECK_PARTIAL();
 2418:       RRETURN(MATCH_NOMATCH);
 2419:       }
 2420:     GETCHARINCTEST(c, eptr);
 2421:     if (
 2422: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
 2423:        c > 255 ||
 2424: #endif
 2425:        (md->ctypes[c] & ctype_word) == 0
 2426:        )
 2427:       RRETURN(MATCH_NOMATCH);
 2428:     ecode++;
 2429:     break;
 2430: 
 2431:     case OP_ANYNL:
 2432:     if (eptr >= md->end_subject)
 2433:       {
 2434:       SCHECK_PARTIAL();
 2435:       RRETURN(MATCH_NOMATCH);
 2436:       }
 2437:     GETCHARINCTEST(c, eptr);
 2438:     switch(c)
 2439:       {
 2440:       default: RRETURN(MATCH_NOMATCH);
 2441: 
 2442:       case CHAR_CR:
 2443:       if (eptr >= md->end_subject)
 2444:         {
 2445:         SCHECK_PARTIAL();
 2446:         }
 2447:       else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
 2448:       break;
 2449: 
 2450:       case CHAR_LF:
 2451:       break;
 2452: 
 2453:       case CHAR_VT:
 2454:       case CHAR_FF:
 2455:       case CHAR_NEL:
 2456: #ifndef EBCDIC
 2457:       case 0x2028:
 2458:       case 0x2029:
 2459: #endif  /* Not EBCDIC */
 2460:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
 2461:       break;
 2462:       }
 2463:     ecode++;
 2464:     break;
 2465: 
 2466:     case OP_NOT_HSPACE:
 2467:     if (eptr >= md->end_subject)
 2468:       {
 2469:       SCHECK_PARTIAL();
 2470:       RRETURN(MATCH_NOMATCH);
 2471:       }
 2472:     GETCHARINCTEST(c, eptr);
 2473:     switch(c)
 2474:       {
 2475:       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
 2476:       default: break;
 2477:       }
 2478:     ecode++;
 2479:     break;
 2480: 
 2481:     case OP_HSPACE:
 2482:     if (eptr >= md->end_subject)
 2483:       {
 2484:       SCHECK_PARTIAL();
 2485:       RRETURN(MATCH_NOMATCH);
 2486:       }
 2487:     GETCHARINCTEST(c, eptr);
 2488:     switch(c)
 2489:       {
 2490:       HSPACE_CASES: break;  /* Byte and multibyte cases */
 2491:       default: RRETURN(MATCH_NOMATCH);
 2492:       }
 2493:     ecode++;
 2494:     break;
 2495: 
 2496:     case OP_NOT_VSPACE:
 2497:     if (eptr >= md->end_subject)
 2498:       {
 2499:       SCHECK_PARTIAL();
 2500:       RRETURN(MATCH_NOMATCH);
 2501:       }
 2502:     GETCHARINCTEST(c, eptr);
 2503:     switch(c)
 2504:       {
 2505:       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
 2506:       default: break;
 2507:       }
 2508:     ecode++;
 2509:     break;
 2510: 
 2511:     case OP_VSPACE:
 2512:     if (eptr >= md->end_subject)
 2513:       {
 2514:       SCHECK_PARTIAL();
 2515:       RRETURN(MATCH_NOMATCH);
 2516:       }
 2517:     GETCHARINCTEST(c, eptr);
 2518:     switch(c)
 2519:       {
 2520:       VSPACE_CASES: break;
 2521:       default: RRETURN(MATCH_NOMATCH);
 2522:       }
 2523:     ecode++;
 2524:     break;
 2525: 
 2526: #ifdef SUPPORT_UCP
 2527:     /* Check the next character by Unicode property. We will get here only
 2528:     if the support is in the binary; otherwise a compile-time error occurs. */
 2529: 
 2530:     case OP_PROP:
 2531:     case OP_NOTPROP:
 2532:     if (eptr >= md->end_subject)
 2533:       {
 2534:       SCHECK_PARTIAL();
 2535:       RRETURN(MATCH_NOMATCH);
 2536:       }
 2537:     GETCHARINCTEST(c, eptr);
 2538:       {
 2539:       const pcre_uint32 *cp;
 2540:       const ucd_record *prop = GET_UCD(c);
 2541: 
 2542:       switch(ecode[1])
 2543:         {
 2544:         case PT_ANY:
 2545:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
 2546:         break;
 2547: 
 2548:         case PT_LAMP:
 2549:         if ((prop->chartype == ucp_Lu ||
 2550:              prop->chartype == ucp_Ll ||
 2551:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
 2552:           RRETURN(MATCH_NOMATCH);
 2553:         break;
 2554: 
 2555:         case PT_GC:
 2556:         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
 2557:           RRETURN(MATCH_NOMATCH);
 2558:         break;
 2559: 
 2560:         case PT_PC:
 2561:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
 2562:           RRETURN(MATCH_NOMATCH);
 2563:         break;
 2564: 
 2565:         case PT_SC:
 2566:         if ((ecode[2] != prop->script) == (op == OP_PROP))
 2567:           RRETURN(MATCH_NOMATCH);
 2568:         break;
 2569: 
 2570:         /* These are specials */
 2571: 
 2572:         case PT_ALNUM:
 2573:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 2574:              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
 2575:           RRETURN(MATCH_NOMATCH);
 2576:         break;
 2577: 
 2578:         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 2579:         which means that Perl space and POSIX space are now identical. PCRE
 2580:         was changed at release 8.34. */
 2581: 
 2582:         case PT_SPACE:    /* Perl space */
 2583:         case PT_PXSPACE:  /* POSIX space */
 2584:         switch(c)
 2585:           {
 2586:           HSPACE_CASES:
 2587:           VSPACE_CASES:
 2588:           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
 2589:           break;
 2590: 
 2591:           default:
 2592:           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
 2593:             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
 2594:           break;
 2595:           }
 2596:         break;
 2597: 
 2598:         case PT_WORD:
 2599:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
 2600:              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
 2601:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
 2602:           RRETURN(MATCH_NOMATCH);
 2603:         break;
 2604: 
 2605:         case PT_CLIST:
 2606:         cp = PRIV(ucd_caseless_sets) + ecode[2];
 2607:         for (;;)
 2608:           {
 2609:           if (c < *cp)
 2610:             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
 2611:           if (c == *cp++)
 2612:             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
 2613:           }
 2614:         break;
 2615: 
 2616:         case PT_UCNC:
 2617:         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 2618:              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 2619:              c >= 0xe000) == (op == OP_NOTPROP))
 2620:           RRETURN(MATCH_NOMATCH);
 2621:         break;
 2622: 
 2623:         /* This should never occur */
 2624: 
 2625:         default:
 2626:         RRETURN(PCRE_ERROR_INTERNAL);
 2627:         }
 2628: 
 2629:       ecode += 3;
 2630:       }
 2631:     break;
 2632: 
 2633:     /* Match an extended Unicode sequence. We will get here only if the support
 2634:     is in the binary; otherwise a compile-time error occurs. */
 2635: 
 2636:     case OP_EXTUNI:
 2637:     if (eptr >= md->end_subject)
 2638:       {
 2639:       SCHECK_PARTIAL();
 2640:       RRETURN(MATCH_NOMATCH);
 2641:       }
 2642:     else
 2643:       {
 2644:       int lgb, rgb;
 2645:       GETCHARINCTEST(c, eptr);
 2646:       lgb = UCD_GRAPHBREAK(c);
 2647:       while (eptr < md->end_subject)
 2648:         {
 2649:         int len = 1;
 2650:         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
 2651:         rgb = UCD_GRAPHBREAK(c);
 2652:         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 2653:         lgb = rgb;
 2654:         eptr += len;
 2655:         }
 2656:       }
 2657:     CHECK_PARTIAL();
 2658:     ecode++;
 2659:     break;
 2660: #endif  /* SUPPORT_UCP */
 2661: 
 2662: 
 2663:     /* Match a back reference, possibly repeatedly. Look past the end of the
 2664:     item to see if there is repeat information following. The code is similar
 2665:     to that for character classes, but repeated for efficiency. Then obey
 2666:     similar code to character type repeats - written out again for speed.
 2667:     However, if the referenced string is the empty string, always treat
 2668:     it as matched, any number of times (otherwise there could be infinite
 2669:     loops). If the reference is unset, there are two possibilities:
 2670: 
 2671:     (a) In the default, Perl-compatible state, set the length negative;
 2672:     this ensures that every attempt at a match fails. We can't just fail
 2673:     here, because of the possibility of quantifiers with zero minima.
 2674: 
 2675:     (b) If the JavaScript compatibility flag is set, set the length to zero
 2676:     so that the back reference matches an empty string.
 2677: 
 2678:     Otherwise, set the length to the length of what was matched by the
 2679:     referenced subpattern.
 2680: 
 2681:     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
 2682:     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
 2683:     and OP_DNREFI are used. In this case we must scan the list of groups to
 2684:     which the name refers, and use the first one that is set. */
 2685: 
 2686:     case OP_DNREF:
 2687:     case OP_DNREFI:
 2688:     caseless = op == OP_DNREFI;
 2689:       {
 2690:       int count = GET2(ecode, 1+IMM2_SIZE);
 2691:       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
 2692:       ecode += 1 + 2*IMM2_SIZE;
 2693: 
 2694:       while (count-- > 0)
 2695:         {
 2696:         offset = GET2(slot, 0) << 1;
 2697:         if (offset < offset_top && md->offset_vector[offset] >= 0) break;
 2698:         slot += md->name_entry_size;
 2699:         }
 2700:       if (count < 0)
 2701:         length = (md->jscript_compat)? 0 : -1;
 2702:       else
 2703:         length = md->offset_vector[offset+1] - md->offset_vector[offset];
 2704:       }
 2705:     goto REF_REPEAT;
 2706: 
 2707:     case OP_REF:
 2708:     case OP_REFI:
 2709:     caseless = op == OP_REFI;
 2710:     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
 2711:     ecode += 1 + IMM2_SIZE;
 2712:     if (offset >= offset_top || md->offset_vector[offset] < 0)
 2713:       length = (md->jscript_compat)? 0 : -1;
 2714:     else
 2715:       length = md->offset_vector[offset+1] - md->offset_vector[offset];
 2716: 
 2717:     /* Set up for repetition, or handle the non-repeated case */
 2718: 
 2719:     REF_REPEAT:
 2720:     switch (*ecode)
 2721:       {
 2722:       case OP_CRSTAR:
 2723:       case OP_CRMINSTAR:
 2724:       case OP_CRPLUS:
 2725:       case OP_CRMINPLUS:
 2726:       case OP_CRQUERY:
 2727:       case OP_CRMINQUERY:
 2728:       c = *ecode++ - OP_CRSTAR;
 2729:       minimize = (c & 1) != 0;
 2730:       min = rep_min[c];                 /* Pick up values from tables; */
 2731:       max = rep_max[c];                 /* zero for max => infinity */
 2732:       if (max == 0) max = INT_MAX;
 2733:       break;
 2734: 
 2735:       case OP_CRRANGE:
 2736:       case OP_CRMINRANGE:
 2737:       minimize = (*ecode == OP_CRMINRANGE);
 2738:       min = GET2(ecode, 1);
 2739:       max = GET2(ecode, 1 + IMM2_SIZE);
 2740:       if (max == 0) max = INT_MAX;
 2741:       ecode += 1 + 2 * IMM2_SIZE;
 2742:       break;
 2743: 
 2744:       default:               /* No repeat follows */
 2745:       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
 2746:         {
 2747:         if (length == -2) eptr = md->end_subject;   /* Partial match */
 2748:         CHECK_PARTIAL();
 2749:         RRETURN(MATCH_NOMATCH);
 2750:         }
 2751:       eptr += length;
 2752:       continue;              /* With the main loop */
 2753:       }
 2754: 
 2755:     /* Handle repeated back references. If the length of the reference is
 2756:     zero, just continue with the main loop. If the length is negative, it
 2757:     means the reference is unset in non-Java-compatible mode. If the minimum is
 2758:     zero, we can continue at the same level without recursion. For any other
 2759:     minimum, carrying on will result in NOMATCH. */
 2760: 
 2761:     if (length == 0) continue;
 2762:     if (length < 0 && min == 0) continue;
 2763: 
 2764:     /* First, ensure the minimum number of matches are present. We get back
 2765:     the length of the reference string explicitly rather than passing the
 2766:     address of eptr, so that eptr can be a register variable. */
 2767: 
 2768:     for (i = 1; i <= min; i++)
 2769:       {
 2770:       int slength;
 2771:       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
 2772:         {
 2773:         if (slength == -2) eptr = md->end_subject;   /* Partial match */
 2774:         CHECK_PARTIAL();
 2775:         RRETURN(MATCH_NOMATCH);
 2776:         }
 2777:       eptr += slength;
 2778:       }
 2779: 
 2780:     /* If min = max, continue at the same level without recursion.
 2781:     They are not both allowed to be zero. */
 2782: 
 2783:     if (min == max) continue;
 2784: 
 2785:     /* If minimizing, keep trying and advancing the pointer */
 2786: 
 2787:     if (minimize)
 2788:       {
 2789:       for (fi = min;; fi++)
 2790:         {
 2791:         int slength;
 2792:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
 2793:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2794:         if (fi >= max) RRETURN(MATCH_NOMATCH);
 2795:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
 2796:           {
 2797:           if (slength == -2) eptr = md->end_subject;   /* Partial match */
 2798:           CHECK_PARTIAL();
 2799:           RRETURN(MATCH_NOMATCH);
 2800:           }
 2801:         eptr += slength;
 2802:         }
 2803:       /* Control never gets here */
 2804:       }
 2805: 
 2806:     /* If maximizing, find the longest string and work backwards */
 2807: 
 2808:     else
 2809:       {
 2810:       pp = eptr;
 2811:       for (i = min; i < max; i++)
 2812:         {
 2813:         int slength;
 2814:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
 2815:           {
 2816:           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
 2817:           the soft partial matching case. */
 2818: 
 2819:           if (slength == -2 && md->partial != 0 &&
 2820:               md->end_subject > md->start_used_ptr)
 2821:             {
 2822:             md->hitend = TRUE;
 2823:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 2824:             }
 2825:           break;
 2826:           }
 2827:         eptr += slength;
 2828:         }
 2829: 
 2830:       while (eptr >= pp)
 2831:         {
 2832:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
 2833:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2834:         eptr -= length;
 2835:         }
 2836:       RRETURN(MATCH_NOMATCH);
 2837:       }
 2838:     /* Control never gets here */
 2839: 
 2840:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
 2841:     used when all the characters in the class have values in the range 0-255,
 2842:     and either the matching is caseful, or the characters are in the range
 2843:     0-127 when UTF-8 processing is enabled. The only difference between
 2844:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
 2845:     encountered.
 2846: 
 2847:     First, look past the end of the item to see if there is repeat information
 2848:     following. Then obey similar code to character type repeats - written out
 2849:     again for speed. */
 2850: 
 2851:     case OP_NCLASS:
 2852:     case OP_CLASS:
 2853:       {
 2854:       /* The data variable is saved across frames, so the byte map needs to
 2855:       be stored there. */
 2856: #define BYTE_MAP ((pcre_uint8 *)data)
 2857:       data = ecode + 1;                /* Save for matching */
 2858:       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
 2859: 
 2860:       switch (*ecode)
 2861:         {
 2862:         case OP_CRSTAR:
 2863:         case OP_CRMINSTAR:
 2864:         case OP_CRPLUS:
 2865:         case OP_CRMINPLUS:
 2866:         case OP_CRQUERY:
 2867:         case OP_CRMINQUERY:
 2868:         case OP_CRPOSSTAR:
 2869:         case OP_CRPOSPLUS:
 2870:         case OP_CRPOSQUERY:
 2871:         c = *ecode++ - OP_CRSTAR;
 2872:         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
 2873:         else possessive = TRUE;
 2874:         min = rep_min[c];                 /* Pick up values from tables; */
 2875:         max = rep_max[c];                 /* zero for max => infinity */
 2876:         if (max == 0) max = INT_MAX;
 2877:         break;
 2878: 
 2879:         case OP_CRRANGE:
 2880:         case OP_CRMINRANGE:
 2881:         case OP_CRPOSRANGE:
 2882:         minimize = (*ecode == OP_CRMINRANGE);
 2883:         possessive = (*ecode == OP_CRPOSRANGE);
 2884:         min = GET2(ecode, 1);
 2885:         max = GET2(ecode, 1 + IMM2_SIZE);
 2886:         if (max == 0) max = INT_MAX;
 2887:         ecode += 1 + 2 * IMM2_SIZE;
 2888:         break;
 2889: 
 2890:         default:               /* No repeat follows */
 2891:         min = max = 1;
 2892:         break;
 2893:         }
 2894: 
 2895:       /* First, ensure the minimum number of matches are present. */
 2896: 
 2897: #ifdef SUPPORT_UTF
 2898:       if (utf)
 2899:         {
 2900:         for (i = 1; i <= min; i++)
 2901:           {
 2902:           if (eptr >= md->end_subject)
 2903:             {
 2904:             SCHECK_PARTIAL();
 2905:             RRETURN(MATCH_NOMATCH);
 2906:             }
 2907:           GETCHARINC(c, eptr);
 2908:           if (c > 255)
 2909:             {
 2910:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
 2911:             }
 2912:           else
 2913:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 2914:           }
 2915:         }
 2916:       else
 2917: #endif
 2918:       /* Not UTF mode */
 2919:         {
 2920:         for (i = 1; i <= min; i++)
 2921:           {
 2922:           if (eptr >= md->end_subject)
 2923:             {
 2924:             SCHECK_PARTIAL();
 2925:             RRETURN(MATCH_NOMATCH);
 2926:             }
 2927:           c = *eptr++;
 2928: #ifndef COMPILE_PCRE8
 2929:           if (c > 255)
 2930:             {
 2931:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
 2932:             }
 2933:           else
 2934: #endif
 2935:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 2936:           }
 2937:         }
 2938: 
 2939:       /* If max == min we can continue with the main loop without the
 2940:       need to recurse. */
 2941: 
 2942:       if (min == max) continue;
 2943: 
 2944:       /* If minimizing, keep testing the rest of the expression and advancing
 2945:       the pointer while it matches the class. */
 2946: 
 2947:       if (minimize)
 2948:         {
 2949: #ifdef SUPPORT_UTF
 2950:         if (utf)
 2951:           {
 2952:           for (fi = min;; fi++)
 2953:             {
 2954:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
 2955:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2956:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 2957:             if (eptr >= md->end_subject)
 2958:               {
 2959:               SCHECK_PARTIAL();
 2960:               RRETURN(MATCH_NOMATCH);
 2961:               }
 2962:             GETCHARINC(c, eptr);
 2963:             if (c > 255)
 2964:               {
 2965:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
 2966:               }
 2967:             else
 2968:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 2969:             }
 2970:           }
 2971:         else
 2972: #endif
 2973:         /* Not UTF mode */
 2974:           {
 2975:           for (fi = min;; fi++)
 2976:             {
 2977:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
 2978:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 2979:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 2980:             if (eptr >= md->end_subject)
 2981:               {
 2982:               SCHECK_PARTIAL();
 2983:               RRETURN(MATCH_NOMATCH);
 2984:               }
 2985:             c = *eptr++;
 2986: #ifndef COMPILE_PCRE8
 2987:             if (c > 255)
 2988:               {
 2989:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
 2990:               }
 2991:             else
 2992: #endif
 2993:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
 2994:             }
 2995:           }
 2996:         /* Control never gets here */
 2997:         }
 2998: 
 2999:       /* If maximizing, find the longest possible run, then work backwards. */
 3000: 
 3001:       else
 3002:         {
 3003:         pp = eptr;
 3004: 
 3005: #ifdef SUPPORT_UTF
 3006:         if (utf)
 3007:           {
 3008:           for (i = min; i < max; i++)
 3009:             {
 3010:             int len = 1;
 3011:             if (eptr >= md->end_subject)
 3012:               {
 3013:               SCHECK_PARTIAL();
 3014:               break;
 3015:               }
 3016:             GETCHARLEN(c, eptr, len);
 3017:             if (c > 255)
 3018:               {
 3019:               if (op == OP_CLASS) break;
 3020:               }
 3021:             else
 3022:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
 3023:             eptr += len;
 3024:             }
 3025: 
 3026:           if (possessive) continue;    /* No backtracking */
 3027: 
 3028:           for (;;)
 3029:             {
 3030:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
 3031:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3032:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
 3033:             BACKCHAR(eptr);
 3034:             }
 3035:           }
 3036:         else
 3037: #endif
 3038:           /* Not UTF mode */
 3039:           {
 3040:           for (i = min; i < max; i++)
 3041:             {
 3042:             if (eptr >= md->end_subject)
 3043:               {
 3044:               SCHECK_PARTIAL();
 3045:               break;
 3046:               }
 3047:             c = *eptr;
 3048: #ifndef COMPILE_PCRE8
 3049:             if (c > 255)
 3050:               {
 3051:               if (op == OP_CLASS) break;
 3052:               }
 3053:             else
 3054: #endif
 3055:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
 3056:             eptr++;
 3057:             }
 3058: 
 3059:           if (possessive) continue;    /* No backtracking */
 3060: 
 3061:           while (eptr >= pp)
 3062:             {
 3063:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
 3064:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3065:             eptr--;
 3066:             }
 3067:           }
 3068: 
 3069:         RRETURN(MATCH_NOMATCH);
 3070:         }
 3071: #undef BYTE_MAP
 3072:       }
 3073:     /* Control never gets here */
 3074: 
 3075: 
 3076:     /* Match an extended character class. In the 8-bit library, this opcode is
 3077:     encountered only when UTF-8 mode mode is supported. In the 16-bit and
 3078:     32-bit libraries, codepoints greater than 255 may be encountered even when
 3079:     UTF is not supported. */
 3080: 
 3081: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
 3082:     case OP_XCLASS:
 3083:       {
 3084:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
 3085:       ecode += GET(ecode, 1);                      /* Advance past the item */
 3086: 
 3087:       switch (*ecode)
 3088:         {
 3089:         case OP_CRSTAR:
 3090:         case OP_CRMINSTAR:
 3091:         case OP_CRPLUS:
 3092:         case OP_CRMINPLUS:
 3093:         case OP_CRQUERY:
 3094:         case OP_CRMINQUERY:
 3095:         case OP_CRPOSSTAR:
 3096:         case OP_CRPOSPLUS:
 3097:         case OP_CRPOSQUERY:
 3098:         c = *ecode++ - OP_CRSTAR;
 3099:         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
 3100:         else possessive = TRUE;
 3101:         min = rep_min[c];                 /* Pick up values from tables; */
 3102:         max = rep_max[c];                 /* zero for max => infinity */
 3103:         if (max == 0) max = INT_MAX;
 3104:         break;
 3105: 
 3106:         case OP_CRRANGE:
 3107:         case OP_CRMINRANGE:
 3108:         case OP_CRPOSRANGE:
 3109:         minimize = (*ecode == OP_CRMINRANGE);
 3110:         possessive = (*ecode == OP_CRPOSRANGE);
 3111:         min = GET2(ecode, 1);
 3112:         max = GET2(ecode, 1 + IMM2_SIZE);
 3113:         if (max == 0) max = INT_MAX;
 3114:         ecode += 1 + 2 * IMM2_SIZE;
 3115:         break;
 3116: 
 3117:         default:               /* No repeat follows */
 3118:         min = max = 1;
 3119:         break;
 3120:         }
 3121: 
 3122:       /* First, ensure the minimum number of matches are present. */
 3123: 
 3124:       for (i = 1; i <= min; i++)
 3125:         {
 3126:         if (eptr >= md->end_subject)
 3127:           {
 3128:           SCHECK_PARTIAL();
 3129:           RRETURN(MATCH_NOMATCH);
 3130:           }
 3131:         GETCHARINCTEST(c, eptr);
 3132:         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
 3133:         }
 3134: 
 3135:       /* If max == min we can continue with the main loop without the
 3136:       need to recurse. */
 3137: 
 3138:       if (min == max) continue;
 3139: 
 3140:       /* If minimizing, keep testing the rest of the expression and advancing
 3141:       the pointer while it matches the class. */
 3142: 
 3143:       if (minimize)
 3144:         {
 3145:         for (fi = min;; fi++)
 3146:           {
 3147:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
 3148:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3149:           if (fi >= max) RRETURN(MATCH_NOMATCH);
 3150:           if (eptr >= md->end_subject)
 3151:             {
 3152:             SCHECK_PARTIAL();
 3153:             RRETURN(MATCH_NOMATCH);
 3154:             }
 3155:           GETCHARINCTEST(c, eptr);
 3156:           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
 3157:           }
 3158:         /* Control never gets here */
 3159:         }
 3160: 
 3161:       /* If maximizing, find the longest possible run, then work backwards. */
 3162: 
 3163:       else
 3164:         {
 3165:         pp = eptr;
 3166:         for (i = min; i < max; i++)
 3167:           {
 3168:           int len = 1;
 3169:           if (eptr >= md->end_subject)
 3170:             {
 3171:             SCHECK_PARTIAL();
 3172:             break;
 3173:             }
 3174: #ifdef SUPPORT_UTF
 3175:           GETCHARLENTEST(c, eptr, len);
 3176: #else
 3177:           c = *eptr;
 3178: #endif
 3179:           if (!PRIV(xclass)(c, data, utf)) break;
 3180:           eptr += len;
 3181:           }
 3182: 
 3183:         if (possessive) continue;    /* No backtracking */
 3184: 
 3185:         for(;;)
 3186:           {
 3187:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
 3188:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3189:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
 3190: #ifdef SUPPORT_UTF
 3191:           if (utf) BACKCHAR(eptr);
 3192: #endif
 3193:           }
 3194:         RRETURN(MATCH_NOMATCH);
 3195:         }
 3196: 
 3197:       /* Control never gets here */
 3198:       }
 3199: #endif    /* End of XCLASS */
 3200: 
 3201:     /* Match a single character, casefully */
 3202: 
 3203:     case OP_CHAR:
 3204: #ifdef SUPPORT_UTF
 3205:     if (utf)
 3206:       {
 3207:       length = 1;
 3208:       ecode++;
 3209:       GETCHARLEN(fc, ecode, length);
 3210:       if (length > md->end_subject - eptr)
 3211:         {
 3212:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
 3213:         RRETURN(MATCH_NOMATCH);
 3214:         }
 3215:       while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
 3216:       }
 3217:     else
 3218: #endif
 3219:     /* Not UTF mode */
 3220:       {
 3221:       if (md->end_subject - eptr < 1)
 3222:         {
 3223:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
 3224:         RRETURN(MATCH_NOMATCH);
 3225:         }
 3226:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
 3227:       ecode += 2;
 3228:       }
 3229:     break;
 3230: 
 3231:     /* Match a single character, caselessly. If we are at the end of the
 3232:     subject, give up immediately. */
 3233: 
 3234:     case OP_CHARI:
 3235:     if (eptr >= md->end_subject)
 3236:       {
 3237:       SCHECK_PARTIAL();
 3238:       RRETURN(MATCH_NOMATCH);
 3239:       }
 3240: 
 3241: #ifdef SUPPORT_UTF
 3242:     if (utf)
 3243:       {
 3244:       length = 1;
 3245:       ecode++;
 3246:       GETCHARLEN(fc, ecode, length);
 3247: 
 3248:       /* If the pattern character's value is < 128, we have only one byte, and
 3249:       we know that its other case must also be one byte long, so we can use the
 3250:       fast lookup table. We know that there is at least one byte left in the
 3251:       subject. */
 3252: 
 3253:       if (fc < 128)
 3254:         {
 3255:         pcre_uint32 cc = RAWUCHAR(eptr);
 3256:         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
 3257:         ecode++;
 3258:         eptr++;
 3259:         }
 3260: 
 3261:       /* Otherwise we must pick up the subject character. Note that we cannot
 3262:       use the value of "length" to check for sufficient bytes left, because the
 3263:       other case of the character may have more or fewer bytes.  */
 3264: 
 3265:       else
 3266:         {
 3267:         pcre_uint32 dc;
 3268:         GETCHARINC(dc, eptr);
 3269:         ecode += length;
 3270: 
 3271:         /* If we have Unicode property support, we can use it to test the other
 3272:         case of the character, if there is one. */
 3273: 
 3274:         if (fc != dc)
 3275:           {
 3276: #ifdef SUPPORT_UCP
 3277:           if (dc != UCD_OTHERCASE(fc))
 3278: #endif
 3279:             RRETURN(MATCH_NOMATCH);
 3280:           }
 3281:         }
 3282:       }
 3283:     else
 3284: #endif   /* SUPPORT_UTF */
 3285: 
 3286:     /* Not UTF mode */
 3287:       {
 3288:       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
 3289:           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
 3290:       eptr++;
 3291:       ecode += 2;
 3292:       }
 3293:     break;
 3294: 
 3295:     /* Match a single character repeatedly. */
 3296: 
 3297:     case OP_EXACT:
 3298:     case OP_EXACTI:
 3299:     min = max = GET2(ecode, 1);
 3300:     ecode += 1 + IMM2_SIZE;
 3301:     goto REPEATCHAR;
 3302: 
 3303:     case OP_POSUPTO:
 3304:     case OP_POSUPTOI:
 3305:     possessive = TRUE;
 3306:     /* Fall through */
 3307: 
 3308:     case OP_UPTO:
 3309:     case OP_UPTOI:
 3310:     case OP_MINUPTO:
 3311:     case OP_MINUPTOI:
 3312:     min = 0;
 3313:     max = GET2(ecode, 1);
 3314:     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
 3315:     ecode += 1 + IMM2_SIZE;
 3316:     goto REPEATCHAR;
 3317: 
 3318:     case OP_POSSTAR:
 3319:     case OP_POSSTARI:
 3320:     possessive = TRUE;
 3321:     min = 0;
 3322:     max = INT_MAX;
 3323:     ecode++;
 3324:     goto REPEATCHAR;
 3325: 
 3326:     case OP_POSPLUS:
 3327:     case OP_POSPLUSI:
 3328:     possessive = TRUE;
 3329:     min = 1;
 3330:     max = INT_MAX;
 3331:     ecode++;
 3332:     goto REPEATCHAR;
 3333: 
 3334:     case OP_POSQUERY:
 3335:     case OP_POSQUERYI:
 3336:     possessive = TRUE;
 3337:     min = 0;
 3338:     max = 1;
 3339:     ecode++;
 3340:     goto REPEATCHAR;
 3341: 
 3342:     case OP_STAR:
 3343:     case OP_STARI:
 3344:     case OP_MINSTAR:
 3345:     case OP_MINSTARI:
 3346:     case OP_PLUS:
 3347:     case OP_PLUSI:
 3348:     case OP_MINPLUS:
 3349:     case OP_MINPLUSI:
 3350:     case OP_QUERY:
 3351:     case OP_QUERYI:
 3352:     case OP_MINQUERY:
 3353:     case OP_MINQUERYI:
 3354:     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
 3355:     minimize = (c & 1) != 0;
 3356:     min = rep_min[c];                 /* Pick up values from tables; */
 3357:     max = rep_max[c];                 /* zero for max => infinity */
 3358:     if (max == 0) max = INT_MAX;
 3359: 
 3360:     /* Common code for all repeated single-character matches. We first check
 3361:     for the minimum number of characters. If the minimum equals the maximum, we
 3362:     are done. Otherwise, if minimizing, check the rest of the pattern for a
 3363:     match; if there isn't one, advance up to the maximum, one character at a
 3364:     time.
 3365: 
 3366:     If maximizing, advance up to the maximum number of matching characters,
 3367:     until eptr is past the end of the maximum run. If possessive, we are
 3368:     then done (no backing up). Otherwise, match at this position; anything
 3369:     other than no match is immediately returned. For nomatch, back up one
 3370:     character, unless we are matching \R and the last thing matched was
 3371:     \r\n, in which case, back up two bytes. When we reach the first optional
 3372:     character position, we can save stack by doing a tail recurse.
 3373: 
 3374:     The various UTF/non-UTF and caseful/caseless cases are handled separately,
 3375:     for speed. */
 3376: 
 3377:     REPEATCHAR:
 3378: #ifdef SUPPORT_UTF
 3379:     if (utf)
 3380:       {
 3381:       length = 1;
 3382:       charptr = ecode;
 3383:       GETCHARLEN(fc, ecode, length);
 3384:       ecode += length;
 3385: 
 3386:       /* Handle multibyte character matching specially here. There is
 3387:       support for caseless matching if UCP support is present. */
 3388: 
 3389:       if (length > 1)
 3390:         {
 3391: #ifdef SUPPORT_UCP
 3392:         pcre_uint32 othercase;
 3393:         if (op >= OP_STARI &&     /* Caseless */
 3394:             (othercase = UCD_OTHERCASE(fc)) != fc)
 3395:           oclength = PRIV(ord2utf)(othercase, occhars);
 3396:         else oclength = 0;
 3397: #endif  /* SUPPORT_UCP */
 3398: 
 3399:         for (i = 1; i <= min; i++)
 3400:           {
 3401:           if (eptr <= md->end_subject - length &&
 3402:             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
 3403: #ifdef SUPPORT_UCP
 3404:           else if (oclength > 0 &&
 3405:                    eptr <= md->end_subject - oclength &&
 3406:                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 3407: #endif  /* SUPPORT_UCP */
 3408:           else
 3409:             {
 3410:             CHECK_PARTIAL();
 3411:             RRETURN(MATCH_NOMATCH);
 3412:             }
 3413:           }
 3414: 
 3415:         if (min == max) continue;
 3416: 
 3417:         if (minimize)
 3418:           {
 3419:           for (fi = min;; fi++)
 3420:             {
 3421:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
 3422:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3423:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 3424:             if (eptr <= md->end_subject - length &&
 3425:               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
 3426: #ifdef SUPPORT_UCP
 3427:             else if (oclength > 0 &&
 3428:                      eptr <= md->end_subject - oclength &&
 3429:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 3430: #endif  /* SUPPORT_UCP */
 3431:             else
 3432:               {
 3433:               CHECK_PARTIAL();
 3434:               RRETURN(MATCH_NOMATCH);
 3435:               }
 3436:             }
 3437:           /* Control never gets here */
 3438:           }
 3439: 
 3440:         else  /* Maximize */
 3441:           {
 3442:           pp = eptr;
 3443:           for (i = min; i < max; i++)
 3444:             {
 3445:             if (eptr <= md->end_subject - length &&
 3446:                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
 3447: #ifdef SUPPORT_UCP
 3448:             else if (oclength > 0 &&
 3449:                      eptr <= md->end_subject - oclength &&
 3450:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 3451: #endif  /* SUPPORT_UCP */
 3452:             else
 3453:               {
 3454:               CHECK_PARTIAL();
 3455:               break;
 3456:               }
 3457:             }
 3458: 
 3459:           if (possessive) continue;    /* No backtracking */
 3460:           for(;;)
 3461:             {
 3462:             if (eptr == pp) goto TAIL_RECURSE;
 3463:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
 3464:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3465: #ifdef SUPPORT_UCP
 3466:             eptr--;
 3467:             BACKCHAR(eptr);
 3468: #else   /* without SUPPORT_UCP */
 3469:             eptr -= length;
 3470: #endif  /* SUPPORT_UCP */
 3471:             }
 3472:           }
 3473:         /* Control never gets here */
 3474:         }
 3475: 
 3476:       /* If the length of a UTF-8 character is 1, we fall through here, and
 3477:       obey the code as for non-UTF-8 characters below, though in this case the
 3478:       value of fc will always be < 128. */
 3479:       }
 3480:     else
 3481: #endif  /* SUPPORT_UTF */
 3482:       /* When not in UTF-8 mode, load a single-byte character. */
 3483:       fc = *ecode++;
 3484: 
 3485:     /* The value of fc at this point is always one character, though we may
 3486:     or may not be in UTF mode. The code is duplicated for the caseless and
 3487:     caseful cases, for speed, since matching characters is likely to be quite
 3488:     common. First, ensure the minimum number of matches are present. If min =
 3489:     max, continue at the same level without recursing. Otherwise, if
 3490:     minimizing, keep trying the rest of the expression and advancing one
 3491:     matching character if failing, up to the maximum. Alternatively, if
 3492:     maximizing, find the maximum number of characters and work backwards. */
 3493: 
 3494:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
 3495:       max, (char *)eptr));
 3496: 
 3497:     if (op >= OP_STARI)  /* Caseless */
 3498:       {
 3499: #ifdef COMPILE_PCRE8
 3500:       /* fc must be < 128 if UTF is enabled. */
 3501:       foc = md->fcc[fc];
 3502: #else
 3503: #ifdef SUPPORT_UTF
 3504: #ifdef SUPPORT_UCP
 3505:       if (utf && fc > 127)
 3506:         foc = UCD_OTHERCASE(fc);
 3507: #else
 3508:       if (utf && fc > 127)
 3509:         foc = fc;
 3510: #endif /* SUPPORT_UCP */
 3511:       else
 3512: #endif /* SUPPORT_UTF */
 3513:         foc = TABLE_GET(fc, md->fcc, fc);
 3514: #endif /* COMPILE_PCRE8 */
 3515: 
 3516:       for (i = 1; i <= min; i++)
 3517:         {
 3518:         pcre_uint32 cc;                 /* Faster than pcre_uchar */
 3519:         if (eptr >= md->end_subject)
 3520:           {
 3521:           SCHECK_PARTIAL();
 3522:           RRETURN(MATCH_NOMATCH);
 3523:           }
 3524:         cc = RAWUCHARTEST(eptr);
 3525:         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
 3526:         eptr++;
 3527:         }
 3528:       if (min == max) continue;
 3529:       if (minimize)
 3530:         {
 3531:         for (fi = min;; fi++)
 3532:           {
 3533:           pcre_uint32 cc;               /* Faster than pcre_uchar */
 3534:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
 3535:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3536:           if (fi >= max) RRETURN(MATCH_NOMATCH);
 3537:           if (eptr >= md->end_subject)
 3538:             {
 3539:             SCHECK_PARTIAL();
 3540:             RRETURN(MATCH_NOMATCH);
 3541:             }
 3542:           cc = RAWUCHARTEST(eptr);
 3543:           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
 3544:           eptr++;
 3545:           }
 3546:         /* Control never gets here */
 3547:         }
 3548:       else  /* Maximize */
 3549:         {
 3550:         pp = eptr;
 3551:         for (i = min; i < max; i++)
 3552:           {
 3553:           pcre_uint32 cc;               /* Faster than pcre_uchar */
 3554:           if (eptr >= md->end_subject)
 3555:             {
 3556:             SCHECK_PARTIAL();
 3557:             break;
 3558:             }
 3559:           cc = RAWUCHARTEST(eptr);
 3560:           if (fc != cc && foc != cc) break;
 3561:           eptr++;
 3562:           }
 3563:         if (possessive) continue;       /* No backtracking */
 3564:         for (;;)
 3565:           {
 3566:           if (eptr == pp) goto TAIL_RECURSE;
 3567:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
 3568:           eptr--;
 3569:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3570:           }
 3571:         /* Control never gets here */
 3572:         }
 3573:       }
 3574: 
 3575:     /* Caseful comparisons (includes all multi-byte characters) */
 3576: 
 3577:     else
 3578:       {
 3579:       for (i = 1; i <= min; i++)
 3580:         {
 3581:         if (eptr >= md->end_subject)
 3582:           {
 3583:           SCHECK_PARTIAL();
 3584:           RRETURN(MATCH_NOMATCH);
 3585:           }
 3586:         if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
 3587:         }
 3588: 
 3589:       if (min == max) continue;
 3590: 
 3591:       if (minimize)
 3592:         {
 3593:         for (fi = min;; fi++)
 3594:           {
 3595:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
 3596:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3597:           if (fi >= max) RRETURN(MATCH_NOMATCH);
 3598:           if (eptr >= md->end_subject)
 3599:             {
 3600:             SCHECK_PARTIAL();
 3601:             RRETURN(MATCH_NOMATCH);
 3602:             }
 3603:           if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
 3604:           }
 3605:         /* Control never gets here */
 3606:         }
 3607:       else  /* Maximize */
 3608:         {
 3609:         pp = eptr;
 3610:         for (i = min; i < max; i++)
 3611:           {
 3612:           if (eptr >= md->end_subject)
 3613:             {
 3614:             SCHECK_PARTIAL();
 3615:             break;
 3616:             }
 3617:           if (fc != RAWUCHARTEST(eptr)) break;
 3618:           eptr++;
 3619:           }
 3620:         if (possessive) continue;    /* No backtracking */
 3621:         for (;;)
 3622:           {
 3623:           if (eptr == pp) goto TAIL_RECURSE;
 3624:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
 3625:           eptr--;
 3626:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3627:           }
 3628:         /* Control never gets here */
 3629:         }
 3630:       }
 3631:     /* Control never gets here */
 3632: 
 3633:     /* Match a negated single one-byte character. The character we are
 3634:     checking can be multibyte. */
 3635: 
 3636:     case OP_NOT:
 3637:     case OP_NOTI:
 3638:     if (eptr >= md->end_subject)
 3639:       {
 3640:       SCHECK_PARTIAL();
 3641:       RRETURN(MATCH_NOMATCH);
 3642:       }
 3643: #ifdef SUPPORT_UTF
 3644:     if (utf)
 3645:       {
 3646:       register pcre_uint32 ch, och;
 3647: 
 3648:       ecode++;
 3649:       GETCHARINC(ch, ecode);
 3650:       GETCHARINC(c, eptr);
 3651: 
 3652:       if (op == OP_NOT)
 3653:         {
 3654:         if (ch == c) RRETURN(MATCH_NOMATCH);
 3655:         }
 3656:       else
 3657:         {
 3658: #ifdef SUPPORT_UCP
 3659:         if (ch > 127)
 3660:           och = UCD_OTHERCASE(ch);
 3661: #else
 3662:         if (ch > 127)
 3663:           och = ch;
 3664: #endif /* SUPPORT_UCP */
 3665:         else
 3666:           och = TABLE_GET(ch, md->fcc, ch);
 3667:         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
 3668:         }
 3669:       }
 3670:     else
 3671: #endif
 3672:       {
 3673:       register pcre_uint32 ch = ecode[1];
 3674:       c = *eptr++;
 3675:       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
 3676:         RRETURN(MATCH_NOMATCH);
 3677:       ecode += 2;
 3678:       }
 3679:     break;
 3680: 
 3681:     /* Match a negated single one-byte character repeatedly. This is almost a
 3682:     repeat of the code for a repeated single character, but I haven't found a
 3683:     nice way of commoning these up that doesn't require a test of the
 3684:     positive/negative option for each character match. Maybe that wouldn't add
 3685:     very much to the time taken, but character matching *is* what this is all
 3686:     about... */
 3687: 
 3688:     case OP_NOTEXACT:
 3689:     case OP_NOTEXACTI:
 3690:     min = max = GET2(ecode, 1);
 3691:     ecode += 1 + IMM2_SIZE;
 3692:     goto REPEATNOTCHAR;
 3693: 
 3694:     case OP_NOTUPTO:
 3695:     case OP_NOTUPTOI:
 3696:     case OP_NOTMINUPTO:
 3697:     case OP_NOTMINUPTOI:
 3698:     min = 0;
 3699:     max = GET2(ecode, 1);
 3700:     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
 3701:     ecode += 1 + IMM2_SIZE;
 3702:     goto REPEATNOTCHAR;
 3703: 
 3704:     case OP_NOTPOSSTAR:
 3705:     case OP_NOTPOSSTARI:
 3706:     possessive = TRUE;
 3707:     min = 0;
 3708:     max = INT_MAX;
 3709:     ecode++;
 3710:     goto REPEATNOTCHAR;
 3711: 
 3712:     case OP_NOTPOSPLUS:
 3713:     case OP_NOTPOSPLUSI:
 3714:     possessive = TRUE;
 3715:     min = 1;
 3716:     max = INT_MAX;
 3717:     ecode++;
 3718:     goto REPEATNOTCHAR;
 3719: 
 3720:     case OP_NOTPOSQUERY:
 3721:     case OP_NOTPOSQUERYI:
 3722:     possessive = TRUE;
 3723:     min = 0;
 3724:     max = 1;
 3725:     ecode++;
 3726:     goto REPEATNOTCHAR;
 3727: 
 3728:     case OP_NOTPOSUPTO:
 3729:     case OP_NOTPOSUPTOI:
 3730:     possessive = TRUE;
 3731:     min = 0;
 3732:     max = GET2(ecode, 1);
 3733:     ecode += 1 + IMM2_SIZE;
 3734:     goto REPEATNOTCHAR;
 3735: 
 3736:     case OP_NOTSTAR:
 3737:     case OP_NOTSTARI:
 3738:     case OP_NOTMINSTAR:
 3739:     case OP_NOTMINSTARI:
 3740:     case OP_NOTPLUS:
 3741:     case OP_NOTPLUSI:
 3742:     case OP_NOTMINPLUS:
 3743:     case OP_NOTMINPLUSI:
 3744:     case OP_NOTQUERY:
 3745:     case OP_NOTQUERYI:
 3746:     case OP_NOTMINQUERY:
 3747:     case OP_NOTMINQUERYI:
 3748:     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
 3749:     minimize = (c & 1) != 0;
 3750:     min = rep_min[c];                 /* Pick up values from tables; */
 3751:     max = rep_max[c];                 /* zero for max => infinity */
 3752:     if (max == 0) max = INT_MAX;
 3753: 
 3754:     /* Common code for all repeated single-byte matches. */
 3755: 
 3756:     REPEATNOTCHAR:
 3757:     GETCHARINCTEST(fc, ecode);
 3758: 
 3759:     /* The code is duplicated for the caseless and caseful cases, for speed,
 3760:     since matching characters is likely to be quite common. First, ensure the
 3761:     minimum number of matches are present. If min = max, continue at the same
 3762:     level without recursing. Otherwise, if minimizing, keep trying the rest of
 3763:     the expression and advancing one matching character if failing, up to the
 3764:     maximum. Alternatively, if maximizing, find the maximum number of
 3765:     characters and work backwards. */
 3766: 
 3767:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
 3768:       max, (char *)eptr));
 3769: 
 3770:     if (op >= OP_NOTSTARI)     /* Caseless */
 3771:       {
 3772: #ifdef SUPPORT_UTF
 3773: #ifdef SUPPORT_UCP
 3774:       if (utf && fc > 127)
 3775:         foc = UCD_OTHERCASE(fc);
 3776: #else
 3777:       if (utf && fc > 127)
 3778:         foc = fc;
 3779: #endif /* SUPPORT_UCP */
 3780:       else
 3781: #endif /* SUPPORT_UTF */
 3782:         foc = TABLE_GET(fc, md->fcc, fc);
 3783: 
 3784: #ifdef SUPPORT_UTF
 3785:       if (utf)
 3786:         {
 3787:         register pcre_uint32 d;
 3788:         for (i = 1; i <= min; i++)
 3789:           {
 3790:           if (eptr >= md->end_subject)
 3791:             {
 3792:             SCHECK_PARTIAL();
 3793:             RRETURN(MATCH_NOMATCH);
 3794:             }
 3795:           GETCHARINC(d, eptr);
 3796:           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
 3797:           }
 3798:         }
 3799:       else
 3800: #endif  /* SUPPORT_UTF */
 3801:       /* Not UTF mode */
 3802:         {
 3803:         for (i = 1; i <= min; i++)
 3804:           {
 3805:           if (eptr >= md->end_subject)
 3806:             {
 3807:             SCHECK_PARTIAL();
 3808:             RRETURN(MATCH_NOMATCH);
 3809:             }
 3810:           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
 3811:           eptr++;
 3812:           }
 3813:         }
 3814: 
 3815:       if (min == max) continue;
 3816: 
 3817:       if (minimize)
 3818:         {
 3819: #ifdef SUPPORT_UTF
 3820:         if (utf)
 3821:           {
 3822:           register pcre_uint32 d;
 3823:           for (fi = min;; fi++)
 3824:             {
 3825:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
 3826:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3827:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 3828:             if (eptr >= md->end_subject)
 3829:               {
 3830:               SCHECK_PARTIAL();
 3831:               RRETURN(MATCH_NOMATCH);
 3832:               }
 3833:             GETCHARINC(d, eptr);
 3834:             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
 3835:             }
 3836:           }
 3837:         else
 3838: #endif  /*SUPPORT_UTF */
 3839:         /* Not UTF mode */
 3840:           {
 3841:           for (fi = min;; fi++)
 3842:             {
 3843:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
 3844:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3845:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 3846:             if (eptr >= md->end_subject)
 3847:               {
 3848:               SCHECK_PARTIAL();
 3849:               RRETURN(MATCH_NOMATCH);
 3850:               }
 3851:             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
 3852:             eptr++;
 3853:             }
 3854:           }
 3855:         /* Control never gets here */
 3856:         }
 3857: 
 3858:       /* Maximize case */
 3859: 
 3860:       else
 3861:         {
 3862:         pp = eptr;
 3863: 
 3864: #ifdef SUPPORT_UTF
 3865:         if (utf)
 3866:           {
 3867:           register pcre_uint32 d;
 3868:           for (i = min; i < max; i++)
 3869:             {
 3870:             int len = 1;
 3871:             if (eptr >= md->end_subject)
 3872:               {
 3873:               SCHECK_PARTIAL();
 3874:               break;
 3875:               }
 3876:             GETCHARLEN(d, eptr, len);
 3877:             if (fc == d || (unsigned int)foc == d) break;
 3878:             eptr += len;
 3879:             }
 3880:           if (possessive) continue;    /* No backtracking */
 3881:           for(;;)
 3882:             {
 3883:             if (eptr == pp) goto TAIL_RECURSE;
 3884:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
 3885:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3886:             eptr--;
 3887:             BACKCHAR(eptr);
 3888:             }
 3889:           }
 3890:         else
 3891: #endif  /* SUPPORT_UTF */
 3892:         /* Not UTF mode */
 3893:           {
 3894:           for (i = min; i < max; i++)
 3895:             {
 3896:             if (eptr >= md->end_subject)
 3897:               {
 3898:               SCHECK_PARTIAL();
 3899:               break;
 3900:               }
 3901:             if (fc == *eptr || foc == *eptr) break;
 3902:             eptr++;
 3903:             }
 3904:           if (possessive) continue;    /* No backtracking */
 3905:           for (;;)
 3906:             {
 3907:             if (eptr == pp) goto TAIL_RECURSE;
 3908:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
 3909:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3910:             eptr--;
 3911:             }
 3912:           }
 3913:         /* Control never gets here */
 3914:         }
 3915:       }
 3916: 
 3917:     /* Caseful comparisons */
 3918: 
 3919:     else
 3920:       {
 3921: #ifdef SUPPORT_UTF
 3922:       if (utf)
 3923:         {
 3924:         register pcre_uint32 d;
 3925:         for (i = 1; i <= min; i++)
 3926:           {
 3927:           if (eptr >= md->end_subject)
 3928:             {
 3929:             SCHECK_PARTIAL();
 3930:             RRETURN(MATCH_NOMATCH);
 3931:             }
 3932:           GETCHARINC(d, eptr);
 3933:           if (fc == d) RRETURN(MATCH_NOMATCH);
 3934:           }
 3935:         }
 3936:       else
 3937: #endif
 3938:       /* Not UTF mode */
 3939:         {
 3940:         for (i = 1; i <= min; i++)
 3941:           {
 3942:           if (eptr >= md->end_subject)
 3943:             {
 3944:             SCHECK_PARTIAL();
 3945:             RRETURN(MATCH_NOMATCH);
 3946:             }
 3947:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
 3948:           }
 3949:         }
 3950: 
 3951:       if (min == max) continue;
 3952: 
 3953:       if (minimize)
 3954:         {
 3955: #ifdef SUPPORT_UTF
 3956:         if (utf)
 3957:           {
 3958:           register pcre_uint32 d;
 3959:           for (fi = min;; fi++)
 3960:             {
 3961:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
 3962:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3963:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 3964:             if (eptr >= md->end_subject)
 3965:               {
 3966:               SCHECK_PARTIAL();
 3967:               RRETURN(MATCH_NOMATCH);
 3968:               }
 3969:             GETCHARINC(d, eptr);
 3970:             if (fc == d) RRETURN(MATCH_NOMATCH);
 3971:             }
 3972:           }
 3973:         else
 3974: #endif
 3975:         /* Not UTF mode */
 3976:           {
 3977:           for (fi = min;; fi++)
 3978:             {
 3979:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
 3980:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 3981:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 3982:             if (eptr >= md->end_subject)
 3983:               {
 3984:               SCHECK_PARTIAL();
 3985:               RRETURN(MATCH_NOMATCH);
 3986:               }
 3987:             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
 3988:             }
 3989:           }
 3990:         /* Control never gets here */
 3991:         }
 3992: 
 3993:       /* Maximize case */
 3994: 
 3995:       else
 3996:         {
 3997:         pp = eptr;
 3998: 
 3999: #ifdef SUPPORT_UTF
 4000:         if (utf)
 4001:           {
 4002:           register pcre_uint32 d;
 4003:           for (i = min; i < max; i++)
 4004:             {
 4005:             int len = 1;
 4006:             if (eptr >= md->end_subject)
 4007:               {
 4008:               SCHECK_PARTIAL();
 4009:               break;
 4010:               }
 4011:             GETCHARLEN(d, eptr, len);
 4012:             if (fc == d) break;
 4013:             eptr += len;
 4014:             }
 4015:           if (possessive) continue;    /* No backtracking */
 4016:           for(;;)
 4017:             {
 4018:             if (eptr == pp) goto TAIL_RECURSE;
 4019:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
 4020:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4021:             eptr--;
 4022:             BACKCHAR(eptr);
 4023:             }
 4024:           }
 4025:         else
 4026: #endif
 4027:         /* Not UTF mode */
 4028:           {
 4029:           for (i = min; i < max; i++)
 4030:             {
 4031:             if (eptr >= md->end_subject)
 4032:               {
 4033:               SCHECK_PARTIAL();
 4034:               break;
 4035:               }
 4036:             if (fc == *eptr) break;
 4037:             eptr++;
 4038:             }
 4039:           if (possessive) continue;    /* No backtracking */
 4040:           for (;;)
 4041:             {
 4042:             if (eptr == pp) goto TAIL_RECURSE;
 4043:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
 4044:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4045:             eptr--;
 4046:             }
 4047:           }
 4048:         /* Control never gets here */
 4049:         }
 4050:       }
 4051:     /* Control never gets here */
 4052: 
 4053:     /* Match a single character type repeatedly; several different opcodes
 4054:     share code. This is very similar to the code for single characters, but we
 4055:     repeat it in the interests of efficiency. */
 4056: 
 4057:     case OP_TYPEEXACT:
 4058:     min = max = GET2(ecode, 1);
 4059:     minimize = TRUE;
 4060:     ecode += 1 + IMM2_SIZE;
 4061:     goto REPEATTYPE;
 4062: 
 4063:     case OP_TYPEUPTO:
 4064:     case OP_TYPEMINUPTO:
 4065:     min = 0;
 4066:     max = GET2(ecode, 1);
 4067:     minimize = *ecode == OP_TYPEMINUPTO;
 4068:     ecode += 1 + IMM2_SIZE;
 4069:     goto REPEATTYPE;
 4070: 
 4071:     case OP_TYPEPOSSTAR:
 4072:     possessive = TRUE;
 4073:     min = 0;
 4074:     max = INT_MAX;
 4075:     ecode++;
 4076:     goto REPEATTYPE;
 4077: 
 4078:     case OP_TYPEPOSPLUS:
 4079:     possessive = TRUE;
 4080:     min = 1;
 4081:     max = INT_MAX;
 4082:     ecode++;
 4083:     goto REPEATTYPE;
 4084: 
 4085:     case OP_TYPEPOSQUERY:
 4086:     possessive = TRUE;
 4087:     min = 0;
 4088:     max = 1;
 4089:     ecode++;
 4090:     goto REPEATTYPE;
 4091: 
 4092:     case OP_TYPEPOSUPTO:
 4093:     possessive = TRUE;
 4094:     min = 0;
 4095:     max = GET2(ecode, 1);
 4096:     ecode += 1 + IMM2_SIZE;
 4097:     goto REPEATTYPE;
 4098: 
 4099:     case OP_TYPESTAR:
 4100:     case OP_TYPEMINSTAR:
 4101:     case OP_TYPEPLUS:
 4102:     case OP_TYPEMINPLUS:
 4103:     case OP_TYPEQUERY:
 4104:     case OP_TYPEMINQUERY:
 4105:     c = *ecode++ - OP_TYPESTAR;
 4106:     minimize = (c & 1) != 0;
 4107:     min = rep_min[c];                 /* Pick up values from tables; */
 4108:     max = rep_max[c];                 /* zero for max => infinity */
 4109:     if (max == 0) max = INT_MAX;
 4110: 
 4111:     /* Common code for all repeated single character type matches. Note that
 4112:     in UTF-8 mode, '.' matches a character of any length, but for the other
 4113:     character types, the valid characters are all one-byte long. */
 4114: 
 4115:     REPEATTYPE:
 4116:     ctype = *ecode++;      /* Code for the character type */
 4117: 
 4118: #ifdef SUPPORT_UCP
 4119:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
 4120:       {
 4121:       prop_fail_result = ctype == OP_NOTPROP;
 4122:       prop_type = *ecode++;
 4123:       prop_value = *ecode++;
 4124:       }
 4125:     else prop_type = -1;
 4126: #endif
 4127: 
 4128:     /* First, ensure the minimum number of matches are present. Use inline
 4129:     code for maximizing the speed, and do the type test once at the start
 4130:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
 4131:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
 4132:     and single-bytes. */
 4133: 
 4134:     if (min > 0)
 4135:       {
 4136: #ifdef SUPPORT_UCP
 4137:       if (prop_type >= 0)
 4138:         {
 4139:         switch(prop_type)
 4140:           {
 4141:           case PT_ANY:
 4142:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
 4143:           for (i = 1; i <= min; i++)
 4144:             {
 4145:             if (eptr >= md->end_subject)
 4146:               {
 4147:               SCHECK_PARTIAL();
 4148:               RRETURN(MATCH_NOMATCH);
 4149:               }
 4150:             GETCHARINCTEST(c, eptr);
 4151:             }
 4152:           break;
 4153: 
 4154:           case PT_LAMP:
 4155:           for (i = 1; i <= min; i++)
 4156:             {
 4157:             int chartype;
 4158:             if (eptr >= md->end_subject)
 4159:               {
 4160:               SCHECK_PARTIAL();
 4161:               RRETURN(MATCH_NOMATCH);
 4162:               }
 4163:             GETCHARINCTEST(c, eptr);
 4164:             chartype = UCD_CHARTYPE(c);
 4165:             if ((chartype == ucp_Lu ||
 4166:                  chartype == ucp_Ll ||
 4167:                  chartype == ucp_Lt) == prop_fail_result)
 4168:               RRETURN(MATCH_NOMATCH);
 4169:             }
 4170:           break;
 4171: 
 4172:           case PT_GC:
 4173:           for (i = 1; i <= min; i++)
 4174:             {
 4175:             if (eptr >= md->end_subject)
 4176:               {
 4177:               SCHECK_PARTIAL();
 4178:               RRETURN(MATCH_NOMATCH);
 4179:               }
 4180:             GETCHARINCTEST(c, eptr);
 4181:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
 4182:               RRETURN(MATCH_NOMATCH);
 4183:             }
 4184:           break;
 4185: 
 4186:           case PT_PC:
 4187:           for (i = 1; i <= min; i++)
 4188:             {
 4189:             if (eptr >= md->end_subject)
 4190:               {
 4191:               SCHECK_PARTIAL();
 4192:               RRETURN(MATCH_NOMATCH);
 4193:               }
 4194:             GETCHARINCTEST(c, eptr);
 4195:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
 4196:               RRETURN(MATCH_NOMATCH);
 4197:             }
 4198:           break;
 4199: 
 4200:           case PT_SC:
 4201:           for (i = 1; i <= min; i++)
 4202:             {
 4203:             if (eptr >= md->end_subject)
 4204:               {
 4205:               SCHECK_PARTIAL();
 4206:               RRETURN(MATCH_NOMATCH);
 4207:               }
 4208:             GETCHARINCTEST(c, eptr);
 4209:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
 4210:               RRETURN(MATCH_NOMATCH);
 4211:             }
 4212:           break;
 4213: 
 4214:           case PT_ALNUM:
 4215:           for (i = 1; i <= min; i++)
 4216:             {
 4217:             int category;
 4218:             if (eptr >= md->end_subject)
 4219:               {
 4220:               SCHECK_PARTIAL();
 4221:               RRETURN(MATCH_NOMATCH);
 4222:               }
 4223:             GETCHARINCTEST(c, eptr);
 4224:             category = UCD_CATEGORY(c);
 4225:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
 4226:               RRETURN(MATCH_NOMATCH);
 4227:             }
 4228:           break;
 4229: 
 4230:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 4231:           which means that Perl space and POSIX space are now identical. PCRE
 4232:           was changed at release 8.34. */
 4233: 
 4234:           case PT_SPACE:    /* Perl space */
 4235:           case PT_PXSPACE:  /* POSIX space */
 4236:           for (i = 1; i <= min; i++)
 4237:             {
 4238:             if (eptr >= md->end_subject)
 4239:               {
 4240:               SCHECK_PARTIAL();
 4241:               RRETURN(MATCH_NOMATCH);
 4242:               }
 4243:             GETCHARINCTEST(c, eptr);
 4244:             switch(c)
 4245:               {
 4246:               HSPACE_CASES:
 4247:               VSPACE_CASES:
 4248:               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
 4249:               break;
 4250: 
 4251:               default:
 4252:               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
 4253:                 RRETURN(MATCH_NOMATCH);
 4254:               break;
 4255:               }
 4256:             }
 4257:           break;
 4258: 
 4259:           case PT_WORD:
 4260:           for (i = 1; i <= min; i++)
 4261:             {
 4262:             int category;
 4263:             if (eptr >= md->end_subject)
 4264:               {
 4265:               SCHECK_PARTIAL();
 4266:               RRETURN(MATCH_NOMATCH);
 4267:               }
 4268:             GETCHARINCTEST(c, eptr);
 4269:             category = UCD_CATEGORY(c);
 4270:             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
 4271:                    == prop_fail_result)
 4272:               RRETURN(MATCH_NOMATCH);
 4273:             }
 4274:           break;
 4275: 
 4276:           case PT_CLIST:
 4277:           for (i = 1; i <= min; i++)
 4278:             {
 4279:             const pcre_uint32 *cp;
 4280:             if (eptr >= md->end_subject)
 4281:               {
 4282:               SCHECK_PARTIAL();
 4283:               RRETURN(MATCH_NOMATCH);
 4284:               }
 4285:             GETCHARINCTEST(c, eptr);
 4286:             cp = PRIV(ucd_caseless_sets) + prop_value;
 4287:             for (;;)
 4288:               {
 4289:               if (c < *cp)
 4290:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
 4291:               if (c == *cp++)
 4292:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
 4293:               }
 4294:             }
 4295:           break;
 4296: 
 4297:           case PT_UCNC:
 4298:           for (i = 1; i <= min; i++)
 4299:             {
 4300:             if (eptr >= md->end_subject)
 4301:               {
 4302:               SCHECK_PARTIAL();
 4303:               RRETURN(MATCH_NOMATCH);
 4304:               }
 4305:             GETCHARINCTEST(c, eptr);
 4306:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 4307:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 4308:                  c >= 0xe000) == prop_fail_result)
 4309:               RRETURN(MATCH_NOMATCH);
 4310:             }
 4311:           break;
 4312: 
 4313:           /* This should not occur */
 4314: 
 4315:           default:
 4316:           RRETURN(PCRE_ERROR_INTERNAL);
 4317:           }
 4318:         }
 4319: 
 4320:       /* Match extended Unicode sequences. We will get here only if the
 4321:       support is in the binary; otherwise a compile-time error occurs. */
 4322: 
 4323:       else if (ctype == OP_EXTUNI)
 4324:         {
 4325:         for (i = 1; i <= min; i++)
 4326:           {
 4327:           if (eptr >= md->end_subject)
 4328:             {
 4329:             SCHECK_PARTIAL();
 4330:             RRETURN(MATCH_NOMATCH);
 4331:             }
 4332:           else
 4333:             {
 4334:             int lgb, rgb;
 4335:             GETCHARINCTEST(c, eptr);
 4336:             lgb = UCD_GRAPHBREAK(c);
 4337:            while (eptr < md->end_subject)
 4338:               {
 4339:               int len = 1;
 4340:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
 4341:               rgb = UCD_GRAPHBREAK(c);
 4342:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 4343:               lgb = rgb;
 4344:               eptr += len;
 4345:               }
 4346:             }
 4347:           CHECK_PARTIAL();
 4348:           }
 4349:         }
 4350: 
 4351:       else
 4352: #endif     /* SUPPORT_UCP */
 4353: 
 4354: /* Handle all other cases when the coding is UTF-8 */
 4355: 
 4356: #ifdef SUPPORT_UTF
 4357:       if (utf) switch(ctype)
 4358:         {
 4359:         case OP_ANY:
 4360:         for (i = 1; i <= min; i++)
 4361:           {
 4362:           if (eptr >= md->end_subject)
 4363:             {
 4364:             SCHECK_PARTIAL();
 4365:             RRETURN(MATCH_NOMATCH);
 4366:             }
 4367:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
 4368:           if (md->partial != 0 &&
 4369:               eptr + 1 >= md->end_subject &&
 4370:               NLBLOCK->nltype == NLTYPE_FIXED &&
 4371:               NLBLOCK->nllen == 2 &&
 4372:               RAWUCHAR(eptr) == NLBLOCK->nl[0])
 4373:             {
 4374:             md->hitend = TRUE;
 4375:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 4376:             }
 4377:           eptr++;
 4378:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 4379:           }
 4380:         break;
 4381: 
 4382:         case OP_ALLANY:
 4383:         for (i = 1; i <= min; i++)
 4384:           {
 4385:           if (eptr >= md->end_subject)
 4386:             {
 4387:             SCHECK_PARTIAL();
 4388:             RRETURN(MATCH_NOMATCH);
 4389:             }
 4390:           eptr++;
 4391:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 4392:           }
 4393:         break;
 4394: 
 4395:         case OP_ANYBYTE:
 4396:         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
 4397:         eptr += min;
 4398:         break;
 4399: 
 4400:         case OP_ANYNL:
 4401:         for (i = 1; i <= min; i++)
 4402:           {
 4403:           if (eptr >= md->end_subject)
 4404:             {
 4405:             SCHECK_PARTIAL();
 4406:             RRETURN(MATCH_NOMATCH);
 4407:             }
 4408:           GETCHARINC(c, eptr);
 4409:           switch(c)
 4410:             {
 4411:             default: RRETURN(MATCH_NOMATCH);
 4412: 
 4413:             case CHAR_CR:
 4414:             if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
 4415:             break;
 4416: 
 4417:             case CHAR_LF:
 4418:             break;
 4419: 
 4420:             case CHAR_VT:
 4421:             case CHAR_FF:
 4422:             case CHAR_NEL:
 4423: #ifndef EBCDIC
 4424:             case 0x2028:
 4425:             case 0x2029:
 4426: #endif  /* Not EBCDIC */
 4427:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
 4428:             break;
 4429:             }
 4430:           }
 4431:         break;
 4432: 
 4433:         case OP_NOT_HSPACE:
 4434:         for (i = 1; i <= min; i++)
 4435:           {
 4436:           if (eptr >= md->end_subject)
 4437:             {
 4438:             SCHECK_PARTIAL();
 4439:             RRETURN(MATCH_NOMATCH);
 4440:             }
 4441:           GETCHARINC(c, eptr);
 4442:           switch(c)
 4443:             {
 4444:             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
 4445:             default: break;
 4446:             }
 4447:           }
 4448:         break;
 4449: 
 4450:         case OP_HSPACE:
 4451:         for (i = 1; i <= min; i++)
 4452:           {
 4453:           if (eptr >= md->end_subject)
 4454:             {
 4455:             SCHECK_PARTIAL();
 4456:             RRETURN(MATCH_NOMATCH);
 4457:             }
 4458:           GETCHARINC(c, eptr);
 4459:           switch(c)
 4460:             {
 4461:             HSPACE_CASES: break;  /* Byte and multibyte cases */
 4462:             default: RRETURN(MATCH_NOMATCH);
 4463:             }
 4464:           }
 4465:         break;
 4466: 
 4467:         case OP_NOT_VSPACE:
 4468:         for (i = 1; i <= min; i++)
 4469:           {
 4470:           if (eptr >= md->end_subject)
 4471:             {
 4472:             SCHECK_PARTIAL();
 4473:             RRETURN(MATCH_NOMATCH);
 4474:             }
 4475:           GETCHARINC(c, eptr);
 4476:           switch(c)
 4477:             {
 4478:             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
 4479:             default: break;
 4480:             }
 4481:           }
 4482:         break;
 4483: 
 4484:         case OP_VSPACE:
 4485:         for (i = 1; i <= min; i++)
 4486:           {
 4487:           if (eptr >= md->end_subject)
 4488:             {
 4489:             SCHECK_PARTIAL();
 4490:             RRETURN(MATCH_NOMATCH);
 4491:             }
 4492:           GETCHARINC(c, eptr);
 4493:           switch(c)
 4494:             {
 4495:             VSPACE_CASES: break;
 4496:             default: RRETURN(MATCH_NOMATCH);
 4497:             }
 4498:           }
 4499:         break;
 4500: 
 4501:         case OP_NOT_DIGIT:
 4502:         for (i = 1; i <= min; i++)
 4503:           {
 4504:           if (eptr >= md->end_subject)
 4505:             {
 4506:             SCHECK_PARTIAL();
 4507:             RRETURN(MATCH_NOMATCH);
 4508:             }
 4509:           GETCHARINC(c, eptr);
 4510:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
 4511:             RRETURN(MATCH_NOMATCH);
 4512:           }
 4513:         break;
 4514: 
 4515:         case OP_DIGIT:
 4516:         for (i = 1; i <= min; i++)
 4517:           {
 4518:           pcre_uint32 cc;
 4519:           if (eptr >= md->end_subject)
 4520:             {
 4521:             SCHECK_PARTIAL();
 4522:             RRETURN(MATCH_NOMATCH);
 4523:             }
 4524:           cc = RAWUCHAR(eptr);
 4525:           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
 4526:             RRETURN(MATCH_NOMATCH);
 4527:           eptr++;
 4528:           /* No need to skip more bytes - we know it's a 1-byte character */
 4529:           }
 4530:         break;
 4531: 
 4532:         case OP_NOT_WHITESPACE:
 4533:         for (i = 1; i <= min; i++)
 4534:           {
 4535:           pcre_uint32 cc;
 4536:           if (eptr >= md->end_subject)
 4537:             {
 4538:             SCHECK_PARTIAL();
 4539:             RRETURN(MATCH_NOMATCH);
 4540:             }
 4541:           cc = RAWUCHAR(eptr);
 4542:           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
 4543:             RRETURN(MATCH_NOMATCH);
 4544:           eptr++;
 4545:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 4546:           }
 4547:         break;
 4548: 
 4549:         case OP_WHITESPACE:
 4550:         for (i = 1; i <= min; i++)
 4551:           {
 4552:           pcre_uint32 cc;
 4553:           if (eptr >= md->end_subject)
 4554:             {
 4555:             SCHECK_PARTIAL();
 4556:             RRETURN(MATCH_NOMATCH);
 4557:             }
 4558:           cc = RAWUCHAR(eptr);
 4559:           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
 4560:             RRETURN(MATCH_NOMATCH);
 4561:           eptr++;
 4562:           /* No need to skip more bytes - we know it's a 1-byte character */
 4563:           }
 4564:         break;
 4565: 
 4566:         case OP_NOT_WORDCHAR:
 4567:         for (i = 1; i <= min; i++)
 4568:           {
 4569:           pcre_uint32 cc;
 4570:           if (eptr >= md->end_subject)
 4571:             {
 4572:             SCHECK_PARTIAL();
 4573:             RRETURN(MATCH_NOMATCH);
 4574:             }
 4575:           cc = RAWUCHAR(eptr);
 4576:           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
 4577:             RRETURN(MATCH_NOMATCH);
 4578:           eptr++;
 4579:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 4580:           }
 4581:         break;
 4582: 
 4583:         case OP_WORDCHAR:
 4584:         for (i = 1; i <= min; i++)
 4585:           {
 4586:           pcre_uint32 cc;
 4587:           if (eptr >= md->end_subject)
 4588:             {
 4589:             SCHECK_PARTIAL();
 4590:             RRETURN(MATCH_NOMATCH);
 4591:             }
 4592:           cc = RAWUCHAR(eptr);
 4593:           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
 4594:             RRETURN(MATCH_NOMATCH);
 4595:           eptr++;
 4596:           /* No need to skip more bytes - we know it's a 1-byte character */
 4597:           }
 4598:         break;
 4599: 
 4600:         default:
 4601:         RRETURN(PCRE_ERROR_INTERNAL);
 4602:         }  /* End switch(ctype) */
 4603: 
 4604:       else
 4605: #endif     /* SUPPORT_UTF */
 4606: 
 4607:       /* Code for the non-UTF-8 case for minimum matching of operators other
 4608:       than OP_PROP and OP_NOTPROP. */
 4609: 
 4610:       switch(ctype)
 4611:         {
 4612:         case OP_ANY:
 4613:         for (i = 1; i <= min; i++)
 4614:           {
 4615:           if (eptr >= md->end_subject)
 4616:             {
 4617:             SCHECK_PARTIAL();
 4618:             RRETURN(MATCH_NOMATCH);
 4619:             }
 4620:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
 4621:           if (md->partial != 0 &&
 4622:               eptr + 1 >= md->end_subject &&
 4623:               NLBLOCK->nltype == NLTYPE_FIXED &&
 4624:               NLBLOCK->nllen == 2 &&
 4625:               *eptr == NLBLOCK->nl[0])
 4626:             {
 4627:             md->hitend = TRUE;
 4628:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 4629:             }
 4630:           eptr++;
 4631:           }
 4632:         break;
 4633: 
 4634:         case OP_ALLANY:
 4635:         if (eptr > md->end_subject - min)
 4636:           {
 4637:           SCHECK_PARTIAL();
 4638:           RRETURN(MATCH_NOMATCH);
 4639:           }
 4640:         eptr += min;
 4641:         break;
 4642: 
 4643:         case OP_ANYBYTE:
 4644:         if (eptr > md->end_subject - min)
 4645:           {
 4646:           SCHECK_PARTIAL();
 4647:           RRETURN(MATCH_NOMATCH);
 4648:           }
 4649:         eptr += min;
 4650:         break;
 4651: 
 4652:         case OP_ANYNL:
 4653:         for (i = 1; i <= min; i++)
 4654:           {
 4655:           if (eptr >= md->end_subject)
 4656:             {
 4657:             SCHECK_PARTIAL();
 4658:             RRETURN(MATCH_NOMATCH);
 4659:             }
 4660:           switch(*eptr++)
 4661:             {
 4662:             default: RRETURN(MATCH_NOMATCH);
 4663: 
 4664:             case CHAR_CR:
 4665:             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
 4666:             break;
 4667: 
 4668:             case CHAR_LF:
 4669:             break;
 4670: 
 4671:             case CHAR_VT:
 4672:             case CHAR_FF:
 4673:             case CHAR_NEL:
 4674: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 4675:             case 0x2028:
 4676:             case 0x2029:
 4677: #endif
 4678:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
 4679:             break;
 4680:             }
 4681:           }
 4682:         break;
 4683: 
 4684:         case OP_NOT_HSPACE:
 4685:         for (i = 1; i <= min; i++)
 4686:           {
 4687:           if (eptr >= md->end_subject)
 4688:             {
 4689:             SCHECK_PARTIAL();
 4690:             RRETURN(MATCH_NOMATCH);
 4691:             }
 4692:           switch(*eptr++)
 4693:             {
 4694:             default: break;
 4695:             HSPACE_BYTE_CASES:
 4696: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 4697:             HSPACE_MULTIBYTE_CASES:
 4698: #endif
 4699:             RRETURN(MATCH_NOMATCH);
 4700:             }
 4701:           }
 4702:         break;
 4703: 
 4704:         case OP_HSPACE:
 4705:         for (i = 1; i <= min; i++)
 4706:           {
 4707:           if (eptr >= md->end_subject)
 4708:             {
 4709:             SCHECK_PARTIAL();
 4710:             RRETURN(MATCH_NOMATCH);
 4711:             }
 4712:           switch(*eptr++)
 4713:             {
 4714:             default: RRETURN(MATCH_NOMATCH);
 4715:             HSPACE_BYTE_CASES:
 4716: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 4717:             HSPACE_MULTIBYTE_CASES:
 4718: #endif
 4719:             break;
 4720:             }
 4721:           }
 4722:         break;
 4723: 
 4724:         case OP_NOT_VSPACE:
 4725:         for (i = 1; i <= min; i++)
 4726:           {
 4727:           if (eptr >= md->end_subject)
 4728:             {
 4729:             SCHECK_PARTIAL();
 4730:             RRETURN(MATCH_NOMATCH);
 4731:             }
 4732:           switch(*eptr++)
 4733:             {
 4734:             VSPACE_BYTE_CASES:
 4735: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 4736:             VSPACE_MULTIBYTE_CASES:
 4737: #endif
 4738:             RRETURN(MATCH_NOMATCH);
 4739:             default: break;
 4740:             }
 4741:           }
 4742:         break;
 4743: 
 4744:         case OP_VSPACE:
 4745:         for (i = 1; i <= min; i++)
 4746:           {
 4747:           if (eptr >= md->end_subject)
 4748:             {
 4749:             SCHECK_PARTIAL();
 4750:             RRETURN(MATCH_NOMATCH);
 4751:             }
 4752:           switch(*eptr++)
 4753:             {
 4754:             default: RRETURN(MATCH_NOMATCH);
 4755:             VSPACE_BYTE_CASES:
 4756: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 4757:             VSPACE_MULTIBYTE_CASES:
 4758: #endif
 4759:             break;
 4760:             }
 4761:           }
 4762:         break;
 4763: 
 4764:         case OP_NOT_DIGIT:
 4765:         for (i = 1; i <= min; i++)
 4766:           {
 4767:           if (eptr >= md->end_subject)
 4768:             {
 4769:             SCHECK_PARTIAL();
 4770:             RRETURN(MATCH_NOMATCH);
 4771:             }
 4772:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
 4773:             RRETURN(MATCH_NOMATCH);
 4774:           eptr++;
 4775:           }
 4776:         break;
 4777: 
 4778:         case OP_DIGIT:
 4779:         for (i = 1; i <= min; i++)
 4780:           {
 4781:           if (eptr >= md->end_subject)
 4782:             {
 4783:             SCHECK_PARTIAL();
 4784:             RRETURN(MATCH_NOMATCH);
 4785:             }
 4786:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
 4787:             RRETURN(MATCH_NOMATCH);
 4788:           eptr++;
 4789:           }
 4790:         break;
 4791: 
 4792:         case OP_NOT_WHITESPACE:
 4793:         for (i = 1; i <= min; i++)
 4794:           {
 4795:           if (eptr >= md->end_subject)
 4796:             {
 4797:             SCHECK_PARTIAL();
 4798:             RRETURN(MATCH_NOMATCH);
 4799:             }
 4800:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
 4801:             RRETURN(MATCH_NOMATCH);
 4802:           eptr++;
 4803:           }
 4804:         break;
 4805: 
 4806:         case OP_WHITESPACE:
 4807:         for (i = 1; i <= min; i++)
 4808:           {
 4809:           if (eptr >= md->end_subject)
 4810:             {
 4811:             SCHECK_PARTIAL();
 4812:             RRETURN(MATCH_NOMATCH);
 4813:             }
 4814:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
 4815:             RRETURN(MATCH_NOMATCH);
 4816:           eptr++;
 4817:           }
 4818:         break;
 4819: 
 4820:         case OP_NOT_WORDCHAR:
 4821:         for (i = 1; i <= min; i++)
 4822:           {
 4823:           if (eptr >= md->end_subject)
 4824:             {
 4825:             SCHECK_PARTIAL();
 4826:             RRETURN(MATCH_NOMATCH);
 4827:             }
 4828:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
 4829:             RRETURN(MATCH_NOMATCH);
 4830:           eptr++;
 4831:           }
 4832:         break;
 4833: 
 4834:         case OP_WORDCHAR:
 4835:         for (i = 1; i <= min; i++)
 4836:           {
 4837:           if (eptr >= md->end_subject)
 4838:             {
 4839:             SCHECK_PARTIAL();
 4840:             RRETURN(MATCH_NOMATCH);
 4841:             }
 4842:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
 4843:             RRETURN(MATCH_NOMATCH);
 4844:           eptr++;
 4845:           }
 4846:         break;
 4847: 
 4848:         default:
 4849:         RRETURN(PCRE_ERROR_INTERNAL);
 4850:         }
 4851:       }
 4852: 
 4853:     /* If min = max, continue at the same level without recursing */
 4854: 
 4855:     if (min == max) continue;
 4856: 
 4857:     /* If minimizing, we have to test the rest of the pattern before each
 4858:     subsequent match. Again, separate the UTF-8 case for speed, and also
 4859:     separate the UCP cases. */
 4860: 
 4861:     if (minimize)
 4862:       {
 4863: #ifdef SUPPORT_UCP
 4864:       if (prop_type >= 0)
 4865:         {
 4866:         switch(prop_type)
 4867:           {
 4868:           case PT_ANY:
 4869:           for (fi = min;; fi++)
 4870:             {
 4871:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
 4872:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4873:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 4874:             if (eptr >= md->end_subject)
 4875:               {
 4876:               SCHECK_PARTIAL();
 4877:               RRETURN(MATCH_NOMATCH);
 4878:               }
 4879:             GETCHARINCTEST(c, eptr);
 4880:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
 4881:             }
 4882:           /* Control never gets here */
 4883: 
 4884:           case PT_LAMP:
 4885:           for (fi = min;; fi++)
 4886:             {
 4887:             int chartype;
 4888:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
 4889:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4890:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 4891:             if (eptr >= md->end_subject)
 4892:               {
 4893:               SCHECK_PARTIAL();
 4894:               RRETURN(MATCH_NOMATCH);
 4895:               }
 4896:             GETCHARINCTEST(c, eptr);
 4897:             chartype = UCD_CHARTYPE(c);
 4898:             if ((chartype == ucp_Lu ||
 4899:                  chartype == ucp_Ll ||
 4900:                  chartype == ucp_Lt) == prop_fail_result)
 4901:               RRETURN(MATCH_NOMATCH);
 4902:             }
 4903:           /* Control never gets here */
 4904: 
 4905:           case PT_GC:
 4906:           for (fi = min;; fi++)
 4907:             {
 4908:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
 4909:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4910:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 4911:             if (eptr >= md->end_subject)
 4912:               {
 4913:               SCHECK_PARTIAL();
 4914:               RRETURN(MATCH_NOMATCH);
 4915:               }
 4916:             GETCHARINCTEST(c, eptr);
 4917:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
 4918:               RRETURN(MATCH_NOMATCH);
 4919:             }
 4920:           /* Control never gets here */
 4921: 
 4922:           case PT_PC:
 4923:           for (fi = min;; fi++)
 4924:             {
 4925:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
 4926:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4927:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 4928:             if (eptr >= md->end_subject)
 4929:               {
 4930:               SCHECK_PARTIAL();
 4931:               RRETURN(MATCH_NOMATCH);
 4932:               }
 4933:             GETCHARINCTEST(c, eptr);
 4934:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
 4935:               RRETURN(MATCH_NOMATCH);
 4936:             }
 4937:           /* Control never gets here */
 4938: 
 4939:           case PT_SC:
 4940:           for (fi = min;; fi++)
 4941:             {
 4942:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
 4943:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4944:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 4945:             if (eptr >= md->end_subject)
 4946:               {
 4947:               SCHECK_PARTIAL();
 4948:               RRETURN(MATCH_NOMATCH);
 4949:               }
 4950:             GETCHARINCTEST(c, eptr);
 4951:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
 4952:               RRETURN(MATCH_NOMATCH);
 4953:             }
 4954:           /* Control never gets here */
 4955: 
 4956:           case PT_ALNUM:
 4957:           for (fi = min;; fi++)
 4958:             {
 4959:             int category;
 4960:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
 4961:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4962:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 4963:             if (eptr >= md->end_subject)
 4964:               {
 4965:               SCHECK_PARTIAL();
 4966:               RRETURN(MATCH_NOMATCH);
 4967:               }
 4968:             GETCHARINCTEST(c, eptr);
 4969:             category = UCD_CATEGORY(c);
 4970:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
 4971:               RRETURN(MATCH_NOMATCH);
 4972:             }
 4973:           /* Control never gets here */
 4974: 
 4975:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 4976:           which means that Perl space and POSIX space are now identical. PCRE
 4977:           was changed at release 8.34. */
 4978: 
 4979:           case PT_SPACE:    /* Perl space */
 4980:           case PT_PXSPACE:  /* POSIX space */
 4981:           for (fi = min;; fi++)
 4982:             {
 4983:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
 4984:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 4985:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 4986:             if (eptr >= md->end_subject)
 4987:               {
 4988:               SCHECK_PARTIAL();
 4989:               RRETURN(MATCH_NOMATCH);
 4990:               }
 4991:             GETCHARINCTEST(c, eptr);
 4992:             switch(c)
 4993:               {
 4994:               HSPACE_CASES:
 4995:               VSPACE_CASES:
 4996:               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
 4997:               break;
 4998: 
 4999:               default:
 5000:               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
 5001:                 RRETURN(MATCH_NOMATCH);
 5002:               break;
 5003:               }
 5004:             }
 5005:           /* Control never gets here */
 5006: 
 5007:           case PT_WORD:
 5008:           for (fi = min;; fi++)
 5009:             {
 5010:             int category;
 5011:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
 5012:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5013:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 5014:             if (eptr >= md->end_subject)
 5015:               {
 5016:               SCHECK_PARTIAL();
 5017:               RRETURN(MATCH_NOMATCH);
 5018:               }
 5019:             GETCHARINCTEST(c, eptr);
 5020:             category = UCD_CATEGORY(c);
 5021:             if ((category == ucp_L ||
 5022:                  category == ucp_N ||
 5023:                  c == CHAR_UNDERSCORE)
 5024:                    == prop_fail_result)
 5025:               RRETURN(MATCH_NOMATCH);
 5026:             }
 5027:           /* Control never gets here */
 5028: 
 5029:           case PT_CLIST:
 5030:           for (fi = min;; fi++)
 5031:             {
 5032:             const pcre_uint32 *cp;
 5033:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
 5034:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5035:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 5036:             if (eptr >= md->end_subject)
 5037:               {
 5038:               SCHECK_PARTIAL();
 5039:               RRETURN(MATCH_NOMATCH);
 5040:               }
 5041:             GETCHARINCTEST(c, eptr);
 5042:             cp = PRIV(ucd_caseless_sets) + prop_value;
 5043:             for (;;)
 5044:               {
 5045:               if (c < *cp)
 5046:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
 5047:               if (c == *cp++)
 5048:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
 5049:               }
 5050:             }
 5051:           /* Control never gets here */
 5052: 
 5053:           case PT_UCNC:
 5054:           for (fi = min;; fi++)
 5055:             {
 5056:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
 5057:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5058:             if (fi >= max) RRETURN(MATCH_NOMATCH);
 5059:             if (eptr >= md->end_subject)
 5060:               {
 5061:               SCHECK_PARTIAL();
 5062:               RRETURN(MATCH_NOMATCH);
 5063:               }
 5064:             GETCHARINCTEST(c, eptr);
 5065:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 5066:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 5067:                  c >= 0xe000) == prop_fail_result)
 5068:               RRETURN(MATCH_NOMATCH);
 5069:             }
 5070:           /* Control never gets here */
 5071: 
 5072:           /* This should never occur */
 5073:           default:
 5074:           RRETURN(PCRE_ERROR_INTERNAL);
 5075:           }
 5076:         }
 5077: 
 5078:       /* Match extended Unicode sequences. We will get here only if the
 5079:       support is in the binary; otherwise a compile-time error occurs. */
 5080: 
 5081:       else if (ctype == OP_EXTUNI)
 5082:         {
 5083:         for (fi = min;; fi++)
 5084:           {
 5085:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
 5086:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5087:           if (fi >= max) RRETURN(MATCH_NOMATCH);
 5088:           if (eptr >= md->end_subject)
 5089:             {
 5090:             SCHECK_PARTIAL();
 5091:             RRETURN(MATCH_NOMATCH);
 5092:             }
 5093:           else
 5094:             {
 5095:             int lgb, rgb;
 5096:             GETCHARINCTEST(c, eptr);
 5097:             lgb = UCD_GRAPHBREAK(c);
 5098:             while (eptr < md->end_subject)
 5099:               {
 5100:               int len = 1;
 5101:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
 5102:               rgb = UCD_GRAPHBREAK(c);
 5103:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 5104:               lgb = rgb;
 5105:               eptr += len;
 5106:               }
 5107:             }
 5108:           CHECK_PARTIAL();
 5109:           }
 5110:         }
 5111:       else
 5112: #endif     /* SUPPORT_UCP */
 5113: 
 5114: #ifdef SUPPORT_UTF
 5115:       if (utf)
 5116:         {
 5117:         for (fi = min;; fi++)
 5118:           {
 5119:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
 5120:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5121:           if (fi >= max) RRETURN(MATCH_NOMATCH);
 5122:           if (eptr >= md->end_subject)
 5123:             {
 5124:             SCHECK_PARTIAL();
 5125:             RRETURN(MATCH_NOMATCH);
 5126:             }
 5127:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
 5128:             RRETURN(MATCH_NOMATCH);
 5129:           GETCHARINC(c, eptr);
 5130:           switch(ctype)
 5131:             {
 5132:             case OP_ANY:               /* This is the non-NL case */
 5133:             if (md->partial != 0 &&    /* Take care with CRLF partial */
 5134:                 eptr >= md->end_subject &&
 5135:                 NLBLOCK->nltype == NLTYPE_FIXED &&
 5136:                 NLBLOCK->nllen == 2 &&
 5137:                 c == NLBLOCK->nl[0])
 5138:               {
 5139:               md->hitend = TRUE;
 5140:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 5141:               }
 5142:             break;
 5143: 
 5144:             case OP_ALLANY:
 5145:             case OP_ANYBYTE:
 5146:             break;
 5147: 
 5148:             case OP_ANYNL:
 5149:             switch(c)
 5150:               {
 5151:               default: RRETURN(MATCH_NOMATCH);
 5152:               case CHAR_CR:
 5153:               if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
 5154:               break;
 5155: 
 5156:               case CHAR_LF:
 5157:               break;
 5158: 
 5159:               case CHAR_VT:
 5160:               case CHAR_FF:
 5161:               case CHAR_NEL:
 5162: #ifndef EBCDIC
 5163:               case 0x2028:
 5164:               case 0x2029:
 5165: #endif  /* Not EBCDIC */
 5166:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
 5167:               break;
 5168:               }
 5169:             break;
 5170: 
 5171:             case OP_NOT_HSPACE:
 5172:             switch(c)
 5173:               {
 5174:               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
 5175:               default: break;
 5176:               }
 5177:             break;
 5178: 
 5179:             case OP_HSPACE:
 5180:             switch(c)
 5181:               {
 5182:               HSPACE_CASES: break;
 5183:               default: RRETURN(MATCH_NOMATCH);
 5184:               }
 5185:             break;
 5186: 
 5187:             case OP_NOT_VSPACE:
 5188:             switch(c)
 5189:               {
 5190:               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
 5191:               default: break;
 5192:               }
 5193:             break;
 5194: 
 5195:             case OP_VSPACE:
 5196:             switch(c)
 5197:               {
 5198:               VSPACE_CASES: break;
 5199:               default: RRETURN(MATCH_NOMATCH);
 5200:               }
 5201:             break;
 5202: 
 5203:             case OP_NOT_DIGIT:
 5204:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
 5205:               RRETURN(MATCH_NOMATCH);
 5206:             break;
 5207: 
 5208:             case OP_DIGIT:
 5209:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
 5210:               RRETURN(MATCH_NOMATCH);
 5211:             break;
 5212: 
 5213:             case OP_NOT_WHITESPACE:
 5214:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
 5215:               RRETURN(MATCH_NOMATCH);
 5216:             break;
 5217: 
 5218:             case OP_WHITESPACE:
 5219:             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
 5220:               RRETURN(MATCH_NOMATCH);
 5221:             break;
 5222: 
 5223:             case OP_NOT_WORDCHAR:
 5224:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
 5225:               RRETURN(MATCH_NOMATCH);
 5226:             break;
 5227: 
 5228:             case OP_WORDCHAR:
 5229:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
 5230:               RRETURN(MATCH_NOMATCH);
 5231:             break;
 5232: 
 5233:             default:
 5234:             RRETURN(PCRE_ERROR_INTERNAL);
 5235:             }
 5236:           }
 5237:         }
 5238:       else
 5239: #endif
 5240:       /* Not UTF mode */
 5241:         {
 5242:         for (fi = min;; fi++)
 5243:           {
 5244:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
 5245:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5246:           if (fi >= max) RRETURN(MATCH_NOMATCH);
 5247:           if (eptr >= md->end_subject)
 5248:             {
 5249:             SCHECK_PARTIAL();
 5250:             RRETURN(MATCH_NOMATCH);
 5251:             }
 5252:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
 5253:             RRETURN(MATCH_NOMATCH);
 5254:           c = *eptr++;
 5255:           switch(ctype)
 5256:             {
 5257:             case OP_ANY:               /* This is the non-NL case */
 5258:             if (md->partial != 0 &&    /* Take care with CRLF partial */
 5259:                 eptr >= md->end_subject &&
 5260:                 NLBLOCK->nltype == NLTYPE_FIXED &&
 5261:                 NLBLOCK->nllen == 2 &&
 5262:                 c == NLBLOCK->nl[0])
 5263:               {
 5264:               md->hitend = TRUE;
 5265:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 5266:               }
 5267:             break;
 5268: 
 5269:             case OP_ALLANY:
 5270:             case OP_ANYBYTE:
 5271:             break;
 5272: 
 5273:             case OP_ANYNL:
 5274:             switch(c)
 5275:               {
 5276:               default: RRETURN(MATCH_NOMATCH);
 5277:               case CHAR_CR:
 5278:               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
 5279:               break;
 5280: 
 5281:               case CHAR_LF:
 5282:               break;
 5283: 
 5284:               case CHAR_VT:
 5285:               case CHAR_FF:
 5286:               case CHAR_NEL:
 5287: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 5288:               case 0x2028:
 5289:               case 0x2029:
 5290: #endif
 5291:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
 5292:               break;
 5293:               }
 5294:             break;
 5295: 
 5296:             case OP_NOT_HSPACE:
 5297:             switch(c)
 5298:               {
 5299:               default: break;
 5300:               HSPACE_BYTE_CASES:
 5301: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 5302:               HSPACE_MULTIBYTE_CASES:
 5303: #endif
 5304:               RRETURN(MATCH_NOMATCH);
 5305:               }
 5306:             break;
 5307: 
 5308:             case OP_HSPACE:
 5309:             switch(c)
 5310:               {
 5311:               default: RRETURN(MATCH_NOMATCH);
 5312:               HSPACE_BYTE_CASES:
 5313: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 5314:               HSPACE_MULTIBYTE_CASES:
 5315: #endif
 5316:               break;
 5317:               }
 5318:             break;
 5319: 
 5320:             case OP_NOT_VSPACE:
 5321:             switch(c)
 5322:               {
 5323:               default: break;
 5324:               VSPACE_BYTE_CASES:
 5325: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 5326:               VSPACE_MULTIBYTE_CASES:
 5327: #endif
 5328:               RRETURN(MATCH_NOMATCH);
 5329:               }
 5330:             break;
 5331: 
 5332:             case OP_VSPACE:
 5333:             switch(c)
 5334:               {
 5335:               default: RRETURN(MATCH_NOMATCH);
 5336:               VSPACE_BYTE_CASES:
 5337: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 5338:               VSPACE_MULTIBYTE_CASES:
 5339: #endif
 5340:               break;
 5341:               }
 5342:             break;
 5343: 
 5344:             case OP_NOT_DIGIT:
 5345:             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
 5346:             break;
 5347: 
 5348:             case OP_DIGIT:
 5349:             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
 5350:             break;
 5351: 
 5352:             case OP_NOT_WHITESPACE:
 5353:             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
 5354:             break;
 5355: 
 5356:             case OP_WHITESPACE:
 5357:             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
 5358:             break;
 5359: 
 5360:             case OP_NOT_WORDCHAR:
 5361:             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
 5362:             break;
 5363: 
 5364:             case OP_WORDCHAR:
 5365:             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
 5366:             break;
 5367: 
 5368:             default:
 5369:             RRETURN(PCRE_ERROR_INTERNAL);
 5370:             }
 5371:           }
 5372:         }
 5373:       /* Control never gets here */
 5374:       }
 5375: 
 5376:     /* If maximizing, it is worth using inline code for speed, doing the type
 5377:     test once at the start (i.e. keep it out of the loop). Again, keep the
 5378:     UTF-8 and UCP stuff separate. */
 5379: 
 5380:     else
 5381:       {
 5382:       pp = eptr;  /* Remember where we started */
 5383: 
 5384: #ifdef SUPPORT_UCP
 5385:       if (prop_type >= 0)
 5386:         {
 5387:         switch(prop_type)
 5388:           {
 5389:           case PT_ANY:
 5390:           for (i = min; i < max; i++)
 5391:             {
 5392:             int len = 1;
 5393:             if (eptr >= md->end_subject)
 5394:               {
 5395:               SCHECK_PARTIAL();
 5396:               break;
 5397:               }
 5398:             GETCHARLENTEST(c, eptr, len);
 5399:             if (prop_fail_result) break;
 5400:             eptr+= len;
 5401:             }
 5402:           break;
 5403: 
 5404:           case PT_LAMP:
 5405:           for (i = min; i < max; i++)
 5406:             {
 5407:             int chartype;
 5408:             int len = 1;
 5409:             if (eptr >= md->end_subject)
 5410:               {
 5411:               SCHECK_PARTIAL();
 5412:               break;
 5413:               }
 5414:             GETCHARLENTEST(c, eptr, len);
 5415:             chartype = UCD_CHARTYPE(c);
 5416:             if ((chartype == ucp_Lu ||
 5417:                  chartype == ucp_Ll ||
 5418:                  chartype == ucp_Lt) == prop_fail_result)
 5419:               break;
 5420:             eptr+= len;
 5421:             }
 5422:           break;
 5423: 
 5424:           case PT_GC:
 5425:           for (i = min; i < max; i++)
 5426:             {
 5427:             int len = 1;
 5428:             if (eptr >= md->end_subject)
 5429:               {
 5430:               SCHECK_PARTIAL();
 5431:               break;
 5432:               }
 5433:             GETCHARLENTEST(c, eptr, len);
 5434:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
 5435:             eptr+= len;
 5436:             }
 5437:           break;
 5438: 
 5439:           case PT_PC:
 5440:           for (i = min; i < max; i++)
 5441:             {
 5442:             int len = 1;
 5443:             if (eptr >= md->end_subject)
 5444:               {
 5445:               SCHECK_PARTIAL();
 5446:               break;
 5447:               }
 5448:             GETCHARLENTEST(c, eptr, len);
 5449:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
 5450:             eptr+= len;
 5451:             }
 5452:           break;
 5453: 
 5454:           case PT_SC:
 5455:           for (i = min; i < max; i++)
 5456:             {
 5457:             int len = 1;
 5458:             if (eptr >= md->end_subject)
 5459:               {
 5460:               SCHECK_PARTIAL();
 5461:               break;
 5462:               }
 5463:             GETCHARLENTEST(c, eptr, len);
 5464:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
 5465:             eptr+= len;
 5466:             }
 5467:           break;
 5468: 
 5469:           case PT_ALNUM:
 5470:           for (i = min; i < max; i++)
 5471:             {
 5472:             int category;
 5473:             int len = 1;
 5474:             if (eptr >= md->end_subject)
 5475:               {
 5476:               SCHECK_PARTIAL();
 5477:               break;
 5478:               }
 5479:             GETCHARLENTEST(c, eptr, len);
 5480:             category = UCD_CATEGORY(c);
 5481:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
 5482:               break;
 5483:             eptr+= len;
 5484:             }
 5485:           break;
 5486: 
 5487:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
 5488:           which means that Perl space and POSIX space are now identical. PCRE
 5489:           was changed at release 8.34. */
 5490: 
 5491:           case PT_SPACE:    /* Perl space */
 5492:           case PT_PXSPACE:  /* POSIX space */
 5493:           for (i = min; i < max; i++)
 5494:             {
 5495:             int len = 1;
 5496:             if (eptr >= md->end_subject)
 5497:               {
 5498:               SCHECK_PARTIAL();
 5499:               break;
 5500:               }
 5501:             GETCHARLENTEST(c, eptr, len);
 5502:             switch(c)
 5503:               {
 5504:               HSPACE_CASES:
 5505:               VSPACE_CASES:
 5506:               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
 5507:               break;
 5508: 
 5509:               default:
 5510:               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
 5511:                 goto ENDLOOP99;   /* Break the loop */
 5512:               break;
 5513:               }
 5514:             eptr+= len;
 5515:             }
 5516:           ENDLOOP99:
 5517:           break;
 5518: 
 5519:           case PT_WORD:
 5520:           for (i = min; i < max; i++)
 5521:             {
 5522:             int category;
 5523:             int len = 1;
 5524:             if (eptr >= md->end_subject)
 5525:               {
 5526:               SCHECK_PARTIAL();
 5527:               break;
 5528:               }
 5529:             GETCHARLENTEST(c, eptr, len);
 5530:             category = UCD_CATEGORY(c);
 5531:             if ((category == ucp_L || category == ucp_N ||
 5532:                  c == CHAR_UNDERSCORE) == prop_fail_result)
 5533:               break;
 5534:             eptr+= len;
 5535:             }
 5536:           break;
 5537: 
 5538:           case PT_CLIST:
 5539:           for (i = min; i < max; i++)
 5540:             {
 5541:             const pcre_uint32 *cp;
 5542:             int len = 1;
 5543:             if (eptr >= md->end_subject)
 5544:               {
 5545:               SCHECK_PARTIAL();
 5546:               break;
 5547:               }
 5548:             GETCHARLENTEST(c, eptr, len);
 5549:             cp = PRIV(ucd_caseless_sets) + prop_value;
 5550:             for (;;)
 5551:               {
 5552:               if (c < *cp)
 5553:                 { if (prop_fail_result) break; else goto GOT_MAX; }
 5554:               if (c == *cp++)
 5555:                 { if (prop_fail_result) goto GOT_MAX; else break; }
 5556:               }
 5557:             eptr += len;
 5558:             }
 5559:           GOT_MAX:
 5560:           break;
 5561: 
 5562:           case PT_UCNC:
 5563:           for (i = min; i < max; i++)
 5564:             {
 5565:             int len = 1;
 5566:             if (eptr >= md->end_subject)
 5567:               {
 5568:               SCHECK_PARTIAL();
 5569:               break;
 5570:               }
 5571:             GETCHARLENTEST(c, eptr, len);
 5572:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
 5573:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
 5574:                  c >= 0xe000) == prop_fail_result)
 5575:               break;
 5576:             eptr += len;
 5577:             }
 5578:           break;
 5579: 
 5580:           default:
 5581:           RRETURN(PCRE_ERROR_INTERNAL);
 5582:           }
 5583: 
 5584:         /* eptr is now past the end of the maximum run */
 5585: 
 5586:         if (possessive) continue;    /* No backtracking */
 5587:         for(;;)
 5588:           {
 5589:           if (eptr == pp) goto TAIL_RECURSE;
 5590:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
 5591:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5592:           eptr--;
 5593:           if (utf) BACKCHAR(eptr);
 5594:           }
 5595:         }
 5596: 
 5597:       /* Match extended Unicode grapheme clusters. We will get here only if the
 5598:       support is in the binary; otherwise a compile-time error occurs. */
 5599: 
 5600:       else if (ctype == OP_EXTUNI)
 5601:         {
 5602:         for (i = min; i < max; i++)
 5603:           {
 5604:           if (eptr >= md->end_subject)
 5605:             {
 5606:             SCHECK_PARTIAL();
 5607:             break;
 5608:             }
 5609:           else
 5610:             {
 5611:             int lgb, rgb;
 5612:             GETCHARINCTEST(c, eptr);
 5613:             lgb = UCD_GRAPHBREAK(c);
 5614:             while (eptr < md->end_subject)
 5615:               {
 5616:               int len = 1;
 5617:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
 5618:               rgb = UCD_GRAPHBREAK(c);
 5619:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 5620:               lgb = rgb;
 5621:               eptr += len;
 5622:               }
 5623:             }
 5624:           CHECK_PARTIAL();
 5625:           }
 5626: 
 5627:         /* eptr is now past the end of the maximum run */
 5628: 
 5629:         if (possessive) continue;    /* No backtracking */
 5630: 
 5631:         for(;;)
 5632:           {
 5633:           int lgb, rgb;
 5634:           PCRE_PUCHAR fptr;
 5635: 
 5636:           if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
 5637:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
 5638:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5639: 
 5640:           /* Backtracking over an extended grapheme cluster involves inspecting
 5641:           the previous two characters (if present) to see if a break is
 5642:           permitted between them. */
 5643: 
 5644:           eptr--;
 5645:           if (!utf) c = *eptr; else
 5646:             {
 5647:             BACKCHAR(eptr);
 5648:             GETCHAR(c, eptr);
 5649:             }
 5650:           rgb = UCD_GRAPHBREAK(c);
 5651: 
 5652:           for (;;)
 5653:             {
 5654:             if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
 5655:             fptr = eptr - 1;
 5656:             if (!utf) c = *fptr; else
 5657:               {
 5658:               BACKCHAR(fptr);
 5659:               GETCHAR(c, fptr);
 5660:               }
 5661:             lgb = UCD_GRAPHBREAK(c);
 5662:             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
 5663:             eptr = fptr;
 5664:             rgb = lgb;
 5665:             }
 5666:           }
 5667:         }
 5668: 
 5669:       else
 5670: #endif   /* SUPPORT_UCP */
 5671: 
 5672: #ifdef SUPPORT_UTF
 5673:       if (utf)
 5674:         {
 5675:         switch(ctype)
 5676:           {
 5677:           case OP_ANY:
 5678:           if (max < INT_MAX)
 5679:             {
 5680:             for (i = min; i < max; i++)
 5681:               {
 5682:               if (eptr >= md->end_subject)
 5683:                 {
 5684:                 SCHECK_PARTIAL();
 5685:                 break;
 5686:                 }
 5687:               if (IS_NEWLINE(eptr)) break;
 5688:               if (md->partial != 0 &&    /* Take care with CRLF partial */
 5689:                   eptr + 1 >= md->end_subject &&
 5690:                   NLBLOCK->nltype == NLTYPE_FIXED &&
 5691:                   NLBLOCK->nllen == 2 &&
 5692:                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
 5693:                 {
 5694:                 md->hitend = TRUE;
 5695:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 5696:                 }
 5697:               eptr++;
 5698:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 5699:               }
 5700:             }
 5701: 
 5702:           /* Handle unlimited UTF-8 repeat */
 5703: 
 5704:           else
 5705:             {
 5706:             for (i = min; i < max; i++)
 5707:               {
 5708:               if (eptr >= md->end_subject)
 5709:                 {
 5710:                 SCHECK_PARTIAL();
 5711:                 break;
 5712:                 }
 5713:               if (IS_NEWLINE(eptr)) break;
 5714:               if (md->partial != 0 &&    /* Take care with CRLF partial */
 5715:                   eptr + 1 >= md->end_subject &&
 5716:                   NLBLOCK->nltype == NLTYPE_FIXED &&
 5717:                   NLBLOCK->nllen == 2 &&
 5718:                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
 5719:                 {
 5720:                 md->hitend = TRUE;
 5721:                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 5722:                 }
 5723:               eptr++;
 5724:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 5725:               }
 5726:             }
 5727:           break;
 5728: 
 5729:           case OP_ALLANY:
 5730:           if (max < INT_MAX)
 5731:             {
 5732:             for (i = min; i < max; i++)
 5733:               {
 5734:               if (eptr >= md->end_subject)
 5735:                 {
 5736:                 SCHECK_PARTIAL();
 5737:                 break;
 5738:                 }
 5739:               eptr++;
 5740:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 5741:               }
 5742:             }
 5743:           else
 5744:             {
 5745:             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
 5746:             SCHECK_PARTIAL();
 5747:             }
 5748:           break;
 5749: 
 5750:           /* The byte case is the same as non-UTF8 */
 5751: 
 5752:           case OP_ANYBYTE:
 5753:           c = max - min;
 5754:           if (c > (unsigned int)(md->end_subject - eptr))
 5755:             {
 5756:             eptr = md->end_subject;
 5757:             SCHECK_PARTIAL();
 5758:             }
 5759:           else eptr += c;
 5760:           break;
 5761: 
 5762:           case OP_ANYNL:
 5763:           for (i = min; i < max; i++)
 5764:             {
 5765:             int len = 1;
 5766:             if (eptr >= md->end_subject)
 5767:               {
 5768:               SCHECK_PARTIAL();
 5769:               break;
 5770:               }
 5771:             GETCHARLEN(c, eptr, len);
 5772:             if (c == CHAR_CR)
 5773:               {
 5774:               if (++eptr >= md->end_subject) break;
 5775:               if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
 5776:               }
 5777:             else
 5778:               {
 5779:               if (c != CHAR_LF &&
 5780:                   (md->bsr_anycrlf ||
 5781:                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
 5782: #ifndef EBCDIC
 5783:                     && c != 0x2028 && c != 0x2029
 5784: #endif  /* Not EBCDIC */
 5785:                     )))
 5786:                 break;
 5787:               eptr += len;
 5788:               }
 5789:             }
 5790:           break;
 5791: 
 5792:           case OP_NOT_HSPACE:
 5793:           case OP_HSPACE:
 5794:           for (i = min; i < max; i++)
 5795:             {
 5796:             BOOL gotspace;
 5797:             int len = 1;
 5798:             if (eptr >= md->end_subject)
 5799:               {
 5800:               SCHECK_PARTIAL();
 5801:               break;
 5802:               }
 5803:             GETCHARLEN(c, eptr, len);
 5804:             switch(c)
 5805:               {
 5806:               HSPACE_CASES: gotspace = TRUE; break;
 5807:               default: gotspace = FALSE; break;
 5808:               }
 5809:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
 5810:             eptr += len;
 5811:             }
 5812:           break;
 5813: 
 5814:           case OP_NOT_VSPACE:
 5815:           case OP_VSPACE:
 5816:           for (i = min; i < max; i++)
 5817:             {
 5818:             BOOL gotspace;
 5819:             int len = 1;
 5820:             if (eptr >= md->end_subject)
 5821:               {
 5822:               SCHECK_PARTIAL();
 5823:               break;
 5824:               }
 5825:             GETCHARLEN(c, eptr, len);
 5826:             switch(c)
 5827:               {
 5828:               VSPACE_CASES: gotspace = TRUE; break;
 5829:               default: gotspace = FALSE; break;
 5830:               }
 5831:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
 5832:             eptr += len;
 5833:             }
 5834:           break;
 5835: 
 5836:           case OP_NOT_DIGIT:
 5837:           for (i = min; i < max; i++)
 5838:             {
 5839:             int len = 1;
 5840:             if (eptr >= md->end_subject)
 5841:               {
 5842:               SCHECK_PARTIAL();
 5843:               break;
 5844:               }
 5845:             GETCHARLEN(c, eptr, len);
 5846:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
 5847:             eptr+= len;
 5848:             }
 5849:           break;
 5850: 
 5851:           case OP_DIGIT:
 5852:           for (i = min; i < max; i++)
 5853:             {
 5854:             int len = 1;
 5855:             if (eptr >= md->end_subject)
 5856:               {
 5857:               SCHECK_PARTIAL();
 5858:               break;
 5859:               }
 5860:             GETCHARLEN(c, eptr, len);
 5861:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
 5862:             eptr+= len;
 5863:             }
 5864:           break;
 5865: 
 5866:           case OP_NOT_WHITESPACE:
 5867:           for (i = min; i < max; i++)
 5868:             {
 5869:             int len = 1;
 5870:             if (eptr >= md->end_subject)
 5871:               {
 5872:               SCHECK_PARTIAL();
 5873:               break;
 5874:               }
 5875:             GETCHARLEN(c, eptr, len);
 5876:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
 5877:             eptr+= len;
 5878:             }
 5879:           break;
 5880: 
 5881:           case OP_WHITESPACE:
 5882:           for (i = min; i < max; i++)
 5883:             {
 5884:             int len = 1;
 5885:             if (eptr >= md->end_subject)
 5886:               {
 5887:               SCHECK_PARTIAL();
 5888:               break;
 5889:               }
 5890:             GETCHARLEN(c, eptr, len);
 5891:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
 5892:             eptr+= len;
 5893:             }
 5894:           break;
 5895: 
 5896:           case OP_NOT_WORDCHAR:
 5897:           for (i = min; i < max; i++)
 5898:             {
 5899:             int len = 1;
 5900:             if (eptr >= md->end_subject)
 5901:               {
 5902:               SCHECK_PARTIAL();
 5903:               break;
 5904:               }
 5905:             GETCHARLEN(c, eptr, len);
 5906:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
 5907:             eptr+= len;
 5908:             }
 5909:           break;
 5910: 
 5911:           case OP_WORDCHAR:
 5912:           for (i = min; i < max; i++)
 5913:             {
 5914:             int len = 1;
 5915:             if (eptr >= md->end_subject)
 5916:               {
 5917:               SCHECK_PARTIAL();
 5918:               break;
 5919:               }
 5920:             GETCHARLEN(c, eptr, len);
 5921:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
 5922:             eptr+= len;
 5923:             }
 5924:           break;
 5925: 
 5926:           default:
 5927:           RRETURN(PCRE_ERROR_INTERNAL);
 5928:           }
 5929: 
 5930:         if (possessive) continue;    /* No backtracking */
 5931:         for(;;)
 5932:           {
 5933:           if (eptr == pp) goto TAIL_RECURSE;
 5934:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
 5935:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 5936:           eptr--;
 5937:           BACKCHAR(eptr);
 5938:           if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
 5939:               RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
 5940:           }
 5941:         }
 5942:       else
 5943: #endif  /* SUPPORT_UTF */
 5944:       /* Not UTF mode */
 5945:         {
 5946:         switch(ctype)
 5947:           {
 5948:           case OP_ANY:
 5949:           for (i = min; i < max; i++)
 5950:             {
 5951:             if (eptr >= md->end_subject)
 5952:               {
 5953:               SCHECK_PARTIAL();
 5954:               break;
 5955:               }
 5956:             if (IS_NEWLINE(eptr)) break;
 5957:             if (md->partial != 0 &&    /* Take care with CRLF partial */
 5958:                 eptr + 1 >= md->end_subject &&
 5959:                 NLBLOCK->nltype == NLTYPE_FIXED &&
 5960:                 NLBLOCK->nllen == 2 &&
 5961:                 *eptr == NLBLOCK->nl[0])
 5962:               {
 5963:               md->hitend = TRUE;
 5964:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
 5965:               }
 5966:             eptr++;
 5967:             }
 5968:           break;
 5969: 
 5970:           case OP_ALLANY:
 5971:           case OP_ANYBYTE:
 5972:           c = max - min;
 5973:           if (c > (unsigned int)(md->end_subject - eptr))
 5974:             {
 5975:             eptr = md->end_subject;
 5976:             SCHECK_PARTIAL();
 5977:             }
 5978:           else eptr += c;
 5979:           break;
 5980: 
 5981:           case OP_ANYNL:
 5982:           for (i = min; i < max; i++)
 5983:             {
 5984:             if (eptr >= md->end_subject)
 5985:               {
 5986:               SCHECK_PARTIAL();
 5987:               break;
 5988:               }
 5989:             c = *eptr;
 5990:             if (c == CHAR_CR)
 5991:               {
 5992:               if (++eptr >= md->end_subject) break;
 5993:               if (*eptr == CHAR_LF) eptr++;
 5994:               }
 5995:             else
 5996:               {
 5997:               if (c != CHAR_LF && (md->bsr_anycrlf ||
 5998:                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
 5999: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 6000:                  && c != 0x2028 && c != 0x2029
 6001: #endif
 6002:                  ))) break;
 6003:               eptr++;
 6004:               }
 6005:             }
 6006:           break;
 6007: 
 6008:           case OP_NOT_HSPACE:
 6009:           for (i = min; i < max; i++)
 6010:             {
 6011:             if (eptr >= md->end_subject)
 6012:               {
 6013:               SCHECK_PARTIAL();
 6014:               break;
 6015:               }
 6016:             switch(*eptr)
 6017:               {
 6018:               default: eptr++; break;
 6019:               HSPACE_BYTE_CASES:
 6020: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 6021:               HSPACE_MULTIBYTE_CASES:
 6022: #endif
 6023:               goto ENDLOOP00;
 6024:               }
 6025:             }
 6026:           ENDLOOP00:
 6027:           break;
 6028: 
 6029:           case OP_HSPACE:
 6030:           for (i = min; i < max; i++)
 6031:             {
 6032:             if (eptr >= md->end_subject)
 6033:               {
 6034:               SCHECK_PARTIAL();
 6035:               break;
 6036:               }
 6037:             switch(*eptr)
 6038:               {
 6039:               default: goto ENDLOOP01;
 6040:               HSPACE_BYTE_CASES:
 6041: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 6042:               HSPACE_MULTIBYTE_CASES:
 6043: #endif
 6044:               eptr++; break;
 6045:               }
 6046:             }
 6047:           ENDLOOP01:
 6048:           break;
 6049: 
 6050:           case OP_NOT_VSPACE:
 6051:           for (i = min; i < max; i++)
 6052:             {
 6053:             if (eptr >= md->end_subject)
 6054:               {
 6055:               SCHECK_PARTIAL();
 6056:               break;
 6057:               }
 6058:             switch(*eptr)
 6059:               {
 6060:               default: eptr++; break;
 6061:               VSPACE_BYTE_CASES:
 6062: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 6063:               VSPACE_MULTIBYTE_CASES:
 6064: #endif
 6065:               goto ENDLOOP02;
 6066:               }
 6067:             }
 6068:           ENDLOOP02:
 6069:           break;
 6070: 
 6071:           case OP_VSPACE:
 6072:           for (i = min; i < max; i++)
 6073:             {
 6074:             if (eptr >= md->end_subject)
 6075:               {
 6076:               SCHECK_PARTIAL();
 6077:               break;
 6078:               }
 6079:             switch(*eptr)
 6080:               {
 6081:               default: goto ENDLOOP03;
 6082:               VSPACE_BYTE_CASES:
 6083: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
 6084:               VSPACE_MULTIBYTE_CASES:
 6085: #endif
 6086:               eptr++; break;
 6087:               }
 6088:             }
 6089:           ENDLOOP03:
 6090:           break;
 6091: 
 6092:           case OP_NOT_DIGIT:
 6093:           for (i = min; i < max; i++)
 6094:             {
 6095:             if (eptr >= md->end_subject)
 6096:               {
 6097:               SCHECK_PARTIAL();
 6098:               break;
 6099:               }
 6100:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
 6101:             eptr++;
 6102:             }
 6103:           break;
 6104: 
 6105:           case OP_DIGIT:
 6106:           for (i = min; i < max; i++)
 6107:             {
 6108:             if (eptr >= md->end_subject)
 6109:               {
 6110:               SCHECK_PARTIAL();
 6111:               break;
 6112:               }
 6113:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
 6114:             eptr++;
 6115:             }
 6116:           break;
 6117: 
 6118:           case OP_NOT_WHITESPACE:
 6119:           for (i = min; i < max; i++)
 6120:             {
 6121:             if (eptr >= md->end_subject)
 6122:               {
 6123:               SCHECK_PARTIAL();
 6124:               break;
 6125:               }
 6126:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
 6127:             eptr++;
 6128:             }
 6129:           break;
 6130: 
 6131:           case OP_WHITESPACE:
 6132:           for (i = min; i < max; i++)
 6133:             {
 6134:             if (eptr >= md->end_subject)
 6135:               {
 6136:               SCHECK_PARTIAL();
 6137:               break;
 6138:               }
 6139:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
 6140:             eptr++;
 6141:             }
 6142:           break;
 6143: 
 6144:           case OP_NOT_WORDCHAR:
 6145:           for (i = min; i < max; i++)
 6146:             {
 6147:             if (eptr >= md->end_subject)
 6148:               {
 6149:               SCHECK_PARTIAL();
 6150:               break;
 6151:               }
 6152:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
 6153:             eptr++;
 6154:             }
 6155:           break;
 6156: 
 6157:           case OP_WORDCHAR:
 6158:           for (i = min; i < max; i++)
 6159:             {
 6160:             if (eptr >= md->end_subject)
 6161:               {
 6162:               SCHECK_PARTIAL();
 6163:               break;
 6164:               }
 6165:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
 6166:             eptr++;
 6167:             }
 6168:           break;
 6169: 
 6170:           default:
 6171:           RRETURN(PCRE_ERROR_INTERNAL);
 6172:           }
 6173: 
 6174:         if (possessive) continue;    /* No backtracking */
 6175:         for (;;)
 6176:           {
 6177:           if (eptr == pp) goto TAIL_RECURSE;
 6178:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
 6179:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 6180:           eptr--;
 6181:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
 6182:               eptr[-1] == CHAR_CR) eptr--;
 6183:           }
 6184:         }
 6185: 
 6186:       /* Control never gets here */
 6187:       }
 6188: 
 6189:     /* There's been some horrible disaster. Arrival here can only mean there is
 6190:     something seriously wrong in the code above or the OP_xxx definitions. */
 6191: 
 6192:     default:
 6193:     DPRINTF(("Unknown opcode %d\n", *ecode));
 6194:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
 6195:     }
 6196: 
 6197:   /* Do not stick any code in here without much thought; it is assumed
 6198:   that "continue" in the code above comes out to here to repeat the main
 6199:   loop. */
 6200: 
 6201:   }             /* End of main loop */
 6202: /* Control never reaches here */
 6203: 
 6204: 
 6205: /* When compiling to use the heap rather than the stack for recursive calls to
 6206: match(), the RRETURN() macro jumps here. The number that is saved in
 6207: frame->Xwhere indicates which label we actually want to return to. */
 6208: 
 6209: #ifdef NO_RECURSE
 6210: #define LBL(val) case val: goto L_RM##val;
 6211: HEAP_RETURN:
 6212: switch (frame->Xwhere)
 6213:   {
 6214:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
 6215:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
 6216:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
 6217:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
 6218:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
 6219:   LBL(65) LBL(66)
 6220: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
 6221:   LBL(20) LBL(21)
 6222: #endif
 6223: #ifdef SUPPORT_UTF
 6224:   LBL(16) LBL(18)
 6225:   LBL(22) LBL(23) LBL(28) LBL(30)
 6226:   LBL(32) LBL(34) LBL(42) LBL(46)
 6227: #ifdef SUPPORT_UCP
 6228:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
 6229:   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
 6230: #endif  /* SUPPORT_UCP */
 6231: #endif  /* SUPPORT_UTF */
 6232:   default:
 6233:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
 6234:   return PCRE_ERROR_INTERNAL;
 6235:   }
 6236: #undef LBL
 6237: #endif  /* NO_RECURSE */
 6238: }
 6239: 
 6240: 
 6241: /***************************************************************************
 6242: ****************************************************************************
 6243:                    RECURSION IN THE match() FUNCTION
 6244: 
 6245: Undefine all the macros that were defined above to handle this. */
 6246: 
 6247: #ifdef NO_RECURSE
 6248: #undef eptr
 6249: #undef ecode
 6250: #undef mstart
 6251: #undef offset_top
 6252: #undef eptrb
 6253: #undef flags
 6254: 
 6255: #undef callpat
 6256: #undef charptr
 6257: #undef data
 6258: #undef next
 6259: #undef pp
 6260: #undef prev
 6261: #undef saved_eptr
 6262: 
 6263: #undef new_recursive
 6264: 
 6265: #undef cur_is_word
 6266: #undef condition
 6267: #undef prev_is_word
 6268: 
 6269: #undef ctype
 6270: #undef length
 6271: #undef max
 6272: #undef min
 6273: #undef number
 6274: #undef offset
 6275: #undef op
 6276: #undef save_capture_last
 6277: #undef save_offset1
 6278: #undef save_offset2
 6279: #undef save_offset3
 6280: #undef stacksave
 6281: 
 6282: #undef newptrb
 6283: 
 6284: #endif
 6285: 
 6286: /* These two are defined as macros in both cases */
 6287: 
 6288: #undef fc
 6289: #undef fi
 6290: 
 6291: /***************************************************************************
 6292: ***************************************************************************/
 6293: 
 6294: 
 6295: #ifdef NO_RECURSE
 6296: /*************************************************
 6297: *          Release allocated heap frames         *
 6298: *************************************************/
 6299: 
 6300: /* This function releases all the allocated frames. The base frame is on the
 6301: machine stack, and so must not be freed.
 6302: 
 6303: Argument: the address of the base frame
 6304: Returns:  nothing
 6305: */
 6306: 
 6307: static void
 6308: release_match_heapframes (heapframe *frame_base)
 6309: {
 6310: heapframe *nextframe = frame_base->Xnextframe;
 6311: while (nextframe != NULL)
 6312:   {
 6313:   heapframe *oldframe = nextframe;
 6314:   nextframe = nextframe->Xnextframe;
 6315:   (PUBL(stack_free))(oldframe);
 6316:   }
 6317: }
 6318: #endif
 6319: 
 6320: 
 6321: /*************************************************
 6322: *         Execute a Regular Expression           *
 6323: *************************************************/
 6324: 
 6325: /* This function applies a compiled re to a subject string and picks out
 6326: portions of the string if it matches. Two elements in the vector are set for
 6327: each substring: the offsets to the start and end of the substring.
 6328: 
 6329: Arguments:
 6330:   argument_re     points to the compiled expression
 6331:   extra_data      points to extra data or is NULL
 6332:   subject         points to the subject string
 6333:   length          length of subject string (may contain binary zeros)
 6334:   start_offset    where to start in the subject string
 6335:   options         option bits
 6336:   offsets         points to a vector of ints to be filled in with offsets
 6337:   offsetcount     the number of elements in the vector
 6338: 
 6339: Returns:          > 0 => success; value is the number of elements filled in
 6340:                   = 0 => success, but offsets is not big enough
 6341:                    -1 => failed to match
 6342:                  < -1 => some kind of unexpected problem
 6343: */
 6344: 
 6345: #if defined COMPILE_PCRE8
 6346: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 6347: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
 6348:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
 6349:   int offsetcount)
 6350: #elif defined COMPILE_PCRE16
 6351: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 6352: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
 6353:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
 6354:   int offsetcount)
 6355: #elif defined COMPILE_PCRE32
 6356: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 6357: pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
 6358:   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
 6359:   int offsetcount)
 6360: #endif
 6361: {
 6362: int rc, ocount, arg_offset_max;
 6363: int newline;
 6364: BOOL using_temporary_offsets = FALSE;
 6365: BOOL anchored;
 6366: BOOL startline;
 6367: BOOL firstline;
 6368: BOOL utf;
 6369: BOOL has_first_char = FALSE;
 6370: BOOL has_req_char = FALSE;
 6371: pcre_uchar first_char = 0;
 6372: pcre_uchar first_char2 = 0;
 6373: pcre_uchar req_char = 0;
 6374: pcre_uchar req_char2 = 0;
 6375: match_data match_block;
 6376: match_data *md = &match_block;
 6377: const pcre_uint8 *tables;
 6378: const pcre_uint8 *start_bits = NULL;
 6379: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
 6380: PCRE_PUCHAR end_subject;
 6381: PCRE_PUCHAR start_partial = NULL;
 6382: PCRE_PUCHAR match_partial = NULL;
 6383: PCRE_PUCHAR req_char_ptr = start_match - 1;
 6384: 
 6385: const pcre_study_data *study;
 6386: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
 6387: 
 6388: #ifdef NO_RECURSE
 6389: heapframe frame_zero;
 6390: frame_zero.Xprevframe = NULL;            /* Marks the top level */
 6391: frame_zero.Xnextframe = NULL;            /* None are allocated yet */
 6392: md->match_frames_base = &frame_zero;
 6393: #endif
 6394: 
 6395: /* Check for the special magic call that measures the size of the stack used
 6396: per recursive call of match(). Without the funny casting for sizeof, a Windows
 6397: compiler gave this error: "unary minus operator applied to unsigned type,
 6398: result still unsigned". Hopefully the cast fixes that. */
 6399: 
 6400: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
 6401:     start_offset == -999)
 6402: #ifdef NO_RECURSE
 6403:   return -((int)sizeof(heapframe));
 6404: #else
 6405:   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
 6406: #endif
 6407: 
 6408: /* Plausibility checks */
 6409: 
 6410: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
 6411: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
 6412:   return PCRE_ERROR_NULL;
 6413: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 6414: if (length < 0) return PCRE_ERROR_BADLENGTH;
 6415: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
 6416: 
 6417: /* Check that the first field in the block is the magic number. If it is not,
 6418: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
 6419: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
 6420: means that the pattern is likely compiled with different endianness. */
 6421: 
 6422: if (re->magic_number != MAGIC_NUMBER)
 6423:   return re->magic_number == REVERSED_MAGIC_NUMBER?
 6424:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
 6425: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 6426: 
 6427: /* These two settings are used in the code for checking a UTF-8 string that
 6428: follows immediately afterwards. Other values in the md block are used only
 6429: during "normal" pcre_exec() processing, not when the JIT support is in use,
 6430: so they are set up later. */
 6431: 
 6432: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 6433: utf = md->utf = (re->options & PCRE_UTF8) != 0;
 6434: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
 6435:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
 6436: 
 6437: /* Check a UTF-8 string if required. Pass back the character offset and error
 6438: code for an invalid string if a results vector is available. */
 6439: 
 6440: #ifdef SUPPORT_UTF
 6441: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
 6442:   {
 6443:   int erroroffset;
 6444:   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
 6445:   if (errorcode != 0)
 6446:     {
 6447:     if (offsetcount >= 2)
 6448:       {
 6449:       offsets[0] = erroroffset;
 6450:       offsets[1] = errorcode;
 6451:       }
 6452: #if defined COMPILE_PCRE8
 6453:     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
 6454:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
 6455: #elif defined COMPILE_PCRE16
 6456:     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
 6457:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
 6458: #elif defined COMPILE_PCRE32
 6459:     return PCRE_ERROR_BADUTF32;
 6460: #endif
 6461:     }
 6462: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
 6463:   /* Check that a start_offset points to the start of a UTF character. */
 6464:   if (start_offset > 0 && start_offset < length &&
 6465:       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
 6466:     return PCRE_ERROR_BADUTF8_OFFSET;
 6467: #endif
 6468:   }
 6469: #endif
 6470: 
 6471: /* If the pattern was successfully studied with JIT support, run the JIT
 6472: executable instead of the rest of this function. Most options must be set at
 6473: compile time for the JIT code to be usable. Fallback to the normal code path if
 6474: an unsupported flag is set. */
 6475: 
 6476: #ifdef SUPPORT_JIT
 6477: if (extra_data != NULL
 6478:     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
 6479:                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
 6480:     && extra_data->executable_jit != NULL
 6481:     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
 6482:   {
 6483:   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
 6484:        start_offset, options, offsets, offsetcount);
 6485: 
 6486:   /* PCRE_ERROR_NULL means that the selected normal or partial matching
 6487:   mode is not compiled. In this case we simply fallback to interpreter. */
 6488: 
 6489:   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
 6490:   }
 6491: #endif
 6492: 
 6493: /* Carry on with non-JIT matching. This information is for finding all the
 6494: numbers associated with a given name, for condition testing. */
 6495: 
 6496: md->name_table = (pcre_uchar *)re + re->name_table_offset;
 6497: md->name_count = re->name_count;
 6498: md->name_entry_size = re->name_entry_size;
 6499: 
 6500: /* Fish out the optional data from the extra_data structure, first setting
 6501: the default values. */
 6502: 
 6503: study = NULL;
 6504: md->match_limit = MATCH_LIMIT;
 6505: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
 6506: md->callout_data = NULL;
 6507: 
 6508: /* The table pointer is always in native byte order. */
 6509: 
 6510: tables = re->tables;
 6511: 
 6512: /* The two limit values override the defaults, whatever their value. */
 6513: 
 6514: if (extra_data != NULL)
 6515:   {
 6516:   register unsigned int flags = extra_data->flags;
 6517:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
 6518:     study = (const pcre_study_data *)extra_data->study_data;
 6519:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
 6520:     md->match_limit = extra_data->match_limit;
 6521:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
 6522:     md->match_limit_recursion = extra_data->match_limit_recursion;
 6523:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
 6524:     md->callout_data = extra_data->callout_data;
 6525:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
 6526:   }
 6527: 
 6528: /* Limits in the regex override only if they are smaller. */
 6529: 
 6530: if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
 6531:   md->match_limit = re->limit_match;
 6532: 
 6533: if ((re->flags & PCRE_RLSET) != 0 &&
 6534:     re->limit_recursion < md->match_limit_recursion)
 6535:   md->match_limit_recursion = re->limit_recursion;
 6536: 
 6537: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
 6538: is a feature that makes it possible to save compiled regex and re-use them
 6539: in other programs later. */
 6540: 
 6541: if (tables == NULL) tables = PRIV(default_tables);
 6542: 
 6543: /* Set up other data */
 6544: 
 6545: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
 6546: startline = (re->flags & PCRE_STARTLINE) != 0;
 6547: firstline = (re->options & PCRE_FIRSTLINE) != 0;
 6548: 
 6549: /* The code starts after the real_pcre block and the capture name table. */
 6550: 
 6551: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
 6552:   re->name_count * re->name_entry_size;
 6553: 
 6554: md->start_subject = (PCRE_PUCHAR)subject;
 6555: md->start_offset = start_offset;
 6556: md->end_subject = md->start_subject + length;
 6557: end_subject = md->end_subject;
 6558: 
 6559: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 6560: md->use_ucp = (re->options & PCRE_UCP) != 0;
 6561: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
 6562: md->ignore_skip_arg = 0;
 6563: 
 6564: /* Some options are unpacked into BOOL variables in the hope that testing
 6565: them will be faster than individual option bits. */
 6566: 
 6567: md->notbol = (options & PCRE_NOTBOL) != 0;
 6568: md->noteol = (options & PCRE_NOTEOL) != 0;
 6569: md->notempty = (options & PCRE_NOTEMPTY) != 0;
 6570: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 6571: 
 6572: md->hitend = FALSE;
 6573: md->mark = md->nomatch_mark = NULL;     /* In case never set */
 6574: 
 6575: md->recursive = NULL;                   /* No recursion at top level */
 6576: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
 6577: 
 6578: md->lcc = tables + lcc_offset;
 6579: md->fcc = tables + fcc_offset;
 6580: md->ctypes = tables + ctypes_offset;
 6581: 
 6582: /* Handle different \R options. */
 6583: 
 6584: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
 6585:   {
 6586:   case 0:
 6587:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
 6588:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
 6589:   else
 6590: #ifdef BSR_ANYCRLF
 6591:   md->bsr_anycrlf = TRUE;
 6592: #else
 6593:   md->bsr_anycrlf = FALSE;
 6594: #endif
 6595:   break;
 6596: 
 6597:   case PCRE_BSR_ANYCRLF:
 6598:   md->bsr_anycrlf = TRUE;
 6599:   break;
 6600: 
 6601:   case PCRE_BSR_UNICODE:
 6602:   md->bsr_anycrlf = FALSE;
 6603:   break;
 6604: 
 6605:   default: return PCRE_ERROR_BADNEWLINE;
 6606:   }
 6607: 
 6608: /* Handle different types of newline. The three bits give eight cases. If
 6609: nothing is set at run time, whatever was used at compile time applies. */
 6610: 
 6611: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
 6612:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
 6613:   {
 6614:   case 0: newline = NEWLINE; break;   /* Compile-time default */
 6615:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
 6616:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
 6617:   case PCRE_NEWLINE_CR+
 6618:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
 6619:   case PCRE_NEWLINE_ANY: newline = -1; break;
 6620:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
 6621:   default: return PCRE_ERROR_BADNEWLINE;
 6622:   }
 6623: 
 6624: if (newline == -2)
 6625:   {
 6626:   md->nltype = NLTYPE_ANYCRLF;
 6627:   }
 6628: else if (newline < 0)
 6629:   {
 6630:   md->nltype = NLTYPE_ANY;
 6631:   }
 6632: else
 6633:   {
 6634:   md->nltype = NLTYPE_FIXED;
 6635:   if (newline > 255)
 6636:     {
 6637:     md->nllen = 2;
 6638:     md->nl[0] = (newline >> 8) & 255;
 6639:     md->nl[1] = newline & 255;
 6640:     }
 6641:   else
 6642:     {
 6643:     md->nllen = 1;
 6644:     md->nl[0] = newline;
 6645:     }
 6646:   }
 6647: 
 6648: /* Partial matching was originally supported only for a restricted set of
 6649: regexes; from release 8.00 there are no restrictions, but the bits are still
 6650: defined (though never set). So there's no harm in leaving this code. */
 6651: 
 6652: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
 6653:   return PCRE_ERROR_BADPARTIAL;
 6654: 
 6655: /* If the expression has got more back references than the offsets supplied can
 6656: hold, we get a temporary chunk of working store to use during the matching.
 6657: Otherwise, we can use the vector supplied, rounding down its size to a multiple
 6658: of 3. */
 6659: 
 6660: ocount = offsetcount - (offsetcount % 3);
 6661: arg_offset_max = (2*ocount)/3;
 6662: 
 6663: if (re->top_backref > 0 && re->top_backref >= ocount/3)
 6664:   {
 6665:   ocount = re->top_backref * 3 + 3;
 6666:   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
 6667:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
 6668:   using_temporary_offsets = TRUE;
 6669:   DPRINTF(("Got memory to hold back references\n"));
 6670:   }
 6671: else md->offset_vector = offsets;
 6672: md->offset_end = ocount;
 6673: md->offset_max = (2*ocount)/3;
 6674: md->capture_last = 0;
 6675: 
 6676: /* Reset the working variable associated with each extraction. These should
 6677: never be used unless previously set, but they get saved and restored, and so we
 6678: initialize them to avoid reading uninitialized locations. Also, unset the
 6679: offsets for the matched string. This is really just for tidiness with callouts,
 6680: in case they inspect these fields. */
 6681: 
 6682: if (md->offset_vector != NULL)
 6683:   {
 6684:   register int *iptr = md->offset_vector + ocount;
 6685:   register int *iend = iptr - re->top_bracket;
 6686:   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
 6687:   while (--iptr >= iend) *iptr = -1;
 6688:   md->offset_vector[0] = md->offset_vector[1] = -1;
 6689:   }
 6690: 
 6691: /* Set up the first character to match, if available. The first_char value is
 6692: never set for an anchored regular expression, but the anchoring may be forced
 6693: at run time, so we have to test for anchoring. The first char may be unset for
 6694: an unanchored pattern, of course. If there's no first char and the pattern was
 6695: studied, there may be a bitmap of possible first characters. */
 6696: 
 6697: if (!anchored)
 6698:   {
 6699:   if ((re->flags & PCRE_FIRSTSET) != 0)
 6700:     {
 6701:     has_first_char = TRUE;
 6702:     first_char = first_char2 = (pcre_uchar)(re->first_char);
 6703:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
 6704:       {
 6705:       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
 6706: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
 6707:       if (utf && first_char > 127)
 6708:         first_char2 = UCD_OTHERCASE(first_char);
 6709: #endif
 6710:       }
 6711:     }
 6712:   else
 6713:     if (!startline && study != NULL &&
 6714:       (study->flags & PCRE_STUDY_MAPPED) != 0)
 6715:         start_bits = study->start_bits;
 6716:   }
 6717: 
 6718: /* For anchored or unanchored matches, there may be a "last known required
 6719: character" set. */
 6720: 
 6721: if ((re->flags & PCRE_REQCHSET) != 0)
 6722:   {
 6723:   has_req_char = TRUE;
 6724:   req_char = req_char2 = (pcre_uchar)(re->req_char);
 6725:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
 6726:     {
 6727:     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
 6728: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
 6729:     if (utf && req_char > 127)
 6730:       req_char2 = UCD_OTHERCASE(req_char);
 6731: #endif
 6732:     }
 6733:   }
 6734: 
 6735: 
 6736: /* ==========================================================================*/
 6737: 
 6738: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
 6739: the loop runs just once. */
 6740: 
 6741: for(;;)
 6742:   {
 6743:   PCRE_PUCHAR save_end_subject = end_subject;
 6744:   PCRE_PUCHAR new_start_match;
 6745: 
 6746:   /* If firstline is TRUE, the start of the match is constrained to the first
 6747:   line of a multiline string. That is, the match must be before or at the first
 6748:   newline. Implement this by temporarily adjusting end_subject so that we stop
 6749:   scanning at a newline. If the match fails at the newline, later code breaks
 6750:   this loop. */
 6751: 
 6752:   if (firstline)
 6753:     {
 6754:     PCRE_PUCHAR t = start_match;
 6755: #ifdef SUPPORT_UTF
 6756:     if (utf)
 6757:       {
 6758:       while (t < md->end_subject && !IS_NEWLINE(t))
 6759:         {
 6760:         t++;
 6761:         ACROSSCHAR(t < end_subject, *t, t++);
 6762:         }
 6763:       }
 6764:     else
 6765: #endif
 6766:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
 6767:     end_subject = t;
 6768:     }
 6769: 
 6770:   /* There are some optimizations that avoid running the match if a known
 6771:   starting point is not found, or if a known later character is not present.
 6772:   However, there is an option that disables these, for testing and for ensuring
 6773:   that all callouts do actually occur. The option can be set in the regex by
 6774:   (*NO_START_OPT) or passed in match-time options. */
 6775: 
 6776:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
 6777:     {
 6778:     /* Advance to a unique first char if there is one. */
 6779: 
 6780:     if (has_first_char)
 6781:       {
 6782:       pcre_uchar smc;
 6783: 
 6784:       if (first_char != first_char2)
 6785:         while (start_match < end_subject &&
 6786:           (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
 6787:           start_match++;
 6788:       else
 6789:         while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
 6790:           start_match++;
 6791:       }
 6792: 
 6793:     /* Or to just after a linebreak for a multiline match */
 6794: 
 6795:     else if (startline)
 6796:       {
 6797:       if (start_match > md->start_subject + start_offset)
 6798:         {
 6799: #ifdef SUPPORT_UTF
 6800:         if (utf)
 6801:           {
 6802:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
 6803:             {
 6804:             start_match++;
 6805:             ACROSSCHAR(start_match < end_subject, *start_match,
 6806:               start_match++);
 6807:             }
 6808:           }
 6809:         else
 6810: #endif
 6811:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
 6812:           start_match++;
 6813: 
 6814:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
 6815:         and we are now at a LF, advance the match position by one more character.
 6816:         */
 6817: 
 6818:         if (start_match[-1] == CHAR_CR &&
 6819:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
 6820:              start_match < end_subject &&
 6821:              RAWUCHARTEST(start_match) == CHAR_NL)
 6822:           start_match++;
 6823:         }
 6824:       }
 6825: 
 6826:     /* Or to a non-unique first byte after study */
 6827: 
 6828:     else if (start_bits != NULL)
 6829:       {
 6830:       while (start_match < end_subject)
 6831:         {
 6832:         register pcre_uint32 c = RAWUCHARTEST(start_match);
 6833: #ifndef COMPILE_PCRE8
 6834:         if (c > 255) c = 255;
 6835: #endif
 6836:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
 6837:           {
 6838:           start_match++;
 6839: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
 6840:           /* In non 8-bit mode, the iteration will stop for
 6841:           characters > 255 at the beginning or not stop at all. */
 6842:           if (utf)
 6843:             ACROSSCHAR(start_match < end_subject, *start_match,
 6844:               start_match++);
 6845: #endif
 6846:           }
 6847:         else break;
 6848:         }
 6849:       }
 6850:     }   /* Starting optimizations */
 6851: 
 6852:   /* Restore fudged end_subject */
 6853: 
 6854:   end_subject = save_end_subject;
 6855: 
 6856:   /* The following two optimizations are disabled for partial matching or if
 6857:   disabling is explicitly requested. */
 6858: 
 6859:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
 6860:     {
 6861:     /* If the pattern was studied, a minimum subject length may be set. This is
 6862:     a lower bound; no actual string of that length may actually match the
 6863:     pattern. Although the value is, strictly, in characters, we treat it as
 6864:     bytes to avoid spending too much time in this optimization. */
 6865: 
 6866:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
 6867:         (pcre_uint32)(end_subject - start_match) < study->minlength)
 6868:       {
 6869:       rc = MATCH_NOMATCH;
 6870:       break;
 6871:       }
 6872: 
 6873:     /* If req_char is set, we know that that character must appear in the
 6874:     subject for the match to succeed. If the first character is set, req_char
 6875:     must be later in the subject; otherwise the test starts at the match point.
 6876:     This optimization can save a huge amount of backtracking in patterns with
 6877:     nested unlimited repeats that aren't going to match. Writing separate code
 6878:     for cased/caseless versions makes it go faster, as does using an
 6879:     autoincrement and backing off on a match.
 6880: 
 6881:     HOWEVER: when the subject string is very, very long, searching to its end
 6882:     can take a long time, and give bad performance on quite ordinary patterns.
 6883:     This showed up when somebody was matching something like /^\d+C/ on a
 6884:     32-megabyte string... so we don't do this when the string is sufficiently
 6885:     long. */
 6886: 
 6887:     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
 6888:       {
 6889:       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
 6890: 
 6891:       /* We don't need to repeat the search if we haven't yet reached the
 6892:       place we found it at last time. */
 6893: 
 6894:       if (p > req_char_ptr)
 6895:         {
 6896:         if (req_char != req_char2)
 6897:           {
 6898:           while (p < end_subject)
 6899:             {
 6900:             register pcre_uint32 pp = RAWUCHARINCTEST(p);
 6901:             if (pp == req_char || pp == req_char2) { p--; break; }
 6902:             }
 6903:           }
 6904:         else
 6905:           {
 6906:           while (p < end_subject)
 6907:             {
 6908:             if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
 6909:             }
 6910:           }
 6911: 
 6912:         /* If we can't find the required character, break the matching loop,
 6913:         forcing a match failure. */
 6914: 
 6915:         if (p >= end_subject)
 6916:           {
 6917:           rc = MATCH_NOMATCH;
 6918:           break;
 6919:           }
 6920: 
 6921:         /* If we have found the required character, save the point where we
 6922:         found it, so that we don't search again next time round the loop if
 6923:         the start hasn't passed this character yet. */
 6924: 
 6925:         req_char_ptr = p;
 6926:         }
 6927:       }
 6928:     }
 6929: 
 6930: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
 6931:   printf(">>>> Match against: ");
 6932:   pchars(start_match, end_subject - start_match, TRUE, md);
 6933:   printf("\n");
 6934: #endif
 6935: 
 6936:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
 6937:   first starting point for which a partial match was found. */
 6938: 
 6939:   md->start_match_ptr = start_match;
 6940:   md->start_used_ptr = start_match;
 6941:   md->match_call_count = 0;
 6942:   md->match_function_type = 0;
 6943:   md->end_offset_top = 0;
 6944:   md->skip_arg_count = 0;
 6945:   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
 6946:   if (md->hitend && start_partial == NULL)
 6947:     {
 6948:     start_partial = md->start_used_ptr;
 6949:     match_partial = start_match;
 6950:     }
 6951: 
 6952:   switch(rc)
 6953:     {
 6954:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
 6955:     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
 6956:     entirely. The only way we can do that is to re-do the match at the same
 6957:     point, with a flag to force SKIP with an argument to be ignored. Just
 6958:     treating this case as NOMATCH does not work because it does not check other
 6959:     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
 6960: 
 6961:     case MATCH_SKIP_ARG:
 6962:     new_start_match = start_match;
 6963:     md->ignore_skip_arg = md->skip_arg_count;
 6964:     break;
 6965: 
 6966:     /* SKIP passes back the next starting point explicitly, but if it is no
 6967:     greater than the match we have just done, treat it as NOMATCH. */
 6968: 
 6969:     case MATCH_SKIP:
 6970:     if (md->start_match_ptr > start_match)
 6971:       {
 6972:       new_start_match = md->start_match_ptr;
 6973:       break;
 6974:       }
 6975:     /* Fall through */
 6976: 
 6977:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
 6978:     exactly like PRUNE. Unset ignore SKIP-with-argument. */
 6979: 
 6980:     case MATCH_NOMATCH:
 6981:     case MATCH_PRUNE:
 6982:     case MATCH_THEN:
 6983:     md->ignore_skip_arg = 0;
 6984:     new_start_match = start_match + 1;
 6985: #ifdef SUPPORT_UTF
 6986:     if (utf)
 6987:       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
 6988:         new_start_match++);
 6989: #endif
 6990:     break;
 6991: 
 6992:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
 6993: 
 6994:     case MATCH_COMMIT:
 6995:     rc = MATCH_NOMATCH;
 6996:     goto ENDLOOP;
 6997: 
 6998:     /* Any other return is either a match, or some kind of error. */
 6999: 
 7000:     default:
 7001:     goto ENDLOOP;
 7002:     }
 7003: 
 7004:   /* Control reaches here for the various types of "no match at this point"
 7005:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
 7006: 
 7007:   rc = MATCH_NOMATCH;
 7008: 
 7009:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
 7010:   newline in the subject (though it may continue over the newline). Therefore,
 7011:   if we have just failed to match, starting at a newline, do not continue. */
 7012: 
 7013:   if (firstline && IS_NEWLINE(start_match)) break;
 7014: 
 7015:   /* Advance to new matching position */
 7016: 
 7017:   start_match = new_start_match;
 7018: 
 7019:   /* Break the loop if the pattern is anchored or if we have passed the end of
 7020:   the subject. */
 7021: 
 7022:   if (anchored || start_match > end_subject) break;
 7023: 
 7024:   /* If we have just passed a CR and we are now at a LF, and the pattern does
 7025:   not contain any explicit matches for \r or \n, and the newline option is CRLF
 7026:   or ANY or ANYCRLF, advance the match position by one more character. In
 7027:   normal matching start_match will aways be greater than the first position at
 7028:   this stage, but a failed *SKIP can cause a return at the same point, which is
 7029:   why the first test exists. */
 7030: 
 7031:   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
 7032:       start_match[-1] == CHAR_CR &&
 7033:       start_match < end_subject &&
 7034:       *start_match == CHAR_NL &&
 7035:       (re->flags & PCRE_HASCRORLF) == 0 &&
 7036:         (md->nltype == NLTYPE_ANY ||
 7037:          md->nltype == NLTYPE_ANYCRLF ||
 7038:          md->nllen == 2))
 7039:     start_match++;
 7040: 
 7041:   md->mark = NULL;   /* Reset for start of next match attempt */
 7042:   }                  /* End of for(;;) "bumpalong" loop */
 7043: 
 7044: /* ==========================================================================*/
 7045: 
 7046: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
 7047: conditions is true:
 7048: 
 7049: (1) The pattern is anchored or the match was failed by (*COMMIT);
 7050: 
 7051: (2) We are past the end of the subject;
 7052: 
 7053: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
 7054:     this option requests that a match occur at or before the first newline in
 7055:     the subject.
 7056: 
 7057: When we have a match and the offset vector is big enough to deal with any
 7058: backreferences, captured substring offsets will already be set up. In the case
 7059: where we had to get some local store to hold offsets for backreference
 7060: processing, copy those that we can. In this case there need not be overflow if
 7061: certain parts of the pattern were not used, even though there are more
 7062: capturing parentheses than vector slots. */
 7063: 
 7064: ENDLOOP:
 7065: 
 7066: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
 7067:   {
 7068:   if (using_temporary_offsets)
 7069:     {
 7070:     if (arg_offset_max >= 4)
 7071:       {
 7072:       memcpy(offsets + 2, md->offset_vector + 2,
 7073:         (arg_offset_max - 2) * sizeof(int));
 7074:       DPRINTF(("Copied offsets from temporary memory\n"));
 7075:       }
 7076:     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
 7077:     DPRINTF(("Freeing temporary memory\n"));
 7078:     (PUBL(free))(md->offset_vector);
 7079:     }
 7080: 
 7081:   /* Set the return code to the number of captured strings, or 0 if there were
 7082:   too many to fit into the vector. */
 7083: 
 7084:   rc = ((md->capture_last & OVFLBIT) != 0 &&
 7085:          md->end_offset_top >= arg_offset_max)?
 7086:     0 : md->end_offset_top/2;
 7087: 
 7088:   /* If there is space in the offset vector, set any unused pairs at the end of
 7089:   the pattern to -1 for backwards compatibility. It is documented that this
 7090:   happens. In earlier versions, the whole set of potential capturing offsets
 7091:   was set to -1 each time round the loop, but this is handled differently now.
 7092:   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
 7093:   those at the end that need unsetting here. We can't just unset them all at
 7094:   the start of the whole thing because they may get set in one branch that is
 7095:   not the final matching branch. */
 7096: 
 7097:   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
 7098:     {
 7099:     register int *iptr, *iend;
 7100:     int resetcount = 2 + re->top_bracket * 2;
 7101:     if (resetcount > offsetcount) resetcount = offsetcount;
 7102:     iptr = offsets + md->end_offset_top;
 7103:     iend = offsets + resetcount;
 7104:     while (iptr < iend) *iptr++ = -1;
 7105:     }
 7106: 
 7107:   /* If there is space, set up the whole thing as substring 0. The value of
 7108:   md->start_match_ptr might be modified if \K was encountered on the success
 7109:   matching path. */
 7110: 
 7111:   if (offsetcount < 2) rc = 0; else
 7112:     {
 7113:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
 7114:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
 7115:     }
 7116: 
 7117:   /* Return MARK data if requested */
 7118: 
 7119:   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
 7120:     *(extra_data->mark) = (pcre_uchar *)md->mark;
 7121:   DPRINTF((">>>> returning %d\n", rc));
 7122: #ifdef NO_RECURSE
 7123:   release_match_heapframes(&frame_zero);
 7124: #endif
 7125:   return rc;
 7126:   }
 7127: 
 7128: /* Control gets here if there has been an error, or if the overall match
 7129: attempt has failed at all permitted starting positions. */
 7130: 
 7131: if (using_temporary_offsets)
 7132:   {
 7133:   DPRINTF(("Freeing temporary memory\n"));
 7134:   (PUBL(free))(md->offset_vector);
 7135:   }
 7136: 
 7137: /* For anything other than nomatch or partial match, just return the code. */
 7138: 
 7139: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
 7140:   {
 7141:   DPRINTF((">>>> error: returning %d\n", rc));
 7142: #ifdef NO_RECURSE
 7143:   release_match_heapframes(&frame_zero);
 7144: #endif
 7145:   return rc;
 7146:   }
 7147: 
 7148: /* Handle partial matches - disable any mark data */
 7149: 
 7150: if (match_partial != NULL)
 7151:   {
 7152:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
 7153:   md->mark = NULL;
 7154:   if (offsetcount > 1)
 7155:     {
 7156:     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
 7157:     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
 7158:     if (offsetcount > 2)
 7159:       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
 7160:     }
 7161:   rc = PCRE_ERROR_PARTIAL;
 7162:   }
 7163: 
 7164: /* This is the classic nomatch case */
 7165: 
 7166: else
 7167:   {
 7168:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
 7169:   rc = PCRE_ERROR_NOMATCH;
 7170:   }
 7171: 
 7172: /* Return the MARK data if it has been requested. */
 7173: 
 7174: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
 7175:   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
 7176: #ifdef NO_RECURSE
 7177:   release_match_heapframes(&frame_zero);
 7178: #endif
 7179: return rc;
 7180: }
 7181: 
 7182: /* End of pcre_exec.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>