Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_exec.c, revision 1.1
1.1 ! misho 1: /*************************************************
! 2: * Perl-Compatible Regular Expressions *
! 3: *************************************************/
! 4:
! 5: /* PCRE is a library of functions to support regular expressions whose syntax
! 6: and semantics are as close as possible to those of the Perl 5 language.
! 7:
! 8: Written by Philip Hazel
! 9: Copyright (c) 1997-2010 University of Cambridge
! 10:
! 11: -----------------------------------------------------------------------------
! 12: Redistribution and use in source and binary forms, with or without
! 13: modification, are permitted provided that the following conditions are met:
! 14:
! 15: * Redistributions of source code must retain the above copyright notice,
! 16: this list of conditions and the following disclaimer.
! 17:
! 18: * Redistributions in binary form must reproduce the above copyright
! 19: notice, this list of conditions and the following disclaimer in the
! 20: documentation and/or other materials provided with the distribution.
! 21:
! 22: * Neither the name of the University of Cambridge nor the names of its
! 23: contributors may be used to endorse or promote products derived from
! 24: this software without specific prior written permission.
! 25:
! 26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
! 27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
! 30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! 32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! 33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! 34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! 35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! 36: POSSIBILITY OF SUCH DAMAGE.
! 37: -----------------------------------------------------------------------------
! 38: */
! 39:
! 40:
! 41: /* This module contains pcre_exec(), the externally visible function that does
! 42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
! 43: possible. There are also some static supporting functions. */
! 44:
! 45: #include "config.h"
! 46:
! 47: #define NLBLOCK md /* Block containing newline information */
! 48: #define PSSTART start_subject /* Field containing processed string start */
! 49: #define PSEND end_subject /* Field containing processed string end */
! 50:
! 51: #include "pcre_internal.h"
! 52:
! 53: /* Undefine some potentially clashing cpp symbols */
! 54:
! 55: #undef min
! 56: #undef max
! 57:
! 58: /* Flag bits for the match() function */
! 59:
! 60: #define match_condassert 0x01 /* Called to check a condition assertion */
! 61: #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
! 62:
! 63: /* Non-error returns from the match() function. Error returns are externally
! 64: defined PCRE_ERROR_xxx codes, which are all negative. */
! 65:
! 66: #define MATCH_MATCH 1
! 67: #define MATCH_NOMATCH 0
! 68:
! 69: /* Special internal returns from the match() function. Make them sufficiently
! 70: negative to avoid the external error codes. */
! 71:
! 72: #define MATCH_ACCEPT (-999)
! 73: #define MATCH_COMMIT (-998)
! 74: #define MATCH_PRUNE (-997)
! 75: #define MATCH_SKIP (-996)
! 76: #define MATCH_SKIP_ARG (-995)
! 77: #define MATCH_THEN (-994)
! 78:
! 79: /* This is a convenience macro for code that occurs many times. */
! 80:
! 81: #define MRRETURN(ra) \
! 82: { \
! 83: md->mark = markptr; \
! 84: RRETURN(ra); \
! 85: }
! 86:
! 87: /* Maximum number of ints of offset to save on the stack for recursive calls.
! 88: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
! 89: because the offset vector is always a multiple of 3 long. */
! 90:
! 91: #define REC_STACK_SAVE_MAX 30
! 92:
! 93: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
! 94:
! 95: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
! 96: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
! 97:
! 98:
! 99:
! 100: #ifdef PCRE_DEBUG
! 101: /*************************************************
! 102: * Debugging function to print chars *
! 103: *************************************************/
! 104:
! 105: /* Print a sequence of chars in printable format, stopping at the end of the
! 106: subject if the requested.
! 107:
! 108: Arguments:
! 109: p points to characters
! 110: length number to print
! 111: is_subject TRUE if printing from within md->start_subject
! 112: md pointer to matching data block, if is_subject is TRUE
! 113:
! 114: Returns: nothing
! 115: */
! 116:
! 117: static void
! 118: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
! 119: {
! 120: unsigned int c;
! 121: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
! 122: while (length-- > 0)
! 123: if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
! 124: }
! 125: #endif
! 126:
! 127:
! 128:
! 129: /*************************************************
! 130: * Match a back-reference *
! 131: *************************************************/
! 132:
! 133: /* If a back reference hasn't been set, the length that is passed is greater
! 134: than the number of characters left in the string, so the match fails.
! 135:
! 136: Arguments:
! 137: offset index into the offset vector
! 138: eptr points into the subject
! 139: length length to be matched
! 140: md points to match data block
! 141: ims the ims flags
! 142:
! 143: Returns: TRUE if matched
! 144: */
! 145:
! 146: static BOOL
! 147: match_ref(int offset, register USPTR eptr, int length, match_data *md,
! 148: unsigned long int ims)
! 149: {
! 150: USPTR p = md->start_subject + md->offset_vector[offset];
! 151:
! 152: #ifdef PCRE_DEBUG
! 153: if (eptr >= md->end_subject)
! 154: printf("matching subject <null>");
! 155: else
! 156: {
! 157: printf("matching subject ");
! 158: pchars(eptr, length, TRUE, md);
! 159: }
! 160: printf(" against backref ");
! 161: pchars(p, length, FALSE, md);
! 162: printf("\n");
! 163: #endif
! 164:
! 165: /* Always fail if not enough characters left */
! 166:
! 167: if (length > md->end_subject - eptr) return FALSE;
! 168:
! 169: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
! 170: properly if Unicode properties are supported. Otherwise, we can check only
! 171: ASCII characters. */
! 172:
! 173: if ((ims & PCRE_CASELESS) != 0)
! 174: {
! 175: #ifdef SUPPORT_UTF8
! 176: #ifdef SUPPORT_UCP
! 177: if (md->utf8)
! 178: {
! 179: USPTR endptr = eptr + length;
! 180: while (eptr < endptr)
! 181: {
! 182: int c, d;
! 183: GETCHARINC(c, eptr);
! 184: GETCHARINC(d, p);
! 185: if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
! 186: }
! 187: }
! 188: else
! 189: #endif
! 190: #endif
! 191:
! 192: /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
! 193: is no UCP support. */
! 194:
! 195: while (length-- > 0)
! 196: { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
! 197: }
! 198:
! 199: /* In the caseful case, we can just compare the bytes, whether or not we
! 200: are in UTF-8 mode. */
! 201:
! 202: else
! 203: { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
! 204:
! 205: return TRUE;
! 206: }
! 207:
! 208:
! 209:
! 210: /***************************************************************************
! 211: ****************************************************************************
! 212: RECURSION IN THE match() FUNCTION
! 213:
! 214: The match() function is highly recursive, though not every recursive call
! 215: increases the recursive depth. Nevertheless, some regular expressions can cause
! 216: it to recurse to a great depth. I was writing for Unix, so I just let it call
! 217: itself recursively. This uses the stack for saving everything that has to be
! 218: saved for a recursive call. On Unix, the stack can be large, and this works
! 219: fine.
! 220:
! 221: It turns out that on some non-Unix-like systems there are problems with
! 222: programs that use a lot of stack. (This despite the fact that every last chip
! 223: has oodles of memory these days, and techniques for extending the stack have
! 224: been known for decades.) So....
! 225:
! 226: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
! 227: calls by keeping local variables that need to be preserved in blocks of memory
! 228: obtained from malloc() instead instead of on the stack. Macros are used to
! 229: achieve this so that the actual code doesn't look very different to what it
! 230: always used to.
! 231:
! 232: The original heap-recursive code used longjmp(). However, it seems that this
! 233: can be very slow on some operating systems. Following a suggestion from Stan
! 234: Switzer, the use of longjmp() has been abolished, at the cost of having to
! 235: provide a unique number for each call to RMATCH. There is no way of generating
! 236: a sequence of numbers at compile time in C. I have given them names, to make
! 237: them stand out more clearly.
! 238:
! 239: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
! 240: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
! 241: tests. Furthermore, not using longjmp() means that local dynamic variables
! 242: don't have indeterminate values; this has meant that the frame size can be
! 243: reduced because the result can be "passed back" by straight setting of the
! 244: variable instead of being passed in the frame.
! 245: ****************************************************************************
! 246: ***************************************************************************/
! 247:
! 248: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
! 249: below must be updated in sync. */
! 250:
! 251: enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
! 252: RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
! 253: RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
! 254: RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
! 255: RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
! 256: RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
! 257: RM61, RM62 };
! 258:
! 259: /* These versions of the macros use the stack, as normal. There are debugging
! 260: versions and production versions. Note that the "rw" argument of RMATCH isn't
! 261: actually used in this definition. */
! 262:
! 263: #ifndef NO_RECURSE
! 264: #define REGISTER register
! 265:
! 266: #ifdef PCRE_DEBUG
! 267: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
! 268: { \
! 269: printf("match() called in line %d\n", __LINE__); \
! 270: rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
! 271: printf("to line %d\n", __LINE__); \
! 272: }
! 273: #define RRETURN(ra) \
! 274: { \
! 275: printf("match() returned %d from line %d ", ra, __LINE__); \
! 276: return ra; \
! 277: }
! 278: #else
! 279: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
! 280: rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
! 281: #define RRETURN(ra) return ra
! 282: #endif
! 283:
! 284: #else
! 285:
! 286:
! 287: /* These versions of the macros manage a private stack on the heap. Note that
! 288: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
! 289: argument of match(), which never changes. */
! 290:
! 291: #define REGISTER
! 292:
! 293: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
! 294: {\
! 295: heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
! 296: if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
! 297: frame->Xwhere = rw; \
! 298: newframe->Xeptr = ra;\
! 299: newframe->Xecode = rb;\
! 300: newframe->Xmstart = mstart;\
! 301: newframe->Xmarkptr = markptr;\
! 302: newframe->Xoffset_top = rc;\
! 303: newframe->Xims = re;\
! 304: newframe->Xeptrb = rf;\
! 305: newframe->Xflags = rg;\
! 306: newframe->Xrdepth = frame->Xrdepth + 1;\
! 307: newframe->Xprevframe = frame;\
! 308: frame = newframe;\
! 309: DPRINTF(("restarting from line %d\n", __LINE__));\
! 310: goto HEAP_RECURSE;\
! 311: L_##rw:\
! 312: DPRINTF(("jumped back to line %d\n", __LINE__));\
! 313: }
! 314:
! 315: #define RRETURN(ra)\
! 316: {\
! 317: heapframe *oldframe = frame;\
! 318: frame = oldframe->Xprevframe;\
! 319: (pcre_stack_free)(oldframe);\
! 320: if (frame != NULL)\
! 321: {\
! 322: rrc = ra;\
! 323: goto HEAP_RETURN;\
! 324: }\
! 325: return ra;\
! 326: }
! 327:
! 328:
! 329: /* Structure for remembering the local variables in a private frame */
! 330:
! 331: typedef struct heapframe {
! 332: struct heapframe *Xprevframe;
! 333:
! 334: /* Function arguments that may change */
! 335:
! 336: USPTR Xeptr;
! 337: const uschar *Xecode;
! 338: USPTR Xmstart;
! 339: USPTR Xmarkptr;
! 340: int Xoffset_top;
! 341: long int Xims;
! 342: eptrblock *Xeptrb;
! 343: int Xflags;
! 344: unsigned int Xrdepth;
! 345:
! 346: /* Function local variables */
! 347:
! 348: USPTR Xcallpat;
! 349: #ifdef SUPPORT_UTF8
! 350: USPTR Xcharptr;
! 351: #endif
! 352: USPTR Xdata;
! 353: USPTR Xnext;
! 354: USPTR Xpp;
! 355: USPTR Xprev;
! 356: USPTR Xsaved_eptr;
! 357:
! 358: recursion_info Xnew_recursive;
! 359:
! 360: BOOL Xcur_is_word;
! 361: BOOL Xcondition;
! 362: BOOL Xprev_is_word;
! 363:
! 364: unsigned long int Xoriginal_ims;
! 365:
! 366: #ifdef SUPPORT_UCP
! 367: int Xprop_type;
! 368: int Xprop_value;
! 369: int Xprop_fail_result;
! 370: int Xprop_category;
! 371: int Xprop_chartype;
! 372: int Xprop_script;
! 373: int Xoclength;
! 374: uschar Xocchars[8];
! 375: #endif
! 376:
! 377: int Xcodelink;
! 378: int Xctype;
! 379: unsigned int Xfc;
! 380: int Xfi;
! 381: int Xlength;
! 382: int Xmax;
! 383: int Xmin;
! 384: int Xnumber;
! 385: int Xoffset;
! 386: int Xop;
! 387: int Xsave_capture_last;
! 388: int Xsave_offset1, Xsave_offset2, Xsave_offset3;
! 389: int Xstacksave[REC_STACK_SAVE_MAX];
! 390:
! 391: eptrblock Xnewptrb;
! 392:
! 393: /* Where to jump back to */
! 394:
! 395: int Xwhere;
! 396:
! 397: } heapframe;
! 398:
! 399: #endif
! 400:
! 401:
! 402: /***************************************************************************
! 403: ***************************************************************************/
! 404:
! 405:
! 406:
! 407: /*************************************************
! 408: * Match from current position *
! 409: *************************************************/
! 410:
! 411: /* This function is called recursively in many circumstances. Whenever it
! 412: returns a negative (error) response, the outer incarnation must also return the
! 413: same response. */
! 414:
! 415: /* These macros pack up tests that are used for partial matching, and which
! 416: appears several times in the code. We set the "hit end" flag if the pointer is
! 417: at the end of the subject and also past the start of the subject (i.e.
! 418: something has been matched). For hard partial matching, we then return
! 419: immediately. The second one is used when we already know we are past the end of
! 420: the subject. */
! 421:
! 422: #define CHECK_PARTIAL()\
! 423: if (md->partial != 0 && eptr >= md->end_subject && \
! 424: eptr > md->start_used_ptr) \
! 425: { \
! 426: md->hitend = TRUE; \
! 427: if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
! 428: }
! 429:
! 430: #define SCHECK_PARTIAL()\
! 431: if (md->partial != 0 && eptr > md->start_used_ptr) \
! 432: { \
! 433: md->hitend = TRUE; \
! 434: if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
! 435: }
! 436:
! 437:
! 438: /* Performance note: It might be tempting to extract commonly used fields from
! 439: the md structure (e.g. utf8, end_subject) into individual variables to improve
! 440: performance. Tests using gcc on a SPARC disproved this; in the first case, it
! 441: made performance worse.
! 442:
! 443: Arguments:
! 444: eptr pointer to current character in subject
! 445: ecode pointer to current position in compiled code
! 446: mstart pointer to the current match start position (can be modified
! 447: by encountering \K)
! 448: markptr pointer to the most recent MARK name, or NULL
! 449: offset_top current top pointer
! 450: md pointer to "static" info for the match
! 451: ims current /i, /m, and /s options
! 452: eptrb pointer to chain of blocks containing eptr at start of
! 453: brackets - for testing for empty matches
! 454: flags can contain
! 455: match_condassert - this is an assertion condition
! 456: match_cbegroup - this is the start of an unlimited repeat
! 457: group that can match an empty string
! 458: rdepth the recursion depth
! 459:
! 460: Returns: MATCH_MATCH if matched ) these values are >= 0
! 461: MATCH_NOMATCH if failed to match )
! 462: a negative MATCH_xxx value for PRUNE, SKIP, etc
! 463: a negative PCRE_ERROR_xxx value if aborted by an error condition
! 464: (e.g. stopped by repeated call or recursion limit)
! 465: */
! 466:
! 467: static int
! 468: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
! 469: const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
! 470: eptrblock *eptrb, int flags, unsigned int rdepth)
! 471: {
! 472: /* These variables do not need to be preserved over recursion in this function,
! 473: so they can be ordinary variables in all cases. Mark some of them with
! 474: "register" because they are used a lot in loops. */
! 475:
! 476: register int rrc; /* Returns from recursive calls */
! 477: register int i; /* Used for loops not involving calls to RMATCH() */
! 478: register unsigned int c; /* Character values not kept over RMATCH() calls */
! 479: register BOOL utf8; /* Local copy of UTF-8 flag for speed */
! 480:
! 481: BOOL minimize, possessive; /* Quantifier options */
! 482: int condcode;
! 483:
! 484: /* When recursion is not being used, all "local" variables that have to be
! 485: preserved over calls to RMATCH() are part of a "frame" which is obtained from
! 486: heap storage. Set up the top-level frame here; others are obtained from the
! 487: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
! 488:
! 489: #ifdef NO_RECURSE
! 490: heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
! 491: if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
! 492: frame->Xprevframe = NULL; /* Marks the top level */
! 493:
! 494: /* Copy in the original argument variables */
! 495:
! 496: frame->Xeptr = eptr;
! 497: frame->Xecode = ecode;
! 498: frame->Xmstart = mstart;
! 499: frame->Xmarkptr = markptr;
! 500: frame->Xoffset_top = offset_top;
! 501: frame->Xims = ims;
! 502: frame->Xeptrb = eptrb;
! 503: frame->Xflags = flags;
! 504: frame->Xrdepth = rdepth;
! 505:
! 506: /* This is where control jumps back to to effect "recursion" */
! 507:
! 508: HEAP_RECURSE:
! 509:
! 510: /* Macros make the argument variables come from the current frame */
! 511:
! 512: #define eptr frame->Xeptr
! 513: #define ecode frame->Xecode
! 514: #define mstart frame->Xmstart
! 515: #define markptr frame->Xmarkptr
! 516: #define offset_top frame->Xoffset_top
! 517: #define ims frame->Xims
! 518: #define eptrb frame->Xeptrb
! 519: #define flags frame->Xflags
! 520: #define rdepth frame->Xrdepth
! 521:
! 522: /* Ditto for the local variables */
! 523:
! 524: #ifdef SUPPORT_UTF8
! 525: #define charptr frame->Xcharptr
! 526: #endif
! 527: #define callpat frame->Xcallpat
! 528: #define codelink frame->Xcodelink
! 529: #define data frame->Xdata
! 530: #define next frame->Xnext
! 531: #define pp frame->Xpp
! 532: #define prev frame->Xprev
! 533: #define saved_eptr frame->Xsaved_eptr
! 534:
! 535: #define new_recursive frame->Xnew_recursive
! 536:
! 537: #define cur_is_word frame->Xcur_is_word
! 538: #define condition frame->Xcondition
! 539: #define prev_is_word frame->Xprev_is_word
! 540:
! 541: #define original_ims frame->Xoriginal_ims
! 542:
! 543: #ifdef SUPPORT_UCP
! 544: #define prop_type frame->Xprop_type
! 545: #define prop_value frame->Xprop_value
! 546: #define prop_fail_result frame->Xprop_fail_result
! 547: #define prop_category frame->Xprop_category
! 548: #define prop_chartype frame->Xprop_chartype
! 549: #define prop_script frame->Xprop_script
! 550: #define oclength frame->Xoclength
! 551: #define occhars frame->Xocchars
! 552: #endif
! 553:
! 554: #define ctype frame->Xctype
! 555: #define fc frame->Xfc
! 556: #define fi frame->Xfi
! 557: #define length frame->Xlength
! 558: #define max frame->Xmax
! 559: #define min frame->Xmin
! 560: #define number frame->Xnumber
! 561: #define offset frame->Xoffset
! 562: #define op frame->Xop
! 563: #define save_capture_last frame->Xsave_capture_last
! 564: #define save_offset1 frame->Xsave_offset1
! 565: #define save_offset2 frame->Xsave_offset2
! 566: #define save_offset3 frame->Xsave_offset3
! 567: #define stacksave frame->Xstacksave
! 568:
! 569: #define newptrb frame->Xnewptrb
! 570:
! 571: /* When recursion is being used, local variables are allocated on the stack and
! 572: get preserved during recursion in the normal way. In this environment, fi and
! 573: i, and fc and c, can be the same variables. */
! 574:
! 575: #else /* NO_RECURSE not defined */
! 576: #define fi i
! 577: #define fc c
! 578:
! 579:
! 580: #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
! 581: const uschar *charptr; /* in small blocks of the code. My normal */
! 582: #endif /* style of coding would have declared */
! 583: const uschar *callpat; /* them within each of those blocks. */
! 584: const uschar *data; /* However, in order to accommodate the */
! 585: const uschar *next; /* version of this code that uses an */
! 586: USPTR pp; /* external "stack" implemented on the */
! 587: const uschar *prev; /* heap, it is easier to declare them all */
! 588: USPTR saved_eptr; /* here, so the declarations can be cut */
! 589: /* out in a block. The only declarations */
! 590: recursion_info new_recursive; /* within blocks below are for variables */
! 591: /* that do not have to be preserved over */
! 592: BOOL cur_is_word; /* a recursive call to RMATCH(). */
! 593: BOOL condition;
! 594: BOOL prev_is_word;
! 595:
! 596: unsigned long int original_ims;
! 597:
! 598: #ifdef SUPPORT_UCP
! 599: int prop_type;
! 600: int prop_value;
! 601: int prop_fail_result;
! 602: int prop_category;
! 603: int prop_chartype;
! 604: int prop_script;
! 605: int oclength;
! 606: uschar occhars[8];
! 607: #endif
! 608:
! 609: int codelink;
! 610: int ctype;
! 611: int length;
! 612: int max;
! 613: int min;
! 614: int number;
! 615: int offset;
! 616: int op;
! 617: int save_capture_last;
! 618: int save_offset1, save_offset2, save_offset3;
! 619: int stacksave[REC_STACK_SAVE_MAX];
! 620:
! 621: eptrblock newptrb;
! 622: #endif /* NO_RECURSE */
! 623:
! 624: /* These statements are here to stop the compiler complaining about unitialized
! 625: variables. */
! 626:
! 627: #ifdef SUPPORT_UCP
! 628: prop_value = 0;
! 629: prop_fail_result = 0;
! 630: #endif
! 631:
! 632:
! 633: /* This label is used for tail recursion, which is used in a few cases even
! 634: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
! 635: used. Thanks to Ian Taylor for noticing this possibility and sending the
! 636: original patch. */
! 637:
! 638: TAIL_RECURSE:
! 639:
! 640: /* OK, now we can get on with the real code of the function. Recursive calls
! 641: are specified by the macro RMATCH and RRETURN is used to return. When
! 642: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
! 643: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
! 644: defined). However, RMATCH isn't like a function call because it's quite a
! 645: complicated macro. It has to be used in one particular way. This shouldn't,
! 646: however, impact performance when true recursion is being used. */
! 647:
! 648: #ifdef SUPPORT_UTF8
! 649: utf8 = md->utf8; /* Local copy of the flag */
! 650: #else
! 651: utf8 = FALSE;
! 652: #endif
! 653:
! 654: /* First check that we haven't called match() too many times, or that we
! 655: haven't exceeded the recursive call limit. */
! 656:
! 657: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
! 658: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
! 659:
! 660: original_ims = ims; /* Save for resetting on ')' */
! 661:
! 662: /* At the start of a group with an unlimited repeat that may match an empty
! 663: string, the match_cbegroup flag is set. When this is the case, add the current
! 664: subject pointer to the chain of such remembered pointers, to be checked when we
! 665: hit the closing ket, in order to break infinite loops that match no characters.
! 666: When match() is called in other circumstances, don't add to the chain. The
! 667: match_cbegroup flag must NOT be used with tail recursion, because the memory
! 668: block that is used is on the stack, so a new one may be required for each
! 669: match(). */
! 670:
! 671: if ((flags & match_cbegroup) != 0)
! 672: {
! 673: newptrb.epb_saved_eptr = eptr;
! 674: newptrb.epb_prev = eptrb;
! 675: eptrb = &newptrb;
! 676: }
! 677:
! 678: /* Now start processing the opcodes. */
! 679:
! 680: for (;;)
! 681: {
! 682: minimize = possessive = FALSE;
! 683: op = *ecode;
! 684:
! 685: switch(op)
! 686: {
! 687: case OP_MARK:
! 688: markptr = ecode + 2;
! 689: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
! 690: ims, eptrb, flags, RM55);
! 691:
! 692: /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
! 693: argument, and we must check whether that argument matches this MARK's
! 694: argument. It is passed back in md->start_match_ptr (an overloading of that
! 695: variable). If it does match, we reset that variable to the current subject
! 696: position and return MATCH_SKIP. Otherwise, pass back the return code
! 697: unaltered. */
! 698:
! 699: if (rrc == MATCH_SKIP_ARG &&
! 700: strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
! 701: {
! 702: md->start_match_ptr = eptr;
! 703: RRETURN(MATCH_SKIP);
! 704: }
! 705:
! 706: if (md->mark == NULL) md->mark = markptr;
! 707: RRETURN(rrc);
! 708:
! 709: case OP_FAIL:
! 710: MRRETURN(MATCH_NOMATCH);
! 711:
! 712: /* COMMIT overrides PRUNE, SKIP, and THEN */
! 713:
! 714: case OP_COMMIT:
! 715: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 716: ims, eptrb, flags, RM52);
! 717: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
! 718: rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
! 719: rrc != MATCH_THEN)
! 720: RRETURN(rrc);
! 721: MRRETURN(MATCH_COMMIT);
! 722:
! 723: /* PRUNE overrides THEN */
! 724:
! 725: case OP_PRUNE:
! 726: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 727: ims, eptrb, flags, RM51);
! 728: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
! 729: MRRETURN(MATCH_PRUNE);
! 730:
! 731: case OP_PRUNE_ARG:
! 732: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
! 733: ims, eptrb, flags, RM56);
! 734: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
! 735: md->mark = ecode + 2;
! 736: RRETURN(MATCH_PRUNE);
! 737:
! 738: /* SKIP overrides PRUNE and THEN */
! 739:
! 740: case OP_SKIP:
! 741: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 742: ims, eptrb, flags, RM53);
! 743: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
! 744: RRETURN(rrc);
! 745: md->start_match_ptr = eptr; /* Pass back current position */
! 746: MRRETURN(MATCH_SKIP);
! 747:
! 748: case OP_SKIP_ARG:
! 749: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
! 750: ims, eptrb, flags, RM57);
! 751: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
! 752: RRETURN(rrc);
! 753:
! 754: /* Pass back the current skip name by overloading md->start_match_ptr and
! 755: returning the special MATCH_SKIP_ARG return code. This will either be
! 756: caught by a matching MARK, or get to the top, where it is treated the same
! 757: as PRUNE. */
! 758:
! 759: md->start_match_ptr = ecode + 2;
! 760: RRETURN(MATCH_SKIP_ARG);
! 761:
! 762: /* For THEN (and THEN_ARG) we pass back the address of the bracket or
! 763: the alt that is at the start of the current branch. This makes it possible
! 764: to skip back past alternatives that precede the THEN within the current
! 765: branch. */
! 766:
! 767: case OP_THEN:
! 768: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 769: ims, eptrb, flags, RM54);
! 770: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 771: md->start_match_ptr = ecode - GET(ecode, 1);
! 772: MRRETURN(MATCH_THEN);
! 773:
! 774: case OP_THEN_ARG:
! 775: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
! 776: offset_top, md, ims, eptrb, flags, RM58);
! 777: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 778: md->start_match_ptr = ecode - GET(ecode, 1);
! 779: md->mark = ecode + LINK_SIZE + 2;
! 780: RRETURN(MATCH_THEN);
! 781:
! 782: /* Handle a capturing bracket. If there is space in the offset vector, save
! 783: the current subject position in the working slot at the top of the vector.
! 784: We mustn't change the current values of the data slot, because they may be
! 785: set from a previous iteration of this group, and be referred to by a
! 786: reference inside the group.
! 787:
! 788: If the bracket fails to match, we need to restore this value and also the
! 789: values of the final offsets, in case they were set by a previous iteration
! 790: of the same bracket.
! 791:
! 792: If there isn't enough space in the offset vector, treat this as if it were
! 793: a non-capturing bracket. Don't worry about setting the flag for the error
! 794: case here; that is handled in the code for KET. */
! 795:
! 796: case OP_CBRA:
! 797: case OP_SCBRA:
! 798: number = GET2(ecode, 1+LINK_SIZE);
! 799: offset = number << 1;
! 800:
! 801: #ifdef PCRE_DEBUG
! 802: printf("start bracket %d\n", number);
! 803: printf("subject=");
! 804: pchars(eptr, 16, TRUE, md);
! 805: printf("\n");
! 806: #endif
! 807:
! 808: if (offset < md->offset_max)
! 809: {
! 810: save_offset1 = md->offset_vector[offset];
! 811: save_offset2 = md->offset_vector[offset+1];
! 812: save_offset3 = md->offset_vector[md->offset_end - number];
! 813: save_capture_last = md->capture_last;
! 814:
! 815: DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
! 816: md->offset_vector[md->offset_end - number] =
! 817: (int)(eptr - md->start_subject);
! 818:
! 819: flags = (op == OP_SCBRA)? match_cbegroup : 0;
! 820: do
! 821: {
! 822: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 823: ims, eptrb, flags, RM1);
! 824: if (rrc != MATCH_NOMATCH &&
! 825: (rrc != MATCH_THEN || md->start_match_ptr != ecode))
! 826: RRETURN(rrc);
! 827: md->capture_last = save_capture_last;
! 828: ecode += GET(ecode, 1);
! 829: }
! 830: while (*ecode == OP_ALT);
! 831:
! 832: DPRINTF(("bracket %d failed\n", number));
! 833:
! 834: md->offset_vector[offset] = save_offset1;
! 835: md->offset_vector[offset+1] = save_offset2;
! 836: md->offset_vector[md->offset_end - number] = save_offset3;
! 837:
! 838: if (rrc != MATCH_THEN) md->mark = markptr;
! 839: RRETURN(MATCH_NOMATCH);
! 840: }
! 841:
! 842: /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
! 843: as a non-capturing bracket. */
! 844:
! 845: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 846: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 847:
! 848: DPRINTF(("insufficient capture room: treat as non-capturing\n"));
! 849:
! 850: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 851: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 852:
! 853: /* Non-capturing bracket. Loop for all the alternatives. When we get to the
! 854: final alternative within the brackets, we would return the result of a
! 855: recursive call to match() whatever happened. We can reduce stack usage by
! 856: turning this into a tail recursion, except in the case when match_cbegroup
! 857: is set.*/
! 858:
! 859: case OP_BRA:
! 860: case OP_SBRA:
! 861: DPRINTF(("start non-capturing bracket\n"));
! 862: flags = (op >= OP_SBRA)? match_cbegroup : 0;
! 863: for (;;)
! 864: {
! 865: if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
! 866: {
! 867: if (flags == 0) /* Not a possibly empty group */
! 868: {
! 869: ecode += _pcre_OP_lengths[*ecode];
! 870: DPRINTF(("bracket 0 tail recursion\n"));
! 871: goto TAIL_RECURSE;
! 872: }
! 873:
! 874: /* Possibly empty group; can't use tail recursion. */
! 875:
! 876: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
! 877: eptrb, flags, RM48);
! 878: if (rrc == MATCH_NOMATCH) md->mark = markptr;
! 879: RRETURN(rrc);
! 880: }
! 881:
! 882: /* For non-final alternatives, continue the loop for a NOMATCH result;
! 883: otherwise return. */
! 884:
! 885: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
! 886: eptrb, flags, RM2);
! 887: if (rrc != MATCH_NOMATCH &&
! 888: (rrc != MATCH_THEN || md->start_match_ptr != ecode))
! 889: RRETURN(rrc);
! 890: ecode += GET(ecode, 1);
! 891: }
! 892: /* Control never reaches here. */
! 893:
! 894: /* Conditional group: compilation checked that there are no more than
! 895: two branches. If the condition is false, skipping the first branch takes us
! 896: past the end if there is only one branch, but that's OK because that is
! 897: exactly what going to the ket would do. As there is only one branch to be
! 898: obeyed, we can use tail recursion to avoid using another stack frame. */
! 899:
! 900: case OP_COND:
! 901: case OP_SCOND:
! 902: codelink= GET(ecode, 1);
! 903:
! 904: /* Because of the way auto-callout works during compile, a callout item is
! 905: inserted between OP_COND and an assertion condition. */
! 906:
! 907: if (ecode[LINK_SIZE+1] == OP_CALLOUT)
! 908: {
! 909: if (pcre_callout != NULL)
! 910: {
! 911: pcre_callout_block cb;
! 912: cb.version = 1; /* Version 1 of the callout block */
! 913: cb.callout_number = ecode[LINK_SIZE+2];
! 914: cb.offset_vector = md->offset_vector;
! 915: cb.subject = (PCRE_SPTR)md->start_subject;
! 916: cb.subject_length = (int)(md->end_subject - md->start_subject);
! 917: cb.start_match = (int)(mstart - md->start_subject);
! 918: cb.current_position = (int)(eptr - md->start_subject);
! 919: cb.pattern_position = GET(ecode, LINK_SIZE + 3);
! 920: cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
! 921: cb.capture_top = offset_top/2;
! 922: cb.capture_last = md->capture_last;
! 923: cb.callout_data = md->callout_data;
! 924: if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
! 925: if (rrc < 0) RRETURN(rrc);
! 926: }
! 927: ecode += _pcre_OP_lengths[OP_CALLOUT];
! 928: }
! 929:
! 930: condcode = ecode[LINK_SIZE+1];
! 931:
! 932: /* Now see what the actual condition is */
! 933:
! 934: if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
! 935: {
! 936: if (md->recursive == NULL) /* Not recursing => FALSE */
! 937: {
! 938: condition = FALSE;
! 939: ecode += GET(ecode, 1);
! 940: }
! 941: else
! 942: {
! 943: int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
! 944: condition = (recno == RREF_ANY || recno == md->recursive->group_num);
! 945:
! 946: /* If the test is for recursion into a specific subpattern, and it is
! 947: false, but the test was set up by name, scan the table to see if the
! 948: name refers to any other numbers, and test them. The condition is true
! 949: if any one is set. */
! 950:
! 951: if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
! 952: {
! 953: uschar *slotA = md->name_table;
! 954: for (i = 0; i < md->name_count; i++)
! 955: {
! 956: if (GET2(slotA, 0) == recno) break;
! 957: slotA += md->name_entry_size;
! 958: }
! 959:
! 960: /* Found a name for the number - there can be only one; duplicate
! 961: names for different numbers are allowed, but not vice versa. First
! 962: scan down for duplicates. */
! 963:
! 964: if (i < md->name_count)
! 965: {
! 966: uschar *slotB = slotA;
! 967: while (slotB > md->name_table)
! 968: {
! 969: slotB -= md->name_entry_size;
! 970: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 971: {
! 972: condition = GET2(slotB, 0) == md->recursive->group_num;
! 973: if (condition) break;
! 974: }
! 975: else break;
! 976: }
! 977:
! 978: /* Scan up for duplicates */
! 979:
! 980: if (!condition)
! 981: {
! 982: slotB = slotA;
! 983: for (i++; i < md->name_count; i++)
! 984: {
! 985: slotB += md->name_entry_size;
! 986: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 987: {
! 988: condition = GET2(slotB, 0) == md->recursive->group_num;
! 989: if (condition) break;
! 990: }
! 991: else break;
! 992: }
! 993: }
! 994: }
! 995: }
! 996:
! 997: /* Chose branch according to the condition */
! 998:
! 999: ecode += condition? 3 : GET(ecode, 1);
! 1000: }
! 1001: }
! 1002:
! 1003: else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
! 1004: {
! 1005: offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
! 1006: condition = offset < offset_top && md->offset_vector[offset] >= 0;
! 1007:
! 1008: /* If the numbered capture is unset, but the reference was by name,
! 1009: scan the table to see if the name refers to any other numbers, and test
! 1010: them. The condition is true if any one is set. This is tediously similar
! 1011: to the code above, but not close enough to try to amalgamate. */
! 1012:
! 1013: if (!condition && condcode == OP_NCREF)
! 1014: {
! 1015: int refno = offset >> 1;
! 1016: uschar *slotA = md->name_table;
! 1017:
! 1018: for (i = 0; i < md->name_count; i++)
! 1019: {
! 1020: if (GET2(slotA, 0) == refno) break;
! 1021: slotA += md->name_entry_size;
! 1022: }
! 1023:
! 1024: /* Found a name for the number - there can be only one; duplicate names
! 1025: for different numbers are allowed, but not vice versa. First scan down
! 1026: for duplicates. */
! 1027:
! 1028: if (i < md->name_count)
! 1029: {
! 1030: uschar *slotB = slotA;
! 1031: while (slotB > md->name_table)
! 1032: {
! 1033: slotB -= md->name_entry_size;
! 1034: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 1035: {
! 1036: offset = GET2(slotB, 0) << 1;
! 1037: condition = offset < offset_top &&
! 1038: md->offset_vector[offset] >= 0;
! 1039: if (condition) break;
! 1040: }
! 1041: else break;
! 1042: }
! 1043:
! 1044: /* Scan up for duplicates */
! 1045:
! 1046: if (!condition)
! 1047: {
! 1048: slotB = slotA;
! 1049: for (i++; i < md->name_count; i++)
! 1050: {
! 1051: slotB += md->name_entry_size;
! 1052: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 1053: {
! 1054: offset = GET2(slotB, 0) << 1;
! 1055: condition = offset < offset_top &&
! 1056: md->offset_vector[offset] >= 0;
! 1057: if (condition) break;
! 1058: }
! 1059: else break;
! 1060: }
! 1061: }
! 1062: }
! 1063: }
! 1064:
! 1065: /* Chose branch according to the condition */
! 1066:
! 1067: ecode += condition? 3 : GET(ecode, 1);
! 1068: }
! 1069:
! 1070: else if (condcode == OP_DEF) /* DEFINE - always false */
! 1071: {
! 1072: condition = FALSE;
! 1073: ecode += GET(ecode, 1);
! 1074: }
! 1075:
! 1076: /* The condition is an assertion. Call match() to evaluate it - setting
! 1077: the final argument match_condassert causes it to stop at the end of an
! 1078: assertion. */
! 1079:
! 1080: else
! 1081: {
! 1082: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
! 1083: match_condassert, RM3);
! 1084: if (rrc == MATCH_MATCH)
! 1085: {
! 1086: condition = TRUE;
! 1087: ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
! 1088: while (*ecode == OP_ALT) ecode += GET(ecode, 1);
! 1089: }
! 1090: else if (rrc != MATCH_NOMATCH &&
! 1091: (rrc != MATCH_THEN || md->start_match_ptr != ecode))
! 1092: {
! 1093: RRETURN(rrc); /* Need braces because of following else */
! 1094: }
! 1095: else
! 1096: {
! 1097: condition = FALSE;
! 1098: ecode += codelink;
! 1099: }
! 1100: }
! 1101:
! 1102: /* We are now at the branch that is to be obeyed. As there is only one,
! 1103: we can use tail recursion to avoid using another stack frame, except when
! 1104: match_cbegroup is required for an unlimited repeat of a possibly empty
! 1105: group. If the second alternative doesn't exist, we can just plough on. */
! 1106:
! 1107: if (condition || *ecode == OP_ALT)
! 1108: {
! 1109: ecode += 1 + LINK_SIZE;
! 1110: if (op == OP_SCOND) /* Possibly empty group */
! 1111: {
! 1112: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
! 1113: RRETURN(rrc);
! 1114: }
! 1115: else /* Group must match something */
! 1116: {
! 1117: flags = 0;
! 1118: goto TAIL_RECURSE;
! 1119: }
! 1120: }
! 1121: else /* Condition false & no alternative */
! 1122: {
! 1123: ecode += 1 + LINK_SIZE;
! 1124: }
! 1125: break;
! 1126:
! 1127:
! 1128: /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
! 1129: to close any currently open capturing brackets. */
! 1130:
! 1131: case OP_CLOSE:
! 1132: number = GET2(ecode, 1);
! 1133: offset = number << 1;
! 1134:
! 1135: #ifdef PCRE_DEBUG
! 1136: printf("end bracket %d at *ACCEPT", number);
! 1137: printf("\n");
! 1138: #endif
! 1139:
! 1140: md->capture_last = number;
! 1141: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
! 1142: {
! 1143: md->offset_vector[offset] =
! 1144: md->offset_vector[md->offset_end - number];
! 1145: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
! 1146: if (offset_top <= offset) offset_top = offset + 2;
! 1147: }
! 1148: ecode += 3;
! 1149: break;
! 1150:
! 1151:
! 1152: /* End of the pattern, either real or forced. If we are in a top-level
! 1153: recursion, we should restore the offsets appropriately and continue from
! 1154: after the call. */
! 1155:
! 1156: case OP_ACCEPT:
! 1157: case OP_END:
! 1158: if (md->recursive != NULL && md->recursive->group_num == 0)
! 1159: {
! 1160: recursion_info *rec = md->recursive;
! 1161: DPRINTF(("End of pattern in a (?0) recursion\n"));
! 1162: md->recursive = rec->prevrec;
! 1163: memmove(md->offset_vector, rec->offset_save,
! 1164: rec->saved_max * sizeof(int));
! 1165: offset_top = rec->save_offset_top;
! 1166: ims = original_ims;
! 1167: ecode = rec->after_call;
! 1168: break;
! 1169: }
! 1170:
! 1171: /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
! 1172: set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
! 1173: the subject. In both cases, backtracking will then try other alternatives,
! 1174: if any. */
! 1175:
! 1176: if (eptr == mstart &&
! 1177: (md->notempty ||
! 1178: (md->notempty_atstart &&
! 1179: mstart == md->start_subject + md->start_offset)))
! 1180: MRRETURN(MATCH_NOMATCH);
! 1181:
! 1182: /* Otherwise, we have a match. */
! 1183:
! 1184: md->end_match_ptr = eptr; /* Record where we ended */
! 1185: md->end_offset_top = offset_top; /* and how many extracts were taken */
! 1186: md->start_match_ptr = mstart; /* and the start (\K can modify) */
! 1187:
! 1188: /* For some reason, the macros don't work properly if an expression is
! 1189: given as the argument to MRRETURN when the heap is in use. */
! 1190:
! 1191: rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
! 1192: MRRETURN(rrc);
! 1193:
! 1194: /* Change option settings */
! 1195:
! 1196: case OP_OPT:
! 1197: ims = ecode[1];
! 1198: ecode += 2;
! 1199: DPRINTF(("ims set to %02lx\n", ims));
! 1200: break;
! 1201:
! 1202: /* Assertion brackets. Check the alternative branches in turn - the
! 1203: matching won't pass the KET for an assertion. If any one branch matches,
! 1204: the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
! 1205: start of each branch to move the current point backwards, so the code at
! 1206: this level is identical to the lookahead case. */
! 1207:
! 1208: case OP_ASSERT:
! 1209: case OP_ASSERTBACK:
! 1210: do
! 1211: {
! 1212: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
! 1213: RM4);
! 1214: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
! 1215: {
! 1216: mstart = md->start_match_ptr; /* In case \K reset it */
! 1217: break;
! 1218: }
! 1219: if (rrc != MATCH_NOMATCH &&
! 1220: (rrc != MATCH_THEN || md->start_match_ptr != ecode))
! 1221: RRETURN(rrc);
! 1222: ecode += GET(ecode, 1);
! 1223: }
! 1224: while (*ecode == OP_ALT);
! 1225: if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
! 1226:
! 1227: /* If checking an assertion for a condition, return MATCH_MATCH. */
! 1228:
! 1229: if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
! 1230:
! 1231: /* Continue from after the assertion, updating the offsets high water
! 1232: mark, since extracts may have been taken during the assertion. */
! 1233:
! 1234: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
! 1235: ecode += 1 + LINK_SIZE;
! 1236: offset_top = md->end_offset_top;
! 1237: continue;
! 1238:
! 1239: /* Negative assertion: all branches must fail to match. Encountering SKIP,
! 1240: PRUNE, or COMMIT means we must assume failure without checking subsequent
! 1241: branches. */
! 1242:
! 1243: case OP_ASSERT_NOT:
! 1244: case OP_ASSERTBACK_NOT:
! 1245: do
! 1246: {
! 1247: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
! 1248: RM5);
! 1249: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
! 1250: if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
! 1251: {
! 1252: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
! 1253: break;
! 1254: }
! 1255: if (rrc != MATCH_NOMATCH &&
! 1256: (rrc != MATCH_THEN || md->start_match_ptr != ecode))
! 1257: RRETURN(rrc);
! 1258: ecode += GET(ecode,1);
! 1259: }
! 1260: while (*ecode == OP_ALT);
! 1261:
! 1262: if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
! 1263:
! 1264: ecode += 1 + LINK_SIZE;
! 1265: continue;
! 1266:
! 1267: /* Move the subject pointer back. This occurs only at the start of
! 1268: each branch of a lookbehind assertion. If we are too close to the start to
! 1269: move back, this match function fails. When working with UTF-8 we move
! 1270: back a number of characters, not bytes. */
! 1271:
! 1272: case OP_REVERSE:
! 1273: #ifdef SUPPORT_UTF8
! 1274: if (utf8)
! 1275: {
! 1276: i = GET(ecode, 1);
! 1277: while (i-- > 0)
! 1278: {
! 1279: eptr--;
! 1280: if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
! 1281: BACKCHAR(eptr);
! 1282: }
! 1283: }
! 1284: else
! 1285: #endif
! 1286:
! 1287: /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
! 1288:
! 1289: {
! 1290: eptr -= GET(ecode, 1);
! 1291: if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
! 1292: }
! 1293:
! 1294: /* Save the earliest consulted character, then skip to next op code */
! 1295:
! 1296: if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
! 1297: ecode += 1 + LINK_SIZE;
! 1298: break;
! 1299:
! 1300: /* The callout item calls an external function, if one is provided, passing
! 1301: details of the match so far. This is mainly for debugging, though the
! 1302: function is able to force a failure. */
! 1303:
! 1304: case OP_CALLOUT:
! 1305: if (pcre_callout != NULL)
! 1306: {
! 1307: pcre_callout_block cb;
! 1308: cb.version = 1; /* Version 1 of the callout block */
! 1309: cb.callout_number = ecode[1];
! 1310: cb.offset_vector = md->offset_vector;
! 1311: cb.subject = (PCRE_SPTR)md->start_subject;
! 1312: cb.subject_length = (int)(md->end_subject - md->start_subject);
! 1313: cb.start_match = (int)(mstart - md->start_subject);
! 1314: cb.current_position = (int)(eptr - md->start_subject);
! 1315: cb.pattern_position = GET(ecode, 2);
! 1316: cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
! 1317: cb.capture_top = offset_top/2;
! 1318: cb.capture_last = md->capture_last;
! 1319: cb.callout_data = md->callout_data;
! 1320: if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
! 1321: if (rrc < 0) RRETURN(rrc);
! 1322: }
! 1323: ecode += 2 + 2*LINK_SIZE;
! 1324: break;
! 1325:
! 1326: /* Recursion either matches the current regex, or some subexpression. The
! 1327: offset data is the offset to the starting bracket from the start of the
! 1328: whole pattern. (This is so that it works from duplicated subpatterns.)
! 1329:
! 1330: If there are any capturing brackets started but not finished, we have to
! 1331: save their starting points and reinstate them after the recursion. However,
! 1332: we don't know how many such there are (offset_top records the completed
! 1333: total) so we just have to save all the potential data. There may be up to
! 1334: 65535 such values, which is too large to put on the stack, but using malloc
! 1335: for small numbers seems expensive. As a compromise, the stack is used when
! 1336: there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
! 1337: is used. A problem is what to do if the malloc fails ... there is no way of
! 1338: returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
! 1339: values on the stack, and accept that the rest may be wrong.
! 1340:
! 1341: There are also other values that have to be saved. We use a chained
! 1342: sequence of blocks that actually live on the stack. Thanks to Robin Houston
! 1343: for the original version of this logic. */
! 1344:
! 1345: case OP_RECURSE:
! 1346: {
! 1347: callpat = md->start_code + GET(ecode, 1);
! 1348: new_recursive.group_num = (callpat == md->start_code)? 0 :
! 1349: GET2(callpat, 1 + LINK_SIZE);
! 1350:
! 1351: /* Add to "recursing stack" */
! 1352:
! 1353: new_recursive.prevrec = md->recursive;
! 1354: md->recursive = &new_recursive;
! 1355:
! 1356: /* Find where to continue from afterwards */
! 1357:
! 1358: ecode += 1 + LINK_SIZE;
! 1359: new_recursive.after_call = ecode;
! 1360:
! 1361: /* Now save the offset data. */
! 1362:
! 1363: new_recursive.saved_max = md->offset_end;
! 1364: if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
! 1365: new_recursive.offset_save = stacksave;
! 1366: else
! 1367: {
! 1368: new_recursive.offset_save =
! 1369: (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
! 1370: if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
! 1371: }
! 1372:
! 1373: memcpy(new_recursive.offset_save, md->offset_vector,
! 1374: new_recursive.saved_max * sizeof(int));
! 1375: new_recursive.save_offset_top = offset_top;
! 1376:
! 1377: /* OK, now we can do the recursion. For each top-level alternative we
! 1378: restore the offset and recursion data. */
! 1379:
! 1380: DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
! 1381: flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
! 1382: do
! 1383: {
! 1384: RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
! 1385: md, ims, eptrb, flags, RM6);
! 1386: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
! 1387: {
! 1388: DPRINTF(("Recursion matched\n"));
! 1389: md->recursive = new_recursive.prevrec;
! 1390: if (new_recursive.offset_save != stacksave)
! 1391: (pcre_free)(new_recursive.offset_save);
! 1392: MRRETURN(MATCH_MATCH);
! 1393: }
! 1394: else if (rrc != MATCH_NOMATCH &&
! 1395: (rrc != MATCH_THEN || md->start_match_ptr != ecode))
! 1396: {
! 1397: DPRINTF(("Recursion gave error %d\n", rrc));
! 1398: if (new_recursive.offset_save != stacksave)
! 1399: (pcre_free)(new_recursive.offset_save);
! 1400: RRETURN(rrc);
! 1401: }
! 1402:
! 1403: md->recursive = &new_recursive;
! 1404: memcpy(md->offset_vector, new_recursive.offset_save,
! 1405: new_recursive.saved_max * sizeof(int));
! 1406: callpat += GET(callpat, 1);
! 1407: }
! 1408: while (*callpat == OP_ALT);
! 1409:
! 1410: DPRINTF(("Recursion didn't match\n"));
! 1411: md->recursive = new_recursive.prevrec;
! 1412: if (new_recursive.offset_save != stacksave)
! 1413: (pcre_free)(new_recursive.offset_save);
! 1414: MRRETURN(MATCH_NOMATCH);
! 1415: }
! 1416: /* Control never reaches here */
! 1417:
! 1418: /* "Once" brackets are like assertion brackets except that after a match,
! 1419: the point in the subject string is not moved back. Thus there can never be
! 1420: a move back into the brackets. Friedl calls these "atomic" subpatterns.
! 1421: Check the alternative branches in turn - the matching won't pass the KET
! 1422: for this kind of subpattern. If any one branch matches, we carry on as at
! 1423: the end of a normal bracket, leaving the subject pointer, but resetting
! 1424: the start-of-match value in case it was changed by \K. */
! 1425:
! 1426: case OP_ONCE:
! 1427: prev = ecode;
! 1428: saved_eptr = eptr;
! 1429:
! 1430: do
! 1431: {
! 1432: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
! 1433: if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
! 1434: {
! 1435: mstart = md->start_match_ptr;
! 1436: break;
! 1437: }
! 1438: if (rrc != MATCH_NOMATCH &&
! 1439: (rrc != MATCH_THEN || md->start_match_ptr != ecode))
! 1440: RRETURN(rrc);
! 1441: ecode += GET(ecode,1);
! 1442: }
! 1443: while (*ecode == OP_ALT);
! 1444:
! 1445: /* If hit the end of the group (which could be repeated), fail */
! 1446:
! 1447: if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
! 1448:
! 1449: /* Continue as from after the assertion, updating the offsets high water
! 1450: mark, since extracts may have been taken. */
! 1451:
! 1452: do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
! 1453:
! 1454: offset_top = md->end_offset_top;
! 1455: eptr = md->end_match_ptr;
! 1456:
! 1457: /* For a non-repeating ket, just continue at this level. This also
! 1458: happens for a repeating ket if no characters were matched in the group.
! 1459: This is the forcible breaking of infinite loops as implemented in Perl
! 1460: 5.005. If there is an options reset, it will get obeyed in the normal
! 1461: course of events. */
! 1462:
! 1463: if (*ecode == OP_KET || eptr == saved_eptr)
! 1464: {
! 1465: ecode += 1+LINK_SIZE;
! 1466: break;
! 1467: }
! 1468:
! 1469: /* The repeating kets try the rest of the pattern or restart from the
! 1470: preceding bracket, in the appropriate order. The second "call" of match()
! 1471: uses tail recursion, to avoid using another stack frame. We need to reset
! 1472: any options that changed within the bracket before re-running it, so
! 1473: check the next opcode. */
! 1474:
! 1475: if (ecode[1+LINK_SIZE] == OP_OPT)
! 1476: {
! 1477: ims = (ims & ~PCRE_IMS) | ecode[4];
! 1478: DPRINTF(("ims set to %02lx at group repeat\n", ims));
! 1479: }
! 1480:
! 1481: if (*ecode == OP_KETRMIN)
! 1482: {
! 1483: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
! 1484: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1485: ecode = prev;
! 1486: flags = 0;
! 1487: goto TAIL_RECURSE;
! 1488: }
! 1489: else /* OP_KETRMAX */
! 1490: {
! 1491: RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
! 1492: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1493: ecode += 1 + LINK_SIZE;
! 1494: flags = 0;
! 1495: goto TAIL_RECURSE;
! 1496: }
! 1497: /* Control never gets here */
! 1498:
! 1499: /* An alternation is the end of a branch; scan along to find the end of the
! 1500: bracketed group and go to there. */
! 1501:
! 1502: case OP_ALT:
! 1503: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
! 1504: break;
! 1505:
! 1506: /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
! 1507: indicating that it may occur zero times. It may repeat infinitely, or not
! 1508: at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
! 1509: with fixed upper repeat limits are compiled as a number of copies, with the
! 1510: optional ones preceded by BRAZERO or BRAMINZERO. */
! 1511:
! 1512: case OP_BRAZERO:
! 1513: {
! 1514: next = ecode+1;
! 1515: RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
! 1516: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1517: do next += GET(next,1); while (*next == OP_ALT);
! 1518: ecode = next + 1 + LINK_SIZE;
! 1519: }
! 1520: break;
! 1521:
! 1522: case OP_BRAMINZERO:
! 1523: {
! 1524: next = ecode+1;
! 1525: do next += GET(next, 1); while (*next == OP_ALT);
! 1526: RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
! 1527: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1528: ecode++;
! 1529: }
! 1530: break;
! 1531:
! 1532: case OP_SKIPZERO:
! 1533: {
! 1534: next = ecode+1;
! 1535: do next += GET(next,1); while (*next == OP_ALT);
! 1536: ecode = next + 1 + LINK_SIZE;
! 1537: }
! 1538: break;
! 1539:
! 1540: /* End of a group, repeated or non-repeating. */
! 1541:
! 1542: case OP_KET:
! 1543: case OP_KETRMIN:
! 1544: case OP_KETRMAX:
! 1545: prev = ecode - GET(ecode, 1);
! 1546:
! 1547: /* If this was a group that remembered the subject start, in order to break
! 1548: infinite repeats of empty string matches, retrieve the subject start from
! 1549: the chain. Otherwise, set it NULL. */
! 1550:
! 1551: if (*prev >= OP_SBRA)
! 1552: {
! 1553: saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
! 1554: eptrb = eptrb->epb_prev; /* Backup to previous group */
! 1555: }
! 1556: else saved_eptr = NULL;
! 1557:
! 1558: /* If we are at the end of an assertion group or an atomic group, stop
! 1559: matching and return MATCH_MATCH, but record the current high water mark for
! 1560: use by positive assertions. We also need to record the match start in case
! 1561: it was changed by \K. */
! 1562:
! 1563: if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
! 1564: *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
! 1565: *prev == OP_ONCE)
! 1566: {
! 1567: md->end_match_ptr = eptr; /* For ONCE */
! 1568: md->end_offset_top = offset_top;
! 1569: md->start_match_ptr = mstart;
! 1570: MRRETURN(MATCH_MATCH);
! 1571: }
! 1572:
! 1573: /* For capturing groups we have to check the group number back at the start
! 1574: and if necessary complete handling an extraction by setting the offsets and
! 1575: bumping the high water mark. Note that whole-pattern recursion is coded as
! 1576: a recurse into group 0, so it won't be picked up here. Instead, we catch it
! 1577: when the OP_END is reached. Other recursion is handled here. */
! 1578:
! 1579: if (*prev == OP_CBRA || *prev == OP_SCBRA)
! 1580: {
! 1581: number = GET2(prev, 1+LINK_SIZE);
! 1582: offset = number << 1;
! 1583:
! 1584: #ifdef PCRE_DEBUG
! 1585: printf("end bracket %d", number);
! 1586: printf("\n");
! 1587: #endif
! 1588:
! 1589: md->capture_last = number;
! 1590: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
! 1591: {
! 1592: md->offset_vector[offset] =
! 1593: md->offset_vector[md->offset_end - number];
! 1594: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
! 1595: if (offset_top <= offset) offset_top = offset + 2;
! 1596: }
! 1597:
! 1598: /* Handle a recursively called group. Restore the offsets
! 1599: appropriately and continue from after the call. */
! 1600:
! 1601: if (md->recursive != NULL && md->recursive->group_num == number)
! 1602: {
! 1603: recursion_info *rec = md->recursive;
! 1604: DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
! 1605: md->recursive = rec->prevrec;
! 1606: memcpy(md->offset_vector, rec->offset_save,
! 1607: rec->saved_max * sizeof(int));
! 1608: offset_top = rec->save_offset_top;
! 1609: ecode = rec->after_call;
! 1610: ims = original_ims;
! 1611: break;
! 1612: }
! 1613: }
! 1614:
! 1615: /* For both capturing and non-capturing groups, reset the value of the ims
! 1616: flags, in case they got changed during the group. */
! 1617:
! 1618: ims = original_ims;
! 1619: DPRINTF(("ims reset to %02lx\n", ims));
! 1620:
! 1621: /* For a non-repeating ket, just continue at this level. This also
! 1622: happens for a repeating ket if no characters were matched in the group.
! 1623: This is the forcible breaking of infinite loops as implemented in Perl
! 1624: 5.005. If there is an options reset, it will get obeyed in the normal
! 1625: course of events. */
! 1626:
! 1627: if (*ecode == OP_KET || eptr == saved_eptr)
! 1628: {
! 1629: ecode += 1 + LINK_SIZE;
! 1630: break;
! 1631: }
! 1632:
! 1633: /* The repeating kets try the rest of the pattern or restart from the
! 1634: preceding bracket, in the appropriate order. In the second case, we can use
! 1635: tail recursion to avoid using another stack frame, unless we have an
! 1636: unlimited repeat of a group that can match an empty string. */
! 1637:
! 1638: flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
! 1639:
! 1640: if (*ecode == OP_KETRMIN)
! 1641: {
! 1642: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
! 1643: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1644: if (flags != 0) /* Could match an empty string */
! 1645: {
! 1646: RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
! 1647: RRETURN(rrc);
! 1648: }
! 1649: ecode = prev;
! 1650: goto TAIL_RECURSE;
! 1651: }
! 1652: else /* OP_KETRMAX */
! 1653: {
! 1654: RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
! 1655: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1656: ecode += 1 + LINK_SIZE;
! 1657: flags = 0;
! 1658: goto TAIL_RECURSE;
! 1659: }
! 1660: /* Control never gets here */
! 1661:
! 1662: /* Start of subject unless notbol, or after internal newline if multiline */
! 1663:
! 1664: case OP_CIRC:
! 1665: if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
! 1666: if ((ims & PCRE_MULTILINE) != 0)
! 1667: {
! 1668: if (eptr != md->start_subject &&
! 1669: (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
! 1670: MRRETURN(MATCH_NOMATCH);
! 1671: ecode++;
! 1672: break;
! 1673: }
! 1674: /* ... else fall through */
! 1675:
! 1676: /* Start of subject assertion */
! 1677:
! 1678: case OP_SOD:
! 1679: if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
! 1680: ecode++;
! 1681: break;
! 1682:
! 1683: /* Start of match assertion */
! 1684:
! 1685: case OP_SOM:
! 1686: if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
! 1687: ecode++;
! 1688: break;
! 1689:
! 1690: /* Reset the start of match point */
! 1691:
! 1692: case OP_SET_SOM:
! 1693: mstart = eptr;
! 1694: ecode++;
! 1695: break;
! 1696:
! 1697: /* Assert before internal newline if multiline, or before a terminating
! 1698: newline unless endonly is set, else end of subject unless noteol is set. */
! 1699:
! 1700: case OP_DOLL:
! 1701: if ((ims & PCRE_MULTILINE) != 0)
! 1702: {
! 1703: if (eptr < md->end_subject)
! 1704: { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
! 1705: else
! 1706: {
! 1707: if (md->noteol) MRRETURN(MATCH_NOMATCH);
! 1708: SCHECK_PARTIAL();
! 1709: }
! 1710: ecode++;
! 1711: break;
! 1712: }
! 1713: else /* Not multiline */
! 1714: {
! 1715: if (md->noteol) MRRETURN(MATCH_NOMATCH);
! 1716: if (!md->endonly) goto ASSERT_NL_OR_EOS;
! 1717: }
! 1718:
! 1719: /* ... else fall through for endonly */
! 1720:
! 1721: /* End of subject assertion (\z) */
! 1722:
! 1723: case OP_EOD:
! 1724: if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
! 1725: SCHECK_PARTIAL();
! 1726: ecode++;
! 1727: break;
! 1728:
! 1729: /* End of subject or ending \n assertion (\Z) */
! 1730:
! 1731: case OP_EODN:
! 1732: ASSERT_NL_OR_EOS:
! 1733: if (eptr < md->end_subject &&
! 1734: (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
! 1735: MRRETURN(MATCH_NOMATCH);
! 1736:
! 1737: /* Either at end of string or \n before end. */
! 1738:
! 1739: SCHECK_PARTIAL();
! 1740: ecode++;
! 1741: break;
! 1742:
! 1743: /* Word boundary assertions */
! 1744:
! 1745: case OP_NOT_WORD_BOUNDARY:
! 1746: case OP_WORD_BOUNDARY:
! 1747: {
! 1748:
! 1749: /* Find out if the previous and current characters are "word" characters.
! 1750: It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
! 1751: be "non-word" characters. Remember the earliest consulted character for
! 1752: partial matching. */
! 1753:
! 1754: #ifdef SUPPORT_UTF8
! 1755: if (utf8)
! 1756: {
! 1757: /* Get status of previous character */
! 1758:
! 1759: if (eptr == md->start_subject) prev_is_word = FALSE; else
! 1760: {
! 1761: USPTR lastptr = eptr - 1;
! 1762: while((*lastptr & 0xc0) == 0x80) lastptr--;
! 1763: if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
! 1764: GETCHAR(c, lastptr);
! 1765: #ifdef SUPPORT_UCP
! 1766: if (md->use_ucp)
! 1767: {
! 1768: if (c == '_') prev_is_word = TRUE; else
! 1769: {
! 1770: int cat = UCD_CATEGORY(c);
! 1771: prev_is_word = (cat == ucp_L || cat == ucp_N);
! 1772: }
! 1773: }
! 1774: else
! 1775: #endif
! 1776: prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
! 1777: }
! 1778:
! 1779: /* Get status of next character */
! 1780:
! 1781: if (eptr >= md->end_subject)
! 1782: {
! 1783: SCHECK_PARTIAL();
! 1784: cur_is_word = FALSE;
! 1785: }
! 1786: else
! 1787: {
! 1788: GETCHAR(c, eptr);
! 1789: #ifdef SUPPORT_UCP
! 1790: if (md->use_ucp)
! 1791: {
! 1792: if (c == '_') cur_is_word = TRUE; else
! 1793: {
! 1794: int cat = UCD_CATEGORY(c);
! 1795: cur_is_word = (cat == ucp_L || cat == ucp_N);
! 1796: }
! 1797: }
! 1798: else
! 1799: #endif
! 1800: cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
! 1801: }
! 1802: }
! 1803: else
! 1804: #endif
! 1805:
! 1806: /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
! 1807: consistency with the behaviour of \w we do use it in this case. */
! 1808:
! 1809: {
! 1810: /* Get status of previous character */
! 1811:
! 1812: if (eptr == md->start_subject) prev_is_word = FALSE; else
! 1813: {
! 1814: if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
! 1815: #ifdef SUPPORT_UCP
! 1816: if (md->use_ucp)
! 1817: {
! 1818: c = eptr[-1];
! 1819: if (c == '_') prev_is_word = TRUE; else
! 1820: {
! 1821: int cat = UCD_CATEGORY(c);
! 1822: prev_is_word = (cat == ucp_L || cat == ucp_N);
! 1823: }
! 1824: }
! 1825: else
! 1826: #endif
! 1827: prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
! 1828: }
! 1829:
! 1830: /* Get status of next character */
! 1831:
! 1832: if (eptr >= md->end_subject)
! 1833: {
! 1834: SCHECK_PARTIAL();
! 1835: cur_is_word = FALSE;
! 1836: }
! 1837: else
! 1838: #ifdef SUPPORT_UCP
! 1839: if (md->use_ucp)
! 1840: {
! 1841: c = *eptr;
! 1842: if (c == '_') cur_is_word = TRUE; else
! 1843: {
! 1844: int cat = UCD_CATEGORY(c);
! 1845: cur_is_word = (cat == ucp_L || cat == ucp_N);
! 1846: }
! 1847: }
! 1848: else
! 1849: #endif
! 1850: cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
! 1851: }
! 1852:
! 1853: /* Now see if the situation is what we want */
! 1854:
! 1855: if ((*ecode++ == OP_WORD_BOUNDARY)?
! 1856: cur_is_word == prev_is_word : cur_is_word != prev_is_word)
! 1857: MRRETURN(MATCH_NOMATCH);
! 1858: }
! 1859: break;
! 1860:
! 1861: /* Match a single character type; inline for speed */
! 1862:
! 1863: case OP_ANY:
! 1864: if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
! 1865: /* Fall through */
! 1866:
! 1867: case OP_ALLANY:
! 1868: if (eptr++ >= md->end_subject)
! 1869: {
! 1870: SCHECK_PARTIAL();
! 1871: MRRETURN(MATCH_NOMATCH);
! 1872: }
! 1873: if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 1874: ecode++;
! 1875: break;
! 1876:
! 1877: /* Match a single byte, even in UTF-8 mode. This opcode really does match
! 1878: any byte, even newline, independent of the setting of PCRE_DOTALL. */
! 1879:
! 1880: case OP_ANYBYTE:
! 1881: if (eptr++ >= md->end_subject)
! 1882: {
! 1883: SCHECK_PARTIAL();
! 1884: MRRETURN(MATCH_NOMATCH);
! 1885: }
! 1886: ecode++;
! 1887: break;
! 1888:
! 1889: case OP_NOT_DIGIT:
! 1890: if (eptr >= md->end_subject)
! 1891: {
! 1892: SCHECK_PARTIAL();
! 1893: MRRETURN(MATCH_NOMATCH);
! 1894: }
! 1895: GETCHARINCTEST(c, eptr);
! 1896: if (
! 1897: #ifdef SUPPORT_UTF8
! 1898: c < 256 &&
! 1899: #endif
! 1900: (md->ctypes[c] & ctype_digit) != 0
! 1901: )
! 1902: MRRETURN(MATCH_NOMATCH);
! 1903: ecode++;
! 1904: break;
! 1905:
! 1906: case OP_DIGIT:
! 1907: if (eptr >= md->end_subject)
! 1908: {
! 1909: SCHECK_PARTIAL();
! 1910: MRRETURN(MATCH_NOMATCH);
! 1911: }
! 1912: GETCHARINCTEST(c, eptr);
! 1913: if (
! 1914: #ifdef SUPPORT_UTF8
! 1915: c >= 256 ||
! 1916: #endif
! 1917: (md->ctypes[c] & ctype_digit) == 0
! 1918: )
! 1919: MRRETURN(MATCH_NOMATCH);
! 1920: ecode++;
! 1921: break;
! 1922:
! 1923: case OP_NOT_WHITESPACE:
! 1924: if (eptr >= md->end_subject)
! 1925: {
! 1926: SCHECK_PARTIAL();
! 1927: MRRETURN(MATCH_NOMATCH);
! 1928: }
! 1929: GETCHARINCTEST(c, eptr);
! 1930: if (
! 1931: #ifdef SUPPORT_UTF8
! 1932: c < 256 &&
! 1933: #endif
! 1934: (md->ctypes[c] & ctype_space) != 0
! 1935: )
! 1936: MRRETURN(MATCH_NOMATCH);
! 1937: ecode++;
! 1938: break;
! 1939:
! 1940: case OP_WHITESPACE:
! 1941: if (eptr >= md->end_subject)
! 1942: {
! 1943: SCHECK_PARTIAL();
! 1944: MRRETURN(MATCH_NOMATCH);
! 1945: }
! 1946: GETCHARINCTEST(c, eptr);
! 1947: if (
! 1948: #ifdef SUPPORT_UTF8
! 1949: c >= 256 ||
! 1950: #endif
! 1951: (md->ctypes[c] & ctype_space) == 0
! 1952: )
! 1953: MRRETURN(MATCH_NOMATCH);
! 1954: ecode++;
! 1955: break;
! 1956:
! 1957: case OP_NOT_WORDCHAR:
! 1958: if (eptr >= md->end_subject)
! 1959: {
! 1960: SCHECK_PARTIAL();
! 1961: MRRETURN(MATCH_NOMATCH);
! 1962: }
! 1963: GETCHARINCTEST(c, eptr);
! 1964: if (
! 1965: #ifdef SUPPORT_UTF8
! 1966: c < 256 &&
! 1967: #endif
! 1968: (md->ctypes[c] & ctype_word) != 0
! 1969: )
! 1970: MRRETURN(MATCH_NOMATCH);
! 1971: ecode++;
! 1972: break;
! 1973:
! 1974: case OP_WORDCHAR:
! 1975: if (eptr >= md->end_subject)
! 1976: {
! 1977: SCHECK_PARTIAL();
! 1978: MRRETURN(MATCH_NOMATCH);
! 1979: }
! 1980: GETCHARINCTEST(c, eptr);
! 1981: if (
! 1982: #ifdef SUPPORT_UTF8
! 1983: c >= 256 ||
! 1984: #endif
! 1985: (md->ctypes[c] & ctype_word) == 0
! 1986: )
! 1987: MRRETURN(MATCH_NOMATCH);
! 1988: ecode++;
! 1989: break;
! 1990:
! 1991: case OP_ANYNL:
! 1992: if (eptr >= md->end_subject)
! 1993: {
! 1994: SCHECK_PARTIAL();
! 1995: MRRETURN(MATCH_NOMATCH);
! 1996: }
! 1997: GETCHARINCTEST(c, eptr);
! 1998: switch(c)
! 1999: {
! 2000: default: MRRETURN(MATCH_NOMATCH);
! 2001: case 0x000d:
! 2002: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 2003: break;
! 2004:
! 2005: case 0x000a:
! 2006: break;
! 2007:
! 2008: case 0x000b:
! 2009: case 0x000c:
! 2010: case 0x0085:
! 2011: case 0x2028:
! 2012: case 0x2029:
! 2013: if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
! 2014: break;
! 2015: }
! 2016: ecode++;
! 2017: break;
! 2018:
! 2019: case OP_NOT_HSPACE:
! 2020: if (eptr >= md->end_subject)
! 2021: {
! 2022: SCHECK_PARTIAL();
! 2023: MRRETURN(MATCH_NOMATCH);
! 2024: }
! 2025: GETCHARINCTEST(c, eptr);
! 2026: switch(c)
! 2027: {
! 2028: default: break;
! 2029: case 0x09: /* HT */
! 2030: case 0x20: /* SPACE */
! 2031: case 0xa0: /* NBSP */
! 2032: case 0x1680: /* OGHAM SPACE MARK */
! 2033: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 2034: case 0x2000: /* EN QUAD */
! 2035: case 0x2001: /* EM QUAD */
! 2036: case 0x2002: /* EN SPACE */
! 2037: case 0x2003: /* EM SPACE */
! 2038: case 0x2004: /* THREE-PER-EM SPACE */
! 2039: case 0x2005: /* FOUR-PER-EM SPACE */
! 2040: case 0x2006: /* SIX-PER-EM SPACE */
! 2041: case 0x2007: /* FIGURE SPACE */
! 2042: case 0x2008: /* PUNCTUATION SPACE */
! 2043: case 0x2009: /* THIN SPACE */
! 2044: case 0x200A: /* HAIR SPACE */
! 2045: case 0x202f: /* NARROW NO-BREAK SPACE */
! 2046: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 2047: case 0x3000: /* IDEOGRAPHIC SPACE */
! 2048: MRRETURN(MATCH_NOMATCH);
! 2049: }
! 2050: ecode++;
! 2051: break;
! 2052:
! 2053: case OP_HSPACE:
! 2054: if (eptr >= md->end_subject)
! 2055: {
! 2056: SCHECK_PARTIAL();
! 2057: MRRETURN(MATCH_NOMATCH);
! 2058: }
! 2059: GETCHARINCTEST(c, eptr);
! 2060: switch(c)
! 2061: {
! 2062: default: MRRETURN(MATCH_NOMATCH);
! 2063: case 0x09: /* HT */
! 2064: case 0x20: /* SPACE */
! 2065: case 0xa0: /* NBSP */
! 2066: case 0x1680: /* OGHAM SPACE MARK */
! 2067: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 2068: case 0x2000: /* EN QUAD */
! 2069: case 0x2001: /* EM QUAD */
! 2070: case 0x2002: /* EN SPACE */
! 2071: case 0x2003: /* EM SPACE */
! 2072: case 0x2004: /* THREE-PER-EM SPACE */
! 2073: case 0x2005: /* FOUR-PER-EM SPACE */
! 2074: case 0x2006: /* SIX-PER-EM SPACE */
! 2075: case 0x2007: /* FIGURE SPACE */
! 2076: case 0x2008: /* PUNCTUATION SPACE */
! 2077: case 0x2009: /* THIN SPACE */
! 2078: case 0x200A: /* HAIR SPACE */
! 2079: case 0x202f: /* NARROW NO-BREAK SPACE */
! 2080: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 2081: case 0x3000: /* IDEOGRAPHIC SPACE */
! 2082: break;
! 2083: }
! 2084: ecode++;
! 2085: break;
! 2086:
! 2087: case OP_NOT_VSPACE:
! 2088: if (eptr >= md->end_subject)
! 2089: {
! 2090: SCHECK_PARTIAL();
! 2091: MRRETURN(MATCH_NOMATCH);
! 2092: }
! 2093: GETCHARINCTEST(c, eptr);
! 2094: switch(c)
! 2095: {
! 2096: default: break;
! 2097: case 0x0a: /* LF */
! 2098: case 0x0b: /* VT */
! 2099: case 0x0c: /* FF */
! 2100: case 0x0d: /* CR */
! 2101: case 0x85: /* NEL */
! 2102: case 0x2028: /* LINE SEPARATOR */
! 2103: case 0x2029: /* PARAGRAPH SEPARATOR */
! 2104: MRRETURN(MATCH_NOMATCH);
! 2105: }
! 2106: ecode++;
! 2107: break;
! 2108:
! 2109: case OP_VSPACE:
! 2110: if (eptr >= md->end_subject)
! 2111: {
! 2112: SCHECK_PARTIAL();
! 2113: MRRETURN(MATCH_NOMATCH);
! 2114: }
! 2115: GETCHARINCTEST(c, eptr);
! 2116: switch(c)
! 2117: {
! 2118: default: MRRETURN(MATCH_NOMATCH);
! 2119: case 0x0a: /* LF */
! 2120: case 0x0b: /* VT */
! 2121: case 0x0c: /* FF */
! 2122: case 0x0d: /* CR */
! 2123: case 0x85: /* NEL */
! 2124: case 0x2028: /* LINE SEPARATOR */
! 2125: case 0x2029: /* PARAGRAPH SEPARATOR */
! 2126: break;
! 2127: }
! 2128: ecode++;
! 2129: break;
! 2130:
! 2131: #ifdef SUPPORT_UCP
! 2132: /* Check the next character by Unicode property. We will get here only
! 2133: if the support is in the binary; otherwise a compile-time error occurs. */
! 2134:
! 2135: case OP_PROP:
! 2136: case OP_NOTPROP:
! 2137: if (eptr >= md->end_subject)
! 2138: {
! 2139: SCHECK_PARTIAL();
! 2140: MRRETURN(MATCH_NOMATCH);
! 2141: }
! 2142: GETCHARINCTEST(c, eptr);
! 2143: {
! 2144: const ucd_record *prop = GET_UCD(c);
! 2145:
! 2146: switch(ecode[1])
! 2147: {
! 2148: case PT_ANY:
! 2149: if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
! 2150: break;
! 2151:
! 2152: case PT_LAMP:
! 2153: if ((prop->chartype == ucp_Lu ||
! 2154: prop->chartype == ucp_Ll ||
! 2155: prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
! 2156: MRRETURN(MATCH_NOMATCH);
! 2157: break;
! 2158:
! 2159: case PT_GC:
! 2160: if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
! 2161: MRRETURN(MATCH_NOMATCH);
! 2162: break;
! 2163:
! 2164: case PT_PC:
! 2165: if ((ecode[2] != prop->chartype) == (op == OP_PROP))
! 2166: MRRETURN(MATCH_NOMATCH);
! 2167: break;
! 2168:
! 2169: case PT_SC:
! 2170: if ((ecode[2] != prop->script) == (op == OP_PROP))
! 2171: MRRETURN(MATCH_NOMATCH);
! 2172: break;
! 2173:
! 2174: /* These are specials */
! 2175:
! 2176: case PT_ALNUM:
! 2177: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
! 2178: _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
! 2179: MRRETURN(MATCH_NOMATCH);
! 2180: break;
! 2181:
! 2182: case PT_SPACE: /* Perl space */
! 2183: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
! 2184: c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
! 2185: == (op == OP_NOTPROP))
! 2186: MRRETURN(MATCH_NOMATCH);
! 2187: break;
! 2188:
! 2189: case PT_PXSPACE: /* POSIX space */
! 2190: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
! 2191: c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
! 2192: c == CHAR_FF || c == CHAR_CR)
! 2193: == (op == OP_NOTPROP))
! 2194: MRRETURN(MATCH_NOMATCH);
! 2195: break;
! 2196:
! 2197: case PT_WORD:
! 2198: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
! 2199: _pcre_ucp_gentype[prop->chartype] == ucp_N ||
! 2200: c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
! 2201: MRRETURN(MATCH_NOMATCH);
! 2202: break;
! 2203:
! 2204: /* This should never occur */
! 2205:
! 2206: default:
! 2207: RRETURN(PCRE_ERROR_INTERNAL);
! 2208: }
! 2209:
! 2210: ecode += 3;
! 2211: }
! 2212: break;
! 2213:
! 2214: /* Match an extended Unicode sequence. We will get here only if the support
! 2215: is in the binary; otherwise a compile-time error occurs. */
! 2216:
! 2217: case OP_EXTUNI:
! 2218: if (eptr >= md->end_subject)
! 2219: {
! 2220: SCHECK_PARTIAL();
! 2221: MRRETURN(MATCH_NOMATCH);
! 2222: }
! 2223: GETCHARINCTEST(c, eptr);
! 2224: {
! 2225: int category = UCD_CATEGORY(c);
! 2226: if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
! 2227: while (eptr < md->end_subject)
! 2228: {
! 2229: int len = 1;
! 2230: if (!utf8) c = *eptr; else
! 2231: {
! 2232: GETCHARLEN(c, eptr, len);
! 2233: }
! 2234: category = UCD_CATEGORY(c);
! 2235: if (category != ucp_M) break;
! 2236: eptr += len;
! 2237: }
! 2238: }
! 2239: ecode++;
! 2240: break;
! 2241: #endif
! 2242:
! 2243:
! 2244: /* Match a back reference, possibly repeatedly. Look past the end of the
! 2245: item to see if there is repeat information following. The code is similar
! 2246: to that for character classes, but repeated for efficiency. Then obey
! 2247: similar code to character type repeats - written out again for speed.
! 2248: However, if the referenced string is the empty string, always treat
! 2249: it as matched, any number of times (otherwise there could be infinite
! 2250: loops). */
! 2251:
! 2252: case OP_REF:
! 2253: {
! 2254: offset = GET2(ecode, 1) << 1; /* Doubled ref number */
! 2255: ecode += 3;
! 2256:
! 2257: /* If the reference is unset, there are two possibilities:
! 2258:
! 2259: (a) In the default, Perl-compatible state, set the length to be longer
! 2260: than the amount of subject left; this ensures that every attempt at a
! 2261: match fails. We can't just fail here, because of the possibility of
! 2262: quantifiers with zero minima.
! 2263:
! 2264: (b) If the JavaScript compatibility flag is set, set the length to zero
! 2265: so that the back reference matches an empty string.
! 2266:
! 2267: Otherwise, set the length to the length of what was matched by the
! 2268: referenced subpattern. */
! 2269:
! 2270: if (offset >= offset_top || md->offset_vector[offset] < 0)
! 2271: length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
! 2272: else
! 2273: length = md->offset_vector[offset+1] - md->offset_vector[offset];
! 2274:
! 2275: /* Set up for repetition, or handle the non-repeated case */
! 2276:
! 2277: switch (*ecode)
! 2278: {
! 2279: case OP_CRSTAR:
! 2280: case OP_CRMINSTAR:
! 2281: case OP_CRPLUS:
! 2282: case OP_CRMINPLUS:
! 2283: case OP_CRQUERY:
! 2284: case OP_CRMINQUERY:
! 2285: c = *ecode++ - OP_CRSTAR;
! 2286: minimize = (c & 1) != 0;
! 2287: min = rep_min[c]; /* Pick up values from tables; */
! 2288: max = rep_max[c]; /* zero for max => infinity */
! 2289: if (max == 0) max = INT_MAX;
! 2290: break;
! 2291:
! 2292: case OP_CRRANGE:
! 2293: case OP_CRMINRANGE:
! 2294: minimize = (*ecode == OP_CRMINRANGE);
! 2295: min = GET2(ecode, 1);
! 2296: max = GET2(ecode, 3);
! 2297: if (max == 0) max = INT_MAX;
! 2298: ecode += 5;
! 2299: break;
! 2300:
! 2301: default: /* No repeat follows */
! 2302: if (!match_ref(offset, eptr, length, md, ims))
! 2303: {
! 2304: CHECK_PARTIAL();
! 2305: MRRETURN(MATCH_NOMATCH);
! 2306: }
! 2307: eptr += length;
! 2308: continue; /* With the main loop */
! 2309: }
! 2310:
! 2311: /* If the length of the reference is zero, just continue with the
! 2312: main loop. */
! 2313:
! 2314: if (length == 0) continue;
! 2315:
! 2316: /* First, ensure the minimum number of matches are present. We get back
! 2317: the length of the reference string explicitly rather than passing the
! 2318: address of eptr, so that eptr can be a register variable. */
! 2319:
! 2320: for (i = 1; i <= min; i++)
! 2321: {
! 2322: if (!match_ref(offset, eptr, length, md, ims))
! 2323: {
! 2324: CHECK_PARTIAL();
! 2325: MRRETURN(MATCH_NOMATCH);
! 2326: }
! 2327: eptr += length;
! 2328: }
! 2329:
! 2330: /* If min = max, continue at the same level without recursion.
! 2331: They are not both allowed to be zero. */
! 2332:
! 2333: if (min == max) continue;
! 2334:
! 2335: /* If minimizing, keep trying and advancing the pointer */
! 2336:
! 2337: if (minimize)
! 2338: {
! 2339: for (fi = min;; fi++)
! 2340: {
! 2341: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
! 2342: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2343: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 2344: if (!match_ref(offset, eptr, length, md, ims))
! 2345: {
! 2346: CHECK_PARTIAL();
! 2347: MRRETURN(MATCH_NOMATCH);
! 2348: }
! 2349: eptr += length;
! 2350: }
! 2351: /* Control never gets here */
! 2352: }
! 2353:
! 2354: /* If maximizing, find the longest string and work backwards */
! 2355:
! 2356: else
! 2357: {
! 2358: pp = eptr;
! 2359: for (i = min; i < max; i++)
! 2360: {
! 2361: if (!match_ref(offset, eptr, length, md, ims))
! 2362: {
! 2363: CHECK_PARTIAL();
! 2364: break;
! 2365: }
! 2366: eptr += length;
! 2367: }
! 2368: while (eptr >= pp)
! 2369: {
! 2370: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
! 2371: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2372: eptr -= length;
! 2373: }
! 2374: MRRETURN(MATCH_NOMATCH);
! 2375: }
! 2376: }
! 2377: /* Control never gets here */
! 2378:
! 2379: /* Match a bit-mapped character class, possibly repeatedly. This op code is
! 2380: used when all the characters in the class have values in the range 0-255,
! 2381: and either the matching is caseful, or the characters are in the range
! 2382: 0-127 when UTF-8 processing is enabled. The only difference between
! 2383: OP_CLASS and OP_NCLASS occurs when a data character outside the range is
! 2384: encountered.
! 2385:
! 2386: First, look past the end of the item to see if there is repeat information
! 2387: following. Then obey similar code to character type repeats - written out
! 2388: again for speed. */
! 2389:
! 2390: case OP_NCLASS:
! 2391: case OP_CLASS:
! 2392: {
! 2393: data = ecode + 1; /* Save for matching */
! 2394: ecode += 33; /* Advance past the item */
! 2395:
! 2396: switch (*ecode)
! 2397: {
! 2398: case OP_CRSTAR:
! 2399: case OP_CRMINSTAR:
! 2400: case OP_CRPLUS:
! 2401: case OP_CRMINPLUS:
! 2402: case OP_CRQUERY:
! 2403: case OP_CRMINQUERY:
! 2404: c = *ecode++ - OP_CRSTAR;
! 2405: minimize = (c & 1) != 0;
! 2406: min = rep_min[c]; /* Pick up values from tables; */
! 2407: max = rep_max[c]; /* zero for max => infinity */
! 2408: if (max == 0) max = INT_MAX;
! 2409: break;
! 2410:
! 2411: case OP_CRRANGE:
! 2412: case OP_CRMINRANGE:
! 2413: minimize = (*ecode == OP_CRMINRANGE);
! 2414: min = GET2(ecode, 1);
! 2415: max = GET2(ecode, 3);
! 2416: if (max == 0) max = INT_MAX;
! 2417: ecode += 5;
! 2418: break;
! 2419:
! 2420: default: /* No repeat follows */
! 2421: min = max = 1;
! 2422: break;
! 2423: }
! 2424:
! 2425: /* First, ensure the minimum number of matches are present. */
! 2426:
! 2427: #ifdef SUPPORT_UTF8
! 2428: /* UTF-8 mode */
! 2429: if (utf8)
! 2430: {
! 2431: for (i = 1; i <= min; i++)
! 2432: {
! 2433: if (eptr >= md->end_subject)
! 2434: {
! 2435: SCHECK_PARTIAL();
! 2436: MRRETURN(MATCH_NOMATCH);
! 2437: }
! 2438: GETCHARINC(c, eptr);
! 2439: if (c > 255)
! 2440: {
! 2441: if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
! 2442: }
! 2443: else
! 2444: {
! 2445: if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
! 2446: }
! 2447: }
! 2448: }
! 2449: else
! 2450: #endif
! 2451: /* Not UTF-8 mode */
! 2452: {
! 2453: for (i = 1; i <= min; i++)
! 2454: {
! 2455: if (eptr >= md->end_subject)
! 2456: {
! 2457: SCHECK_PARTIAL();
! 2458: MRRETURN(MATCH_NOMATCH);
! 2459: }
! 2460: c = *eptr++;
! 2461: if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
! 2462: }
! 2463: }
! 2464:
! 2465: /* If max == min we can continue with the main loop without the
! 2466: need to recurse. */
! 2467:
! 2468: if (min == max) continue;
! 2469:
! 2470: /* If minimizing, keep testing the rest of the expression and advancing
! 2471: the pointer while it matches the class. */
! 2472:
! 2473: if (minimize)
! 2474: {
! 2475: #ifdef SUPPORT_UTF8
! 2476: /* UTF-8 mode */
! 2477: if (utf8)
! 2478: {
! 2479: for (fi = min;; fi++)
! 2480: {
! 2481: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
! 2482: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2483: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 2484: if (eptr >= md->end_subject)
! 2485: {
! 2486: SCHECK_PARTIAL();
! 2487: MRRETURN(MATCH_NOMATCH);
! 2488: }
! 2489: GETCHARINC(c, eptr);
! 2490: if (c > 255)
! 2491: {
! 2492: if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
! 2493: }
! 2494: else
! 2495: {
! 2496: if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
! 2497: }
! 2498: }
! 2499: }
! 2500: else
! 2501: #endif
! 2502: /* Not UTF-8 mode */
! 2503: {
! 2504: for (fi = min;; fi++)
! 2505: {
! 2506: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
! 2507: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2508: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 2509: if (eptr >= md->end_subject)
! 2510: {
! 2511: SCHECK_PARTIAL();
! 2512: MRRETURN(MATCH_NOMATCH);
! 2513: }
! 2514: c = *eptr++;
! 2515: if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
! 2516: }
! 2517: }
! 2518: /* Control never gets here */
! 2519: }
! 2520:
! 2521: /* If maximizing, find the longest possible run, then work backwards. */
! 2522:
! 2523: else
! 2524: {
! 2525: pp = eptr;
! 2526:
! 2527: #ifdef SUPPORT_UTF8
! 2528: /* UTF-8 mode */
! 2529: if (utf8)
! 2530: {
! 2531: for (i = min; i < max; i++)
! 2532: {
! 2533: int len = 1;
! 2534: if (eptr >= md->end_subject)
! 2535: {
! 2536: SCHECK_PARTIAL();
! 2537: break;
! 2538: }
! 2539: GETCHARLEN(c, eptr, len);
! 2540: if (c > 255)
! 2541: {
! 2542: if (op == OP_CLASS) break;
! 2543: }
! 2544: else
! 2545: {
! 2546: if ((data[c/8] & (1 << (c&7))) == 0) break;
! 2547: }
! 2548: eptr += len;
! 2549: }
! 2550: for (;;)
! 2551: {
! 2552: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
! 2553: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2554: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 2555: BACKCHAR(eptr);
! 2556: }
! 2557: }
! 2558: else
! 2559: #endif
! 2560: /* Not UTF-8 mode */
! 2561: {
! 2562: for (i = min; i < max; i++)
! 2563: {
! 2564: if (eptr >= md->end_subject)
! 2565: {
! 2566: SCHECK_PARTIAL();
! 2567: break;
! 2568: }
! 2569: c = *eptr;
! 2570: if ((data[c/8] & (1 << (c&7))) == 0) break;
! 2571: eptr++;
! 2572: }
! 2573: while (eptr >= pp)
! 2574: {
! 2575: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
! 2576: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2577: eptr--;
! 2578: }
! 2579: }
! 2580:
! 2581: MRRETURN(MATCH_NOMATCH);
! 2582: }
! 2583: }
! 2584: /* Control never gets here */
! 2585:
! 2586:
! 2587: /* Match an extended character class. This opcode is encountered only
! 2588: when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
! 2589: mode, because Unicode properties are supported in non-UTF-8 mode. */
! 2590:
! 2591: #ifdef SUPPORT_UTF8
! 2592: case OP_XCLASS:
! 2593: {
! 2594: data = ecode + 1 + LINK_SIZE; /* Save for matching */
! 2595: ecode += GET(ecode, 1); /* Advance past the item */
! 2596:
! 2597: switch (*ecode)
! 2598: {
! 2599: case OP_CRSTAR:
! 2600: case OP_CRMINSTAR:
! 2601: case OP_CRPLUS:
! 2602: case OP_CRMINPLUS:
! 2603: case OP_CRQUERY:
! 2604: case OP_CRMINQUERY:
! 2605: c = *ecode++ - OP_CRSTAR;
! 2606: minimize = (c & 1) != 0;
! 2607: min = rep_min[c]; /* Pick up values from tables; */
! 2608: max = rep_max[c]; /* zero for max => infinity */
! 2609: if (max == 0) max = INT_MAX;
! 2610: break;
! 2611:
! 2612: case OP_CRRANGE:
! 2613: case OP_CRMINRANGE:
! 2614: minimize = (*ecode == OP_CRMINRANGE);
! 2615: min = GET2(ecode, 1);
! 2616: max = GET2(ecode, 3);
! 2617: if (max == 0) max = INT_MAX;
! 2618: ecode += 5;
! 2619: break;
! 2620:
! 2621: default: /* No repeat follows */
! 2622: min = max = 1;
! 2623: break;
! 2624: }
! 2625:
! 2626: /* First, ensure the minimum number of matches are present. */
! 2627:
! 2628: for (i = 1; i <= min; i++)
! 2629: {
! 2630: if (eptr >= md->end_subject)
! 2631: {
! 2632: SCHECK_PARTIAL();
! 2633: MRRETURN(MATCH_NOMATCH);
! 2634: }
! 2635: GETCHARINCTEST(c, eptr);
! 2636: if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
! 2637: }
! 2638:
! 2639: /* If max == min we can continue with the main loop without the
! 2640: need to recurse. */
! 2641:
! 2642: if (min == max) continue;
! 2643:
! 2644: /* If minimizing, keep testing the rest of the expression and advancing
! 2645: the pointer while it matches the class. */
! 2646:
! 2647: if (minimize)
! 2648: {
! 2649: for (fi = min;; fi++)
! 2650: {
! 2651: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
! 2652: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2653: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 2654: if (eptr >= md->end_subject)
! 2655: {
! 2656: SCHECK_PARTIAL();
! 2657: MRRETURN(MATCH_NOMATCH);
! 2658: }
! 2659: GETCHARINCTEST(c, eptr);
! 2660: if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
! 2661: }
! 2662: /* Control never gets here */
! 2663: }
! 2664:
! 2665: /* If maximizing, find the longest possible run, then work backwards. */
! 2666:
! 2667: else
! 2668: {
! 2669: pp = eptr;
! 2670: for (i = min; i < max; i++)
! 2671: {
! 2672: int len = 1;
! 2673: if (eptr >= md->end_subject)
! 2674: {
! 2675: SCHECK_PARTIAL();
! 2676: break;
! 2677: }
! 2678: GETCHARLENTEST(c, eptr, len);
! 2679: if (!_pcre_xclass(c, data)) break;
! 2680: eptr += len;
! 2681: }
! 2682: for(;;)
! 2683: {
! 2684: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
! 2685: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2686: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 2687: if (utf8) BACKCHAR(eptr);
! 2688: }
! 2689: MRRETURN(MATCH_NOMATCH);
! 2690: }
! 2691:
! 2692: /* Control never gets here */
! 2693: }
! 2694: #endif /* End of XCLASS */
! 2695:
! 2696: /* Match a single character, casefully */
! 2697:
! 2698: case OP_CHAR:
! 2699: #ifdef SUPPORT_UTF8
! 2700: if (utf8)
! 2701: {
! 2702: length = 1;
! 2703: ecode++;
! 2704: GETCHARLEN(fc, ecode, length);
! 2705: if (length > md->end_subject - eptr)
! 2706: {
! 2707: CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
! 2708: MRRETURN(MATCH_NOMATCH);
! 2709: }
! 2710: while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
! 2711: }
! 2712: else
! 2713: #endif
! 2714:
! 2715: /* Non-UTF-8 mode */
! 2716: {
! 2717: if (md->end_subject - eptr < 1)
! 2718: {
! 2719: SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
! 2720: MRRETURN(MATCH_NOMATCH);
! 2721: }
! 2722: if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
! 2723: ecode += 2;
! 2724: }
! 2725: break;
! 2726:
! 2727: /* Match a single character, caselessly */
! 2728:
! 2729: case OP_CHARNC:
! 2730: #ifdef SUPPORT_UTF8
! 2731: if (utf8)
! 2732: {
! 2733: length = 1;
! 2734: ecode++;
! 2735: GETCHARLEN(fc, ecode, length);
! 2736:
! 2737: if (length > md->end_subject - eptr)
! 2738: {
! 2739: CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
! 2740: MRRETURN(MATCH_NOMATCH);
! 2741: }
! 2742:
! 2743: /* If the pattern character's value is < 128, we have only one byte, and
! 2744: can use the fast lookup table. */
! 2745:
! 2746: if (fc < 128)
! 2747: {
! 2748: if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
! 2749: }
! 2750:
! 2751: /* Otherwise we must pick up the subject character */
! 2752:
! 2753: else
! 2754: {
! 2755: unsigned int dc;
! 2756: GETCHARINC(dc, eptr);
! 2757: ecode += length;
! 2758:
! 2759: /* If we have Unicode property support, we can use it to test the other
! 2760: case of the character, if there is one. */
! 2761:
! 2762: if (fc != dc)
! 2763: {
! 2764: #ifdef SUPPORT_UCP
! 2765: if (dc != UCD_OTHERCASE(fc))
! 2766: #endif
! 2767: MRRETURN(MATCH_NOMATCH);
! 2768: }
! 2769: }
! 2770: }
! 2771: else
! 2772: #endif /* SUPPORT_UTF8 */
! 2773:
! 2774: /* Non-UTF-8 mode */
! 2775: {
! 2776: if (md->end_subject - eptr < 1)
! 2777: {
! 2778: SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
! 2779: MRRETURN(MATCH_NOMATCH);
! 2780: }
! 2781: if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
! 2782: ecode += 2;
! 2783: }
! 2784: break;
! 2785:
! 2786: /* Match a single character repeatedly. */
! 2787:
! 2788: case OP_EXACT:
! 2789: min = max = GET2(ecode, 1);
! 2790: ecode += 3;
! 2791: goto REPEATCHAR;
! 2792:
! 2793: case OP_POSUPTO:
! 2794: possessive = TRUE;
! 2795: /* Fall through */
! 2796:
! 2797: case OP_UPTO:
! 2798: case OP_MINUPTO:
! 2799: min = 0;
! 2800: max = GET2(ecode, 1);
! 2801: minimize = *ecode == OP_MINUPTO;
! 2802: ecode += 3;
! 2803: goto REPEATCHAR;
! 2804:
! 2805: case OP_POSSTAR:
! 2806: possessive = TRUE;
! 2807: min = 0;
! 2808: max = INT_MAX;
! 2809: ecode++;
! 2810: goto REPEATCHAR;
! 2811:
! 2812: case OP_POSPLUS:
! 2813: possessive = TRUE;
! 2814: min = 1;
! 2815: max = INT_MAX;
! 2816: ecode++;
! 2817: goto REPEATCHAR;
! 2818:
! 2819: case OP_POSQUERY:
! 2820: possessive = TRUE;
! 2821: min = 0;
! 2822: max = 1;
! 2823: ecode++;
! 2824: goto REPEATCHAR;
! 2825:
! 2826: case OP_STAR:
! 2827: case OP_MINSTAR:
! 2828: case OP_PLUS:
! 2829: case OP_MINPLUS:
! 2830: case OP_QUERY:
! 2831: case OP_MINQUERY:
! 2832: c = *ecode++ - OP_STAR;
! 2833: minimize = (c & 1) != 0;
! 2834:
! 2835: min = rep_min[c]; /* Pick up values from tables; */
! 2836: max = rep_max[c]; /* zero for max => infinity */
! 2837: if (max == 0) max = INT_MAX;
! 2838:
! 2839: /* Common code for all repeated single-character matches. */
! 2840:
! 2841: REPEATCHAR:
! 2842: #ifdef SUPPORT_UTF8
! 2843: if (utf8)
! 2844: {
! 2845: length = 1;
! 2846: charptr = ecode;
! 2847: GETCHARLEN(fc, ecode, length);
! 2848: ecode += length;
! 2849:
! 2850: /* Handle multibyte character matching specially here. There is
! 2851: support for caseless matching if UCP support is present. */
! 2852:
! 2853: if (length > 1)
! 2854: {
! 2855: #ifdef SUPPORT_UCP
! 2856: unsigned int othercase;
! 2857: if ((ims & PCRE_CASELESS) != 0 &&
! 2858: (othercase = UCD_OTHERCASE(fc)) != fc)
! 2859: oclength = _pcre_ord2utf8(othercase, occhars);
! 2860: else oclength = 0;
! 2861: #endif /* SUPPORT_UCP */
! 2862:
! 2863: for (i = 1; i <= min; i++)
! 2864: {
! 2865: if (eptr <= md->end_subject - length &&
! 2866: memcmp(eptr, charptr, length) == 0) eptr += length;
! 2867: #ifdef SUPPORT_UCP
! 2868: else if (oclength > 0 &&
! 2869: eptr <= md->end_subject - oclength &&
! 2870: memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
! 2871: #endif /* SUPPORT_UCP */
! 2872: else
! 2873: {
! 2874: CHECK_PARTIAL();
! 2875: MRRETURN(MATCH_NOMATCH);
! 2876: }
! 2877: }
! 2878:
! 2879: if (min == max) continue;
! 2880:
! 2881: if (minimize)
! 2882: {
! 2883: for (fi = min;; fi++)
! 2884: {
! 2885: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
! 2886: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2887: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 2888: if (eptr <= md->end_subject - length &&
! 2889: memcmp(eptr, charptr, length) == 0) eptr += length;
! 2890: #ifdef SUPPORT_UCP
! 2891: else if (oclength > 0 &&
! 2892: eptr <= md->end_subject - oclength &&
! 2893: memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
! 2894: #endif /* SUPPORT_UCP */
! 2895: else
! 2896: {
! 2897: CHECK_PARTIAL();
! 2898: MRRETURN(MATCH_NOMATCH);
! 2899: }
! 2900: }
! 2901: /* Control never gets here */
! 2902: }
! 2903:
! 2904: else /* Maximize */
! 2905: {
! 2906: pp = eptr;
! 2907: for (i = min; i < max; i++)
! 2908: {
! 2909: if (eptr <= md->end_subject - length &&
! 2910: memcmp(eptr, charptr, length) == 0) eptr += length;
! 2911: #ifdef SUPPORT_UCP
! 2912: else if (oclength > 0 &&
! 2913: eptr <= md->end_subject - oclength &&
! 2914: memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
! 2915: #endif /* SUPPORT_UCP */
! 2916: else
! 2917: {
! 2918: CHECK_PARTIAL();
! 2919: break;
! 2920: }
! 2921: }
! 2922:
! 2923: if (possessive) continue;
! 2924:
! 2925: for(;;)
! 2926: {
! 2927: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
! 2928: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2929: if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
! 2930: #ifdef SUPPORT_UCP
! 2931: eptr--;
! 2932: BACKCHAR(eptr);
! 2933: #else /* without SUPPORT_UCP */
! 2934: eptr -= length;
! 2935: #endif /* SUPPORT_UCP */
! 2936: }
! 2937: }
! 2938: /* Control never gets here */
! 2939: }
! 2940:
! 2941: /* If the length of a UTF-8 character is 1, we fall through here, and
! 2942: obey the code as for non-UTF-8 characters below, though in this case the
! 2943: value of fc will always be < 128. */
! 2944: }
! 2945: else
! 2946: #endif /* SUPPORT_UTF8 */
! 2947:
! 2948: /* When not in UTF-8 mode, load a single-byte character. */
! 2949:
! 2950: fc = *ecode++;
! 2951:
! 2952: /* The value of fc at this point is always less than 256, though we may or
! 2953: may not be in UTF-8 mode. The code is duplicated for the caseless and
! 2954: caseful cases, for speed, since matching characters is likely to be quite
! 2955: common. First, ensure the minimum number of matches are present. If min =
! 2956: max, continue at the same level without recursing. Otherwise, if
! 2957: minimizing, keep trying the rest of the expression and advancing one
! 2958: matching character if failing, up to the maximum. Alternatively, if
! 2959: maximizing, find the maximum number of characters and work backwards. */
! 2960:
! 2961: DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
! 2962: max, eptr));
! 2963:
! 2964: if ((ims & PCRE_CASELESS) != 0)
! 2965: {
! 2966: fc = md->lcc[fc];
! 2967: for (i = 1; i <= min; i++)
! 2968: {
! 2969: if (eptr >= md->end_subject)
! 2970: {
! 2971: SCHECK_PARTIAL();
! 2972: MRRETURN(MATCH_NOMATCH);
! 2973: }
! 2974: if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
! 2975: }
! 2976: if (min == max) continue;
! 2977: if (minimize)
! 2978: {
! 2979: for (fi = min;; fi++)
! 2980: {
! 2981: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
! 2982: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2983: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 2984: if (eptr >= md->end_subject)
! 2985: {
! 2986: SCHECK_PARTIAL();
! 2987: MRRETURN(MATCH_NOMATCH);
! 2988: }
! 2989: if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
! 2990: }
! 2991: /* Control never gets here */
! 2992: }
! 2993: else /* Maximize */
! 2994: {
! 2995: pp = eptr;
! 2996: for (i = min; i < max; i++)
! 2997: {
! 2998: if (eptr >= md->end_subject)
! 2999: {
! 3000: SCHECK_PARTIAL();
! 3001: break;
! 3002: }
! 3003: if (fc != md->lcc[*eptr]) break;
! 3004: eptr++;
! 3005: }
! 3006:
! 3007: if (possessive) continue;
! 3008:
! 3009: while (eptr >= pp)
! 3010: {
! 3011: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
! 3012: eptr--;
! 3013: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3014: }
! 3015: MRRETURN(MATCH_NOMATCH);
! 3016: }
! 3017: /* Control never gets here */
! 3018: }
! 3019:
! 3020: /* Caseful comparisons (includes all multi-byte characters) */
! 3021:
! 3022: else
! 3023: {
! 3024: for (i = 1; i <= min; i++)
! 3025: {
! 3026: if (eptr >= md->end_subject)
! 3027: {
! 3028: SCHECK_PARTIAL();
! 3029: MRRETURN(MATCH_NOMATCH);
! 3030: }
! 3031: if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
! 3032: }
! 3033:
! 3034: if (min == max) continue;
! 3035:
! 3036: if (minimize)
! 3037: {
! 3038: for (fi = min;; fi++)
! 3039: {
! 3040: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
! 3041: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3042: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 3043: if (eptr >= md->end_subject)
! 3044: {
! 3045: SCHECK_PARTIAL();
! 3046: MRRETURN(MATCH_NOMATCH);
! 3047: }
! 3048: if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
! 3049: }
! 3050: /* Control never gets here */
! 3051: }
! 3052: else /* Maximize */
! 3053: {
! 3054: pp = eptr;
! 3055: for (i = min; i < max; i++)
! 3056: {
! 3057: if (eptr >= md->end_subject)
! 3058: {
! 3059: SCHECK_PARTIAL();
! 3060: break;
! 3061: }
! 3062: if (fc != *eptr) break;
! 3063: eptr++;
! 3064: }
! 3065: if (possessive) continue;
! 3066:
! 3067: while (eptr >= pp)
! 3068: {
! 3069: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
! 3070: eptr--;
! 3071: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3072: }
! 3073: MRRETURN(MATCH_NOMATCH);
! 3074: }
! 3075: }
! 3076: /* Control never gets here */
! 3077:
! 3078: /* Match a negated single one-byte character. The character we are
! 3079: checking can be multibyte. */
! 3080:
! 3081: case OP_NOT:
! 3082: if (eptr >= md->end_subject)
! 3083: {
! 3084: SCHECK_PARTIAL();
! 3085: MRRETURN(MATCH_NOMATCH);
! 3086: }
! 3087: ecode++;
! 3088: GETCHARINCTEST(c, eptr);
! 3089: if ((ims & PCRE_CASELESS) != 0)
! 3090: {
! 3091: #ifdef SUPPORT_UTF8
! 3092: if (c < 256)
! 3093: #endif
! 3094: c = md->lcc[c];
! 3095: if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
! 3096: }
! 3097: else
! 3098: {
! 3099: if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
! 3100: }
! 3101: break;
! 3102:
! 3103: /* Match a negated single one-byte character repeatedly. This is almost a
! 3104: repeat of the code for a repeated single character, but I haven't found a
! 3105: nice way of commoning these up that doesn't require a test of the
! 3106: positive/negative option for each character match. Maybe that wouldn't add
! 3107: very much to the time taken, but character matching *is* what this is all
! 3108: about... */
! 3109:
! 3110: case OP_NOTEXACT:
! 3111: min = max = GET2(ecode, 1);
! 3112: ecode += 3;
! 3113: goto REPEATNOTCHAR;
! 3114:
! 3115: case OP_NOTUPTO:
! 3116: case OP_NOTMINUPTO:
! 3117: min = 0;
! 3118: max = GET2(ecode, 1);
! 3119: minimize = *ecode == OP_NOTMINUPTO;
! 3120: ecode += 3;
! 3121: goto REPEATNOTCHAR;
! 3122:
! 3123: case OP_NOTPOSSTAR:
! 3124: possessive = TRUE;
! 3125: min = 0;
! 3126: max = INT_MAX;
! 3127: ecode++;
! 3128: goto REPEATNOTCHAR;
! 3129:
! 3130: case OP_NOTPOSPLUS:
! 3131: possessive = TRUE;
! 3132: min = 1;
! 3133: max = INT_MAX;
! 3134: ecode++;
! 3135: goto REPEATNOTCHAR;
! 3136:
! 3137: case OP_NOTPOSQUERY:
! 3138: possessive = TRUE;
! 3139: min = 0;
! 3140: max = 1;
! 3141: ecode++;
! 3142: goto REPEATNOTCHAR;
! 3143:
! 3144: case OP_NOTPOSUPTO:
! 3145: possessive = TRUE;
! 3146: min = 0;
! 3147: max = GET2(ecode, 1);
! 3148: ecode += 3;
! 3149: goto REPEATNOTCHAR;
! 3150:
! 3151: case OP_NOTSTAR:
! 3152: case OP_NOTMINSTAR:
! 3153: case OP_NOTPLUS:
! 3154: case OP_NOTMINPLUS:
! 3155: case OP_NOTQUERY:
! 3156: case OP_NOTMINQUERY:
! 3157: c = *ecode++ - OP_NOTSTAR;
! 3158: minimize = (c & 1) != 0;
! 3159: min = rep_min[c]; /* Pick up values from tables; */
! 3160: max = rep_max[c]; /* zero for max => infinity */
! 3161: if (max == 0) max = INT_MAX;
! 3162:
! 3163: /* Common code for all repeated single-byte matches. */
! 3164:
! 3165: REPEATNOTCHAR:
! 3166: fc = *ecode++;
! 3167:
! 3168: /* The code is duplicated for the caseless and caseful cases, for speed,
! 3169: since matching characters is likely to be quite common. First, ensure the
! 3170: minimum number of matches are present. If min = max, continue at the same
! 3171: level without recursing. Otherwise, if minimizing, keep trying the rest of
! 3172: the expression and advancing one matching character if failing, up to the
! 3173: maximum. Alternatively, if maximizing, find the maximum number of
! 3174: characters and work backwards. */
! 3175:
! 3176: DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
! 3177: max, eptr));
! 3178:
! 3179: if ((ims & PCRE_CASELESS) != 0)
! 3180: {
! 3181: fc = md->lcc[fc];
! 3182:
! 3183: #ifdef SUPPORT_UTF8
! 3184: /* UTF-8 mode */
! 3185: if (utf8)
! 3186: {
! 3187: register unsigned int d;
! 3188: for (i = 1; i <= min; i++)
! 3189: {
! 3190: if (eptr >= md->end_subject)
! 3191: {
! 3192: SCHECK_PARTIAL();
! 3193: MRRETURN(MATCH_NOMATCH);
! 3194: }
! 3195: GETCHARINC(d, eptr);
! 3196: if (d < 256) d = md->lcc[d];
! 3197: if (fc == d) MRRETURN(MATCH_NOMATCH);
! 3198: }
! 3199: }
! 3200: else
! 3201: #endif
! 3202:
! 3203: /* Not UTF-8 mode */
! 3204: {
! 3205: for (i = 1; i <= min; i++)
! 3206: {
! 3207: if (eptr >= md->end_subject)
! 3208: {
! 3209: SCHECK_PARTIAL();
! 3210: MRRETURN(MATCH_NOMATCH);
! 3211: }
! 3212: if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
! 3213: }
! 3214: }
! 3215:
! 3216: if (min == max) continue;
! 3217:
! 3218: if (minimize)
! 3219: {
! 3220: #ifdef SUPPORT_UTF8
! 3221: /* UTF-8 mode */
! 3222: if (utf8)
! 3223: {
! 3224: register unsigned int d;
! 3225: for (fi = min;; fi++)
! 3226: {
! 3227: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
! 3228: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3229: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 3230: if (eptr >= md->end_subject)
! 3231: {
! 3232: SCHECK_PARTIAL();
! 3233: MRRETURN(MATCH_NOMATCH);
! 3234: }
! 3235: GETCHARINC(d, eptr);
! 3236: if (d < 256) d = md->lcc[d];
! 3237: if (fc == d) MRRETURN(MATCH_NOMATCH);
! 3238: }
! 3239: }
! 3240: else
! 3241: #endif
! 3242: /* Not UTF-8 mode */
! 3243: {
! 3244: for (fi = min;; fi++)
! 3245: {
! 3246: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
! 3247: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3248: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 3249: if (eptr >= md->end_subject)
! 3250: {
! 3251: SCHECK_PARTIAL();
! 3252: MRRETURN(MATCH_NOMATCH);
! 3253: }
! 3254: if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
! 3255: }
! 3256: }
! 3257: /* Control never gets here */
! 3258: }
! 3259:
! 3260: /* Maximize case */
! 3261:
! 3262: else
! 3263: {
! 3264: pp = eptr;
! 3265:
! 3266: #ifdef SUPPORT_UTF8
! 3267: /* UTF-8 mode */
! 3268: if (utf8)
! 3269: {
! 3270: register unsigned int d;
! 3271: for (i = min; i < max; i++)
! 3272: {
! 3273: int len = 1;
! 3274: if (eptr >= md->end_subject)
! 3275: {
! 3276: SCHECK_PARTIAL();
! 3277: break;
! 3278: }
! 3279: GETCHARLEN(d, eptr, len);
! 3280: if (d < 256) d = md->lcc[d];
! 3281: if (fc == d) break;
! 3282: eptr += len;
! 3283: }
! 3284: if (possessive) continue;
! 3285: for(;;)
! 3286: {
! 3287: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
! 3288: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3289: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 3290: BACKCHAR(eptr);
! 3291: }
! 3292: }
! 3293: else
! 3294: #endif
! 3295: /* Not UTF-8 mode */
! 3296: {
! 3297: for (i = min; i < max; i++)
! 3298: {
! 3299: if (eptr >= md->end_subject)
! 3300: {
! 3301: SCHECK_PARTIAL();
! 3302: break;
! 3303: }
! 3304: if (fc == md->lcc[*eptr]) break;
! 3305: eptr++;
! 3306: }
! 3307: if (possessive) continue;
! 3308: while (eptr >= pp)
! 3309: {
! 3310: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
! 3311: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3312: eptr--;
! 3313: }
! 3314: }
! 3315:
! 3316: MRRETURN(MATCH_NOMATCH);
! 3317: }
! 3318: /* Control never gets here */
! 3319: }
! 3320:
! 3321: /* Caseful comparisons */
! 3322:
! 3323: else
! 3324: {
! 3325: #ifdef SUPPORT_UTF8
! 3326: /* UTF-8 mode */
! 3327: if (utf8)
! 3328: {
! 3329: register unsigned int d;
! 3330: for (i = 1; i <= min; i++)
! 3331: {
! 3332: if (eptr >= md->end_subject)
! 3333: {
! 3334: SCHECK_PARTIAL();
! 3335: MRRETURN(MATCH_NOMATCH);
! 3336: }
! 3337: GETCHARINC(d, eptr);
! 3338: if (fc == d) MRRETURN(MATCH_NOMATCH);
! 3339: }
! 3340: }
! 3341: else
! 3342: #endif
! 3343: /* Not UTF-8 mode */
! 3344: {
! 3345: for (i = 1; i <= min; i++)
! 3346: {
! 3347: if (eptr >= md->end_subject)
! 3348: {
! 3349: SCHECK_PARTIAL();
! 3350: MRRETURN(MATCH_NOMATCH);
! 3351: }
! 3352: if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
! 3353: }
! 3354: }
! 3355:
! 3356: if (min == max) continue;
! 3357:
! 3358: if (minimize)
! 3359: {
! 3360: #ifdef SUPPORT_UTF8
! 3361: /* UTF-8 mode */
! 3362: if (utf8)
! 3363: {
! 3364: register unsigned int d;
! 3365: for (fi = min;; fi++)
! 3366: {
! 3367: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
! 3368: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3369: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 3370: if (eptr >= md->end_subject)
! 3371: {
! 3372: SCHECK_PARTIAL();
! 3373: MRRETURN(MATCH_NOMATCH);
! 3374: }
! 3375: GETCHARINC(d, eptr);
! 3376: if (fc == d) MRRETURN(MATCH_NOMATCH);
! 3377: }
! 3378: }
! 3379: else
! 3380: #endif
! 3381: /* Not UTF-8 mode */
! 3382: {
! 3383: for (fi = min;; fi++)
! 3384: {
! 3385: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
! 3386: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3387: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 3388: if (eptr >= md->end_subject)
! 3389: {
! 3390: SCHECK_PARTIAL();
! 3391: MRRETURN(MATCH_NOMATCH);
! 3392: }
! 3393: if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
! 3394: }
! 3395: }
! 3396: /* Control never gets here */
! 3397: }
! 3398:
! 3399: /* Maximize case */
! 3400:
! 3401: else
! 3402: {
! 3403: pp = eptr;
! 3404:
! 3405: #ifdef SUPPORT_UTF8
! 3406: /* UTF-8 mode */
! 3407: if (utf8)
! 3408: {
! 3409: register unsigned int d;
! 3410: for (i = min; i < max; i++)
! 3411: {
! 3412: int len = 1;
! 3413: if (eptr >= md->end_subject)
! 3414: {
! 3415: SCHECK_PARTIAL();
! 3416: break;
! 3417: }
! 3418: GETCHARLEN(d, eptr, len);
! 3419: if (fc == d) break;
! 3420: eptr += len;
! 3421: }
! 3422: if (possessive) continue;
! 3423: for(;;)
! 3424: {
! 3425: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
! 3426: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3427: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 3428: BACKCHAR(eptr);
! 3429: }
! 3430: }
! 3431: else
! 3432: #endif
! 3433: /* Not UTF-8 mode */
! 3434: {
! 3435: for (i = min; i < max; i++)
! 3436: {
! 3437: if (eptr >= md->end_subject)
! 3438: {
! 3439: SCHECK_PARTIAL();
! 3440: break;
! 3441: }
! 3442: if (fc == *eptr) break;
! 3443: eptr++;
! 3444: }
! 3445: if (possessive) continue;
! 3446: while (eptr >= pp)
! 3447: {
! 3448: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
! 3449: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3450: eptr--;
! 3451: }
! 3452: }
! 3453:
! 3454: MRRETURN(MATCH_NOMATCH);
! 3455: }
! 3456: }
! 3457: /* Control never gets here */
! 3458:
! 3459: /* Match a single character type repeatedly; several different opcodes
! 3460: share code. This is very similar to the code for single characters, but we
! 3461: repeat it in the interests of efficiency. */
! 3462:
! 3463: case OP_TYPEEXACT:
! 3464: min = max = GET2(ecode, 1);
! 3465: minimize = TRUE;
! 3466: ecode += 3;
! 3467: goto REPEATTYPE;
! 3468:
! 3469: case OP_TYPEUPTO:
! 3470: case OP_TYPEMINUPTO:
! 3471: min = 0;
! 3472: max = GET2(ecode, 1);
! 3473: minimize = *ecode == OP_TYPEMINUPTO;
! 3474: ecode += 3;
! 3475: goto REPEATTYPE;
! 3476:
! 3477: case OP_TYPEPOSSTAR:
! 3478: possessive = TRUE;
! 3479: min = 0;
! 3480: max = INT_MAX;
! 3481: ecode++;
! 3482: goto REPEATTYPE;
! 3483:
! 3484: case OP_TYPEPOSPLUS:
! 3485: possessive = TRUE;
! 3486: min = 1;
! 3487: max = INT_MAX;
! 3488: ecode++;
! 3489: goto REPEATTYPE;
! 3490:
! 3491: case OP_TYPEPOSQUERY:
! 3492: possessive = TRUE;
! 3493: min = 0;
! 3494: max = 1;
! 3495: ecode++;
! 3496: goto REPEATTYPE;
! 3497:
! 3498: case OP_TYPEPOSUPTO:
! 3499: possessive = TRUE;
! 3500: min = 0;
! 3501: max = GET2(ecode, 1);
! 3502: ecode += 3;
! 3503: goto REPEATTYPE;
! 3504:
! 3505: case OP_TYPESTAR:
! 3506: case OP_TYPEMINSTAR:
! 3507: case OP_TYPEPLUS:
! 3508: case OP_TYPEMINPLUS:
! 3509: case OP_TYPEQUERY:
! 3510: case OP_TYPEMINQUERY:
! 3511: c = *ecode++ - OP_TYPESTAR;
! 3512: minimize = (c & 1) != 0;
! 3513: min = rep_min[c]; /* Pick up values from tables; */
! 3514: max = rep_max[c]; /* zero for max => infinity */
! 3515: if (max == 0) max = INT_MAX;
! 3516:
! 3517: /* Common code for all repeated single character type matches. Note that
! 3518: in UTF-8 mode, '.' matches a character of any length, but for the other
! 3519: character types, the valid characters are all one-byte long. */
! 3520:
! 3521: REPEATTYPE:
! 3522: ctype = *ecode++; /* Code for the character type */
! 3523:
! 3524: #ifdef SUPPORT_UCP
! 3525: if (ctype == OP_PROP || ctype == OP_NOTPROP)
! 3526: {
! 3527: prop_fail_result = ctype == OP_NOTPROP;
! 3528: prop_type = *ecode++;
! 3529: prop_value = *ecode++;
! 3530: }
! 3531: else prop_type = -1;
! 3532: #endif
! 3533:
! 3534: /* First, ensure the minimum number of matches are present. Use inline
! 3535: code for maximizing the speed, and do the type test once at the start
! 3536: (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
! 3537: is tidier. Also separate the UCP code, which can be the same for both UTF-8
! 3538: and single-bytes. */
! 3539:
! 3540: if (min > 0)
! 3541: {
! 3542: #ifdef SUPPORT_UCP
! 3543: if (prop_type >= 0)
! 3544: {
! 3545: switch(prop_type)
! 3546: {
! 3547: case PT_ANY:
! 3548: if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
! 3549: for (i = 1; i <= min; i++)
! 3550: {
! 3551: if (eptr >= md->end_subject)
! 3552: {
! 3553: SCHECK_PARTIAL();
! 3554: MRRETURN(MATCH_NOMATCH);
! 3555: }
! 3556: GETCHARINCTEST(c, eptr);
! 3557: }
! 3558: break;
! 3559:
! 3560: case PT_LAMP:
! 3561: for (i = 1; i <= min; i++)
! 3562: {
! 3563: if (eptr >= md->end_subject)
! 3564: {
! 3565: SCHECK_PARTIAL();
! 3566: MRRETURN(MATCH_NOMATCH);
! 3567: }
! 3568: GETCHARINCTEST(c, eptr);
! 3569: prop_chartype = UCD_CHARTYPE(c);
! 3570: if ((prop_chartype == ucp_Lu ||
! 3571: prop_chartype == ucp_Ll ||
! 3572: prop_chartype == ucp_Lt) == prop_fail_result)
! 3573: MRRETURN(MATCH_NOMATCH);
! 3574: }
! 3575: break;
! 3576:
! 3577: case PT_GC:
! 3578: for (i = 1; i <= min; i++)
! 3579: {
! 3580: if (eptr >= md->end_subject)
! 3581: {
! 3582: SCHECK_PARTIAL();
! 3583: MRRETURN(MATCH_NOMATCH);
! 3584: }
! 3585: GETCHARINCTEST(c, eptr);
! 3586: prop_category = UCD_CATEGORY(c);
! 3587: if ((prop_category == prop_value) == prop_fail_result)
! 3588: MRRETURN(MATCH_NOMATCH);
! 3589: }
! 3590: break;
! 3591:
! 3592: case PT_PC:
! 3593: for (i = 1; i <= min; i++)
! 3594: {
! 3595: if (eptr >= md->end_subject)
! 3596: {
! 3597: SCHECK_PARTIAL();
! 3598: MRRETURN(MATCH_NOMATCH);
! 3599: }
! 3600: GETCHARINCTEST(c, eptr);
! 3601: prop_chartype = UCD_CHARTYPE(c);
! 3602: if ((prop_chartype == prop_value) == prop_fail_result)
! 3603: MRRETURN(MATCH_NOMATCH);
! 3604: }
! 3605: break;
! 3606:
! 3607: case PT_SC:
! 3608: for (i = 1; i <= min; i++)
! 3609: {
! 3610: if (eptr >= md->end_subject)
! 3611: {
! 3612: SCHECK_PARTIAL();
! 3613: MRRETURN(MATCH_NOMATCH);
! 3614: }
! 3615: GETCHARINCTEST(c, eptr);
! 3616: prop_script = UCD_SCRIPT(c);
! 3617: if ((prop_script == prop_value) == prop_fail_result)
! 3618: MRRETURN(MATCH_NOMATCH);
! 3619: }
! 3620: break;
! 3621:
! 3622: case PT_ALNUM:
! 3623: for (i = 1; i <= min; i++)
! 3624: {
! 3625: if (eptr >= md->end_subject)
! 3626: {
! 3627: SCHECK_PARTIAL();
! 3628: MRRETURN(MATCH_NOMATCH);
! 3629: }
! 3630: GETCHARINCTEST(c, eptr);
! 3631: prop_category = UCD_CATEGORY(c);
! 3632: if ((prop_category == ucp_L || prop_category == ucp_N)
! 3633: == prop_fail_result)
! 3634: MRRETURN(MATCH_NOMATCH);
! 3635: }
! 3636: break;
! 3637:
! 3638: case PT_SPACE: /* Perl space */
! 3639: for (i = 1; i <= min; i++)
! 3640: {
! 3641: if (eptr >= md->end_subject)
! 3642: {
! 3643: SCHECK_PARTIAL();
! 3644: MRRETURN(MATCH_NOMATCH);
! 3645: }
! 3646: GETCHARINCTEST(c, eptr);
! 3647: prop_category = UCD_CATEGORY(c);
! 3648: if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 3649: c == CHAR_FF || c == CHAR_CR)
! 3650: == prop_fail_result)
! 3651: MRRETURN(MATCH_NOMATCH);
! 3652: }
! 3653: break;
! 3654:
! 3655: case PT_PXSPACE: /* POSIX space */
! 3656: for (i = 1; i <= min; i++)
! 3657: {
! 3658: if (eptr >= md->end_subject)
! 3659: {
! 3660: SCHECK_PARTIAL();
! 3661: MRRETURN(MATCH_NOMATCH);
! 3662: }
! 3663: GETCHARINCTEST(c, eptr);
! 3664: prop_category = UCD_CATEGORY(c);
! 3665: if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 3666: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
! 3667: == prop_fail_result)
! 3668: MRRETURN(MATCH_NOMATCH);
! 3669: }
! 3670: break;
! 3671:
! 3672: case PT_WORD:
! 3673: for (i = 1; i <= min; i++)
! 3674: {
! 3675: if (eptr >= md->end_subject)
! 3676: {
! 3677: SCHECK_PARTIAL();
! 3678: MRRETURN(MATCH_NOMATCH);
! 3679: }
! 3680: GETCHARINCTEST(c, eptr);
! 3681: prop_category = UCD_CATEGORY(c);
! 3682: if ((prop_category == ucp_L || prop_category == ucp_N ||
! 3683: c == CHAR_UNDERSCORE)
! 3684: == prop_fail_result)
! 3685: MRRETURN(MATCH_NOMATCH);
! 3686: }
! 3687: break;
! 3688:
! 3689: /* This should not occur */
! 3690:
! 3691: default:
! 3692: RRETURN(PCRE_ERROR_INTERNAL);
! 3693: }
! 3694: }
! 3695:
! 3696: /* Match extended Unicode sequences. We will get here only if the
! 3697: support is in the binary; otherwise a compile-time error occurs. */
! 3698:
! 3699: else if (ctype == OP_EXTUNI)
! 3700: {
! 3701: for (i = 1; i <= min; i++)
! 3702: {
! 3703: if (eptr >= md->end_subject)
! 3704: {
! 3705: SCHECK_PARTIAL();
! 3706: MRRETURN(MATCH_NOMATCH);
! 3707: }
! 3708: GETCHARINCTEST(c, eptr);
! 3709: prop_category = UCD_CATEGORY(c);
! 3710: if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
! 3711: while (eptr < md->end_subject)
! 3712: {
! 3713: int len = 1;
! 3714: if (!utf8) c = *eptr;
! 3715: else { GETCHARLEN(c, eptr, len); }
! 3716: prop_category = UCD_CATEGORY(c);
! 3717: if (prop_category != ucp_M) break;
! 3718: eptr += len;
! 3719: }
! 3720: }
! 3721: }
! 3722:
! 3723: else
! 3724: #endif /* SUPPORT_UCP */
! 3725:
! 3726: /* Handle all other cases when the coding is UTF-8 */
! 3727:
! 3728: #ifdef SUPPORT_UTF8
! 3729: if (utf8) switch(ctype)
! 3730: {
! 3731: case OP_ANY:
! 3732: for (i = 1; i <= min; i++)
! 3733: {
! 3734: if (eptr >= md->end_subject)
! 3735: {
! 3736: SCHECK_PARTIAL();
! 3737: MRRETURN(MATCH_NOMATCH);
! 3738: }
! 3739: if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
! 3740: eptr++;
! 3741: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 3742: }
! 3743: break;
! 3744:
! 3745: case OP_ALLANY:
! 3746: for (i = 1; i <= min; i++)
! 3747: {
! 3748: if (eptr >= md->end_subject)
! 3749: {
! 3750: SCHECK_PARTIAL();
! 3751: MRRETURN(MATCH_NOMATCH);
! 3752: }
! 3753: eptr++;
! 3754: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 3755: }
! 3756: break;
! 3757:
! 3758: case OP_ANYBYTE:
! 3759: if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
! 3760: eptr += min;
! 3761: break;
! 3762:
! 3763: case OP_ANYNL:
! 3764: for (i = 1; i <= min; i++)
! 3765: {
! 3766: if (eptr >= md->end_subject)
! 3767: {
! 3768: SCHECK_PARTIAL();
! 3769: MRRETURN(MATCH_NOMATCH);
! 3770: }
! 3771: GETCHARINC(c, eptr);
! 3772: switch(c)
! 3773: {
! 3774: default: MRRETURN(MATCH_NOMATCH);
! 3775: case 0x000d:
! 3776: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 3777: break;
! 3778:
! 3779: case 0x000a:
! 3780: break;
! 3781:
! 3782: case 0x000b:
! 3783: case 0x000c:
! 3784: case 0x0085:
! 3785: case 0x2028:
! 3786: case 0x2029:
! 3787: if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
! 3788: break;
! 3789: }
! 3790: }
! 3791: break;
! 3792:
! 3793: case OP_NOT_HSPACE:
! 3794: for (i = 1; i <= min; i++)
! 3795: {
! 3796: if (eptr >= md->end_subject)
! 3797: {
! 3798: SCHECK_PARTIAL();
! 3799: MRRETURN(MATCH_NOMATCH);
! 3800: }
! 3801: GETCHARINC(c, eptr);
! 3802: switch(c)
! 3803: {
! 3804: default: break;
! 3805: case 0x09: /* HT */
! 3806: case 0x20: /* SPACE */
! 3807: case 0xa0: /* NBSP */
! 3808: case 0x1680: /* OGHAM SPACE MARK */
! 3809: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 3810: case 0x2000: /* EN QUAD */
! 3811: case 0x2001: /* EM QUAD */
! 3812: case 0x2002: /* EN SPACE */
! 3813: case 0x2003: /* EM SPACE */
! 3814: case 0x2004: /* THREE-PER-EM SPACE */
! 3815: case 0x2005: /* FOUR-PER-EM SPACE */
! 3816: case 0x2006: /* SIX-PER-EM SPACE */
! 3817: case 0x2007: /* FIGURE SPACE */
! 3818: case 0x2008: /* PUNCTUATION SPACE */
! 3819: case 0x2009: /* THIN SPACE */
! 3820: case 0x200A: /* HAIR SPACE */
! 3821: case 0x202f: /* NARROW NO-BREAK SPACE */
! 3822: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 3823: case 0x3000: /* IDEOGRAPHIC SPACE */
! 3824: MRRETURN(MATCH_NOMATCH);
! 3825: }
! 3826: }
! 3827: break;
! 3828:
! 3829: case OP_HSPACE:
! 3830: for (i = 1; i <= min; i++)
! 3831: {
! 3832: if (eptr >= md->end_subject)
! 3833: {
! 3834: SCHECK_PARTIAL();
! 3835: MRRETURN(MATCH_NOMATCH);
! 3836: }
! 3837: GETCHARINC(c, eptr);
! 3838: switch(c)
! 3839: {
! 3840: default: MRRETURN(MATCH_NOMATCH);
! 3841: case 0x09: /* HT */
! 3842: case 0x20: /* SPACE */
! 3843: case 0xa0: /* NBSP */
! 3844: case 0x1680: /* OGHAM SPACE MARK */
! 3845: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 3846: case 0x2000: /* EN QUAD */
! 3847: case 0x2001: /* EM QUAD */
! 3848: case 0x2002: /* EN SPACE */
! 3849: case 0x2003: /* EM SPACE */
! 3850: case 0x2004: /* THREE-PER-EM SPACE */
! 3851: case 0x2005: /* FOUR-PER-EM SPACE */
! 3852: case 0x2006: /* SIX-PER-EM SPACE */
! 3853: case 0x2007: /* FIGURE SPACE */
! 3854: case 0x2008: /* PUNCTUATION SPACE */
! 3855: case 0x2009: /* THIN SPACE */
! 3856: case 0x200A: /* HAIR SPACE */
! 3857: case 0x202f: /* NARROW NO-BREAK SPACE */
! 3858: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 3859: case 0x3000: /* IDEOGRAPHIC SPACE */
! 3860: break;
! 3861: }
! 3862: }
! 3863: break;
! 3864:
! 3865: case OP_NOT_VSPACE:
! 3866: for (i = 1; i <= min; i++)
! 3867: {
! 3868: if (eptr >= md->end_subject)
! 3869: {
! 3870: SCHECK_PARTIAL();
! 3871: MRRETURN(MATCH_NOMATCH);
! 3872: }
! 3873: GETCHARINC(c, eptr);
! 3874: switch(c)
! 3875: {
! 3876: default: break;
! 3877: case 0x0a: /* LF */
! 3878: case 0x0b: /* VT */
! 3879: case 0x0c: /* FF */
! 3880: case 0x0d: /* CR */
! 3881: case 0x85: /* NEL */
! 3882: case 0x2028: /* LINE SEPARATOR */
! 3883: case 0x2029: /* PARAGRAPH SEPARATOR */
! 3884: MRRETURN(MATCH_NOMATCH);
! 3885: }
! 3886: }
! 3887: break;
! 3888:
! 3889: case OP_VSPACE:
! 3890: for (i = 1; i <= min; i++)
! 3891: {
! 3892: if (eptr >= md->end_subject)
! 3893: {
! 3894: SCHECK_PARTIAL();
! 3895: MRRETURN(MATCH_NOMATCH);
! 3896: }
! 3897: GETCHARINC(c, eptr);
! 3898: switch(c)
! 3899: {
! 3900: default: MRRETURN(MATCH_NOMATCH);
! 3901: case 0x0a: /* LF */
! 3902: case 0x0b: /* VT */
! 3903: case 0x0c: /* FF */
! 3904: case 0x0d: /* CR */
! 3905: case 0x85: /* NEL */
! 3906: case 0x2028: /* LINE SEPARATOR */
! 3907: case 0x2029: /* PARAGRAPH SEPARATOR */
! 3908: break;
! 3909: }
! 3910: }
! 3911: break;
! 3912:
! 3913: case OP_NOT_DIGIT:
! 3914: for (i = 1; i <= min; i++)
! 3915: {
! 3916: if (eptr >= md->end_subject)
! 3917: {
! 3918: SCHECK_PARTIAL();
! 3919: MRRETURN(MATCH_NOMATCH);
! 3920: }
! 3921: GETCHARINC(c, eptr);
! 3922: if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
! 3923: MRRETURN(MATCH_NOMATCH);
! 3924: }
! 3925: break;
! 3926:
! 3927: case OP_DIGIT:
! 3928: for (i = 1; i <= min; i++)
! 3929: {
! 3930: if (eptr >= md->end_subject)
! 3931: {
! 3932: SCHECK_PARTIAL();
! 3933: MRRETURN(MATCH_NOMATCH);
! 3934: }
! 3935: if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
! 3936: MRRETURN(MATCH_NOMATCH);
! 3937: /* No need to skip more bytes - we know it's a 1-byte character */
! 3938: }
! 3939: break;
! 3940:
! 3941: case OP_NOT_WHITESPACE:
! 3942: for (i = 1; i <= min; i++)
! 3943: {
! 3944: if (eptr >= md->end_subject)
! 3945: {
! 3946: SCHECK_PARTIAL();
! 3947: MRRETURN(MATCH_NOMATCH);
! 3948: }
! 3949: if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
! 3950: MRRETURN(MATCH_NOMATCH);
! 3951: while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
! 3952: }
! 3953: break;
! 3954:
! 3955: case OP_WHITESPACE:
! 3956: for (i = 1; i <= min; i++)
! 3957: {
! 3958: if (eptr >= md->end_subject)
! 3959: {
! 3960: SCHECK_PARTIAL();
! 3961: MRRETURN(MATCH_NOMATCH);
! 3962: }
! 3963: if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
! 3964: MRRETURN(MATCH_NOMATCH);
! 3965: /* No need to skip more bytes - we know it's a 1-byte character */
! 3966: }
! 3967: break;
! 3968:
! 3969: case OP_NOT_WORDCHAR:
! 3970: for (i = 1; i <= min; i++)
! 3971: {
! 3972: if (eptr >= md->end_subject)
! 3973: {
! 3974: SCHECK_PARTIAL();
! 3975: MRRETURN(MATCH_NOMATCH);
! 3976: }
! 3977: if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
! 3978: MRRETURN(MATCH_NOMATCH);
! 3979: while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
! 3980: }
! 3981: break;
! 3982:
! 3983: case OP_WORDCHAR:
! 3984: for (i = 1; i <= min; i++)
! 3985: {
! 3986: if (eptr >= md->end_subject)
! 3987: {
! 3988: SCHECK_PARTIAL();
! 3989: MRRETURN(MATCH_NOMATCH);
! 3990: }
! 3991: if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
! 3992: MRRETURN(MATCH_NOMATCH);
! 3993: /* No need to skip more bytes - we know it's a 1-byte character */
! 3994: }
! 3995: break;
! 3996:
! 3997: default:
! 3998: RRETURN(PCRE_ERROR_INTERNAL);
! 3999: } /* End switch(ctype) */
! 4000:
! 4001: else
! 4002: #endif /* SUPPORT_UTF8 */
! 4003:
! 4004: /* Code for the non-UTF-8 case for minimum matching of operators other
! 4005: than OP_PROP and OP_NOTPROP. */
! 4006:
! 4007: switch(ctype)
! 4008: {
! 4009: case OP_ANY:
! 4010: for (i = 1; i <= min; i++)
! 4011: {
! 4012: if (eptr >= md->end_subject)
! 4013: {
! 4014: SCHECK_PARTIAL();
! 4015: MRRETURN(MATCH_NOMATCH);
! 4016: }
! 4017: if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
! 4018: eptr++;
! 4019: }
! 4020: break;
! 4021:
! 4022: case OP_ALLANY:
! 4023: if (eptr > md->end_subject - min)
! 4024: {
! 4025: SCHECK_PARTIAL();
! 4026: MRRETURN(MATCH_NOMATCH);
! 4027: }
! 4028: eptr += min;
! 4029: break;
! 4030:
! 4031: case OP_ANYBYTE:
! 4032: if (eptr > md->end_subject - min)
! 4033: {
! 4034: SCHECK_PARTIAL();
! 4035: MRRETURN(MATCH_NOMATCH);
! 4036: }
! 4037: eptr += min;
! 4038: break;
! 4039:
! 4040: case OP_ANYNL:
! 4041: for (i = 1; i <= min; i++)
! 4042: {
! 4043: if (eptr >= md->end_subject)
! 4044: {
! 4045: SCHECK_PARTIAL();
! 4046: MRRETURN(MATCH_NOMATCH);
! 4047: }
! 4048: switch(*eptr++)
! 4049: {
! 4050: default: MRRETURN(MATCH_NOMATCH);
! 4051: case 0x000d:
! 4052: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 4053: break;
! 4054: case 0x000a:
! 4055: break;
! 4056:
! 4057: case 0x000b:
! 4058: case 0x000c:
! 4059: case 0x0085:
! 4060: if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
! 4061: break;
! 4062: }
! 4063: }
! 4064: break;
! 4065:
! 4066: case OP_NOT_HSPACE:
! 4067: for (i = 1; i <= min; i++)
! 4068: {
! 4069: if (eptr >= md->end_subject)
! 4070: {
! 4071: SCHECK_PARTIAL();
! 4072: MRRETURN(MATCH_NOMATCH);
! 4073: }
! 4074: switch(*eptr++)
! 4075: {
! 4076: default: break;
! 4077: case 0x09: /* HT */
! 4078: case 0x20: /* SPACE */
! 4079: case 0xa0: /* NBSP */
! 4080: MRRETURN(MATCH_NOMATCH);
! 4081: }
! 4082: }
! 4083: break;
! 4084:
! 4085: case OP_HSPACE:
! 4086: for (i = 1; i <= min; i++)
! 4087: {
! 4088: if (eptr >= md->end_subject)
! 4089: {
! 4090: SCHECK_PARTIAL();
! 4091: MRRETURN(MATCH_NOMATCH);
! 4092: }
! 4093: switch(*eptr++)
! 4094: {
! 4095: default: MRRETURN(MATCH_NOMATCH);
! 4096: case 0x09: /* HT */
! 4097: case 0x20: /* SPACE */
! 4098: case 0xa0: /* NBSP */
! 4099: break;
! 4100: }
! 4101: }
! 4102: break;
! 4103:
! 4104: case OP_NOT_VSPACE:
! 4105: for (i = 1; i <= min; i++)
! 4106: {
! 4107: if (eptr >= md->end_subject)
! 4108: {
! 4109: SCHECK_PARTIAL();
! 4110: MRRETURN(MATCH_NOMATCH);
! 4111: }
! 4112: switch(*eptr++)
! 4113: {
! 4114: default: break;
! 4115: case 0x0a: /* LF */
! 4116: case 0x0b: /* VT */
! 4117: case 0x0c: /* FF */
! 4118: case 0x0d: /* CR */
! 4119: case 0x85: /* NEL */
! 4120: MRRETURN(MATCH_NOMATCH);
! 4121: }
! 4122: }
! 4123: break;
! 4124:
! 4125: case OP_VSPACE:
! 4126: for (i = 1; i <= min; i++)
! 4127: {
! 4128: if (eptr >= md->end_subject)
! 4129: {
! 4130: SCHECK_PARTIAL();
! 4131: MRRETURN(MATCH_NOMATCH);
! 4132: }
! 4133: switch(*eptr++)
! 4134: {
! 4135: default: MRRETURN(MATCH_NOMATCH);
! 4136: case 0x0a: /* LF */
! 4137: case 0x0b: /* VT */
! 4138: case 0x0c: /* FF */
! 4139: case 0x0d: /* CR */
! 4140: case 0x85: /* NEL */
! 4141: break;
! 4142: }
! 4143: }
! 4144: break;
! 4145:
! 4146: case OP_NOT_DIGIT:
! 4147: for (i = 1; i <= min; i++)
! 4148: {
! 4149: if (eptr >= md->end_subject)
! 4150: {
! 4151: SCHECK_PARTIAL();
! 4152: MRRETURN(MATCH_NOMATCH);
! 4153: }
! 4154: if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
! 4155: }
! 4156: break;
! 4157:
! 4158: case OP_DIGIT:
! 4159: for (i = 1; i <= min; i++)
! 4160: {
! 4161: if (eptr >= md->end_subject)
! 4162: {
! 4163: SCHECK_PARTIAL();
! 4164: MRRETURN(MATCH_NOMATCH);
! 4165: }
! 4166: if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
! 4167: }
! 4168: break;
! 4169:
! 4170: case OP_NOT_WHITESPACE:
! 4171: for (i = 1; i <= min; i++)
! 4172: {
! 4173: if (eptr >= md->end_subject)
! 4174: {
! 4175: SCHECK_PARTIAL();
! 4176: MRRETURN(MATCH_NOMATCH);
! 4177: }
! 4178: if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
! 4179: }
! 4180: break;
! 4181:
! 4182: case OP_WHITESPACE:
! 4183: for (i = 1; i <= min; i++)
! 4184: {
! 4185: if (eptr >= md->end_subject)
! 4186: {
! 4187: SCHECK_PARTIAL();
! 4188: MRRETURN(MATCH_NOMATCH);
! 4189: }
! 4190: if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
! 4191: }
! 4192: break;
! 4193:
! 4194: case OP_NOT_WORDCHAR:
! 4195: for (i = 1; i <= min; i++)
! 4196: {
! 4197: if (eptr >= md->end_subject)
! 4198: {
! 4199: SCHECK_PARTIAL();
! 4200: MRRETURN(MATCH_NOMATCH);
! 4201: }
! 4202: if ((md->ctypes[*eptr++] & ctype_word) != 0)
! 4203: MRRETURN(MATCH_NOMATCH);
! 4204: }
! 4205: break;
! 4206:
! 4207: case OP_WORDCHAR:
! 4208: for (i = 1; i <= min; i++)
! 4209: {
! 4210: if (eptr >= md->end_subject)
! 4211: {
! 4212: SCHECK_PARTIAL();
! 4213: MRRETURN(MATCH_NOMATCH);
! 4214: }
! 4215: if ((md->ctypes[*eptr++] & ctype_word) == 0)
! 4216: MRRETURN(MATCH_NOMATCH);
! 4217: }
! 4218: break;
! 4219:
! 4220: default:
! 4221: RRETURN(PCRE_ERROR_INTERNAL);
! 4222: }
! 4223: }
! 4224:
! 4225: /* If min = max, continue at the same level without recursing */
! 4226:
! 4227: if (min == max) continue;
! 4228:
! 4229: /* If minimizing, we have to test the rest of the pattern before each
! 4230: subsequent match. Again, separate the UTF-8 case for speed, and also
! 4231: separate the UCP cases. */
! 4232:
! 4233: if (minimize)
! 4234: {
! 4235: #ifdef SUPPORT_UCP
! 4236: if (prop_type >= 0)
! 4237: {
! 4238: switch(prop_type)
! 4239: {
! 4240: case PT_ANY:
! 4241: for (fi = min;; fi++)
! 4242: {
! 4243: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
! 4244: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4245: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4246: if (eptr >= md->end_subject)
! 4247: {
! 4248: SCHECK_PARTIAL();
! 4249: MRRETURN(MATCH_NOMATCH);
! 4250: }
! 4251: GETCHARINCTEST(c, eptr);
! 4252: if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
! 4253: }
! 4254: /* Control never gets here */
! 4255:
! 4256: case PT_LAMP:
! 4257: for (fi = min;; fi++)
! 4258: {
! 4259: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
! 4260: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4261: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4262: if (eptr >= md->end_subject)
! 4263: {
! 4264: SCHECK_PARTIAL();
! 4265: MRRETURN(MATCH_NOMATCH);
! 4266: }
! 4267: GETCHARINCTEST(c, eptr);
! 4268: prop_chartype = UCD_CHARTYPE(c);
! 4269: if ((prop_chartype == ucp_Lu ||
! 4270: prop_chartype == ucp_Ll ||
! 4271: prop_chartype == ucp_Lt) == prop_fail_result)
! 4272: MRRETURN(MATCH_NOMATCH);
! 4273: }
! 4274: /* Control never gets here */
! 4275:
! 4276: case PT_GC:
! 4277: for (fi = min;; fi++)
! 4278: {
! 4279: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
! 4280: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4281: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4282: if (eptr >= md->end_subject)
! 4283: {
! 4284: SCHECK_PARTIAL();
! 4285: MRRETURN(MATCH_NOMATCH);
! 4286: }
! 4287: GETCHARINCTEST(c, eptr);
! 4288: prop_category = UCD_CATEGORY(c);
! 4289: if ((prop_category == prop_value) == prop_fail_result)
! 4290: MRRETURN(MATCH_NOMATCH);
! 4291: }
! 4292: /* Control never gets here */
! 4293:
! 4294: case PT_PC:
! 4295: for (fi = min;; fi++)
! 4296: {
! 4297: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
! 4298: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4299: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4300: if (eptr >= md->end_subject)
! 4301: {
! 4302: SCHECK_PARTIAL();
! 4303: MRRETURN(MATCH_NOMATCH);
! 4304: }
! 4305: GETCHARINCTEST(c, eptr);
! 4306: prop_chartype = UCD_CHARTYPE(c);
! 4307: if ((prop_chartype == prop_value) == prop_fail_result)
! 4308: MRRETURN(MATCH_NOMATCH);
! 4309: }
! 4310: /* Control never gets here */
! 4311:
! 4312: case PT_SC:
! 4313: for (fi = min;; fi++)
! 4314: {
! 4315: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
! 4316: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4317: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4318: if (eptr >= md->end_subject)
! 4319: {
! 4320: SCHECK_PARTIAL();
! 4321: MRRETURN(MATCH_NOMATCH);
! 4322: }
! 4323: GETCHARINCTEST(c, eptr);
! 4324: prop_script = UCD_SCRIPT(c);
! 4325: if ((prop_script == prop_value) == prop_fail_result)
! 4326: MRRETURN(MATCH_NOMATCH);
! 4327: }
! 4328: /* Control never gets here */
! 4329:
! 4330: case PT_ALNUM:
! 4331: for (fi = min;; fi++)
! 4332: {
! 4333: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
! 4334: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4335: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4336: if (eptr >= md->end_subject)
! 4337: {
! 4338: SCHECK_PARTIAL();
! 4339: MRRETURN(MATCH_NOMATCH);
! 4340: }
! 4341: GETCHARINCTEST(c, eptr);
! 4342: prop_category = UCD_CATEGORY(c);
! 4343: if ((prop_category == ucp_L || prop_category == ucp_N)
! 4344: == prop_fail_result)
! 4345: MRRETURN(MATCH_NOMATCH);
! 4346: }
! 4347: /* Control never gets here */
! 4348:
! 4349: case PT_SPACE: /* Perl space */
! 4350: for (fi = min;; fi++)
! 4351: {
! 4352: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
! 4353: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4354: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4355: if (eptr >= md->end_subject)
! 4356: {
! 4357: SCHECK_PARTIAL();
! 4358: MRRETURN(MATCH_NOMATCH);
! 4359: }
! 4360: GETCHARINCTEST(c, eptr);
! 4361: prop_category = UCD_CATEGORY(c);
! 4362: if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 4363: c == CHAR_FF || c == CHAR_CR)
! 4364: == prop_fail_result)
! 4365: MRRETURN(MATCH_NOMATCH);
! 4366: }
! 4367: /* Control never gets here */
! 4368:
! 4369: case PT_PXSPACE: /* POSIX space */
! 4370: for (fi = min;; fi++)
! 4371: {
! 4372: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
! 4373: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4374: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4375: if (eptr >= md->end_subject)
! 4376: {
! 4377: SCHECK_PARTIAL();
! 4378: MRRETURN(MATCH_NOMATCH);
! 4379: }
! 4380: GETCHARINCTEST(c, eptr);
! 4381: prop_category = UCD_CATEGORY(c);
! 4382: if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 4383: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
! 4384: == prop_fail_result)
! 4385: MRRETURN(MATCH_NOMATCH);
! 4386: }
! 4387: /* Control never gets here */
! 4388:
! 4389: case PT_WORD:
! 4390: for (fi = min;; fi++)
! 4391: {
! 4392: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
! 4393: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4394: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4395: if (eptr >= md->end_subject)
! 4396: {
! 4397: SCHECK_PARTIAL();
! 4398: MRRETURN(MATCH_NOMATCH);
! 4399: }
! 4400: GETCHARINCTEST(c, eptr);
! 4401: prop_category = UCD_CATEGORY(c);
! 4402: if ((prop_category == ucp_L ||
! 4403: prop_category == ucp_N ||
! 4404: c == CHAR_UNDERSCORE)
! 4405: == prop_fail_result)
! 4406: MRRETURN(MATCH_NOMATCH);
! 4407: }
! 4408: /* Control never gets here */
! 4409:
! 4410: /* This should never occur */
! 4411:
! 4412: default:
! 4413: RRETURN(PCRE_ERROR_INTERNAL);
! 4414: }
! 4415: }
! 4416:
! 4417: /* Match extended Unicode sequences. We will get here only if the
! 4418: support is in the binary; otherwise a compile-time error occurs. */
! 4419:
! 4420: else if (ctype == OP_EXTUNI)
! 4421: {
! 4422: for (fi = min;; fi++)
! 4423: {
! 4424: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
! 4425: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4426: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4427: if (eptr >= md->end_subject)
! 4428: {
! 4429: SCHECK_PARTIAL();
! 4430: MRRETURN(MATCH_NOMATCH);
! 4431: }
! 4432: GETCHARINCTEST(c, eptr);
! 4433: prop_category = UCD_CATEGORY(c);
! 4434: if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
! 4435: while (eptr < md->end_subject)
! 4436: {
! 4437: int len = 1;
! 4438: if (!utf8) c = *eptr;
! 4439: else { GETCHARLEN(c, eptr, len); }
! 4440: prop_category = UCD_CATEGORY(c);
! 4441: if (prop_category != ucp_M) break;
! 4442: eptr += len;
! 4443: }
! 4444: }
! 4445: }
! 4446:
! 4447: else
! 4448: #endif /* SUPPORT_UCP */
! 4449:
! 4450: #ifdef SUPPORT_UTF8
! 4451: /* UTF-8 mode */
! 4452: if (utf8)
! 4453: {
! 4454: for (fi = min;; fi++)
! 4455: {
! 4456: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
! 4457: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4458: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4459: if (eptr >= md->end_subject)
! 4460: {
! 4461: SCHECK_PARTIAL();
! 4462: MRRETURN(MATCH_NOMATCH);
! 4463: }
! 4464: if (ctype == OP_ANY && IS_NEWLINE(eptr))
! 4465: MRRETURN(MATCH_NOMATCH);
! 4466: GETCHARINC(c, eptr);
! 4467: switch(ctype)
! 4468: {
! 4469: case OP_ANY: /* This is the non-NL case */
! 4470: case OP_ALLANY:
! 4471: case OP_ANYBYTE:
! 4472: break;
! 4473:
! 4474: case OP_ANYNL:
! 4475: switch(c)
! 4476: {
! 4477: default: MRRETURN(MATCH_NOMATCH);
! 4478: case 0x000d:
! 4479: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 4480: break;
! 4481: case 0x000a:
! 4482: break;
! 4483:
! 4484: case 0x000b:
! 4485: case 0x000c:
! 4486: case 0x0085:
! 4487: case 0x2028:
! 4488: case 0x2029:
! 4489: if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
! 4490: break;
! 4491: }
! 4492: break;
! 4493:
! 4494: case OP_NOT_HSPACE:
! 4495: switch(c)
! 4496: {
! 4497: default: break;
! 4498: case 0x09: /* HT */
! 4499: case 0x20: /* SPACE */
! 4500: case 0xa0: /* NBSP */
! 4501: case 0x1680: /* OGHAM SPACE MARK */
! 4502: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4503: case 0x2000: /* EN QUAD */
! 4504: case 0x2001: /* EM QUAD */
! 4505: case 0x2002: /* EN SPACE */
! 4506: case 0x2003: /* EM SPACE */
! 4507: case 0x2004: /* THREE-PER-EM SPACE */
! 4508: case 0x2005: /* FOUR-PER-EM SPACE */
! 4509: case 0x2006: /* SIX-PER-EM SPACE */
! 4510: case 0x2007: /* FIGURE SPACE */
! 4511: case 0x2008: /* PUNCTUATION SPACE */
! 4512: case 0x2009: /* THIN SPACE */
! 4513: case 0x200A: /* HAIR SPACE */
! 4514: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4515: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4516: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4517: MRRETURN(MATCH_NOMATCH);
! 4518: }
! 4519: break;
! 4520:
! 4521: case OP_HSPACE:
! 4522: switch(c)
! 4523: {
! 4524: default: MRRETURN(MATCH_NOMATCH);
! 4525: case 0x09: /* HT */
! 4526: case 0x20: /* SPACE */
! 4527: case 0xa0: /* NBSP */
! 4528: case 0x1680: /* OGHAM SPACE MARK */
! 4529: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4530: case 0x2000: /* EN QUAD */
! 4531: case 0x2001: /* EM QUAD */
! 4532: case 0x2002: /* EN SPACE */
! 4533: case 0x2003: /* EM SPACE */
! 4534: case 0x2004: /* THREE-PER-EM SPACE */
! 4535: case 0x2005: /* FOUR-PER-EM SPACE */
! 4536: case 0x2006: /* SIX-PER-EM SPACE */
! 4537: case 0x2007: /* FIGURE SPACE */
! 4538: case 0x2008: /* PUNCTUATION SPACE */
! 4539: case 0x2009: /* THIN SPACE */
! 4540: case 0x200A: /* HAIR SPACE */
! 4541: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4542: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4543: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4544: break;
! 4545: }
! 4546: break;
! 4547:
! 4548: case OP_NOT_VSPACE:
! 4549: switch(c)
! 4550: {
! 4551: default: break;
! 4552: case 0x0a: /* LF */
! 4553: case 0x0b: /* VT */
! 4554: case 0x0c: /* FF */
! 4555: case 0x0d: /* CR */
! 4556: case 0x85: /* NEL */
! 4557: case 0x2028: /* LINE SEPARATOR */
! 4558: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4559: MRRETURN(MATCH_NOMATCH);
! 4560: }
! 4561: break;
! 4562:
! 4563: case OP_VSPACE:
! 4564: switch(c)
! 4565: {
! 4566: default: MRRETURN(MATCH_NOMATCH);
! 4567: case 0x0a: /* LF */
! 4568: case 0x0b: /* VT */
! 4569: case 0x0c: /* FF */
! 4570: case 0x0d: /* CR */
! 4571: case 0x85: /* NEL */
! 4572: case 0x2028: /* LINE SEPARATOR */
! 4573: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4574: break;
! 4575: }
! 4576: break;
! 4577:
! 4578: case OP_NOT_DIGIT:
! 4579: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
! 4580: MRRETURN(MATCH_NOMATCH);
! 4581: break;
! 4582:
! 4583: case OP_DIGIT:
! 4584: if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
! 4585: MRRETURN(MATCH_NOMATCH);
! 4586: break;
! 4587:
! 4588: case OP_NOT_WHITESPACE:
! 4589: if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
! 4590: MRRETURN(MATCH_NOMATCH);
! 4591: break;
! 4592:
! 4593: case OP_WHITESPACE:
! 4594: if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
! 4595: MRRETURN(MATCH_NOMATCH);
! 4596: break;
! 4597:
! 4598: case OP_NOT_WORDCHAR:
! 4599: if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
! 4600: MRRETURN(MATCH_NOMATCH);
! 4601: break;
! 4602:
! 4603: case OP_WORDCHAR:
! 4604: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
! 4605: MRRETURN(MATCH_NOMATCH);
! 4606: break;
! 4607:
! 4608: default:
! 4609: RRETURN(PCRE_ERROR_INTERNAL);
! 4610: }
! 4611: }
! 4612: }
! 4613: else
! 4614: #endif
! 4615: /* Not UTF-8 mode */
! 4616: {
! 4617: for (fi = min;; fi++)
! 4618: {
! 4619: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
! 4620: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4621: if (fi >= max) MRRETURN(MATCH_NOMATCH);
! 4622: if (eptr >= md->end_subject)
! 4623: {
! 4624: SCHECK_PARTIAL();
! 4625: MRRETURN(MATCH_NOMATCH);
! 4626: }
! 4627: if (ctype == OP_ANY && IS_NEWLINE(eptr))
! 4628: MRRETURN(MATCH_NOMATCH);
! 4629: c = *eptr++;
! 4630: switch(ctype)
! 4631: {
! 4632: case OP_ANY: /* This is the non-NL case */
! 4633: case OP_ALLANY:
! 4634: case OP_ANYBYTE:
! 4635: break;
! 4636:
! 4637: case OP_ANYNL:
! 4638: switch(c)
! 4639: {
! 4640: default: MRRETURN(MATCH_NOMATCH);
! 4641: case 0x000d:
! 4642: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 4643: break;
! 4644:
! 4645: case 0x000a:
! 4646: break;
! 4647:
! 4648: case 0x000b:
! 4649: case 0x000c:
! 4650: case 0x0085:
! 4651: if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
! 4652: break;
! 4653: }
! 4654: break;
! 4655:
! 4656: case OP_NOT_HSPACE:
! 4657: switch(c)
! 4658: {
! 4659: default: break;
! 4660: case 0x09: /* HT */
! 4661: case 0x20: /* SPACE */
! 4662: case 0xa0: /* NBSP */
! 4663: MRRETURN(MATCH_NOMATCH);
! 4664: }
! 4665: break;
! 4666:
! 4667: case OP_HSPACE:
! 4668: switch(c)
! 4669: {
! 4670: default: MRRETURN(MATCH_NOMATCH);
! 4671: case 0x09: /* HT */
! 4672: case 0x20: /* SPACE */
! 4673: case 0xa0: /* NBSP */
! 4674: break;
! 4675: }
! 4676: break;
! 4677:
! 4678: case OP_NOT_VSPACE:
! 4679: switch(c)
! 4680: {
! 4681: default: break;
! 4682: case 0x0a: /* LF */
! 4683: case 0x0b: /* VT */
! 4684: case 0x0c: /* FF */
! 4685: case 0x0d: /* CR */
! 4686: case 0x85: /* NEL */
! 4687: MRRETURN(MATCH_NOMATCH);
! 4688: }
! 4689: break;
! 4690:
! 4691: case OP_VSPACE:
! 4692: switch(c)
! 4693: {
! 4694: default: MRRETURN(MATCH_NOMATCH);
! 4695: case 0x0a: /* LF */
! 4696: case 0x0b: /* VT */
! 4697: case 0x0c: /* FF */
! 4698: case 0x0d: /* CR */
! 4699: case 0x85: /* NEL */
! 4700: break;
! 4701: }
! 4702: break;
! 4703:
! 4704: case OP_NOT_DIGIT:
! 4705: if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
! 4706: break;
! 4707:
! 4708: case OP_DIGIT:
! 4709: if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
! 4710: break;
! 4711:
! 4712: case OP_NOT_WHITESPACE:
! 4713: if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
! 4714: break;
! 4715:
! 4716: case OP_WHITESPACE:
! 4717: if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
! 4718: break;
! 4719:
! 4720: case OP_NOT_WORDCHAR:
! 4721: if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
! 4722: break;
! 4723:
! 4724: case OP_WORDCHAR:
! 4725: if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
! 4726: break;
! 4727:
! 4728: default:
! 4729: RRETURN(PCRE_ERROR_INTERNAL);
! 4730: }
! 4731: }
! 4732: }
! 4733: /* Control never gets here */
! 4734: }
! 4735:
! 4736: /* If maximizing, it is worth using inline code for speed, doing the type
! 4737: test once at the start (i.e. keep it out of the loop). Again, keep the
! 4738: UTF-8 and UCP stuff separate. */
! 4739:
! 4740: else
! 4741: {
! 4742: pp = eptr; /* Remember where we started */
! 4743:
! 4744: #ifdef SUPPORT_UCP
! 4745: if (prop_type >= 0)
! 4746: {
! 4747: switch(prop_type)
! 4748: {
! 4749: case PT_ANY:
! 4750: for (i = min; i < max; i++)
! 4751: {
! 4752: int len = 1;
! 4753: if (eptr >= md->end_subject)
! 4754: {
! 4755: SCHECK_PARTIAL();
! 4756: break;
! 4757: }
! 4758: GETCHARLENTEST(c, eptr, len);
! 4759: if (prop_fail_result) break;
! 4760: eptr+= len;
! 4761: }
! 4762: break;
! 4763:
! 4764: case PT_LAMP:
! 4765: for (i = min; i < max; i++)
! 4766: {
! 4767: int len = 1;
! 4768: if (eptr >= md->end_subject)
! 4769: {
! 4770: SCHECK_PARTIAL();
! 4771: break;
! 4772: }
! 4773: GETCHARLENTEST(c, eptr, len);
! 4774: prop_chartype = UCD_CHARTYPE(c);
! 4775: if ((prop_chartype == ucp_Lu ||
! 4776: prop_chartype == ucp_Ll ||
! 4777: prop_chartype == ucp_Lt) == prop_fail_result)
! 4778: break;
! 4779: eptr+= len;
! 4780: }
! 4781: break;
! 4782:
! 4783: case PT_GC:
! 4784: for (i = min; i < max; i++)
! 4785: {
! 4786: int len = 1;
! 4787: if (eptr >= md->end_subject)
! 4788: {
! 4789: SCHECK_PARTIAL();
! 4790: break;
! 4791: }
! 4792: GETCHARLENTEST(c, eptr, len);
! 4793: prop_category = UCD_CATEGORY(c);
! 4794: if ((prop_category == prop_value) == prop_fail_result)
! 4795: break;
! 4796: eptr+= len;
! 4797: }
! 4798: break;
! 4799:
! 4800: case PT_PC:
! 4801: for (i = min; i < max; i++)
! 4802: {
! 4803: int len = 1;
! 4804: if (eptr >= md->end_subject)
! 4805: {
! 4806: SCHECK_PARTIAL();
! 4807: break;
! 4808: }
! 4809: GETCHARLENTEST(c, eptr, len);
! 4810: prop_chartype = UCD_CHARTYPE(c);
! 4811: if ((prop_chartype == prop_value) == prop_fail_result)
! 4812: break;
! 4813: eptr+= len;
! 4814: }
! 4815: break;
! 4816:
! 4817: case PT_SC:
! 4818: for (i = min; i < max; i++)
! 4819: {
! 4820: int len = 1;
! 4821: if (eptr >= md->end_subject)
! 4822: {
! 4823: SCHECK_PARTIAL();
! 4824: break;
! 4825: }
! 4826: GETCHARLENTEST(c, eptr, len);
! 4827: prop_script = UCD_SCRIPT(c);
! 4828: if ((prop_script == prop_value) == prop_fail_result)
! 4829: break;
! 4830: eptr+= len;
! 4831: }
! 4832: break;
! 4833:
! 4834: case PT_ALNUM:
! 4835: for (i = min; i < max; i++)
! 4836: {
! 4837: int len = 1;
! 4838: if (eptr >= md->end_subject)
! 4839: {
! 4840: SCHECK_PARTIAL();
! 4841: break;
! 4842: }
! 4843: GETCHARLENTEST(c, eptr, len);
! 4844: prop_category = UCD_CATEGORY(c);
! 4845: if ((prop_category == ucp_L || prop_category == ucp_N)
! 4846: == prop_fail_result)
! 4847: break;
! 4848: eptr+= len;
! 4849: }
! 4850: break;
! 4851:
! 4852: case PT_SPACE: /* Perl space */
! 4853: for (i = min; i < max; i++)
! 4854: {
! 4855: int len = 1;
! 4856: if (eptr >= md->end_subject)
! 4857: {
! 4858: SCHECK_PARTIAL();
! 4859: break;
! 4860: }
! 4861: GETCHARLENTEST(c, eptr, len);
! 4862: prop_category = UCD_CATEGORY(c);
! 4863: if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 4864: c == CHAR_FF || c == CHAR_CR)
! 4865: == prop_fail_result)
! 4866: break;
! 4867: eptr+= len;
! 4868: }
! 4869: break;
! 4870:
! 4871: case PT_PXSPACE: /* POSIX space */
! 4872: for (i = min; i < max; i++)
! 4873: {
! 4874: int len = 1;
! 4875: if (eptr >= md->end_subject)
! 4876: {
! 4877: SCHECK_PARTIAL();
! 4878: break;
! 4879: }
! 4880: GETCHARLENTEST(c, eptr, len);
! 4881: prop_category = UCD_CATEGORY(c);
! 4882: if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 4883: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
! 4884: == prop_fail_result)
! 4885: break;
! 4886: eptr+= len;
! 4887: }
! 4888: break;
! 4889:
! 4890: case PT_WORD:
! 4891: for (i = min; i < max; i++)
! 4892: {
! 4893: int len = 1;
! 4894: if (eptr >= md->end_subject)
! 4895: {
! 4896: SCHECK_PARTIAL();
! 4897: break;
! 4898: }
! 4899: GETCHARLENTEST(c, eptr, len);
! 4900: prop_category = UCD_CATEGORY(c);
! 4901: if ((prop_category == ucp_L || prop_category == ucp_N ||
! 4902: c == CHAR_UNDERSCORE) == prop_fail_result)
! 4903: break;
! 4904: eptr+= len;
! 4905: }
! 4906: break;
! 4907:
! 4908: default:
! 4909: RRETURN(PCRE_ERROR_INTERNAL);
! 4910: }
! 4911:
! 4912: /* eptr is now past the end of the maximum run */
! 4913:
! 4914: if (possessive) continue;
! 4915: for(;;)
! 4916: {
! 4917: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
! 4918: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4919: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 4920: if (utf8) BACKCHAR(eptr);
! 4921: }
! 4922: }
! 4923:
! 4924: /* Match extended Unicode sequences. We will get here only if the
! 4925: support is in the binary; otherwise a compile-time error occurs. */
! 4926:
! 4927: else if (ctype == OP_EXTUNI)
! 4928: {
! 4929: for (i = min; i < max; i++)
! 4930: {
! 4931: if (eptr >= md->end_subject)
! 4932: {
! 4933: SCHECK_PARTIAL();
! 4934: break;
! 4935: }
! 4936: GETCHARINCTEST(c, eptr);
! 4937: prop_category = UCD_CATEGORY(c);
! 4938: if (prop_category == ucp_M) break;
! 4939: while (eptr < md->end_subject)
! 4940: {
! 4941: int len = 1;
! 4942: if (!utf8) c = *eptr; else
! 4943: {
! 4944: GETCHARLEN(c, eptr, len);
! 4945: }
! 4946: prop_category = UCD_CATEGORY(c);
! 4947: if (prop_category != ucp_M) break;
! 4948: eptr += len;
! 4949: }
! 4950: }
! 4951:
! 4952: /* eptr is now past the end of the maximum run */
! 4953:
! 4954: if (possessive) continue;
! 4955:
! 4956: for(;;)
! 4957: {
! 4958: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
! 4959: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4960: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 4961: for (;;) /* Move back over one extended */
! 4962: {
! 4963: int len = 1;
! 4964: if (!utf8) c = *eptr; else
! 4965: {
! 4966: BACKCHAR(eptr);
! 4967: GETCHARLEN(c, eptr, len);
! 4968: }
! 4969: prop_category = UCD_CATEGORY(c);
! 4970: if (prop_category != ucp_M) break;
! 4971: eptr--;
! 4972: }
! 4973: }
! 4974: }
! 4975:
! 4976: else
! 4977: #endif /* SUPPORT_UCP */
! 4978:
! 4979: #ifdef SUPPORT_UTF8
! 4980: /* UTF-8 mode */
! 4981:
! 4982: if (utf8)
! 4983: {
! 4984: switch(ctype)
! 4985: {
! 4986: case OP_ANY:
! 4987: if (max < INT_MAX)
! 4988: {
! 4989: for (i = min; i < max; i++)
! 4990: {
! 4991: if (eptr >= md->end_subject)
! 4992: {
! 4993: SCHECK_PARTIAL();
! 4994: break;
! 4995: }
! 4996: if (IS_NEWLINE(eptr)) break;
! 4997: eptr++;
! 4998: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 4999: }
! 5000: }
! 5001:
! 5002: /* Handle unlimited UTF-8 repeat */
! 5003:
! 5004: else
! 5005: {
! 5006: for (i = min; i < max; i++)
! 5007: {
! 5008: if (eptr >= md->end_subject)
! 5009: {
! 5010: SCHECK_PARTIAL();
! 5011: break;
! 5012: }
! 5013: if (IS_NEWLINE(eptr)) break;
! 5014: eptr++;
! 5015: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 5016: }
! 5017: }
! 5018: break;
! 5019:
! 5020: case OP_ALLANY:
! 5021: if (max < INT_MAX)
! 5022: {
! 5023: for (i = min; i < max; i++)
! 5024: {
! 5025: if (eptr >= md->end_subject)
! 5026: {
! 5027: SCHECK_PARTIAL();
! 5028: break;
! 5029: }
! 5030: eptr++;
! 5031: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 5032: }
! 5033: }
! 5034: else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
! 5035: break;
! 5036:
! 5037: /* The byte case is the same as non-UTF8 */
! 5038:
! 5039: case OP_ANYBYTE:
! 5040: c = max - min;
! 5041: if (c > (unsigned int)(md->end_subject - eptr))
! 5042: {
! 5043: eptr = md->end_subject;
! 5044: SCHECK_PARTIAL();
! 5045: }
! 5046: else eptr += c;
! 5047: break;
! 5048:
! 5049: case OP_ANYNL:
! 5050: for (i = min; i < max; i++)
! 5051: {
! 5052: int len = 1;
! 5053: if (eptr >= md->end_subject)
! 5054: {
! 5055: SCHECK_PARTIAL();
! 5056: break;
! 5057: }
! 5058: GETCHARLEN(c, eptr, len);
! 5059: if (c == 0x000d)
! 5060: {
! 5061: if (++eptr >= md->end_subject) break;
! 5062: if (*eptr == 0x000a) eptr++;
! 5063: }
! 5064: else
! 5065: {
! 5066: if (c != 0x000a &&
! 5067: (md->bsr_anycrlf ||
! 5068: (c != 0x000b && c != 0x000c &&
! 5069: c != 0x0085 && c != 0x2028 && c != 0x2029)))
! 5070: break;
! 5071: eptr += len;
! 5072: }
! 5073: }
! 5074: break;
! 5075:
! 5076: case OP_NOT_HSPACE:
! 5077: case OP_HSPACE:
! 5078: for (i = min; i < max; i++)
! 5079: {
! 5080: BOOL gotspace;
! 5081: int len = 1;
! 5082: if (eptr >= md->end_subject)
! 5083: {
! 5084: SCHECK_PARTIAL();
! 5085: break;
! 5086: }
! 5087: GETCHARLEN(c, eptr, len);
! 5088: switch(c)
! 5089: {
! 5090: default: gotspace = FALSE; break;
! 5091: case 0x09: /* HT */
! 5092: case 0x20: /* SPACE */
! 5093: case 0xa0: /* NBSP */
! 5094: case 0x1680: /* OGHAM SPACE MARK */
! 5095: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 5096: case 0x2000: /* EN QUAD */
! 5097: case 0x2001: /* EM QUAD */
! 5098: case 0x2002: /* EN SPACE */
! 5099: case 0x2003: /* EM SPACE */
! 5100: case 0x2004: /* THREE-PER-EM SPACE */
! 5101: case 0x2005: /* FOUR-PER-EM SPACE */
! 5102: case 0x2006: /* SIX-PER-EM SPACE */
! 5103: case 0x2007: /* FIGURE SPACE */
! 5104: case 0x2008: /* PUNCTUATION SPACE */
! 5105: case 0x2009: /* THIN SPACE */
! 5106: case 0x200A: /* HAIR SPACE */
! 5107: case 0x202f: /* NARROW NO-BREAK SPACE */
! 5108: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 5109: case 0x3000: /* IDEOGRAPHIC SPACE */
! 5110: gotspace = TRUE;
! 5111: break;
! 5112: }
! 5113: if (gotspace == (ctype == OP_NOT_HSPACE)) break;
! 5114: eptr += len;
! 5115: }
! 5116: break;
! 5117:
! 5118: case OP_NOT_VSPACE:
! 5119: case OP_VSPACE:
! 5120: for (i = min; i < max; i++)
! 5121: {
! 5122: BOOL gotspace;
! 5123: int len = 1;
! 5124: if (eptr >= md->end_subject)
! 5125: {
! 5126: SCHECK_PARTIAL();
! 5127: break;
! 5128: }
! 5129: GETCHARLEN(c, eptr, len);
! 5130: switch(c)
! 5131: {
! 5132: default: gotspace = FALSE; break;
! 5133: case 0x0a: /* LF */
! 5134: case 0x0b: /* VT */
! 5135: case 0x0c: /* FF */
! 5136: case 0x0d: /* CR */
! 5137: case 0x85: /* NEL */
! 5138: case 0x2028: /* LINE SEPARATOR */
! 5139: case 0x2029: /* PARAGRAPH SEPARATOR */
! 5140: gotspace = TRUE;
! 5141: break;
! 5142: }
! 5143: if (gotspace == (ctype == OP_NOT_VSPACE)) break;
! 5144: eptr += len;
! 5145: }
! 5146: break;
! 5147:
! 5148: case OP_NOT_DIGIT:
! 5149: for (i = min; i < max; i++)
! 5150: {
! 5151: int len = 1;
! 5152: if (eptr >= md->end_subject)
! 5153: {
! 5154: SCHECK_PARTIAL();
! 5155: break;
! 5156: }
! 5157: GETCHARLEN(c, eptr, len);
! 5158: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
! 5159: eptr+= len;
! 5160: }
! 5161: break;
! 5162:
! 5163: case OP_DIGIT:
! 5164: for (i = min; i < max; i++)
! 5165: {
! 5166: int len = 1;
! 5167: if (eptr >= md->end_subject)
! 5168: {
! 5169: SCHECK_PARTIAL();
! 5170: break;
! 5171: }
! 5172: GETCHARLEN(c, eptr, len);
! 5173: if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
! 5174: eptr+= len;
! 5175: }
! 5176: break;
! 5177:
! 5178: case OP_NOT_WHITESPACE:
! 5179: for (i = min; i < max; i++)
! 5180: {
! 5181: int len = 1;
! 5182: if (eptr >= md->end_subject)
! 5183: {
! 5184: SCHECK_PARTIAL();
! 5185: break;
! 5186: }
! 5187: GETCHARLEN(c, eptr, len);
! 5188: if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
! 5189: eptr+= len;
! 5190: }
! 5191: break;
! 5192:
! 5193: case OP_WHITESPACE:
! 5194: for (i = min; i < max; i++)
! 5195: {
! 5196: int len = 1;
! 5197: if (eptr >= md->end_subject)
! 5198: {
! 5199: SCHECK_PARTIAL();
! 5200: break;
! 5201: }
! 5202: GETCHARLEN(c, eptr, len);
! 5203: if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
! 5204: eptr+= len;
! 5205: }
! 5206: break;
! 5207:
! 5208: case OP_NOT_WORDCHAR:
! 5209: for (i = min; i < max; i++)
! 5210: {
! 5211: int len = 1;
! 5212: if (eptr >= md->end_subject)
! 5213: {
! 5214: SCHECK_PARTIAL();
! 5215: break;
! 5216: }
! 5217: GETCHARLEN(c, eptr, len);
! 5218: if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
! 5219: eptr+= len;
! 5220: }
! 5221: break;
! 5222:
! 5223: case OP_WORDCHAR:
! 5224: for (i = min; i < max; i++)
! 5225: {
! 5226: int len = 1;
! 5227: if (eptr >= md->end_subject)
! 5228: {
! 5229: SCHECK_PARTIAL();
! 5230: break;
! 5231: }
! 5232: GETCHARLEN(c, eptr, len);
! 5233: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
! 5234: eptr+= len;
! 5235: }
! 5236: break;
! 5237:
! 5238: default:
! 5239: RRETURN(PCRE_ERROR_INTERNAL);
! 5240: }
! 5241:
! 5242: /* eptr is now past the end of the maximum run */
! 5243:
! 5244: if (possessive) continue;
! 5245: for(;;)
! 5246: {
! 5247: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
! 5248: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 5249: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 5250: BACKCHAR(eptr);
! 5251: }
! 5252: }
! 5253: else
! 5254: #endif /* SUPPORT_UTF8 */
! 5255:
! 5256: /* Not UTF-8 mode */
! 5257: {
! 5258: switch(ctype)
! 5259: {
! 5260: case OP_ANY:
! 5261: for (i = min; i < max; i++)
! 5262: {
! 5263: if (eptr >= md->end_subject)
! 5264: {
! 5265: SCHECK_PARTIAL();
! 5266: break;
! 5267: }
! 5268: if (IS_NEWLINE(eptr)) break;
! 5269: eptr++;
! 5270: }
! 5271: break;
! 5272:
! 5273: case OP_ALLANY:
! 5274: case OP_ANYBYTE:
! 5275: c = max - min;
! 5276: if (c > (unsigned int)(md->end_subject - eptr))
! 5277: {
! 5278: eptr = md->end_subject;
! 5279: SCHECK_PARTIAL();
! 5280: }
! 5281: else eptr += c;
! 5282: break;
! 5283:
! 5284: case OP_ANYNL:
! 5285: for (i = min; i < max; i++)
! 5286: {
! 5287: if (eptr >= md->end_subject)
! 5288: {
! 5289: SCHECK_PARTIAL();
! 5290: break;
! 5291: }
! 5292: c = *eptr;
! 5293: if (c == 0x000d)
! 5294: {
! 5295: if (++eptr >= md->end_subject) break;
! 5296: if (*eptr == 0x000a) eptr++;
! 5297: }
! 5298: else
! 5299: {
! 5300: if (c != 0x000a &&
! 5301: (md->bsr_anycrlf ||
! 5302: (c != 0x000b && c != 0x000c && c != 0x0085)))
! 5303: break;
! 5304: eptr++;
! 5305: }
! 5306: }
! 5307: break;
! 5308:
! 5309: case OP_NOT_HSPACE:
! 5310: for (i = min; i < max; i++)
! 5311: {
! 5312: if (eptr >= md->end_subject)
! 5313: {
! 5314: SCHECK_PARTIAL();
! 5315: break;
! 5316: }
! 5317: c = *eptr;
! 5318: if (c == 0x09 || c == 0x20 || c == 0xa0) break;
! 5319: eptr++;
! 5320: }
! 5321: break;
! 5322:
! 5323: case OP_HSPACE:
! 5324: for (i = min; i < max; i++)
! 5325: {
! 5326: if (eptr >= md->end_subject)
! 5327: {
! 5328: SCHECK_PARTIAL();
! 5329: break;
! 5330: }
! 5331: c = *eptr;
! 5332: if (c != 0x09 && c != 0x20 && c != 0xa0) break;
! 5333: eptr++;
! 5334: }
! 5335: break;
! 5336:
! 5337: case OP_NOT_VSPACE:
! 5338: for (i = min; i < max; i++)
! 5339: {
! 5340: if (eptr >= md->end_subject)
! 5341: {
! 5342: SCHECK_PARTIAL();
! 5343: break;
! 5344: }
! 5345: c = *eptr;
! 5346: if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
! 5347: break;
! 5348: eptr++;
! 5349: }
! 5350: break;
! 5351:
! 5352: case OP_VSPACE:
! 5353: for (i = min; i < max; i++)
! 5354: {
! 5355: if (eptr >= md->end_subject)
! 5356: {
! 5357: SCHECK_PARTIAL();
! 5358: break;
! 5359: }
! 5360: c = *eptr;
! 5361: if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
! 5362: break;
! 5363: eptr++;
! 5364: }
! 5365: break;
! 5366:
! 5367: case OP_NOT_DIGIT:
! 5368: for (i = min; i < max; i++)
! 5369: {
! 5370: if (eptr >= md->end_subject)
! 5371: {
! 5372: SCHECK_PARTIAL();
! 5373: break;
! 5374: }
! 5375: if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
! 5376: eptr++;
! 5377: }
! 5378: break;
! 5379:
! 5380: case OP_DIGIT:
! 5381: for (i = min; i < max; i++)
! 5382: {
! 5383: if (eptr >= md->end_subject)
! 5384: {
! 5385: SCHECK_PARTIAL();
! 5386: break;
! 5387: }
! 5388: if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
! 5389: eptr++;
! 5390: }
! 5391: break;
! 5392:
! 5393: case OP_NOT_WHITESPACE:
! 5394: for (i = min; i < max; i++)
! 5395: {
! 5396: if (eptr >= md->end_subject)
! 5397: {
! 5398: SCHECK_PARTIAL();
! 5399: break;
! 5400: }
! 5401: if ((md->ctypes[*eptr] & ctype_space) != 0) break;
! 5402: eptr++;
! 5403: }
! 5404: break;
! 5405:
! 5406: case OP_WHITESPACE:
! 5407: for (i = min; i < max; i++)
! 5408: {
! 5409: if (eptr >= md->end_subject)
! 5410: {
! 5411: SCHECK_PARTIAL();
! 5412: break;
! 5413: }
! 5414: if ((md->ctypes[*eptr] & ctype_space) == 0) break;
! 5415: eptr++;
! 5416: }
! 5417: break;
! 5418:
! 5419: case OP_NOT_WORDCHAR:
! 5420: for (i = min; i < max; i++)
! 5421: {
! 5422: if (eptr >= md->end_subject)
! 5423: {
! 5424: SCHECK_PARTIAL();
! 5425: break;
! 5426: }
! 5427: if ((md->ctypes[*eptr] & ctype_word) != 0) break;
! 5428: eptr++;
! 5429: }
! 5430: break;
! 5431:
! 5432: case OP_WORDCHAR:
! 5433: for (i = min; i < max; i++)
! 5434: {
! 5435: if (eptr >= md->end_subject)
! 5436: {
! 5437: SCHECK_PARTIAL();
! 5438: break;
! 5439: }
! 5440: if ((md->ctypes[*eptr] & ctype_word) == 0) break;
! 5441: eptr++;
! 5442: }
! 5443: break;
! 5444:
! 5445: default:
! 5446: RRETURN(PCRE_ERROR_INTERNAL);
! 5447: }
! 5448:
! 5449: /* eptr is now past the end of the maximum run */
! 5450:
! 5451: if (possessive) continue;
! 5452: while (eptr >= pp)
! 5453: {
! 5454: RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
! 5455: eptr--;
! 5456: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 5457: }
! 5458: }
! 5459:
! 5460: /* Get here if we can't make it match with any permitted repetitions */
! 5461:
! 5462: MRRETURN(MATCH_NOMATCH);
! 5463: }
! 5464: /* Control never gets here */
! 5465:
! 5466: /* There's been some horrible disaster. Arrival here can only mean there is
! 5467: something seriously wrong in the code above or the OP_xxx definitions. */
! 5468:
! 5469: default:
! 5470: DPRINTF(("Unknown opcode %d\n", *ecode));
! 5471: RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
! 5472: }
! 5473:
! 5474: /* Do not stick any code in here without much thought; it is assumed
! 5475: that "continue" in the code above comes out to here to repeat the main
! 5476: loop. */
! 5477:
! 5478: } /* End of main loop */
! 5479: /* Control never reaches here */
! 5480:
! 5481:
! 5482: /* When compiling to use the heap rather than the stack for recursive calls to
! 5483: match(), the RRETURN() macro jumps here. The number that is saved in
! 5484: frame->Xwhere indicates which label we actually want to return to. */
! 5485:
! 5486: #ifdef NO_RECURSE
! 5487: #define LBL(val) case val: goto L_RM##val;
! 5488: HEAP_RETURN:
! 5489: switch (frame->Xwhere)
! 5490: {
! 5491: LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
! 5492: LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
! 5493: LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
! 5494: LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
! 5495: LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
! 5496: #ifdef SUPPORT_UTF8
! 5497: LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
! 5498: LBL(32) LBL(34) LBL(42) LBL(46)
! 5499: #ifdef SUPPORT_UCP
! 5500: LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
! 5501: LBL(59) LBL(60) LBL(61) LBL(62)
! 5502: #endif /* SUPPORT_UCP */
! 5503: #endif /* SUPPORT_UTF8 */
! 5504: default:
! 5505: DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
! 5506: return PCRE_ERROR_INTERNAL;
! 5507: }
! 5508: #undef LBL
! 5509: #endif /* NO_RECURSE */
! 5510: }
! 5511:
! 5512:
! 5513: /***************************************************************************
! 5514: ****************************************************************************
! 5515: RECURSION IN THE match() FUNCTION
! 5516:
! 5517: Undefine all the macros that were defined above to handle this. */
! 5518:
! 5519: #ifdef NO_RECURSE
! 5520: #undef eptr
! 5521: #undef ecode
! 5522: #undef mstart
! 5523: #undef offset_top
! 5524: #undef ims
! 5525: #undef eptrb
! 5526: #undef flags
! 5527:
! 5528: #undef callpat
! 5529: #undef charptr
! 5530: #undef data
! 5531: #undef next
! 5532: #undef pp
! 5533: #undef prev
! 5534: #undef saved_eptr
! 5535:
! 5536: #undef new_recursive
! 5537:
! 5538: #undef cur_is_word
! 5539: #undef condition
! 5540: #undef prev_is_word
! 5541:
! 5542: #undef original_ims
! 5543:
! 5544: #undef ctype
! 5545: #undef length
! 5546: #undef max
! 5547: #undef min
! 5548: #undef number
! 5549: #undef offset
! 5550: #undef op
! 5551: #undef save_capture_last
! 5552: #undef save_offset1
! 5553: #undef save_offset2
! 5554: #undef save_offset3
! 5555: #undef stacksave
! 5556:
! 5557: #undef newptrb
! 5558:
! 5559: #endif
! 5560:
! 5561: /* These two are defined as macros in both cases */
! 5562:
! 5563: #undef fc
! 5564: #undef fi
! 5565:
! 5566: /***************************************************************************
! 5567: ***************************************************************************/
! 5568:
! 5569:
! 5570:
! 5571: /*************************************************
! 5572: * Execute a Regular Expression *
! 5573: *************************************************/
! 5574:
! 5575: /* This function applies a compiled re to a subject string and picks out
! 5576: portions of the string if it matches. Two elements in the vector are set for
! 5577: each substring: the offsets to the start and end of the substring.
! 5578:
! 5579: Arguments:
! 5580: argument_re points to the compiled expression
! 5581: extra_data points to extra data or is NULL
! 5582: subject points to the subject string
! 5583: length length of subject string (may contain binary zeros)
! 5584: start_offset where to start in the subject string
! 5585: options option bits
! 5586: offsets points to a vector of ints to be filled in with offsets
! 5587: offsetcount the number of elements in the vector
! 5588:
! 5589: Returns: > 0 => success; value is the number of elements filled in
! 5590: = 0 => success, but offsets is not big enough
! 5591: -1 => failed to match
! 5592: < -1 => some kind of unexpected problem
! 5593: */
! 5594:
! 5595: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 5596: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
! 5597: PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
! 5598: int offsetcount)
! 5599: {
! 5600: int rc, resetcount, ocount;
! 5601: int first_byte = -1;
! 5602: int req_byte = -1;
! 5603: int req_byte2 = -1;
! 5604: int newline;
! 5605: unsigned long int ims;
! 5606: BOOL using_temporary_offsets = FALSE;
! 5607: BOOL anchored;
! 5608: BOOL startline;
! 5609: BOOL firstline;
! 5610: BOOL first_byte_caseless = FALSE;
! 5611: BOOL req_byte_caseless = FALSE;
! 5612: BOOL utf8;
! 5613: match_data match_block;
! 5614: match_data *md = &match_block;
! 5615: const uschar *tables;
! 5616: const uschar *start_bits = NULL;
! 5617: USPTR start_match = (USPTR)subject + start_offset;
! 5618: USPTR end_subject;
! 5619: USPTR start_partial = NULL;
! 5620: USPTR req_byte_ptr = start_match - 1;
! 5621:
! 5622: pcre_study_data internal_study;
! 5623: const pcre_study_data *study;
! 5624:
! 5625: real_pcre internal_re;
! 5626: const real_pcre *external_re = (const real_pcre *)argument_re;
! 5627: const real_pcre *re = external_re;
! 5628:
! 5629: /* Plausibility checks */
! 5630:
! 5631: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
! 5632: if (re == NULL || subject == NULL ||
! 5633: (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
! 5634: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
! 5635: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
! 5636:
! 5637: /* This information is for finding all the numbers associated with a given
! 5638: name, for condition testing. */
! 5639:
! 5640: md->name_table = (uschar *)re + re->name_table_offset;
! 5641: md->name_count = re->name_count;
! 5642: md->name_entry_size = re->name_entry_size;
! 5643:
! 5644: /* Fish out the optional data from the extra_data structure, first setting
! 5645: the default values. */
! 5646:
! 5647: study = NULL;
! 5648: md->match_limit = MATCH_LIMIT;
! 5649: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
! 5650: md->callout_data = NULL;
! 5651:
! 5652: /* The table pointer is always in native byte order. */
! 5653:
! 5654: tables = external_re->tables;
! 5655:
! 5656: if (extra_data != NULL)
! 5657: {
! 5658: register unsigned int flags = extra_data->flags;
! 5659: if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
! 5660: study = (const pcre_study_data *)extra_data->study_data;
! 5661: if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
! 5662: md->match_limit = extra_data->match_limit;
! 5663: if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
! 5664: md->match_limit_recursion = extra_data->match_limit_recursion;
! 5665: if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
! 5666: md->callout_data = extra_data->callout_data;
! 5667: if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
! 5668: }
! 5669:
! 5670: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
! 5671: is a feature that makes it possible to save compiled regex and re-use them
! 5672: in other programs later. */
! 5673:
! 5674: if (tables == NULL) tables = _pcre_default_tables;
! 5675:
! 5676: /* Check that the first field in the block is the magic number. If it is not,
! 5677: test for a regex that was compiled on a host of opposite endianness. If this is
! 5678: the case, flipped values are put in internal_re and internal_study if there was
! 5679: study data too. */
! 5680:
! 5681: if (re->magic_number != MAGIC_NUMBER)
! 5682: {
! 5683: re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
! 5684: if (re == NULL) return PCRE_ERROR_BADMAGIC;
! 5685: if (study != NULL) study = &internal_study;
! 5686: }
! 5687:
! 5688: /* Set up other data */
! 5689:
! 5690: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
! 5691: startline = (re->flags & PCRE_STARTLINE) != 0;
! 5692: firstline = (re->options & PCRE_FIRSTLINE) != 0;
! 5693:
! 5694: /* The code starts after the real_pcre block and the capture name table. */
! 5695:
! 5696: md->start_code = (const uschar *)external_re + re->name_table_offset +
! 5697: re->name_count * re->name_entry_size;
! 5698:
! 5699: md->start_subject = (USPTR)subject;
! 5700: md->start_offset = start_offset;
! 5701: md->end_subject = md->start_subject + length;
! 5702: end_subject = md->end_subject;
! 5703:
! 5704: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
! 5705: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
! 5706: md->use_ucp = (re->options & PCRE_UCP) != 0;
! 5707: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
! 5708:
! 5709: md->notbol = (options & PCRE_NOTBOL) != 0;
! 5710: md->noteol = (options & PCRE_NOTEOL) != 0;
! 5711: md->notempty = (options & PCRE_NOTEMPTY) != 0;
! 5712: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
! 5713: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
! 5714: ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
! 5715: md->hitend = FALSE;
! 5716: md->mark = NULL; /* In case never set */
! 5717:
! 5718: md->recursive = NULL; /* No recursion at top level */
! 5719:
! 5720: md->lcc = tables + lcc_offset;
! 5721: md->ctypes = tables + ctypes_offset;
! 5722:
! 5723: /* Handle different \R options. */
! 5724:
! 5725: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
! 5726: {
! 5727: case 0:
! 5728: if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
! 5729: md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
! 5730: else
! 5731: #ifdef BSR_ANYCRLF
! 5732: md->bsr_anycrlf = TRUE;
! 5733: #else
! 5734: md->bsr_anycrlf = FALSE;
! 5735: #endif
! 5736: break;
! 5737:
! 5738: case PCRE_BSR_ANYCRLF:
! 5739: md->bsr_anycrlf = TRUE;
! 5740: break;
! 5741:
! 5742: case PCRE_BSR_UNICODE:
! 5743: md->bsr_anycrlf = FALSE;
! 5744: break;
! 5745:
! 5746: default: return PCRE_ERROR_BADNEWLINE;
! 5747: }
! 5748:
! 5749: /* Handle different types of newline. The three bits give eight cases. If
! 5750: nothing is set at run time, whatever was used at compile time applies. */
! 5751:
! 5752: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
! 5753: (pcre_uint32)options) & PCRE_NEWLINE_BITS)
! 5754: {
! 5755: case 0: newline = NEWLINE; break; /* Compile-time default */
! 5756: case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
! 5757: case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
! 5758: case PCRE_NEWLINE_CR+
! 5759: PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
! 5760: case PCRE_NEWLINE_ANY: newline = -1; break;
! 5761: case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
! 5762: default: return PCRE_ERROR_BADNEWLINE;
! 5763: }
! 5764:
! 5765: if (newline == -2)
! 5766: {
! 5767: md->nltype = NLTYPE_ANYCRLF;
! 5768: }
! 5769: else if (newline < 0)
! 5770: {
! 5771: md->nltype = NLTYPE_ANY;
! 5772: }
! 5773: else
! 5774: {
! 5775: md->nltype = NLTYPE_FIXED;
! 5776: if (newline > 255)
! 5777: {
! 5778: md->nllen = 2;
! 5779: md->nl[0] = (newline >> 8) & 255;
! 5780: md->nl[1] = newline & 255;
! 5781: }
! 5782: else
! 5783: {
! 5784: md->nllen = 1;
! 5785: md->nl[0] = newline;
! 5786: }
! 5787: }
! 5788:
! 5789: /* Partial matching was originally supported only for a restricted set of
! 5790: regexes; from release 8.00 there are no restrictions, but the bits are still
! 5791: defined (though never set). So there's no harm in leaving this code. */
! 5792:
! 5793: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
! 5794: return PCRE_ERROR_BADPARTIAL;
! 5795:
! 5796: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
! 5797: back the character offset. */
! 5798:
! 5799: #ifdef SUPPORT_UTF8
! 5800: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
! 5801: {
! 5802: int tb;
! 5803: if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
! 5804: return (tb == length && md->partial > 1)?
! 5805: PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
! 5806: if (start_offset > 0 && start_offset < length)
! 5807: {
! 5808: tb = ((USPTR)subject)[start_offset] & 0xc0;
! 5809: if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
! 5810: }
! 5811: }
! 5812: #endif
! 5813:
! 5814: /* The ims options can vary during the matching as a result of the presence
! 5815: of (?ims) items in the pattern. They are kept in a local variable so that
! 5816: restoring at the exit of a group is easy. */
! 5817:
! 5818: ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
! 5819:
! 5820: /* If the expression has got more back references than the offsets supplied can
! 5821: hold, we get a temporary chunk of working store to use during the matching.
! 5822: Otherwise, we can use the vector supplied, rounding down its size to a multiple
! 5823: of 3. */
! 5824:
! 5825: ocount = offsetcount - (offsetcount % 3);
! 5826:
! 5827: if (re->top_backref > 0 && re->top_backref >= ocount/3)
! 5828: {
! 5829: ocount = re->top_backref * 3 + 3;
! 5830: md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
! 5831: if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
! 5832: using_temporary_offsets = TRUE;
! 5833: DPRINTF(("Got memory to hold back references\n"));
! 5834: }
! 5835: else md->offset_vector = offsets;
! 5836:
! 5837: md->offset_end = ocount;
! 5838: md->offset_max = (2*ocount)/3;
! 5839: md->offset_overflow = FALSE;
! 5840: md->capture_last = -1;
! 5841:
! 5842: /* Compute the minimum number of offsets that we need to reset each time. Doing
! 5843: this makes a huge difference to execution time when there aren't many brackets
! 5844: in the pattern. */
! 5845:
! 5846: resetcount = 2 + re->top_bracket * 2;
! 5847: if (resetcount > offsetcount) resetcount = ocount;
! 5848:
! 5849: /* Reset the working variable associated with each extraction. These should
! 5850: never be used unless previously set, but they get saved and restored, and so we
! 5851: initialize them to avoid reading uninitialized locations. */
! 5852:
! 5853: if (md->offset_vector != NULL)
! 5854: {
! 5855: register int *iptr = md->offset_vector + ocount;
! 5856: register int *iend = iptr - resetcount/2 + 1;
! 5857: while (--iptr >= iend) *iptr = -1;
! 5858: }
! 5859:
! 5860: /* Set up the first character to match, if available. The first_byte value is
! 5861: never set for an anchored regular expression, but the anchoring may be forced
! 5862: at run time, so we have to test for anchoring. The first char may be unset for
! 5863: an unanchored pattern, of course. If there's no first char and the pattern was
! 5864: studied, there may be a bitmap of possible first characters. */
! 5865:
! 5866: if (!anchored)
! 5867: {
! 5868: if ((re->flags & PCRE_FIRSTSET) != 0)
! 5869: {
! 5870: first_byte = re->first_byte & 255;
! 5871: if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
! 5872: first_byte = md->lcc[first_byte];
! 5873: }
! 5874: else
! 5875: if (!startline && study != NULL &&
! 5876: (study->flags & PCRE_STUDY_MAPPED) != 0)
! 5877: start_bits = study->start_bits;
! 5878: }
! 5879:
! 5880: /* For anchored or unanchored matches, there may be a "last known required
! 5881: character" set. */
! 5882:
! 5883: if ((re->flags & PCRE_REQCHSET) != 0)
! 5884: {
! 5885: req_byte = re->req_byte & 255;
! 5886: req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
! 5887: req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
! 5888: }
! 5889:
! 5890:
! 5891: /* ==========================================================================*/
! 5892:
! 5893: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
! 5894: the loop runs just once. */
! 5895:
! 5896: for(;;)
! 5897: {
! 5898: USPTR save_end_subject = end_subject;
! 5899: USPTR new_start_match;
! 5900:
! 5901: /* Reset the maximum number of extractions we might see. */
! 5902:
! 5903: if (md->offset_vector != NULL)
! 5904: {
! 5905: register int *iptr = md->offset_vector;
! 5906: register int *iend = iptr + resetcount;
! 5907: while (iptr < iend) *iptr++ = -1;
! 5908: }
! 5909:
! 5910: /* If firstline is TRUE, the start of the match is constrained to the first
! 5911: line of a multiline string. That is, the match must be before or at the first
! 5912: newline. Implement this by temporarily adjusting end_subject so that we stop
! 5913: scanning at a newline. If the match fails at the newline, later code breaks
! 5914: this loop. */
! 5915:
! 5916: if (firstline)
! 5917: {
! 5918: USPTR t = start_match;
! 5919: #ifdef SUPPORT_UTF8
! 5920: if (utf8)
! 5921: {
! 5922: while (t < md->end_subject && !IS_NEWLINE(t))
! 5923: {
! 5924: t++;
! 5925: while (t < end_subject && (*t & 0xc0) == 0x80) t++;
! 5926: }
! 5927: }
! 5928: else
! 5929: #endif
! 5930: while (t < md->end_subject && !IS_NEWLINE(t)) t++;
! 5931: end_subject = t;
! 5932: }
! 5933:
! 5934: /* There are some optimizations that avoid running the match if a known
! 5935: starting point is not found, or if a known later character is not present.
! 5936: However, there is an option that disables these, for testing and for ensuring
! 5937: that all callouts do actually occur. The option can be set in the regex by
! 5938: (*NO_START_OPT) or passed in match-time options. */
! 5939:
! 5940: if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
! 5941: {
! 5942: /* Advance to a unique first byte if there is one. */
! 5943:
! 5944: if (first_byte >= 0)
! 5945: {
! 5946: if (first_byte_caseless)
! 5947: while (start_match < end_subject && md->lcc[*start_match] != first_byte)
! 5948: start_match++;
! 5949: else
! 5950: while (start_match < end_subject && *start_match != first_byte)
! 5951: start_match++;
! 5952: }
! 5953:
! 5954: /* Or to just after a linebreak for a multiline match */
! 5955:
! 5956: else if (startline)
! 5957: {
! 5958: if (start_match > md->start_subject + start_offset)
! 5959: {
! 5960: #ifdef SUPPORT_UTF8
! 5961: if (utf8)
! 5962: {
! 5963: while (start_match < end_subject && !WAS_NEWLINE(start_match))
! 5964: {
! 5965: start_match++;
! 5966: while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
! 5967: start_match++;
! 5968: }
! 5969: }
! 5970: else
! 5971: #endif
! 5972: while (start_match < end_subject && !WAS_NEWLINE(start_match))
! 5973: start_match++;
! 5974:
! 5975: /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
! 5976: and we are now at a LF, advance the match position by one more character.
! 5977: */
! 5978:
! 5979: if (start_match[-1] == CHAR_CR &&
! 5980: (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
! 5981: start_match < end_subject &&
! 5982: *start_match == CHAR_NL)
! 5983: start_match++;
! 5984: }
! 5985: }
! 5986:
! 5987: /* Or to a non-unique first byte after study */
! 5988:
! 5989: else if (start_bits != NULL)
! 5990: {
! 5991: while (start_match < end_subject)
! 5992: {
! 5993: register unsigned int c = *start_match;
! 5994: if ((start_bits[c/8] & (1 << (c&7))) == 0)
! 5995: {
! 5996: start_match++;
! 5997: #ifdef SUPPORT_UTF8
! 5998: if (utf8)
! 5999: while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
! 6000: start_match++;
! 6001: #endif
! 6002: }
! 6003: else break;
! 6004: }
! 6005: }
! 6006: } /* Starting optimizations */
! 6007:
! 6008: /* Restore fudged end_subject */
! 6009:
! 6010: end_subject = save_end_subject;
! 6011:
! 6012: /* The following two optimizations are disabled for partial matching or if
! 6013: disabling is explicitly requested. */
! 6014:
! 6015: if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
! 6016: {
! 6017: /* If the pattern was studied, a minimum subject length may be set. This is
! 6018: a lower bound; no actual string of that length may actually match the
! 6019: pattern. Although the value is, strictly, in characters, we treat it as
! 6020: bytes to avoid spending too much time in this optimization. */
! 6021:
! 6022: if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
! 6023: (pcre_uint32)(end_subject - start_match) < study->minlength)
! 6024: {
! 6025: rc = MATCH_NOMATCH;
! 6026: break;
! 6027: }
! 6028:
! 6029: /* If req_byte is set, we know that that character must appear in the
! 6030: subject for the match to succeed. If the first character is set, req_byte
! 6031: must be later in the subject; otherwise the test starts at the match point.
! 6032: This optimization can save a huge amount of backtracking in patterns with
! 6033: nested unlimited repeats that aren't going to match. Writing separate code
! 6034: for cased/caseless versions makes it go faster, as does using an
! 6035: autoincrement and backing off on a match.
! 6036:
! 6037: HOWEVER: when the subject string is very, very long, searching to its end
! 6038: can take a long time, and give bad performance on quite ordinary patterns.
! 6039: This showed up when somebody was matching something like /^\d+C/ on a
! 6040: 32-megabyte string... so we don't do this when the string is sufficiently
! 6041: long. */
! 6042:
! 6043: if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
! 6044: {
! 6045: register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
! 6046:
! 6047: /* We don't need to repeat the search if we haven't yet reached the
! 6048: place we found it at last time. */
! 6049:
! 6050: if (p > req_byte_ptr)
! 6051: {
! 6052: if (req_byte_caseless)
! 6053: {
! 6054: while (p < end_subject)
! 6055: {
! 6056: register int pp = *p++;
! 6057: if (pp == req_byte || pp == req_byte2) { p--; break; }
! 6058: }
! 6059: }
! 6060: else
! 6061: {
! 6062: while (p < end_subject)
! 6063: {
! 6064: if (*p++ == req_byte) { p--; break; }
! 6065: }
! 6066: }
! 6067:
! 6068: /* If we can't find the required character, break the matching loop,
! 6069: forcing a match failure. */
! 6070:
! 6071: if (p >= end_subject)
! 6072: {
! 6073: rc = MATCH_NOMATCH;
! 6074: break;
! 6075: }
! 6076:
! 6077: /* If we have found the required character, save the point where we
! 6078: found it, so that we don't search again next time round the loop if
! 6079: the start hasn't passed this character yet. */
! 6080:
! 6081: req_byte_ptr = p;
! 6082: }
! 6083: }
! 6084: }
! 6085:
! 6086: #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
! 6087: printf(">>>> Match against: ");
! 6088: pchars(start_match, end_subject - start_match, TRUE, md);
! 6089: printf("\n");
! 6090: #endif
! 6091:
! 6092: /* OK, we can now run the match. If "hitend" is set afterwards, remember the
! 6093: first starting point for which a partial match was found. */
! 6094:
! 6095: md->start_match_ptr = start_match;
! 6096: md->start_used_ptr = start_match;
! 6097: md->match_call_count = 0;
! 6098: rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
! 6099: 0, 0);
! 6100: if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
! 6101:
! 6102: switch(rc)
! 6103: {
! 6104: /* SKIP passes back the next starting point explicitly, but if it is the
! 6105: same as the match we have just done, treat it as NOMATCH. */
! 6106:
! 6107: case MATCH_SKIP:
! 6108: if (md->start_match_ptr != start_match)
! 6109: {
! 6110: new_start_match = md->start_match_ptr;
! 6111: break;
! 6112: }
! 6113: /* Fall through */
! 6114:
! 6115: /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
! 6116: the SKIP's arg was not found. We also treat this as NOMATCH. */
! 6117:
! 6118: case MATCH_SKIP_ARG:
! 6119: /* Fall through */
! 6120:
! 6121: /* NOMATCH and PRUNE advance by one character. THEN at this level acts
! 6122: exactly like PRUNE. */
! 6123:
! 6124: case MATCH_NOMATCH:
! 6125: case MATCH_PRUNE:
! 6126: case MATCH_THEN:
! 6127: new_start_match = start_match + 1;
! 6128: #ifdef SUPPORT_UTF8
! 6129: if (utf8)
! 6130: while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
! 6131: new_start_match++;
! 6132: #endif
! 6133: break;
! 6134:
! 6135: /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
! 6136:
! 6137: case MATCH_COMMIT:
! 6138: rc = MATCH_NOMATCH;
! 6139: goto ENDLOOP;
! 6140:
! 6141: /* Any other return is either a match, or some kind of error. */
! 6142:
! 6143: default:
! 6144: goto ENDLOOP;
! 6145: }
! 6146:
! 6147: /* Control reaches here for the various types of "no match at this point"
! 6148: result. Reset the code to MATCH_NOMATCH for subsequent checking. */
! 6149:
! 6150: rc = MATCH_NOMATCH;
! 6151:
! 6152: /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
! 6153: newline in the subject (though it may continue over the newline). Therefore,
! 6154: if we have just failed to match, starting at a newline, do not continue. */
! 6155:
! 6156: if (firstline && IS_NEWLINE(start_match)) break;
! 6157:
! 6158: /* Advance to new matching position */
! 6159:
! 6160: start_match = new_start_match;
! 6161:
! 6162: /* Break the loop if the pattern is anchored or if we have passed the end of
! 6163: the subject. */
! 6164:
! 6165: if (anchored || start_match > end_subject) break;
! 6166:
! 6167: /* If we have just passed a CR and we are now at a LF, and the pattern does
! 6168: not contain any explicit matches for \r or \n, and the newline option is CRLF
! 6169: or ANY or ANYCRLF, advance the match position by one more character. */
! 6170:
! 6171: if (start_match[-1] == CHAR_CR &&
! 6172: start_match < end_subject &&
! 6173: *start_match == CHAR_NL &&
! 6174: (re->flags & PCRE_HASCRORLF) == 0 &&
! 6175: (md->nltype == NLTYPE_ANY ||
! 6176: md->nltype == NLTYPE_ANYCRLF ||
! 6177: md->nllen == 2))
! 6178: start_match++;
! 6179:
! 6180: md->mark = NULL; /* Reset for start of next match attempt */
! 6181: } /* End of for(;;) "bumpalong" loop */
! 6182:
! 6183: /* ==========================================================================*/
! 6184:
! 6185: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
! 6186: conditions is true:
! 6187:
! 6188: (1) The pattern is anchored or the match was failed by (*COMMIT);
! 6189:
! 6190: (2) We are past the end of the subject;
! 6191:
! 6192: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
! 6193: this option requests that a match occur at or before the first newline in
! 6194: the subject.
! 6195:
! 6196: When we have a match and the offset vector is big enough to deal with any
! 6197: backreferences, captured substring offsets will already be set up. In the case
! 6198: where we had to get some local store to hold offsets for backreference
! 6199: processing, copy those that we can. In this case there need not be overflow if
! 6200: certain parts of the pattern were not used, even though there are more
! 6201: capturing parentheses than vector slots. */
! 6202:
! 6203: ENDLOOP:
! 6204:
! 6205: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
! 6206: {
! 6207: if (using_temporary_offsets)
! 6208: {
! 6209: if (offsetcount >= 4)
! 6210: {
! 6211: memcpy(offsets + 2, md->offset_vector + 2,
! 6212: (offsetcount - 2) * sizeof(int));
! 6213: DPRINTF(("Copied offsets from temporary memory\n"));
! 6214: }
! 6215: if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
! 6216: DPRINTF(("Freeing temporary memory\n"));
! 6217: (pcre_free)(md->offset_vector);
! 6218: }
! 6219:
! 6220: /* Set the return code to the number of captured strings, or 0 if there are
! 6221: too many to fit into the vector. */
! 6222:
! 6223: rc = md->offset_overflow? 0 : md->end_offset_top/2;
! 6224:
! 6225: /* If there is space, set up the whole thing as substring 0. The value of
! 6226: md->start_match_ptr might be modified if \K was encountered on the success
! 6227: matching path. */
! 6228:
! 6229: if (offsetcount < 2) rc = 0; else
! 6230: {
! 6231: offsets[0] = (int)(md->start_match_ptr - md->start_subject);
! 6232: offsets[1] = (int)(md->end_match_ptr - md->start_subject);
! 6233: }
! 6234:
! 6235: DPRINTF((">>>> returning %d\n", rc));
! 6236: goto RETURN_MARK;
! 6237: }
! 6238:
! 6239: /* Control gets here if there has been an error, or if the overall match
! 6240: attempt has failed at all permitted starting positions. */
! 6241:
! 6242: if (using_temporary_offsets)
! 6243: {
! 6244: DPRINTF(("Freeing temporary memory\n"));
! 6245: (pcre_free)(md->offset_vector);
! 6246: }
! 6247:
! 6248: /* For anything other than nomatch or partial match, just return the code. */
! 6249:
! 6250: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
! 6251: {
! 6252: DPRINTF((">>>> error: returning %d\n", rc));
! 6253: return rc;
! 6254: }
! 6255:
! 6256: /* Handle partial matches - disable any mark data */
! 6257:
! 6258: if (start_partial != NULL)
! 6259: {
! 6260: DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
! 6261: md->mark = NULL;
! 6262: if (offsetcount > 1)
! 6263: {
! 6264: offsets[0] = (int)(start_partial - (USPTR)subject);
! 6265: offsets[1] = (int)(end_subject - (USPTR)subject);
! 6266: }
! 6267: rc = PCRE_ERROR_PARTIAL;
! 6268: }
! 6269:
! 6270: /* This is the classic nomatch case */
! 6271:
! 6272: else
! 6273: {
! 6274: DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
! 6275: rc = PCRE_ERROR_NOMATCH;
! 6276: }
! 6277:
! 6278: /* Return the MARK data if it has been requested. */
! 6279:
! 6280: RETURN_MARK:
! 6281:
! 6282: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
! 6283: *(extra_data->mark) = (unsigned char *)(md->mark);
! 6284: return rc;
! 6285: }
! 6286:
! 6287: /* End of pcre_exec.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>