Annotation of embedaddon/pcre/pcre_exec.c, revision 1.1
1.1 ! misho 1: /*************************************************
! 2: * Perl-Compatible Regular Expressions *
! 3: *************************************************/
! 4:
! 5: /* PCRE is a library of functions to support regular expressions whose syntax
! 6: and semantics are as close as possible to those of the Perl 5 language.
! 7:
! 8: Written by Philip Hazel
! 9: Copyright (c) 1997-2011 University of Cambridge
! 10:
! 11: -----------------------------------------------------------------------------
! 12: Redistribution and use in source and binary forms, with or without
! 13: modification, are permitted provided that the following conditions are met:
! 14:
! 15: * Redistributions of source code must retain the above copyright notice,
! 16: this list of conditions and the following disclaimer.
! 17:
! 18: * Redistributions in binary form must reproduce the above copyright
! 19: notice, this list of conditions and the following disclaimer in the
! 20: documentation and/or other materials provided with the distribution.
! 21:
! 22: * Neither the name of the University of Cambridge nor the names of its
! 23: contributors may be used to endorse or promote products derived from
! 24: this software without specific prior written permission.
! 25:
! 26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
! 27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
! 30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! 32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! 33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! 34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! 35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! 36: POSSIBILITY OF SUCH DAMAGE.
! 37: -----------------------------------------------------------------------------
! 38: */
! 39:
! 40:
! 41: /* This module contains pcre_exec(), the externally visible function that does
! 42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
! 43: possible. There are also some static supporting functions. */
! 44:
! 45: #ifdef HAVE_CONFIG_H
! 46: #include "config.h"
! 47: #endif
! 48:
! 49: #define NLBLOCK md /* Block containing newline information */
! 50: #define PSSTART start_subject /* Field containing processed string start */
! 51: #define PSEND end_subject /* Field containing processed string end */
! 52:
! 53: #include "pcre_internal.h"
! 54:
! 55: /* Undefine some potentially clashing cpp symbols */
! 56:
! 57: #undef min
! 58: #undef max
! 59:
! 60: /* Values for setting in md->match_function_type to indicate two special types
! 61: of call to match(). We do it this way to save on using another stack variable,
! 62: as stack usage is to be discouraged. */
! 63:
! 64: #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
! 65: #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
! 66:
! 67: /* Non-error returns from the match() function. Error returns are externally
! 68: defined PCRE_ERROR_xxx codes, which are all negative. */
! 69:
! 70: #define MATCH_MATCH 1
! 71: #define MATCH_NOMATCH 0
! 72:
! 73: /* Special internal returns from the match() function. Make them sufficiently
! 74: negative to avoid the external error codes. */
! 75:
! 76: #define MATCH_ACCEPT (-999)
! 77: #define MATCH_COMMIT (-998)
! 78: #define MATCH_KETRPOS (-997)
! 79: #define MATCH_ONCE (-996)
! 80: #define MATCH_PRUNE (-995)
! 81: #define MATCH_SKIP (-994)
! 82: #define MATCH_SKIP_ARG (-993)
! 83: #define MATCH_THEN (-992)
! 84:
! 85: /* Maximum number of ints of offset to save on the stack for recursive calls.
! 86: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
! 87: because the offset vector is always a multiple of 3 long. */
! 88:
! 89: #define REC_STACK_SAVE_MAX 30
! 90:
! 91: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
! 92:
! 93: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
! 94: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
! 95:
! 96:
! 97:
! 98: #ifdef PCRE_DEBUG
! 99: /*************************************************
! 100: * Debugging function to print chars *
! 101: *************************************************/
! 102:
! 103: /* Print a sequence of chars in printable format, stopping at the end of the
! 104: subject if the requested.
! 105:
! 106: Arguments:
! 107: p points to characters
! 108: length number to print
! 109: is_subject TRUE if printing from within md->start_subject
! 110: md pointer to matching data block, if is_subject is TRUE
! 111:
! 112: Returns: nothing
! 113: */
! 114:
! 115: static void
! 116: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
! 117: {
! 118: unsigned int c;
! 119: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
! 120: while (length-- > 0)
! 121: if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
! 122: }
! 123: #endif
! 124:
! 125:
! 126:
! 127: /*************************************************
! 128: * Match a back-reference *
! 129: *************************************************/
! 130:
! 131: /* Normally, if a back reference hasn't been set, the length that is passed is
! 132: negative, so the match always fails. However, in JavaScript compatibility mode,
! 133: the length passed is zero. Note that in caseless UTF-8 mode, the number of
! 134: subject bytes matched may be different to the number of reference bytes.
! 135:
! 136: Arguments:
! 137: offset index into the offset vector
! 138: eptr pointer into the subject
! 139: length length of reference to be matched (number of bytes)
! 140: md points to match data block
! 141: caseless TRUE if caseless
! 142:
! 143: Returns: < 0 if not matched, otherwise the number of subject bytes matched
! 144: */
! 145:
! 146: static int
! 147: match_ref(int offset, register USPTR eptr, int length, match_data *md,
! 148: BOOL caseless)
! 149: {
! 150: USPTR eptr_start = eptr;
! 151: register USPTR p = md->start_subject + md->offset_vector[offset];
! 152:
! 153: #ifdef PCRE_DEBUG
! 154: if (eptr >= md->end_subject)
! 155: printf("matching subject <null>");
! 156: else
! 157: {
! 158: printf("matching subject ");
! 159: pchars(eptr, length, TRUE, md);
! 160: }
! 161: printf(" against backref ");
! 162: pchars(p, length, FALSE, md);
! 163: printf("\n");
! 164: #endif
! 165:
! 166: /* Always fail if reference not set (and not JavaScript compatible). */
! 167:
! 168: if (length < 0) return -1;
! 169:
! 170: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
! 171: properly if Unicode properties are supported. Otherwise, we can check only
! 172: ASCII characters. */
! 173:
! 174: if (caseless)
! 175: {
! 176: #ifdef SUPPORT_UTF8
! 177: #ifdef SUPPORT_UCP
! 178: if (md->utf8)
! 179: {
! 180: /* Match characters up to the end of the reference. NOTE: the number of
! 181: bytes matched may differ, because there are some characters whose upper and
! 182: lower case versions code as different numbers of bytes. For example, U+023A
! 183: (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
! 184: a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
! 185: the latter. It is important, therefore, to check the length along the
! 186: reference, not along the subject (earlier code did this wrong). */
! 187:
! 188: USPTR endptr = p + length;
! 189: while (p < endptr)
! 190: {
! 191: int c, d;
! 192: if (eptr >= md->end_subject) return -1;
! 193: GETCHARINC(c, eptr);
! 194: GETCHARINC(d, p);
! 195: if (c != d && c != UCD_OTHERCASE(d)) return -1;
! 196: }
! 197: }
! 198: else
! 199: #endif
! 200: #endif
! 201:
! 202: /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
! 203: is no UCP support. */
! 204: {
! 205: if (eptr + length > md->end_subject) return -1;
! 206: while (length-- > 0)
! 207: { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
! 208: }
! 209: }
! 210:
! 211: /* In the caseful case, we can just compare the bytes, whether or not we
! 212: are in UTF-8 mode. */
! 213:
! 214: else
! 215: {
! 216: if (eptr + length > md->end_subject) return -1;
! 217: while (length-- > 0) if (*p++ != *eptr++) return -1;
! 218: }
! 219:
! 220: return (int)(eptr - eptr_start);
! 221: }
! 222:
! 223:
! 224:
! 225: /***************************************************************************
! 226: ****************************************************************************
! 227: RECURSION IN THE match() FUNCTION
! 228:
! 229: The match() function is highly recursive, though not every recursive call
! 230: increases the recursive depth. Nevertheless, some regular expressions can cause
! 231: it to recurse to a great depth. I was writing for Unix, so I just let it call
! 232: itself recursively. This uses the stack for saving everything that has to be
! 233: saved for a recursive call. On Unix, the stack can be large, and this works
! 234: fine.
! 235:
! 236: It turns out that on some non-Unix-like systems there are problems with
! 237: programs that use a lot of stack. (This despite the fact that every last chip
! 238: has oodles of memory these days, and techniques for extending the stack have
! 239: been known for decades.) So....
! 240:
! 241: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
! 242: calls by keeping local variables that need to be preserved in blocks of memory
! 243: obtained from malloc() instead instead of on the stack. Macros are used to
! 244: achieve this so that the actual code doesn't look very different to what it
! 245: always used to.
! 246:
! 247: The original heap-recursive code used longjmp(). However, it seems that this
! 248: can be very slow on some operating systems. Following a suggestion from Stan
! 249: Switzer, the use of longjmp() has been abolished, at the cost of having to
! 250: provide a unique number for each call to RMATCH. There is no way of generating
! 251: a sequence of numbers at compile time in C. I have given them names, to make
! 252: them stand out more clearly.
! 253:
! 254: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
! 255: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
! 256: tests. Furthermore, not using longjmp() means that local dynamic variables
! 257: don't have indeterminate values; this has meant that the frame size can be
! 258: reduced because the result can be "passed back" by straight setting of the
! 259: variable instead of being passed in the frame.
! 260: ****************************************************************************
! 261: ***************************************************************************/
! 262:
! 263: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
! 264: below must be updated in sync. */
! 265:
! 266: enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
! 267: RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
! 268: RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
! 269: RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
! 270: RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
! 271: RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
! 272: RM61, RM62, RM63, RM64, RM65, RM66 };
! 273:
! 274: /* These versions of the macros use the stack, as normal. There are debugging
! 275: versions and production versions. Note that the "rw" argument of RMATCH isn't
! 276: actually used in this definition. */
! 277:
! 278: #ifndef NO_RECURSE
! 279: #define REGISTER register
! 280:
! 281: #ifdef PCRE_DEBUG
! 282: #define RMATCH(ra,rb,rc,rd,re,rw) \
! 283: { \
! 284: printf("match() called in line %d\n", __LINE__); \
! 285: rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
! 286: printf("to line %d\n", __LINE__); \
! 287: }
! 288: #define RRETURN(ra) \
! 289: { \
! 290: printf("match() returned %d from line %d ", ra, __LINE__); \
! 291: return ra; \
! 292: }
! 293: #else
! 294: #define RMATCH(ra,rb,rc,rd,re,rw) \
! 295: rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
! 296: #define RRETURN(ra) return ra
! 297: #endif
! 298:
! 299: #else
! 300:
! 301:
! 302: /* These versions of the macros manage a private stack on the heap. Note that
! 303: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
! 304: argument of match(), which never changes. */
! 305:
! 306: #define REGISTER
! 307:
! 308: #define RMATCH(ra,rb,rc,rd,re,rw)\
! 309: {\
! 310: heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
! 311: if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
! 312: frame->Xwhere = rw; \
! 313: newframe->Xeptr = ra;\
! 314: newframe->Xecode = rb;\
! 315: newframe->Xmstart = mstart;\
! 316: newframe->Xoffset_top = rc;\
! 317: newframe->Xeptrb = re;\
! 318: newframe->Xrdepth = frame->Xrdepth + 1;\
! 319: newframe->Xprevframe = frame;\
! 320: frame = newframe;\
! 321: DPRINTF(("restarting from line %d\n", __LINE__));\
! 322: goto HEAP_RECURSE;\
! 323: L_##rw:\
! 324: DPRINTF(("jumped back to line %d\n", __LINE__));\
! 325: }
! 326:
! 327: #define RRETURN(ra)\
! 328: {\
! 329: heapframe *oldframe = frame;\
! 330: frame = oldframe->Xprevframe;\
! 331: (pcre_stack_free)(oldframe);\
! 332: if (frame != NULL)\
! 333: {\
! 334: rrc = ra;\
! 335: goto HEAP_RETURN;\
! 336: }\
! 337: return ra;\
! 338: }
! 339:
! 340:
! 341: /* Structure for remembering the local variables in a private frame */
! 342:
! 343: typedef struct heapframe {
! 344: struct heapframe *Xprevframe;
! 345:
! 346: /* Function arguments that may change */
! 347:
! 348: USPTR Xeptr;
! 349: const uschar *Xecode;
! 350: USPTR Xmstart;
! 351: int Xoffset_top;
! 352: eptrblock *Xeptrb;
! 353: unsigned int Xrdepth;
! 354:
! 355: /* Function local variables */
! 356:
! 357: USPTR Xcallpat;
! 358: #ifdef SUPPORT_UTF8
! 359: USPTR Xcharptr;
! 360: #endif
! 361: USPTR Xdata;
! 362: USPTR Xnext;
! 363: USPTR Xpp;
! 364: USPTR Xprev;
! 365: USPTR Xsaved_eptr;
! 366:
! 367: recursion_info Xnew_recursive;
! 368:
! 369: BOOL Xcur_is_word;
! 370: BOOL Xcondition;
! 371: BOOL Xprev_is_word;
! 372:
! 373: #ifdef SUPPORT_UCP
! 374: int Xprop_type;
! 375: int Xprop_value;
! 376: int Xprop_fail_result;
! 377: int Xoclength;
! 378: uschar Xocchars[8];
! 379: #endif
! 380:
! 381: int Xcodelink;
! 382: int Xctype;
! 383: unsigned int Xfc;
! 384: int Xfi;
! 385: int Xlength;
! 386: int Xmax;
! 387: int Xmin;
! 388: int Xnumber;
! 389: int Xoffset;
! 390: int Xop;
! 391: int Xsave_capture_last;
! 392: int Xsave_offset1, Xsave_offset2, Xsave_offset3;
! 393: int Xstacksave[REC_STACK_SAVE_MAX];
! 394:
! 395: eptrblock Xnewptrb;
! 396:
! 397: /* Where to jump back to */
! 398:
! 399: int Xwhere;
! 400:
! 401: } heapframe;
! 402:
! 403: #endif
! 404:
! 405:
! 406: /***************************************************************************
! 407: ***************************************************************************/
! 408:
! 409:
! 410:
! 411: /*************************************************
! 412: * Match from current position *
! 413: *************************************************/
! 414:
! 415: /* This function is called recursively in many circumstances. Whenever it
! 416: returns a negative (error) response, the outer incarnation must also return the
! 417: same response. */
! 418:
! 419: /* These macros pack up tests that are used for partial matching, and which
! 420: appear several times in the code. We set the "hit end" flag if the pointer is
! 421: at the end of the subject and also past the start of the subject (i.e.
! 422: something has been matched). For hard partial matching, we then return
! 423: immediately. The second one is used when we already know we are past the end of
! 424: the subject. */
! 425:
! 426: #define CHECK_PARTIAL()\
! 427: if (md->partial != 0 && eptr >= md->end_subject && \
! 428: eptr > md->start_used_ptr) \
! 429: { \
! 430: md->hitend = TRUE; \
! 431: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
! 432: }
! 433:
! 434: #define SCHECK_PARTIAL()\
! 435: if (md->partial != 0 && eptr > md->start_used_ptr) \
! 436: { \
! 437: md->hitend = TRUE; \
! 438: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
! 439: }
! 440:
! 441:
! 442: /* Performance note: It might be tempting to extract commonly used fields from
! 443: the md structure (e.g. utf8, end_subject) into individual variables to improve
! 444: performance. Tests using gcc on a SPARC disproved this; in the first case, it
! 445: made performance worse.
! 446:
! 447: Arguments:
! 448: eptr pointer to current character in subject
! 449: ecode pointer to current position in compiled code
! 450: mstart pointer to the current match start position (can be modified
! 451: by encountering \K)
! 452: offset_top current top pointer
! 453: md pointer to "static" info for the match
! 454: eptrb pointer to chain of blocks containing eptr at start of
! 455: brackets - for testing for empty matches
! 456: rdepth the recursion depth
! 457:
! 458: Returns: MATCH_MATCH if matched ) these values are >= 0
! 459: MATCH_NOMATCH if failed to match )
! 460: a negative MATCH_xxx value for PRUNE, SKIP, etc
! 461: a negative PCRE_ERROR_xxx value if aborted by an error condition
! 462: (e.g. stopped by repeated call or recursion limit)
! 463: */
! 464:
! 465: static int
! 466: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
! 467: int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
! 468: {
! 469: /* These variables do not need to be preserved over recursion in this function,
! 470: so they can be ordinary variables in all cases. Mark some of them with
! 471: "register" because they are used a lot in loops. */
! 472:
! 473: register int rrc; /* Returns from recursive calls */
! 474: register int i; /* Used for loops not involving calls to RMATCH() */
! 475: register unsigned int c; /* Character values not kept over RMATCH() calls */
! 476: register BOOL utf8; /* Local copy of UTF-8 flag for speed */
! 477:
! 478: BOOL minimize, possessive; /* Quantifier options */
! 479: BOOL caseless;
! 480: int condcode;
! 481:
! 482: /* When recursion is not being used, all "local" variables that have to be
! 483: preserved over calls to RMATCH() are part of a "frame" which is obtained from
! 484: heap storage. Set up the top-level frame here; others are obtained from the
! 485: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
! 486:
! 487: #ifdef NO_RECURSE
! 488: heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
! 489: if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
! 490: frame->Xprevframe = NULL; /* Marks the top level */
! 491:
! 492: /* Copy in the original argument variables */
! 493:
! 494: frame->Xeptr = eptr;
! 495: frame->Xecode = ecode;
! 496: frame->Xmstart = mstart;
! 497: frame->Xoffset_top = offset_top;
! 498: frame->Xeptrb = eptrb;
! 499: frame->Xrdepth = rdepth;
! 500:
! 501: /* This is where control jumps back to to effect "recursion" */
! 502:
! 503: HEAP_RECURSE:
! 504:
! 505: /* Macros make the argument variables come from the current frame */
! 506:
! 507: #define eptr frame->Xeptr
! 508: #define ecode frame->Xecode
! 509: #define mstart frame->Xmstart
! 510: #define offset_top frame->Xoffset_top
! 511: #define eptrb frame->Xeptrb
! 512: #define rdepth frame->Xrdepth
! 513:
! 514: /* Ditto for the local variables */
! 515:
! 516: #ifdef SUPPORT_UTF8
! 517: #define charptr frame->Xcharptr
! 518: #endif
! 519: #define callpat frame->Xcallpat
! 520: #define codelink frame->Xcodelink
! 521: #define data frame->Xdata
! 522: #define next frame->Xnext
! 523: #define pp frame->Xpp
! 524: #define prev frame->Xprev
! 525: #define saved_eptr frame->Xsaved_eptr
! 526:
! 527: #define new_recursive frame->Xnew_recursive
! 528:
! 529: #define cur_is_word frame->Xcur_is_word
! 530: #define condition frame->Xcondition
! 531: #define prev_is_word frame->Xprev_is_word
! 532:
! 533: #ifdef SUPPORT_UCP
! 534: #define prop_type frame->Xprop_type
! 535: #define prop_value frame->Xprop_value
! 536: #define prop_fail_result frame->Xprop_fail_result
! 537: #define oclength frame->Xoclength
! 538: #define occhars frame->Xocchars
! 539: #endif
! 540:
! 541: #define ctype frame->Xctype
! 542: #define fc frame->Xfc
! 543: #define fi frame->Xfi
! 544: #define length frame->Xlength
! 545: #define max frame->Xmax
! 546: #define min frame->Xmin
! 547: #define number frame->Xnumber
! 548: #define offset frame->Xoffset
! 549: #define op frame->Xop
! 550: #define save_capture_last frame->Xsave_capture_last
! 551: #define save_offset1 frame->Xsave_offset1
! 552: #define save_offset2 frame->Xsave_offset2
! 553: #define save_offset3 frame->Xsave_offset3
! 554: #define stacksave frame->Xstacksave
! 555:
! 556: #define newptrb frame->Xnewptrb
! 557:
! 558: /* When recursion is being used, local variables are allocated on the stack and
! 559: get preserved during recursion in the normal way. In this environment, fi and
! 560: i, and fc and c, can be the same variables. */
! 561:
! 562: #else /* NO_RECURSE not defined */
! 563: #define fi i
! 564: #define fc c
! 565:
! 566: /* Many of the following variables are used only in small blocks of the code.
! 567: My normal style of coding would have declared them within each of those blocks.
! 568: However, in order to accommodate the version of this code that uses an external
! 569: "stack" implemented on the heap, it is easier to declare them all here, so the
! 570: declarations can be cut out in a block. The only declarations within blocks
! 571: below are for variables that do not have to be preserved over a recursive call
! 572: to RMATCH(). */
! 573:
! 574: #ifdef SUPPORT_UTF8
! 575: const uschar *charptr;
! 576: #endif
! 577: const uschar *callpat;
! 578: const uschar *data;
! 579: const uschar *next;
! 580: USPTR pp;
! 581: const uschar *prev;
! 582: USPTR saved_eptr;
! 583:
! 584: recursion_info new_recursive;
! 585:
! 586: BOOL cur_is_word;
! 587: BOOL condition;
! 588: BOOL prev_is_word;
! 589:
! 590: #ifdef SUPPORT_UCP
! 591: int prop_type;
! 592: int prop_value;
! 593: int prop_fail_result;
! 594: int oclength;
! 595: uschar occhars[8];
! 596: #endif
! 597:
! 598: int codelink;
! 599: int ctype;
! 600: int length;
! 601: int max;
! 602: int min;
! 603: int number;
! 604: int offset;
! 605: int op;
! 606: int save_capture_last;
! 607: int save_offset1, save_offset2, save_offset3;
! 608: int stacksave[REC_STACK_SAVE_MAX];
! 609:
! 610: eptrblock newptrb;
! 611: #endif /* NO_RECURSE */
! 612:
! 613: /* To save space on the stack and in the heap frame, I have doubled up on some
! 614: of the local variables that are used only in localised parts of the code, but
! 615: still need to be preserved over recursive calls of match(). These macros define
! 616: the alternative names that are used. */
! 617:
! 618: #define allow_zero cur_is_word
! 619: #define cbegroup condition
! 620: #define code_offset codelink
! 621: #define condassert condition
! 622: #define matched_once prev_is_word
! 623:
! 624: /* These statements are here to stop the compiler complaining about unitialized
! 625: variables. */
! 626:
! 627: #ifdef SUPPORT_UCP
! 628: prop_value = 0;
! 629: prop_fail_result = 0;
! 630: #endif
! 631:
! 632:
! 633: /* This label is used for tail recursion, which is used in a few cases even
! 634: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
! 635: used. Thanks to Ian Taylor for noticing this possibility and sending the
! 636: original patch. */
! 637:
! 638: TAIL_RECURSE:
! 639:
! 640: /* OK, now we can get on with the real code of the function. Recursive calls
! 641: are specified by the macro RMATCH and RRETURN is used to return. When
! 642: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
! 643: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
! 644: defined). However, RMATCH isn't like a function call because it's quite a
! 645: complicated macro. It has to be used in one particular way. This shouldn't,
! 646: however, impact performance when true recursion is being used. */
! 647:
! 648: #ifdef SUPPORT_UTF8
! 649: utf8 = md->utf8; /* Local copy of the flag */
! 650: #else
! 651: utf8 = FALSE;
! 652: #endif
! 653:
! 654: /* First check that we haven't called match() too many times, or that we
! 655: haven't exceeded the recursive call limit. */
! 656:
! 657: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
! 658: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
! 659:
! 660: /* At the start of a group with an unlimited repeat that may match an empty
! 661: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
! 662: done this way to save having to use another function argument, which would take
! 663: up space on the stack. See also MATCH_CONDASSERT below.
! 664:
! 665: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
! 666: such remembered pointers, to be checked when we hit the closing ket, in order
! 667: to break infinite loops that match no characters. When match() is called in
! 668: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
! 669: NOT be used with tail recursion, because the memory block that is used is on
! 670: the stack, so a new one may be required for each match(). */
! 671:
! 672: if (md->match_function_type == MATCH_CBEGROUP)
! 673: {
! 674: newptrb.epb_saved_eptr = eptr;
! 675: newptrb.epb_prev = eptrb;
! 676: eptrb = &newptrb;
! 677: md->match_function_type = 0;
! 678: }
! 679:
! 680: /* Now start processing the opcodes. */
! 681:
! 682: for (;;)
! 683: {
! 684: minimize = possessive = FALSE;
! 685: op = *ecode;
! 686:
! 687: switch(op)
! 688: {
! 689: case OP_MARK:
! 690: md->nomatch_mark = ecode + 2;
! 691: md->mark = NULL; /* In case previously set by assertion */
! 692: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
! 693: eptrb, RM55);
! 694: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
! 695: md->mark == NULL) md->mark = ecode + 2;
! 696:
! 697: /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
! 698: argument, and we must check whether that argument matches this MARK's
! 699: argument. It is passed back in md->start_match_ptr (an overloading of that
! 700: variable). If it does match, we reset that variable to the current subject
! 701: position and return MATCH_SKIP. Otherwise, pass back the return code
! 702: unaltered. */
! 703:
! 704: else if (rrc == MATCH_SKIP_ARG &&
! 705: strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)
! 706: {
! 707: md->start_match_ptr = eptr;
! 708: RRETURN(MATCH_SKIP);
! 709: }
! 710: RRETURN(rrc);
! 711:
! 712: case OP_FAIL:
! 713: RRETURN(MATCH_NOMATCH);
! 714:
! 715: /* COMMIT overrides PRUNE, SKIP, and THEN */
! 716:
! 717: case OP_COMMIT:
! 718: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 719: eptrb, RM52);
! 720: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
! 721: rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
! 722: rrc != MATCH_THEN)
! 723: RRETURN(rrc);
! 724: RRETURN(MATCH_COMMIT);
! 725:
! 726: /* PRUNE overrides THEN */
! 727:
! 728: case OP_PRUNE:
! 729: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 730: eptrb, RM51);
! 731: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
! 732: RRETURN(MATCH_PRUNE);
! 733:
! 734: case OP_PRUNE_ARG:
! 735: md->nomatch_mark = ecode + 2;
! 736: md->mark = NULL; /* In case previously set by assertion */
! 737: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
! 738: eptrb, RM56);
! 739: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
! 740: md->mark == NULL) md->mark = ecode + 2;
! 741: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
! 742: RRETURN(MATCH_PRUNE);
! 743:
! 744: /* SKIP overrides PRUNE and THEN */
! 745:
! 746: case OP_SKIP:
! 747: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 748: eptrb, RM53);
! 749: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
! 750: RRETURN(rrc);
! 751: md->start_match_ptr = eptr; /* Pass back current position */
! 752: RRETURN(MATCH_SKIP);
! 753:
! 754: /* Note that, for Perl compatibility, SKIP with an argument does NOT set
! 755: nomatch_mark. There is a flag that disables this opcode when re-matching a
! 756: pattern that ended with a SKIP for which there was not a matching MARK. */
! 757:
! 758: case OP_SKIP_ARG:
! 759: if (md->ignore_skip_arg)
! 760: {
! 761: ecode += _pcre_OP_lengths[*ecode] + ecode[1];
! 762: break;
! 763: }
! 764: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
! 765: eptrb, RM57);
! 766: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
! 767: RRETURN(rrc);
! 768:
! 769: /* Pass back the current skip name by overloading md->start_match_ptr and
! 770: returning the special MATCH_SKIP_ARG return code. This will either be
! 771: caught by a matching MARK, or get to the top, where it causes a rematch
! 772: with the md->ignore_skip_arg flag set. */
! 773:
! 774: md->start_match_ptr = ecode + 2;
! 775: RRETURN(MATCH_SKIP_ARG);
! 776:
! 777: /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
! 778: the branch in which it occurs can be determined. Overload the start of
! 779: match pointer to do this. */
! 780:
! 781: case OP_THEN:
! 782: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 783: eptrb, RM54);
! 784: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 785: md->start_match_ptr = ecode;
! 786: RRETURN(MATCH_THEN);
! 787:
! 788: case OP_THEN_ARG:
! 789: md->nomatch_mark = ecode + 2;
! 790: md->mark = NULL; /* In case previously set by assertion */
! 791: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,
! 792: md, eptrb, RM58);
! 793: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
! 794: md->mark == NULL) md->mark = ecode + 2;
! 795: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 796: md->start_match_ptr = ecode;
! 797: RRETURN(MATCH_THEN);
! 798:
! 799: /* Handle an atomic group that does not contain any capturing parentheses.
! 800: This can be handled like an assertion. Prior to 8.13, all atomic groups
! 801: were handled this way. In 8.13, the code was changed as below for ONCE, so
! 802: that backups pass through the group and thereby reset captured values.
! 803: However, this uses a lot more stack, so in 8.20, atomic groups that do not
! 804: contain any captures generate OP_ONCE_NC, which can be handled in the old,
! 805: less stack intensive way.
! 806:
! 807: Check the alternative branches in turn - the matching won't pass the KET
! 808: for this kind of subpattern. If any one branch matches, we carry on as at
! 809: the end of a normal bracket, leaving the subject pointer, but resetting
! 810: the start-of-match value in case it was changed by \K. */
! 811:
! 812: case OP_ONCE_NC:
! 813: prev = ecode;
! 814: saved_eptr = eptr;
! 815: do
! 816: {
! 817: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
! 818: if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
! 819: {
! 820: mstart = md->start_match_ptr;
! 821: break;
! 822: }
! 823: if (rrc == MATCH_THEN)
! 824: {
! 825: next = ecode + GET(ecode,1);
! 826: if (md->start_match_ptr < next &&
! 827: (*ecode == OP_ALT || *next == OP_ALT))
! 828: rrc = MATCH_NOMATCH;
! 829: }
! 830:
! 831: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 832: ecode += GET(ecode,1);
! 833: }
! 834: while (*ecode == OP_ALT);
! 835:
! 836: /* If hit the end of the group (which could be repeated), fail */
! 837:
! 838: if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
! 839:
! 840: /* Continue as from after the group, updating the offsets high water
! 841: mark, since extracts may have been taken. */
! 842:
! 843: do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
! 844:
! 845: offset_top = md->end_offset_top;
! 846: eptr = md->end_match_ptr;
! 847:
! 848: /* For a non-repeating ket, just continue at this level. This also
! 849: happens for a repeating ket if no characters were matched in the group.
! 850: This is the forcible breaking of infinite loops as implemented in Perl
! 851: 5.005. */
! 852:
! 853: if (*ecode == OP_KET || eptr == saved_eptr)
! 854: {
! 855: ecode += 1+LINK_SIZE;
! 856: break;
! 857: }
! 858:
! 859: /* The repeating kets try the rest of the pattern or restart from the
! 860: preceding bracket, in the appropriate order. The second "call" of match()
! 861: uses tail recursion, to avoid using another stack frame. */
! 862:
! 863: if (*ecode == OP_KETRMIN)
! 864: {
! 865: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
! 866: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 867: ecode = prev;
! 868: goto TAIL_RECURSE;
! 869: }
! 870: else /* OP_KETRMAX */
! 871: {
! 872: md->match_function_type = MATCH_CBEGROUP;
! 873: RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
! 874: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 875: ecode += 1 + LINK_SIZE;
! 876: goto TAIL_RECURSE;
! 877: }
! 878: /* Control never gets here */
! 879:
! 880: /* Handle a capturing bracket, other than those that are possessive with an
! 881: unlimited repeat. If there is space in the offset vector, save the current
! 882: subject position in the working slot at the top of the vector. We mustn't
! 883: change the current values of the data slot, because they may be set from a
! 884: previous iteration of this group, and be referred to by a reference inside
! 885: the group. A failure to match might occur after the group has succeeded,
! 886: if something later on doesn't match. For this reason, we need to restore
! 887: the working value and also the values of the final offsets, in case they
! 888: were set by a previous iteration of the same bracket.
! 889:
! 890: If there isn't enough space in the offset vector, treat this as if it were
! 891: a non-capturing bracket. Don't worry about setting the flag for the error
! 892: case here; that is handled in the code for KET. */
! 893:
! 894: case OP_CBRA:
! 895: case OP_SCBRA:
! 896: number = GET2(ecode, 1+LINK_SIZE);
! 897: offset = number << 1;
! 898:
! 899: #ifdef PCRE_DEBUG
! 900: printf("start bracket %d\n", number);
! 901: printf("subject=");
! 902: pchars(eptr, 16, TRUE, md);
! 903: printf("\n");
! 904: #endif
! 905:
! 906: if (offset < md->offset_max)
! 907: {
! 908: save_offset1 = md->offset_vector[offset];
! 909: save_offset2 = md->offset_vector[offset+1];
! 910: save_offset3 = md->offset_vector[md->offset_end - number];
! 911: save_capture_last = md->capture_last;
! 912:
! 913: DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
! 914: md->offset_vector[md->offset_end - number] =
! 915: (int)(eptr - md->start_subject);
! 916:
! 917: for (;;)
! 918: {
! 919: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
! 920: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 921: eptrb, RM1);
! 922: if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
! 923:
! 924: /* If we backed up to a THEN, check whether it is within the current
! 925: branch by comparing the address of the THEN that is passed back with
! 926: the end of the branch. If it is within the current branch, and the
! 927: branch is one of two or more alternatives (it either starts or ends
! 928: with OP_ALT), we have reached the limit of THEN's action, so convert
! 929: the return code to NOMATCH, which will cause normal backtracking to
! 930: happen from now on. Otherwise, THEN is passed back to an outer
! 931: alternative. This implements Perl's treatment of parenthesized groups,
! 932: where a group not containing | does not affect the current alternative,
! 933: that is, (X) is NOT the same as (X|(*F)). */
! 934:
! 935: if (rrc == MATCH_THEN)
! 936: {
! 937: next = ecode + GET(ecode,1);
! 938: if (md->start_match_ptr < next &&
! 939: (*ecode == OP_ALT || *next == OP_ALT))
! 940: rrc = MATCH_NOMATCH;
! 941: }
! 942:
! 943: /* Anything other than NOMATCH is passed back. */
! 944:
! 945: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 946: md->capture_last = save_capture_last;
! 947: ecode += GET(ecode, 1);
! 948: if (*ecode != OP_ALT) break;
! 949: }
! 950:
! 951: DPRINTF(("bracket %d failed\n", number));
! 952: md->offset_vector[offset] = save_offset1;
! 953: md->offset_vector[offset+1] = save_offset2;
! 954: md->offset_vector[md->offset_end - number] = save_offset3;
! 955:
! 956: /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
! 957:
! 958: RRETURN(rrc);
! 959: }
! 960:
! 961: /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
! 962: as a non-capturing bracket. */
! 963:
! 964: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 965: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 966:
! 967: DPRINTF(("insufficient capture room: treat as non-capturing\n"));
! 968:
! 969: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 970: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 971:
! 972: /* Non-capturing or atomic group, except for possessive with unlimited
! 973: repeat and ONCE group with no captures. Loop for all the alternatives.
! 974:
! 975: When we get to the final alternative within the brackets, we used to return
! 976: the result of a recursive call to match() whatever happened so it was
! 977: possible to reduce stack usage by turning this into a tail recursion,
! 978: except in the case of a possibly empty group. However, now that there is
! 979: the possiblity of (*THEN) occurring in the final alternative, this
! 980: optimization is no longer always possible.
! 981:
! 982: We can optimize if we know there are no (*THEN)s in the pattern; at present
! 983: this is the best that can be done.
! 984:
! 985: MATCH_ONCE is returned when the end of an atomic group is successfully
! 986: reached, but subsequent matching fails. It passes back up the tree (causing
! 987: captured values to be reset) until the original atomic group level is
! 988: reached. This is tested by comparing md->once_target with the start of the
! 989: group. At this point, the return is converted into MATCH_NOMATCH so that
! 990: previous backup points can be taken. */
! 991:
! 992: case OP_ONCE:
! 993: case OP_BRA:
! 994: case OP_SBRA:
! 995: DPRINTF(("start non-capturing bracket\n"));
! 996:
! 997: for (;;)
! 998: {
! 999: if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
! 1000:
! 1001: /* If this is not a possibly empty group, and there are no (*THEN)s in
! 1002: the pattern, and this is the final alternative, optimize as described
! 1003: above. */
! 1004:
! 1005: else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
! 1006: {
! 1007: ecode += _pcre_OP_lengths[*ecode];
! 1008: goto TAIL_RECURSE;
! 1009: }
! 1010:
! 1011: /* In all other cases, we have to make another call to match(). */
! 1012:
! 1013: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
! 1014: RM2);
! 1015:
! 1016: /* See comment in the code for capturing groups above about handling
! 1017: THEN. */
! 1018:
! 1019: if (rrc == MATCH_THEN)
! 1020: {
! 1021: next = ecode + GET(ecode,1);
! 1022: if (md->start_match_ptr < next &&
! 1023: (*ecode == OP_ALT || *next == OP_ALT))
! 1024: rrc = MATCH_NOMATCH;
! 1025: }
! 1026:
! 1027: if (rrc != MATCH_NOMATCH)
! 1028: {
! 1029: if (rrc == MATCH_ONCE)
! 1030: {
! 1031: const uschar *scode = ecode;
! 1032: if (*scode != OP_ONCE) /* If not at start, find it */
! 1033: {
! 1034: while (*scode == OP_ALT) scode += GET(scode, 1);
! 1035: scode -= GET(scode, 1);
! 1036: }
! 1037: if (md->once_target == scode) rrc = MATCH_NOMATCH;
! 1038: }
! 1039: RRETURN(rrc);
! 1040: }
! 1041: ecode += GET(ecode, 1);
! 1042: if (*ecode != OP_ALT) break;
! 1043: }
! 1044:
! 1045: RRETURN(MATCH_NOMATCH);
! 1046:
! 1047: /* Handle possessive capturing brackets with an unlimited repeat. We come
! 1048: here from BRAZERO with allow_zero set TRUE. The offset_vector values are
! 1049: handled similarly to the normal case above. However, the matching is
! 1050: different. The end of these brackets will always be OP_KETRPOS, which
! 1051: returns MATCH_KETRPOS without going further in the pattern. By this means
! 1052: we can handle the group by iteration rather than recursion, thereby
! 1053: reducing the amount of stack needed. */
! 1054:
! 1055: case OP_CBRAPOS:
! 1056: case OP_SCBRAPOS:
! 1057: allow_zero = FALSE;
! 1058:
! 1059: POSSESSIVE_CAPTURE:
! 1060: number = GET2(ecode, 1+LINK_SIZE);
! 1061: offset = number << 1;
! 1062:
! 1063: #ifdef PCRE_DEBUG
! 1064: printf("start possessive bracket %d\n", number);
! 1065: printf("subject=");
! 1066: pchars(eptr, 16, TRUE, md);
! 1067: printf("\n");
! 1068: #endif
! 1069:
! 1070: if (offset < md->offset_max)
! 1071: {
! 1072: matched_once = FALSE;
! 1073: code_offset = (int)(ecode - md->start_code);
! 1074:
! 1075: save_offset1 = md->offset_vector[offset];
! 1076: save_offset2 = md->offset_vector[offset+1];
! 1077: save_offset3 = md->offset_vector[md->offset_end - number];
! 1078: save_capture_last = md->capture_last;
! 1079:
! 1080: DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
! 1081:
! 1082: /* Each time round the loop, save the current subject position for use
! 1083: when the group matches. For MATCH_MATCH, the group has matched, so we
! 1084: restart it with a new subject starting position, remembering that we had
! 1085: at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
! 1086: usual. If we haven't matched any alternatives in any iteration, check to
! 1087: see if a previous iteration matched. If so, the group has matched;
! 1088: continue from afterwards. Otherwise it has failed; restore the previous
! 1089: capture values before returning NOMATCH. */
! 1090:
! 1091: for (;;)
! 1092: {
! 1093: md->offset_vector[md->offset_end - number] =
! 1094: (int)(eptr - md->start_subject);
! 1095: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
! 1096: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 1097: eptrb, RM63);
! 1098: if (rrc == MATCH_KETRPOS)
! 1099: {
! 1100: offset_top = md->end_offset_top;
! 1101: eptr = md->end_match_ptr;
! 1102: ecode = md->start_code + code_offset;
! 1103: save_capture_last = md->capture_last;
! 1104: matched_once = TRUE;
! 1105: continue;
! 1106: }
! 1107:
! 1108: /* See comment in the code for capturing groups above about handling
! 1109: THEN. */
! 1110:
! 1111: if (rrc == MATCH_THEN)
! 1112: {
! 1113: next = ecode + GET(ecode,1);
! 1114: if (md->start_match_ptr < next &&
! 1115: (*ecode == OP_ALT || *next == OP_ALT))
! 1116: rrc = MATCH_NOMATCH;
! 1117: }
! 1118:
! 1119: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1120: md->capture_last = save_capture_last;
! 1121: ecode += GET(ecode, 1);
! 1122: if (*ecode != OP_ALT) break;
! 1123: }
! 1124:
! 1125: if (!matched_once)
! 1126: {
! 1127: md->offset_vector[offset] = save_offset1;
! 1128: md->offset_vector[offset+1] = save_offset2;
! 1129: md->offset_vector[md->offset_end - number] = save_offset3;
! 1130: }
! 1131:
! 1132: if (allow_zero || matched_once)
! 1133: {
! 1134: ecode += 1 + LINK_SIZE;
! 1135: break;
! 1136: }
! 1137:
! 1138: RRETURN(MATCH_NOMATCH);
! 1139: }
! 1140:
! 1141: /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
! 1142: as a non-capturing bracket. */
! 1143:
! 1144: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1145: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1146:
! 1147: DPRINTF(("insufficient capture room: treat as non-capturing\n"));
! 1148:
! 1149: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1150: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1151:
! 1152: /* Non-capturing possessive bracket with unlimited repeat. We come here
! 1153: from BRAZERO with allow_zero = TRUE. The code is similar to the above,
! 1154: without the capturing complication. It is written out separately for speed
! 1155: and cleanliness. */
! 1156:
! 1157: case OP_BRAPOS:
! 1158: case OP_SBRAPOS:
! 1159: allow_zero = FALSE;
! 1160:
! 1161: POSSESSIVE_NON_CAPTURE:
! 1162: matched_once = FALSE;
! 1163: code_offset = (int)(ecode - md->start_code);
! 1164:
! 1165: for (;;)
! 1166: {
! 1167: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
! 1168: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
! 1169: eptrb, RM48);
! 1170: if (rrc == MATCH_KETRPOS)
! 1171: {
! 1172: offset_top = md->end_offset_top;
! 1173: eptr = md->end_match_ptr;
! 1174: ecode = md->start_code + code_offset;
! 1175: matched_once = TRUE;
! 1176: continue;
! 1177: }
! 1178:
! 1179: /* See comment in the code for capturing groups above about handling
! 1180: THEN. */
! 1181:
! 1182: if (rrc == MATCH_THEN)
! 1183: {
! 1184: next = ecode + GET(ecode,1);
! 1185: if (md->start_match_ptr < next &&
! 1186: (*ecode == OP_ALT || *next == OP_ALT))
! 1187: rrc = MATCH_NOMATCH;
! 1188: }
! 1189:
! 1190: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1191: ecode += GET(ecode, 1);
! 1192: if (*ecode != OP_ALT) break;
! 1193: }
! 1194:
! 1195: if (matched_once || allow_zero)
! 1196: {
! 1197: ecode += 1 + LINK_SIZE;
! 1198: break;
! 1199: }
! 1200: RRETURN(MATCH_NOMATCH);
! 1201:
! 1202: /* Control never reaches here. */
! 1203:
! 1204: /* Conditional group: compilation checked that there are no more than
! 1205: two branches. If the condition is false, skipping the first branch takes us
! 1206: past the end if there is only one branch, but that's OK because that is
! 1207: exactly what going to the ket would do. */
! 1208:
! 1209: case OP_COND:
! 1210: case OP_SCOND:
! 1211: codelink = GET(ecode, 1);
! 1212:
! 1213: /* Because of the way auto-callout works during compile, a callout item is
! 1214: inserted between OP_COND and an assertion condition. */
! 1215:
! 1216: if (ecode[LINK_SIZE+1] == OP_CALLOUT)
! 1217: {
! 1218: if (pcre_callout != NULL)
! 1219: {
! 1220: pcre_callout_block cb;
! 1221: cb.version = 2; /* Version 1 of the callout block */
! 1222: cb.callout_number = ecode[LINK_SIZE+2];
! 1223: cb.offset_vector = md->offset_vector;
! 1224: cb.subject = (PCRE_SPTR)md->start_subject;
! 1225: cb.subject_length = (int)(md->end_subject - md->start_subject);
! 1226: cb.start_match = (int)(mstart - md->start_subject);
! 1227: cb.current_position = (int)(eptr - md->start_subject);
! 1228: cb.pattern_position = GET(ecode, LINK_SIZE + 3);
! 1229: cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
! 1230: cb.capture_top = offset_top/2;
! 1231: cb.capture_last = md->capture_last;
! 1232: cb.callout_data = md->callout_data;
! 1233: cb.mark = md->nomatch_mark;
! 1234: if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
! 1235: if (rrc < 0) RRETURN(rrc);
! 1236: }
! 1237: ecode += _pcre_OP_lengths[OP_CALLOUT];
! 1238: }
! 1239:
! 1240: condcode = ecode[LINK_SIZE+1];
! 1241:
! 1242: /* Now see what the actual condition is */
! 1243:
! 1244: if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
! 1245: {
! 1246: if (md->recursive == NULL) /* Not recursing => FALSE */
! 1247: {
! 1248: condition = FALSE;
! 1249: ecode += GET(ecode, 1);
! 1250: }
! 1251: else
! 1252: {
! 1253: int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
! 1254: condition = (recno == RREF_ANY || recno == md->recursive->group_num);
! 1255:
! 1256: /* If the test is for recursion into a specific subpattern, and it is
! 1257: false, but the test was set up by name, scan the table to see if the
! 1258: name refers to any other numbers, and test them. The condition is true
! 1259: if any one is set. */
! 1260:
! 1261: if (!condition && condcode == OP_NRREF)
! 1262: {
! 1263: uschar *slotA = md->name_table;
! 1264: for (i = 0; i < md->name_count; i++)
! 1265: {
! 1266: if (GET2(slotA, 0) == recno) break;
! 1267: slotA += md->name_entry_size;
! 1268: }
! 1269:
! 1270: /* Found a name for the number - there can be only one; duplicate
! 1271: names for different numbers are allowed, but not vice versa. First
! 1272: scan down for duplicates. */
! 1273:
! 1274: if (i < md->name_count)
! 1275: {
! 1276: uschar *slotB = slotA;
! 1277: while (slotB > md->name_table)
! 1278: {
! 1279: slotB -= md->name_entry_size;
! 1280: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 1281: {
! 1282: condition = GET2(slotB, 0) == md->recursive->group_num;
! 1283: if (condition) break;
! 1284: }
! 1285: else break;
! 1286: }
! 1287:
! 1288: /* Scan up for duplicates */
! 1289:
! 1290: if (!condition)
! 1291: {
! 1292: slotB = slotA;
! 1293: for (i++; i < md->name_count; i++)
! 1294: {
! 1295: slotB += md->name_entry_size;
! 1296: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 1297: {
! 1298: condition = GET2(slotB, 0) == md->recursive->group_num;
! 1299: if (condition) break;
! 1300: }
! 1301: else break;
! 1302: }
! 1303: }
! 1304: }
! 1305: }
! 1306:
! 1307: /* Chose branch according to the condition */
! 1308:
! 1309: ecode += condition? 3 : GET(ecode, 1);
! 1310: }
! 1311: }
! 1312:
! 1313: else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
! 1314: {
! 1315: offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
! 1316: condition = offset < offset_top && md->offset_vector[offset] >= 0;
! 1317:
! 1318: /* If the numbered capture is unset, but the reference was by name,
! 1319: scan the table to see if the name refers to any other numbers, and test
! 1320: them. The condition is true if any one is set. This is tediously similar
! 1321: to the code above, but not close enough to try to amalgamate. */
! 1322:
! 1323: if (!condition && condcode == OP_NCREF)
! 1324: {
! 1325: int refno = offset >> 1;
! 1326: uschar *slotA = md->name_table;
! 1327:
! 1328: for (i = 0; i < md->name_count; i++)
! 1329: {
! 1330: if (GET2(slotA, 0) == refno) break;
! 1331: slotA += md->name_entry_size;
! 1332: }
! 1333:
! 1334: /* Found a name for the number - there can be only one; duplicate names
! 1335: for different numbers are allowed, but not vice versa. First scan down
! 1336: for duplicates. */
! 1337:
! 1338: if (i < md->name_count)
! 1339: {
! 1340: uschar *slotB = slotA;
! 1341: while (slotB > md->name_table)
! 1342: {
! 1343: slotB -= md->name_entry_size;
! 1344: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 1345: {
! 1346: offset = GET2(slotB, 0) << 1;
! 1347: condition = offset < offset_top &&
! 1348: md->offset_vector[offset] >= 0;
! 1349: if (condition) break;
! 1350: }
! 1351: else break;
! 1352: }
! 1353:
! 1354: /* Scan up for duplicates */
! 1355:
! 1356: if (!condition)
! 1357: {
! 1358: slotB = slotA;
! 1359: for (i++; i < md->name_count; i++)
! 1360: {
! 1361: slotB += md->name_entry_size;
! 1362: if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
! 1363: {
! 1364: offset = GET2(slotB, 0) << 1;
! 1365: condition = offset < offset_top &&
! 1366: md->offset_vector[offset] >= 0;
! 1367: if (condition) break;
! 1368: }
! 1369: else break;
! 1370: }
! 1371: }
! 1372: }
! 1373: }
! 1374:
! 1375: /* Chose branch according to the condition */
! 1376:
! 1377: ecode += condition? 3 : GET(ecode, 1);
! 1378: }
! 1379:
! 1380: else if (condcode == OP_DEF) /* DEFINE - always false */
! 1381: {
! 1382: condition = FALSE;
! 1383: ecode += GET(ecode, 1);
! 1384: }
! 1385:
! 1386: /* The condition is an assertion. Call match() to evaluate it - setting
! 1387: md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
! 1388: an assertion. */
! 1389:
! 1390: else
! 1391: {
! 1392: md->match_function_type = MATCH_CONDASSERT;
! 1393: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
! 1394: if (rrc == MATCH_MATCH)
! 1395: {
! 1396: if (md->end_offset_top > offset_top)
! 1397: offset_top = md->end_offset_top; /* Captures may have happened */
! 1398: condition = TRUE;
! 1399: ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
! 1400: while (*ecode == OP_ALT) ecode += GET(ecode, 1);
! 1401: }
! 1402:
! 1403: /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
! 1404: assertion; it is therefore treated as NOMATCH. */
! 1405:
! 1406: else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
! 1407: {
! 1408: RRETURN(rrc); /* Need braces because of following else */
! 1409: }
! 1410: else
! 1411: {
! 1412: condition = FALSE;
! 1413: ecode += codelink;
! 1414: }
! 1415: }
! 1416:
! 1417: /* We are now at the branch that is to be obeyed. As there is only one, can
! 1418: use tail recursion to avoid using another stack frame, except when there is
! 1419: unlimited repeat of a possibly empty group. In the latter case, a recursive
! 1420: call to match() is always required, unless the second alternative doesn't
! 1421: exist, in which case we can just plough on. Note that, for compatibility
! 1422: with Perl, the | in a conditional group is NOT treated as creating two
! 1423: alternatives. If a THEN is encountered in the branch, it propagates out to
! 1424: the enclosing alternative (unless nested in a deeper set of alternatives,
! 1425: of course). */
! 1426:
! 1427: if (condition || *ecode == OP_ALT)
! 1428: {
! 1429: if (op != OP_SCOND)
! 1430: {
! 1431: ecode += 1 + LINK_SIZE;
! 1432: goto TAIL_RECURSE;
! 1433: }
! 1434:
! 1435: md->match_function_type = MATCH_CBEGROUP;
! 1436: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
! 1437: RRETURN(rrc);
! 1438: }
! 1439:
! 1440: /* Condition false & no alternative; continue after the group. */
! 1441:
! 1442: else
! 1443: {
! 1444: ecode += 1 + LINK_SIZE;
! 1445: }
! 1446: break;
! 1447:
! 1448:
! 1449: /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
! 1450: to close any currently open capturing brackets. */
! 1451:
! 1452: case OP_CLOSE:
! 1453: number = GET2(ecode, 1);
! 1454: offset = number << 1;
! 1455:
! 1456: #ifdef PCRE_DEBUG
! 1457: printf("end bracket %d at *ACCEPT", number);
! 1458: printf("\n");
! 1459: #endif
! 1460:
! 1461: md->capture_last = number;
! 1462: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
! 1463: {
! 1464: md->offset_vector[offset] =
! 1465: md->offset_vector[md->offset_end - number];
! 1466: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
! 1467: if (offset_top <= offset) offset_top = offset + 2;
! 1468: }
! 1469: ecode += 3;
! 1470: break;
! 1471:
! 1472:
! 1473: /* End of the pattern, either real or forced. */
! 1474:
! 1475: case OP_END:
! 1476: case OP_ACCEPT:
! 1477: case OP_ASSERT_ACCEPT:
! 1478:
! 1479: /* If we have matched an empty string, fail if not in an assertion and not
! 1480: in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
! 1481: is set and we have matched at the start of the subject. In both cases,
! 1482: backtracking will then try other alternatives, if any. */
! 1483:
! 1484: if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
! 1485: md->recursive == NULL &&
! 1486: (md->notempty ||
! 1487: (md->notempty_atstart &&
! 1488: mstart == md->start_subject + md->start_offset)))
! 1489: RRETURN(MATCH_NOMATCH);
! 1490:
! 1491: /* Otherwise, we have a match. */
! 1492:
! 1493: md->end_match_ptr = eptr; /* Record where we ended */
! 1494: md->end_offset_top = offset_top; /* and how many extracts were taken */
! 1495: md->start_match_ptr = mstart; /* and the start (\K can modify) */
! 1496:
! 1497: /* For some reason, the macros don't work properly if an expression is
! 1498: given as the argument to RRETURN when the heap is in use. */
! 1499:
! 1500: rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
! 1501: RRETURN(rrc);
! 1502:
! 1503: /* Assertion brackets. Check the alternative branches in turn - the
! 1504: matching won't pass the KET for an assertion. If any one branch matches,
! 1505: the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
! 1506: start of each branch to move the current point backwards, so the code at
! 1507: this level is identical to the lookahead case. When the assertion is part
! 1508: of a condition, we want to return immediately afterwards. The caller of
! 1509: this incarnation of the match() function will have set MATCH_CONDASSERT in
! 1510: md->match_function type, and one of these opcodes will be the first opcode
! 1511: that is processed. We use a local variable that is preserved over calls to
! 1512: match() to remember this case. */
! 1513:
! 1514: case OP_ASSERT:
! 1515: case OP_ASSERTBACK:
! 1516: if (md->match_function_type == MATCH_CONDASSERT)
! 1517: {
! 1518: condassert = TRUE;
! 1519: md->match_function_type = 0;
! 1520: }
! 1521: else condassert = FALSE;
! 1522:
! 1523: do
! 1524: {
! 1525: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
! 1526: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
! 1527: {
! 1528: mstart = md->start_match_ptr; /* In case \K reset it */
! 1529: break;
! 1530: }
! 1531:
! 1532: /* PCRE does not allow THEN to escape beyond an assertion; it is treated
! 1533: as NOMATCH. */
! 1534:
! 1535: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
! 1536: ecode += GET(ecode, 1);
! 1537: }
! 1538: while (*ecode == OP_ALT);
! 1539:
! 1540: if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
! 1541:
! 1542: /* If checking an assertion for a condition, return MATCH_MATCH. */
! 1543:
! 1544: if (condassert) RRETURN(MATCH_MATCH);
! 1545:
! 1546: /* Continue from after the assertion, updating the offsets high water
! 1547: mark, since extracts may have been taken during the assertion. */
! 1548:
! 1549: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
! 1550: ecode += 1 + LINK_SIZE;
! 1551: offset_top = md->end_offset_top;
! 1552: continue;
! 1553:
! 1554: /* Negative assertion: all branches must fail to match. Encountering SKIP,
! 1555: PRUNE, or COMMIT means we must assume failure without checking subsequent
! 1556: branches. */
! 1557:
! 1558: case OP_ASSERT_NOT:
! 1559: case OP_ASSERTBACK_NOT:
! 1560: if (md->match_function_type == MATCH_CONDASSERT)
! 1561: {
! 1562: condassert = TRUE;
! 1563: md->match_function_type = 0;
! 1564: }
! 1565: else condassert = FALSE;
! 1566:
! 1567: do
! 1568: {
! 1569: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
! 1570: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
! 1571: if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
! 1572: {
! 1573: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
! 1574: break;
! 1575: }
! 1576:
! 1577: /* PCRE does not allow THEN to escape beyond an assertion; it is treated
! 1578: as NOMATCH. */
! 1579:
! 1580: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
! 1581: ecode += GET(ecode,1);
! 1582: }
! 1583: while (*ecode == OP_ALT);
! 1584:
! 1585: if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
! 1586:
! 1587: ecode += 1 + LINK_SIZE;
! 1588: continue;
! 1589:
! 1590: /* Move the subject pointer back. This occurs only at the start of
! 1591: each branch of a lookbehind assertion. If we are too close to the start to
! 1592: move back, this match function fails. When working with UTF-8 we move
! 1593: back a number of characters, not bytes. */
! 1594:
! 1595: case OP_REVERSE:
! 1596: #ifdef SUPPORT_UTF8
! 1597: if (utf8)
! 1598: {
! 1599: i = GET(ecode, 1);
! 1600: while (i-- > 0)
! 1601: {
! 1602: eptr--;
! 1603: if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
! 1604: BACKCHAR(eptr);
! 1605: }
! 1606: }
! 1607: else
! 1608: #endif
! 1609:
! 1610: /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
! 1611:
! 1612: {
! 1613: eptr -= GET(ecode, 1);
! 1614: if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
! 1615: }
! 1616:
! 1617: /* Save the earliest consulted character, then skip to next op code */
! 1618:
! 1619: if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
! 1620: ecode += 1 + LINK_SIZE;
! 1621: break;
! 1622:
! 1623: /* The callout item calls an external function, if one is provided, passing
! 1624: details of the match so far. This is mainly for debugging, though the
! 1625: function is able to force a failure. */
! 1626:
! 1627: case OP_CALLOUT:
! 1628: if (pcre_callout != NULL)
! 1629: {
! 1630: pcre_callout_block cb;
! 1631: cb.version = 2; /* Version 1 of the callout block */
! 1632: cb.callout_number = ecode[1];
! 1633: cb.offset_vector = md->offset_vector;
! 1634: cb.subject = (PCRE_SPTR)md->start_subject;
! 1635: cb.subject_length = (int)(md->end_subject - md->start_subject);
! 1636: cb.start_match = (int)(mstart - md->start_subject);
! 1637: cb.current_position = (int)(eptr - md->start_subject);
! 1638: cb.pattern_position = GET(ecode, 2);
! 1639: cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
! 1640: cb.capture_top = offset_top/2;
! 1641: cb.capture_last = md->capture_last;
! 1642: cb.callout_data = md->callout_data;
! 1643: cb.mark = md->nomatch_mark;
! 1644: if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
! 1645: if (rrc < 0) RRETURN(rrc);
! 1646: }
! 1647: ecode += 2 + 2*LINK_SIZE;
! 1648: break;
! 1649:
! 1650: /* Recursion either matches the current regex, or some subexpression. The
! 1651: offset data is the offset to the starting bracket from the start of the
! 1652: whole pattern. (This is so that it works from duplicated subpatterns.)
! 1653:
! 1654: The state of the capturing groups is preserved over recursion, and
! 1655: re-instated afterwards. We don't know how many are started and not yet
! 1656: finished (offset_top records the completed total) so we just have to save
! 1657: all the potential data. There may be up to 65535 such values, which is too
! 1658: large to put on the stack, but using malloc for small numbers seems
! 1659: expensive. As a compromise, the stack is used when there are no more than
! 1660: REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
! 1661:
! 1662: There are also other values that have to be saved. We use a chained
! 1663: sequence of blocks that actually live on the stack. Thanks to Robin Houston
! 1664: for the original version of this logic. It has, however, been hacked around
! 1665: a lot, so he is not to blame for the current way it works. */
! 1666:
! 1667: case OP_RECURSE:
! 1668: {
! 1669: recursion_info *ri;
! 1670: int recno;
! 1671:
! 1672: callpat = md->start_code + GET(ecode, 1);
! 1673: recno = (callpat == md->start_code)? 0 :
! 1674: GET2(callpat, 1 + LINK_SIZE);
! 1675:
! 1676: /* Check for repeating a recursion without advancing the subject pointer.
! 1677: This should catch convoluted mutual recursions. (Some simple cases are
! 1678: caught at compile time.) */
! 1679:
! 1680: for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
! 1681: if (recno == ri->group_num && eptr == ri->subject_position)
! 1682: RRETURN(PCRE_ERROR_RECURSELOOP);
! 1683:
! 1684: /* Add to "recursing stack" */
! 1685:
! 1686: new_recursive.group_num = recno;
! 1687: new_recursive.subject_position = eptr;
! 1688: new_recursive.prevrec = md->recursive;
! 1689: md->recursive = &new_recursive;
! 1690:
! 1691: /* Where to continue from afterwards */
! 1692:
! 1693: ecode += 1 + LINK_SIZE;
! 1694:
! 1695: /* Now save the offset data */
! 1696:
! 1697: new_recursive.saved_max = md->offset_end;
! 1698: if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
! 1699: new_recursive.offset_save = stacksave;
! 1700: else
! 1701: {
! 1702: new_recursive.offset_save =
! 1703: (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
! 1704: if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
! 1705: }
! 1706: memcpy(new_recursive.offset_save, md->offset_vector,
! 1707: new_recursive.saved_max * sizeof(int));
! 1708:
! 1709: /* OK, now we can do the recursion. After processing each alternative,
! 1710: restore the offset data. If there were nested recursions, md->recursive
! 1711: might be changed, so reset it before looping. */
! 1712:
! 1713: DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
! 1714: cbegroup = (*callpat >= OP_SBRA);
! 1715: do
! 1716: {
! 1717: if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
! 1718: RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
! 1719: md, eptrb, RM6);
! 1720: memcpy(md->offset_vector, new_recursive.offset_save,
! 1721: new_recursive.saved_max * sizeof(int));
! 1722: md->recursive = new_recursive.prevrec;
! 1723: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
! 1724: {
! 1725: DPRINTF(("Recursion matched\n"));
! 1726: if (new_recursive.offset_save != stacksave)
! 1727: (pcre_free)(new_recursive.offset_save);
! 1728:
! 1729: /* Set where we got to in the subject, and reset the start in case
! 1730: it was changed by \K. This *is* propagated back out of a recursion,
! 1731: for Perl compatibility. */
! 1732:
! 1733: eptr = md->end_match_ptr;
! 1734: mstart = md->start_match_ptr;
! 1735: goto RECURSION_MATCHED; /* Exit loop; end processing */
! 1736: }
! 1737:
! 1738: /* PCRE does not allow THEN to escape beyond a recursion; it is treated
! 1739: as NOMATCH. */
! 1740:
! 1741: else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
! 1742: {
! 1743: DPRINTF(("Recursion gave error %d\n", rrc));
! 1744: if (new_recursive.offset_save != stacksave)
! 1745: (pcre_free)(new_recursive.offset_save);
! 1746: RRETURN(rrc);
! 1747: }
! 1748:
! 1749: md->recursive = &new_recursive;
! 1750: callpat += GET(callpat, 1);
! 1751: }
! 1752: while (*callpat == OP_ALT);
! 1753:
! 1754: DPRINTF(("Recursion didn't match\n"));
! 1755: md->recursive = new_recursive.prevrec;
! 1756: if (new_recursive.offset_save != stacksave)
! 1757: (pcre_free)(new_recursive.offset_save);
! 1758: RRETURN(MATCH_NOMATCH);
! 1759: }
! 1760:
! 1761: RECURSION_MATCHED:
! 1762: break;
! 1763:
! 1764: /* An alternation is the end of a branch; scan along to find the end of the
! 1765: bracketed group and go to there. */
! 1766:
! 1767: case OP_ALT:
! 1768: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
! 1769: break;
! 1770:
! 1771: /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
! 1772: indicating that it may occur zero times. It may repeat infinitely, or not
! 1773: at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
! 1774: with fixed upper repeat limits are compiled as a number of copies, with the
! 1775: optional ones preceded by BRAZERO or BRAMINZERO. */
! 1776:
! 1777: case OP_BRAZERO:
! 1778: next = ecode + 1;
! 1779: RMATCH(eptr, next, offset_top, md, eptrb, RM10);
! 1780: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1781: do next += GET(next, 1); while (*next == OP_ALT);
! 1782: ecode = next + 1 + LINK_SIZE;
! 1783: break;
! 1784:
! 1785: case OP_BRAMINZERO:
! 1786: next = ecode + 1;
! 1787: do next += GET(next, 1); while (*next == OP_ALT);
! 1788: RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
! 1789: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1790: ecode++;
! 1791: break;
! 1792:
! 1793: case OP_SKIPZERO:
! 1794: next = ecode+1;
! 1795: do next += GET(next,1); while (*next == OP_ALT);
! 1796: ecode = next + 1 + LINK_SIZE;
! 1797: break;
! 1798:
! 1799: /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
! 1800: here; just jump to the group, with allow_zero set TRUE. */
! 1801:
! 1802: case OP_BRAPOSZERO:
! 1803: op = *(++ecode);
! 1804: allow_zero = TRUE;
! 1805: if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
! 1806: goto POSSESSIVE_NON_CAPTURE;
! 1807:
! 1808: /* End of a group, repeated or non-repeating. */
! 1809:
! 1810: case OP_KET:
! 1811: case OP_KETRMIN:
! 1812: case OP_KETRMAX:
! 1813: case OP_KETRPOS:
! 1814: prev = ecode - GET(ecode, 1);
! 1815:
! 1816: /* If this was a group that remembered the subject start, in order to break
! 1817: infinite repeats of empty string matches, retrieve the subject start from
! 1818: the chain. Otherwise, set it NULL. */
! 1819:
! 1820: if (*prev >= OP_SBRA || *prev == OP_ONCE)
! 1821: {
! 1822: saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
! 1823: eptrb = eptrb->epb_prev; /* Backup to previous group */
! 1824: }
! 1825: else saved_eptr = NULL;
! 1826:
! 1827: /* If we are at the end of an assertion group or a non-capturing atomic
! 1828: group, stop matching and return MATCH_MATCH, but record the current high
! 1829: water mark for use by positive assertions. We also need to record the match
! 1830: start in case it was changed by \K. */
! 1831:
! 1832: if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
! 1833: *prev == OP_ONCE_NC)
! 1834: {
! 1835: md->end_match_ptr = eptr; /* For ONCE_NC */
! 1836: md->end_offset_top = offset_top;
! 1837: md->start_match_ptr = mstart;
! 1838: RRETURN(MATCH_MATCH); /* Sets md->mark */
! 1839: }
! 1840:
! 1841: /* For capturing groups we have to check the group number back at the start
! 1842: and if necessary complete handling an extraction by setting the offsets and
! 1843: bumping the high water mark. Whole-pattern recursion is coded as a recurse
! 1844: into group 0, so it won't be picked up here. Instead, we catch it when the
! 1845: OP_END is reached. Other recursion is handled here. We just have to record
! 1846: the current subject position and start match pointer and give a MATCH
! 1847: return. */
! 1848:
! 1849: if (*prev == OP_CBRA || *prev == OP_SCBRA ||
! 1850: *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
! 1851: {
! 1852: number = GET2(prev, 1+LINK_SIZE);
! 1853: offset = number << 1;
! 1854:
! 1855: #ifdef PCRE_DEBUG
! 1856: printf("end bracket %d", number);
! 1857: printf("\n");
! 1858: #endif
! 1859:
! 1860: /* Handle a recursively called group. */
! 1861:
! 1862: if (md->recursive != NULL && md->recursive->group_num == number)
! 1863: {
! 1864: md->end_match_ptr = eptr;
! 1865: md->start_match_ptr = mstart;
! 1866: RRETURN(MATCH_MATCH);
! 1867: }
! 1868:
! 1869: /* Deal with capturing */
! 1870:
! 1871: md->capture_last = number;
! 1872: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
! 1873: {
! 1874: /* If offset is greater than offset_top, it means that we are
! 1875: "skipping" a capturing group, and that group's offsets must be marked
! 1876: unset. In earlier versions of PCRE, all the offsets were unset at the
! 1877: start of matching, but this doesn't work because atomic groups and
! 1878: assertions can cause a value to be set that should later be unset.
! 1879: Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
! 1880: part of the atomic group, but this is not on the final matching path,
! 1881: so must be unset when 2 is set. (If there is no group 2, there is no
! 1882: problem, because offset_top will then be 2, indicating no capture.) */
! 1883:
! 1884: if (offset > offset_top)
! 1885: {
! 1886: register int *iptr = md->offset_vector + offset_top;
! 1887: register int *iend = md->offset_vector + offset;
! 1888: while (iptr < iend) *iptr++ = -1;
! 1889: }
! 1890:
! 1891: /* Now make the extraction */
! 1892:
! 1893: md->offset_vector[offset] =
! 1894: md->offset_vector[md->offset_end - number];
! 1895: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
! 1896: if (offset_top <= offset) offset_top = offset + 2;
! 1897: }
! 1898: }
! 1899:
! 1900: /* For an ordinary non-repeating ket, just continue at this level. This
! 1901: also happens for a repeating ket if no characters were matched in the
! 1902: group. This is the forcible breaking of infinite loops as implemented in
! 1903: Perl 5.005. For a non-repeating atomic group that includes captures,
! 1904: establish a backup point by processing the rest of the pattern at a lower
! 1905: level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
! 1906: original OP_ONCE level, thereby bypassing intermediate backup points, but
! 1907: resetting any captures that happened along the way. */
! 1908:
! 1909: if (*ecode == OP_KET || eptr == saved_eptr)
! 1910: {
! 1911: if (*prev == OP_ONCE)
! 1912: {
! 1913: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
! 1914: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1915: md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
! 1916: RRETURN(MATCH_ONCE);
! 1917: }
! 1918: ecode += 1 + LINK_SIZE; /* Carry on at this level */
! 1919: break;
! 1920: }
! 1921:
! 1922: /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
! 1923: and return the MATCH_KETRPOS. This makes it possible to do the repeats one
! 1924: at a time from the outer level, thus saving stack. */
! 1925:
! 1926: if (*ecode == OP_KETRPOS)
! 1927: {
! 1928: md->end_match_ptr = eptr;
! 1929: md->end_offset_top = offset_top;
! 1930: RRETURN(MATCH_KETRPOS);
! 1931: }
! 1932:
! 1933: /* The normal repeating kets try the rest of the pattern or restart from
! 1934: the preceding bracket, in the appropriate order. In the second case, we can
! 1935: use tail recursion to avoid using another stack frame, unless we have an
! 1936: an atomic group or an unlimited repeat of a group that can match an empty
! 1937: string. */
! 1938:
! 1939: if (*ecode == OP_KETRMIN)
! 1940: {
! 1941: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
! 1942: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1943: if (*prev == OP_ONCE)
! 1944: {
! 1945: RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
! 1946: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1947: md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
! 1948: RRETURN(MATCH_ONCE);
! 1949: }
! 1950: if (*prev >= OP_SBRA) /* Could match an empty string */
! 1951: {
! 1952: md->match_function_type = MATCH_CBEGROUP;
! 1953: RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
! 1954: RRETURN(rrc);
! 1955: }
! 1956: ecode = prev;
! 1957: goto TAIL_RECURSE;
! 1958: }
! 1959: else /* OP_KETRMAX */
! 1960: {
! 1961: if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
! 1962: RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
! 1963: if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
! 1964: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1965: if (*prev == OP_ONCE)
! 1966: {
! 1967: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
! 1968: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1969: md->once_target = prev;
! 1970: RRETURN(MATCH_ONCE);
! 1971: }
! 1972: ecode += 1 + LINK_SIZE;
! 1973: goto TAIL_RECURSE;
! 1974: }
! 1975: /* Control never gets here */
! 1976:
! 1977: /* Not multiline mode: start of subject assertion, unless notbol. */
! 1978:
! 1979: case OP_CIRC:
! 1980: if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
! 1981:
! 1982: /* Start of subject assertion */
! 1983:
! 1984: case OP_SOD:
! 1985: if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
! 1986: ecode++;
! 1987: break;
! 1988:
! 1989: /* Multiline mode: start of subject unless notbol, or after any newline. */
! 1990:
! 1991: case OP_CIRCM:
! 1992: if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
! 1993: if (eptr != md->start_subject &&
! 1994: (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
! 1995: RRETURN(MATCH_NOMATCH);
! 1996: ecode++;
! 1997: break;
! 1998:
! 1999: /* Start of match assertion */
! 2000:
! 2001: case OP_SOM:
! 2002: if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
! 2003: ecode++;
! 2004: break;
! 2005:
! 2006: /* Reset the start of match point */
! 2007:
! 2008: case OP_SET_SOM:
! 2009: mstart = eptr;
! 2010: ecode++;
! 2011: break;
! 2012:
! 2013: /* Multiline mode: assert before any newline, or before end of subject
! 2014: unless noteol is set. */
! 2015:
! 2016: case OP_DOLLM:
! 2017: if (eptr < md->end_subject)
! 2018: { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
! 2019: else
! 2020: {
! 2021: if (md->noteol) RRETURN(MATCH_NOMATCH);
! 2022: SCHECK_PARTIAL();
! 2023: }
! 2024: ecode++;
! 2025: break;
! 2026:
! 2027: /* Not multiline mode: assert before a terminating newline or before end of
! 2028: subject unless noteol is set. */
! 2029:
! 2030: case OP_DOLL:
! 2031: if (md->noteol) RRETURN(MATCH_NOMATCH);
! 2032: if (!md->endonly) goto ASSERT_NL_OR_EOS;
! 2033:
! 2034: /* ... else fall through for endonly */
! 2035:
! 2036: /* End of subject assertion (\z) */
! 2037:
! 2038: case OP_EOD:
! 2039: if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
! 2040: SCHECK_PARTIAL();
! 2041: ecode++;
! 2042: break;
! 2043:
! 2044: /* End of subject or ending \n assertion (\Z) */
! 2045:
! 2046: case OP_EODN:
! 2047: ASSERT_NL_OR_EOS:
! 2048: if (eptr < md->end_subject &&
! 2049: (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
! 2050: RRETURN(MATCH_NOMATCH);
! 2051:
! 2052: /* Either at end of string or \n before end. */
! 2053:
! 2054: SCHECK_PARTIAL();
! 2055: ecode++;
! 2056: break;
! 2057:
! 2058: /* Word boundary assertions */
! 2059:
! 2060: case OP_NOT_WORD_BOUNDARY:
! 2061: case OP_WORD_BOUNDARY:
! 2062: {
! 2063:
! 2064: /* Find out if the previous and current characters are "word" characters.
! 2065: It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
! 2066: be "non-word" characters. Remember the earliest consulted character for
! 2067: partial matching. */
! 2068:
! 2069: #ifdef SUPPORT_UTF8
! 2070: if (utf8)
! 2071: {
! 2072: /* Get status of previous character */
! 2073:
! 2074: if (eptr == md->start_subject) prev_is_word = FALSE; else
! 2075: {
! 2076: USPTR lastptr = eptr - 1;
! 2077: while((*lastptr & 0xc0) == 0x80) lastptr--;
! 2078: if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
! 2079: GETCHAR(c, lastptr);
! 2080: #ifdef SUPPORT_UCP
! 2081: if (md->use_ucp)
! 2082: {
! 2083: if (c == '_') prev_is_word = TRUE; else
! 2084: {
! 2085: int cat = UCD_CATEGORY(c);
! 2086: prev_is_word = (cat == ucp_L || cat == ucp_N);
! 2087: }
! 2088: }
! 2089: else
! 2090: #endif
! 2091: prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
! 2092: }
! 2093:
! 2094: /* Get status of next character */
! 2095:
! 2096: if (eptr >= md->end_subject)
! 2097: {
! 2098: SCHECK_PARTIAL();
! 2099: cur_is_word = FALSE;
! 2100: }
! 2101: else
! 2102: {
! 2103: GETCHAR(c, eptr);
! 2104: #ifdef SUPPORT_UCP
! 2105: if (md->use_ucp)
! 2106: {
! 2107: if (c == '_') cur_is_word = TRUE; else
! 2108: {
! 2109: int cat = UCD_CATEGORY(c);
! 2110: cur_is_word = (cat == ucp_L || cat == ucp_N);
! 2111: }
! 2112: }
! 2113: else
! 2114: #endif
! 2115: cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
! 2116: }
! 2117: }
! 2118: else
! 2119: #endif
! 2120:
! 2121: /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
! 2122: consistency with the behaviour of \w we do use it in this case. */
! 2123:
! 2124: {
! 2125: /* Get status of previous character */
! 2126:
! 2127: if (eptr == md->start_subject) prev_is_word = FALSE; else
! 2128: {
! 2129: if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
! 2130: #ifdef SUPPORT_UCP
! 2131: if (md->use_ucp)
! 2132: {
! 2133: c = eptr[-1];
! 2134: if (c == '_') prev_is_word = TRUE; else
! 2135: {
! 2136: int cat = UCD_CATEGORY(c);
! 2137: prev_is_word = (cat == ucp_L || cat == ucp_N);
! 2138: }
! 2139: }
! 2140: else
! 2141: #endif
! 2142: prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
! 2143: }
! 2144:
! 2145: /* Get status of next character */
! 2146:
! 2147: if (eptr >= md->end_subject)
! 2148: {
! 2149: SCHECK_PARTIAL();
! 2150: cur_is_word = FALSE;
! 2151: }
! 2152: else
! 2153: #ifdef SUPPORT_UCP
! 2154: if (md->use_ucp)
! 2155: {
! 2156: c = *eptr;
! 2157: if (c == '_') cur_is_word = TRUE; else
! 2158: {
! 2159: int cat = UCD_CATEGORY(c);
! 2160: cur_is_word = (cat == ucp_L || cat == ucp_N);
! 2161: }
! 2162: }
! 2163: else
! 2164: #endif
! 2165: cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
! 2166: }
! 2167:
! 2168: /* Now see if the situation is what we want */
! 2169:
! 2170: if ((*ecode++ == OP_WORD_BOUNDARY)?
! 2171: cur_is_word == prev_is_word : cur_is_word != prev_is_word)
! 2172: RRETURN(MATCH_NOMATCH);
! 2173: }
! 2174: break;
! 2175:
! 2176: /* Match a single character type; inline for speed */
! 2177:
! 2178: case OP_ANY:
! 2179: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
! 2180: /* Fall through */
! 2181:
! 2182: case OP_ALLANY:
! 2183: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
! 2184: { /* not be updated before SCHECK_PARTIAL. */
! 2185: SCHECK_PARTIAL();
! 2186: RRETURN(MATCH_NOMATCH);
! 2187: }
! 2188: eptr++;
! 2189: if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 2190: ecode++;
! 2191: break;
! 2192:
! 2193: /* Match a single byte, even in UTF-8 mode. This opcode really does match
! 2194: any byte, even newline, independent of the setting of PCRE_DOTALL. */
! 2195:
! 2196: case OP_ANYBYTE:
! 2197: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
! 2198: { /* not be updated before SCHECK_PARTIAL. */
! 2199: SCHECK_PARTIAL();
! 2200: RRETURN(MATCH_NOMATCH);
! 2201: }
! 2202: eptr++;
! 2203: ecode++;
! 2204: break;
! 2205:
! 2206: case OP_NOT_DIGIT:
! 2207: if (eptr >= md->end_subject)
! 2208: {
! 2209: SCHECK_PARTIAL();
! 2210: RRETURN(MATCH_NOMATCH);
! 2211: }
! 2212: GETCHARINCTEST(c, eptr);
! 2213: if (
! 2214: #ifdef SUPPORT_UTF8
! 2215: c < 256 &&
! 2216: #endif
! 2217: (md->ctypes[c] & ctype_digit) != 0
! 2218: )
! 2219: RRETURN(MATCH_NOMATCH);
! 2220: ecode++;
! 2221: break;
! 2222:
! 2223: case OP_DIGIT:
! 2224: if (eptr >= md->end_subject)
! 2225: {
! 2226: SCHECK_PARTIAL();
! 2227: RRETURN(MATCH_NOMATCH);
! 2228: }
! 2229: GETCHARINCTEST(c, eptr);
! 2230: if (
! 2231: #ifdef SUPPORT_UTF8
! 2232: c >= 256 ||
! 2233: #endif
! 2234: (md->ctypes[c] & ctype_digit) == 0
! 2235: )
! 2236: RRETURN(MATCH_NOMATCH);
! 2237: ecode++;
! 2238: break;
! 2239:
! 2240: case OP_NOT_WHITESPACE:
! 2241: if (eptr >= md->end_subject)
! 2242: {
! 2243: SCHECK_PARTIAL();
! 2244: RRETURN(MATCH_NOMATCH);
! 2245: }
! 2246: GETCHARINCTEST(c, eptr);
! 2247: if (
! 2248: #ifdef SUPPORT_UTF8
! 2249: c < 256 &&
! 2250: #endif
! 2251: (md->ctypes[c] & ctype_space) != 0
! 2252: )
! 2253: RRETURN(MATCH_NOMATCH);
! 2254: ecode++;
! 2255: break;
! 2256:
! 2257: case OP_WHITESPACE:
! 2258: if (eptr >= md->end_subject)
! 2259: {
! 2260: SCHECK_PARTIAL();
! 2261: RRETURN(MATCH_NOMATCH);
! 2262: }
! 2263: GETCHARINCTEST(c, eptr);
! 2264: if (
! 2265: #ifdef SUPPORT_UTF8
! 2266: c >= 256 ||
! 2267: #endif
! 2268: (md->ctypes[c] & ctype_space) == 0
! 2269: )
! 2270: RRETURN(MATCH_NOMATCH);
! 2271: ecode++;
! 2272: break;
! 2273:
! 2274: case OP_NOT_WORDCHAR:
! 2275: if (eptr >= md->end_subject)
! 2276: {
! 2277: SCHECK_PARTIAL();
! 2278: RRETURN(MATCH_NOMATCH);
! 2279: }
! 2280: GETCHARINCTEST(c, eptr);
! 2281: if (
! 2282: #ifdef SUPPORT_UTF8
! 2283: c < 256 &&
! 2284: #endif
! 2285: (md->ctypes[c] & ctype_word) != 0
! 2286: )
! 2287: RRETURN(MATCH_NOMATCH);
! 2288: ecode++;
! 2289: break;
! 2290:
! 2291: case OP_WORDCHAR:
! 2292: if (eptr >= md->end_subject)
! 2293: {
! 2294: SCHECK_PARTIAL();
! 2295: RRETURN(MATCH_NOMATCH);
! 2296: }
! 2297: GETCHARINCTEST(c, eptr);
! 2298: if (
! 2299: #ifdef SUPPORT_UTF8
! 2300: c >= 256 ||
! 2301: #endif
! 2302: (md->ctypes[c] & ctype_word) == 0
! 2303: )
! 2304: RRETURN(MATCH_NOMATCH);
! 2305: ecode++;
! 2306: break;
! 2307:
! 2308: case OP_ANYNL:
! 2309: if (eptr >= md->end_subject)
! 2310: {
! 2311: SCHECK_PARTIAL();
! 2312: RRETURN(MATCH_NOMATCH);
! 2313: }
! 2314: GETCHARINCTEST(c, eptr);
! 2315: switch(c)
! 2316: {
! 2317: default: RRETURN(MATCH_NOMATCH);
! 2318:
! 2319: case 0x000d:
! 2320: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 2321: break;
! 2322:
! 2323: case 0x000a:
! 2324: break;
! 2325:
! 2326: case 0x000b:
! 2327: case 0x000c:
! 2328: case 0x0085:
! 2329: case 0x2028:
! 2330: case 0x2029:
! 2331: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
! 2332: break;
! 2333: }
! 2334: ecode++;
! 2335: break;
! 2336:
! 2337: case OP_NOT_HSPACE:
! 2338: if (eptr >= md->end_subject)
! 2339: {
! 2340: SCHECK_PARTIAL();
! 2341: RRETURN(MATCH_NOMATCH);
! 2342: }
! 2343: GETCHARINCTEST(c, eptr);
! 2344: switch(c)
! 2345: {
! 2346: default: break;
! 2347: case 0x09: /* HT */
! 2348: case 0x20: /* SPACE */
! 2349: case 0xa0: /* NBSP */
! 2350: case 0x1680: /* OGHAM SPACE MARK */
! 2351: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 2352: case 0x2000: /* EN QUAD */
! 2353: case 0x2001: /* EM QUAD */
! 2354: case 0x2002: /* EN SPACE */
! 2355: case 0x2003: /* EM SPACE */
! 2356: case 0x2004: /* THREE-PER-EM SPACE */
! 2357: case 0x2005: /* FOUR-PER-EM SPACE */
! 2358: case 0x2006: /* SIX-PER-EM SPACE */
! 2359: case 0x2007: /* FIGURE SPACE */
! 2360: case 0x2008: /* PUNCTUATION SPACE */
! 2361: case 0x2009: /* THIN SPACE */
! 2362: case 0x200A: /* HAIR SPACE */
! 2363: case 0x202f: /* NARROW NO-BREAK SPACE */
! 2364: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 2365: case 0x3000: /* IDEOGRAPHIC SPACE */
! 2366: RRETURN(MATCH_NOMATCH);
! 2367: }
! 2368: ecode++;
! 2369: break;
! 2370:
! 2371: case OP_HSPACE:
! 2372: if (eptr >= md->end_subject)
! 2373: {
! 2374: SCHECK_PARTIAL();
! 2375: RRETURN(MATCH_NOMATCH);
! 2376: }
! 2377: GETCHARINCTEST(c, eptr);
! 2378: switch(c)
! 2379: {
! 2380: default: RRETURN(MATCH_NOMATCH);
! 2381: case 0x09: /* HT */
! 2382: case 0x20: /* SPACE */
! 2383: case 0xa0: /* NBSP */
! 2384: case 0x1680: /* OGHAM SPACE MARK */
! 2385: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 2386: case 0x2000: /* EN QUAD */
! 2387: case 0x2001: /* EM QUAD */
! 2388: case 0x2002: /* EN SPACE */
! 2389: case 0x2003: /* EM SPACE */
! 2390: case 0x2004: /* THREE-PER-EM SPACE */
! 2391: case 0x2005: /* FOUR-PER-EM SPACE */
! 2392: case 0x2006: /* SIX-PER-EM SPACE */
! 2393: case 0x2007: /* FIGURE SPACE */
! 2394: case 0x2008: /* PUNCTUATION SPACE */
! 2395: case 0x2009: /* THIN SPACE */
! 2396: case 0x200A: /* HAIR SPACE */
! 2397: case 0x202f: /* NARROW NO-BREAK SPACE */
! 2398: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 2399: case 0x3000: /* IDEOGRAPHIC SPACE */
! 2400: break;
! 2401: }
! 2402: ecode++;
! 2403: break;
! 2404:
! 2405: case OP_NOT_VSPACE:
! 2406: if (eptr >= md->end_subject)
! 2407: {
! 2408: SCHECK_PARTIAL();
! 2409: RRETURN(MATCH_NOMATCH);
! 2410: }
! 2411: GETCHARINCTEST(c, eptr);
! 2412: switch(c)
! 2413: {
! 2414: default: break;
! 2415: case 0x0a: /* LF */
! 2416: case 0x0b: /* VT */
! 2417: case 0x0c: /* FF */
! 2418: case 0x0d: /* CR */
! 2419: case 0x85: /* NEL */
! 2420: case 0x2028: /* LINE SEPARATOR */
! 2421: case 0x2029: /* PARAGRAPH SEPARATOR */
! 2422: RRETURN(MATCH_NOMATCH);
! 2423: }
! 2424: ecode++;
! 2425: break;
! 2426:
! 2427: case OP_VSPACE:
! 2428: if (eptr >= md->end_subject)
! 2429: {
! 2430: SCHECK_PARTIAL();
! 2431: RRETURN(MATCH_NOMATCH);
! 2432: }
! 2433: GETCHARINCTEST(c, eptr);
! 2434: switch(c)
! 2435: {
! 2436: default: RRETURN(MATCH_NOMATCH);
! 2437: case 0x0a: /* LF */
! 2438: case 0x0b: /* VT */
! 2439: case 0x0c: /* FF */
! 2440: case 0x0d: /* CR */
! 2441: case 0x85: /* NEL */
! 2442: case 0x2028: /* LINE SEPARATOR */
! 2443: case 0x2029: /* PARAGRAPH SEPARATOR */
! 2444: break;
! 2445: }
! 2446: ecode++;
! 2447: break;
! 2448:
! 2449: #ifdef SUPPORT_UCP
! 2450: /* Check the next character by Unicode property. We will get here only
! 2451: if the support is in the binary; otherwise a compile-time error occurs. */
! 2452:
! 2453: case OP_PROP:
! 2454: case OP_NOTPROP:
! 2455: if (eptr >= md->end_subject)
! 2456: {
! 2457: SCHECK_PARTIAL();
! 2458: RRETURN(MATCH_NOMATCH);
! 2459: }
! 2460: GETCHARINCTEST(c, eptr);
! 2461: {
! 2462: const ucd_record *prop = GET_UCD(c);
! 2463:
! 2464: switch(ecode[1])
! 2465: {
! 2466: case PT_ANY:
! 2467: if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
! 2468: break;
! 2469:
! 2470: case PT_LAMP:
! 2471: if ((prop->chartype == ucp_Lu ||
! 2472: prop->chartype == ucp_Ll ||
! 2473: prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
! 2474: RRETURN(MATCH_NOMATCH);
! 2475: break;
! 2476:
! 2477: case PT_GC:
! 2478: if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
! 2479: RRETURN(MATCH_NOMATCH);
! 2480: break;
! 2481:
! 2482: case PT_PC:
! 2483: if ((ecode[2] != prop->chartype) == (op == OP_PROP))
! 2484: RRETURN(MATCH_NOMATCH);
! 2485: break;
! 2486:
! 2487: case PT_SC:
! 2488: if ((ecode[2] != prop->script) == (op == OP_PROP))
! 2489: RRETURN(MATCH_NOMATCH);
! 2490: break;
! 2491:
! 2492: /* These are specials */
! 2493:
! 2494: case PT_ALNUM:
! 2495: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
! 2496: _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
! 2497: RRETURN(MATCH_NOMATCH);
! 2498: break;
! 2499:
! 2500: case PT_SPACE: /* Perl space */
! 2501: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
! 2502: c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
! 2503: == (op == OP_NOTPROP))
! 2504: RRETURN(MATCH_NOMATCH);
! 2505: break;
! 2506:
! 2507: case PT_PXSPACE: /* POSIX space */
! 2508: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
! 2509: c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
! 2510: c == CHAR_FF || c == CHAR_CR)
! 2511: == (op == OP_NOTPROP))
! 2512: RRETURN(MATCH_NOMATCH);
! 2513: break;
! 2514:
! 2515: case PT_WORD:
! 2516: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
! 2517: _pcre_ucp_gentype[prop->chartype] == ucp_N ||
! 2518: c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
! 2519: RRETURN(MATCH_NOMATCH);
! 2520: break;
! 2521:
! 2522: /* This should never occur */
! 2523:
! 2524: default:
! 2525: RRETURN(PCRE_ERROR_INTERNAL);
! 2526: }
! 2527:
! 2528: ecode += 3;
! 2529: }
! 2530: break;
! 2531:
! 2532: /* Match an extended Unicode sequence. We will get here only if the support
! 2533: is in the binary; otherwise a compile-time error occurs. */
! 2534:
! 2535: case OP_EXTUNI:
! 2536: if (eptr >= md->end_subject)
! 2537: {
! 2538: SCHECK_PARTIAL();
! 2539: RRETURN(MATCH_NOMATCH);
! 2540: }
! 2541: GETCHARINCTEST(c, eptr);
! 2542: if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
! 2543: while (eptr < md->end_subject)
! 2544: {
! 2545: int len = 1;
! 2546: if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 2547: if (UCD_CATEGORY(c) != ucp_M) break;
! 2548: eptr += len;
! 2549: }
! 2550: ecode++;
! 2551: break;
! 2552: #endif
! 2553:
! 2554:
! 2555: /* Match a back reference, possibly repeatedly. Look past the end of the
! 2556: item to see if there is repeat information following. The code is similar
! 2557: to that for character classes, but repeated for efficiency. Then obey
! 2558: similar code to character type repeats - written out again for speed.
! 2559: However, if the referenced string is the empty string, always treat
! 2560: it as matched, any number of times (otherwise there could be infinite
! 2561: loops). */
! 2562:
! 2563: case OP_REF:
! 2564: case OP_REFI:
! 2565: caseless = op == OP_REFI;
! 2566: offset = GET2(ecode, 1) << 1; /* Doubled ref number */
! 2567: ecode += 3;
! 2568:
! 2569: /* If the reference is unset, there are two possibilities:
! 2570:
! 2571: (a) In the default, Perl-compatible state, set the length negative;
! 2572: this ensures that every attempt at a match fails. We can't just fail
! 2573: here, because of the possibility of quantifiers with zero minima.
! 2574:
! 2575: (b) If the JavaScript compatibility flag is set, set the length to zero
! 2576: so that the back reference matches an empty string.
! 2577:
! 2578: Otherwise, set the length to the length of what was matched by the
! 2579: referenced subpattern. */
! 2580:
! 2581: if (offset >= offset_top || md->offset_vector[offset] < 0)
! 2582: length = (md->jscript_compat)? 0 : -1;
! 2583: else
! 2584: length = md->offset_vector[offset+1] - md->offset_vector[offset];
! 2585:
! 2586: /* Set up for repetition, or handle the non-repeated case */
! 2587:
! 2588: switch (*ecode)
! 2589: {
! 2590: case OP_CRSTAR:
! 2591: case OP_CRMINSTAR:
! 2592: case OP_CRPLUS:
! 2593: case OP_CRMINPLUS:
! 2594: case OP_CRQUERY:
! 2595: case OP_CRMINQUERY:
! 2596: c = *ecode++ - OP_CRSTAR;
! 2597: minimize = (c & 1) != 0;
! 2598: min = rep_min[c]; /* Pick up values from tables; */
! 2599: max = rep_max[c]; /* zero for max => infinity */
! 2600: if (max == 0) max = INT_MAX;
! 2601: break;
! 2602:
! 2603: case OP_CRRANGE:
! 2604: case OP_CRMINRANGE:
! 2605: minimize = (*ecode == OP_CRMINRANGE);
! 2606: min = GET2(ecode, 1);
! 2607: max = GET2(ecode, 3);
! 2608: if (max == 0) max = INT_MAX;
! 2609: ecode += 5;
! 2610: break;
! 2611:
! 2612: default: /* No repeat follows */
! 2613: if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
! 2614: {
! 2615: CHECK_PARTIAL();
! 2616: RRETURN(MATCH_NOMATCH);
! 2617: }
! 2618: eptr += length;
! 2619: continue; /* With the main loop */
! 2620: }
! 2621:
! 2622: /* Handle repeated back references. If the length of the reference is
! 2623: zero, just continue with the main loop. */
! 2624:
! 2625: if (length == 0) continue;
! 2626:
! 2627: /* First, ensure the minimum number of matches are present. We get back
! 2628: the length of the reference string explicitly rather than passing the
! 2629: address of eptr, so that eptr can be a register variable. */
! 2630:
! 2631: for (i = 1; i <= min; i++)
! 2632: {
! 2633: int slength;
! 2634: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
! 2635: {
! 2636: CHECK_PARTIAL();
! 2637: RRETURN(MATCH_NOMATCH);
! 2638: }
! 2639: eptr += slength;
! 2640: }
! 2641:
! 2642: /* If min = max, continue at the same level without recursion.
! 2643: They are not both allowed to be zero. */
! 2644:
! 2645: if (min == max) continue;
! 2646:
! 2647: /* If minimizing, keep trying and advancing the pointer */
! 2648:
! 2649: if (minimize)
! 2650: {
! 2651: for (fi = min;; fi++)
! 2652: {
! 2653: int slength;
! 2654: RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
! 2655: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2656: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 2657: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
! 2658: {
! 2659: CHECK_PARTIAL();
! 2660: RRETURN(MATCH_NOMATCH);
! 2661: }
! 2662: eptr += slength;
! 2663: }
! 2664: /* Control never gets here */
! 2665: }
! 2666:
! 2667: /* If maximizing, find the longest string and work backwards */
! 2668:
! 2669: else
! 2670: {
! 2671: pp = eptr;
! 2672: for (i = min; i < max; i++)
! 2673: {
! 2674: int slength;
! 2675: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
! 2676: {
! 2677: CHECK_PARTIAL();
! 2678: break;
! 2679: }
! 2680: eptr += slength;
! 2681: }
! 2682: while (eptr >= pp)
! 2683: {
! 2684: RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
! 2685: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2686: eptr -= length;
! 2687: }
! 2688: RRETURN(MATCH_NOMATCH);
! 2689: }
! 2690: /* Control never gets here */
! 2691:
! 2692: /* Match a bit-mapped character class, possibly repeatedly. This op code is
! 2693: used when all the characters in the class have values in the range 0-255,
! 2694: and either the matching is caseful, or the characters are in the range
! 2695: 0-127 when UTF-8 processing is enabled. The only difference between
! 2696: OP_CLASS and OP_NCLASS occurs when a data character outside the range is
! 2697: encountered.
! 2698:
! 2699: First, look past the end of the item to see if there is repeat information
! 2700: following. Then obey similar code to character type repeats - written out
! 2701: again for speed. */
! 2702:
! 2703: case OP_NCLASS:
! 2704: case OP_CLASS:
! 2705: {
! 2706: data = ecode + 1; /* Save for matching */
! 2707: ecode += 33; /* Advance past the item */
! 2708:
! 2709: switch (*ecode)
! 2710: {
! 2711: case OP_CRSTAR:
! 2712: case OP_CRMINSTAR:
! 2713: case OP_CRPLUS:
! 2714: case OP_CRMINPLUS:
! 2715: case OP_CRQUERY:
! 2716: case OP_CRMINQUERY:
! 2717: c = *ecode++ - OP_CRSTAR;
! 2718: minimize = (c & 1) != 0;
! 2719: min = rep_min[c]; /* Pick up values from tables; */
! 2720: max = rep_max[c]; /* zero for max => infinity */
! 2721: if (max == 0) max = INT_MAX;
! 2722: break;
! 2723:
! 2724: case OP_CRRANGE:
! 2725: case OP_CRMINRANGE:
! 2726: minimize = (*ecode == OP_CRMINRANGE);
! 2727: min = GET2(ecode, 1);
! 2728: max = GET2(ecode, 3);
! 2729: if (max == 0) max = INT_MAX;
! 2730: ecode += 5;
! 2731: break;
! 2732:
! 2733: default: /* No repeat follows */
! 2734: min = max = 1;
! 2735: break;
! 2736: }
! 2737:
! 2738: /* First, ensure the minimum number of matches are present. */
! 2739:
! 2740: #ifdef SUPPORT_UTF8
! 2741: /* UTF-8 mode */
! 2742: if (utf8)
! 2743: {
! 2744: for (i = 1; i <= min; i++)
! 2745: {
! 2746: if (eptr >= md->end_subject)
! 2747: {
! 2748: SCHECK_PARTIAL();
! 2749: RRETURN(MATCH_NOMATCH);
! 2750: }
! 2751: GETCHARINC(c, eptr);
! 2752: if (c > 255)
! 2753: {
! 2754: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
! 2755: }
! 2756: else
! 2757: {
! 2758: if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
! 2759: }
! 2760: }
! 2761: }
! 2762: else
! 2763: #endif
! 2764: /* Not UTF-8 mode */
! 2765: {
! 2766: for (i = 1; i <= min; i++)
! 2767: {
! 2768: if (eptr >= md->end_subject)
! 2769: {
! 2770: SCHECK_PARTIAL();
! 2771: RRETURN(MATCH_NOMATCH);
! 2772: }
! 2773: c = *eptr++;
! 2774: if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
! 2775: }
! 2776: }
! 2777:
! 2778: /* If max == min we can continue with the main loop without the
! 2779: need to recurse. */
! 2780:
! 2781: if (min == max) continue;
! 2782:
! 2783: /* If minimizing, keep testing the rest of the expression and advancing
! 2784: the pointer while it matches the class. */
! 2785:
! 2786: if (minimize)
! 2787: {
! 2788: #ifdef SUPPORT_UTF8
! 2789: /* UTF-8 mode */
! 2790: if (utf8)
! 2791: {
! 2792: for (fi = min;; fi++)
! 2793: {
! 2794: RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
! 2795: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2796: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 2797: if (eptr >= md->end_subject)
! 2798: {
! 2799: SCHECK_PARTIAL();
! 2800: RRETURN(MATCH_NOMATCH);
! 2801: }
! 2802: GETCHARINC(c, eptr);
! 2803: if (c > 255)
! 2804: {
! 2805: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
! 2806: }
! 2807: else
! 2808: {
! 2809: if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
! 2810: }
! 2811: }
! 2812: }
! 2813: else
! 2814: #endif
! 2815: /* Not UTF-8 mode */
! 2816: {
! 2817: for (fi = min;; fi++)
! 2818: {
! 2819: RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
! 2820: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2821: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 2822: if (eptr >= md->end_subject)
! 2823: {
! 2824: SCHECK_PARTIAL();
! 2825: RRETURN(MATCH_NOMATCH);
! 2826: }
! 2827: c = *eptr++;
! 2828: if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
! 2829: }
! 2830: }
! 2831: /* Control never gets here */
! 2832: }
! 2833:
! 2834: /* If maximizing, find the longest possible run, then work backwards. */
! 2835:
! 2836: else
! 2837: {
! 2838: pp = eptr;
! 2839:
! 2840: #ifdef SUPPORT_UTF8
! 2841: /* UTF-8 mode */
! 2842: if (utf8)
! 2843: {
! 2844: for (i = min; i < max; i++)
! 2845: {
! 2846: int len = 1;
! 2847: if (eptr >= md->end_subject)
! 2848: {
! 2849: SCHECK_PARTIAL();
! 2850: break;
! 2851: }
! 2852: GETCHARLEN(c, eptr, len);
! 2853: if (c > 255)
! 2854: {
! 2855: if (op == OP_CLASS) break;
! 2856: }
! 2857: else
! 2858: {
! 2859: if ((data[c/8] & (1 << (c&7))) == 0) break;
! 2860: }
! 2861: eptr += len;
! 2862: }
! 2863: for (;;)
! 2864: {
! 2865: RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
! 2866: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2867: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 2868: BACKCHAR(eptr);
! 2869: }
! 2870: }
! 2871: else
! 2872: #endif
! 2873: /* Not UTF-8 mode */
! 2874: {
! 2875: for (i = min; i < max; i++)
! 2876: {
! 2877: if (eptr >= md->end_subject)
! 2878: {
! 2879: SCHECK_PARTIAL();
! 2880: break;
! 2881: }
! 2882: c = *eptr;
! 2883: if ((data[c/8] & (1 << (c&7))) == 0) break;
! 2884: eptr++;
! 2885: }
! 2886: while (eptr >= pp)
! 2887: {
! 2888: RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
! 2889: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2890: eptr--;
! 2891: }
! 2892: }
! 2893:
! 2894: RRETURN(MATCH_NOMATCH);
! 2895: }
! 2896: }
! 2897: /* Control never gets here */
! 2898:
! 2899:
! 2900: /* Match an extended character class. This opcode is encountered only
! 2901: when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
! 2902: mode, because Unicode properties are supported in non-UTF-8 mode. */
! 2903:
! 2904: #ifdef SUPPORT_UTF8
! 2905: case OP_XCLASS:
! 2906: {
! 2907: data = ecode + 1 + LINK_SIZE; /* Save for matching */
! 2908: ecode += GET(ecode, 1); /* Advance past the item */
! 2909:
! 2910: switch (*ecode)
! 2911: {
! 2912: case OP_CRSTAR:
! 2913: case OP_CRMINSTAR:
! 2914: case OP_CRPLUS:
! 2915: case OP_CRMINPLUS:
! 2916: case OP_CRQUERY:
! 2917: case OP_CRMINQUERY:
! 2918: c = *ecode++ - OP_CRSTAR;
! 2919: minimize = (c & 1) != 0;
! 2920: min = rep_min[c]; /* Pick up values from tables; */
! 2921: max = rep_max[c]; /* zero for max => infinity */
! 2922: if (max == 0) max = INT_MAX;
! 2923: break;
! 2924:
! 2925: case OP_CRRANGE:
! 2926: case OP_CRMINRANGE:
! 2927: minimize = (*ecode == OP_CRMINRANGE);
! 2928: min = GET2(ecode, 1);
! 2929: max = GET2(ecode, 3);
! 2930: if (max == 0) max = INT_MAX;
! 2931: ecode += 5;
! 2932: break;
! 2933:
! 2934: default: /* No repeat follows */
! 2935: min = max = 1;
! 2936: break;
! 2937: }
! 2938:
! 2939: /* First, ensure the minimum number of matches are present. */
! 2940:
! 2941: for (i = 1; i <= min; i++)
! 2942: {
! 2943: if (eptr >= md->end_subject)
! 2944: {
! 2945: SCHECK_PARTIAL();
! 2946: RRETURN(MATCH_NOMATCH);
! 2947: }
! 2948: GETCHARINCTEST(c, eptr);
! 2949: if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
! 2950: }
! 2951:
! 2952: /* If max == min we can continue with the main loop without the
! 2953: need to recurse. */
! 2954:
! 2955: if (min == max) continue;
! 2956:
! 2957: /* If minimizing, keep testing the rest of the expression and advancing
! 2958: the pointer while it matches the class. */
! 2959:
! 2960: if (minimize)
! 2961: {
! 2962: for (fi = min;; fi++)
! 2963: {
! 2964: RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
! 2965: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2966: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 2967: if (eptr >= md->end_subject)
! 2968: {
! 2969: SCHECK_PARTIAL();
! 2970: RRETURN(MATCH_NOMATCH);
! 2971: }
! 2972: GETCHARINCTEST(c, eptr);
! 2973: if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
! 2974: }
! 2975: /* Control never gets here */
! 2976: }
! 2977:
! 2978: /* If maximizing, find the longest possible run, then work backwards. */
! 2979:
! 2980: else
! 2981: {
! 2982: pp = eptr;
! 2983: for (i = min; i < max; i++)
! 2984: {
! 2985: int len = 1;
! 2986: if (eptr >= md->end_subject)
! 2987: {
! 2988: SCHECK_PARTIAL();
! 2989: break;
! 2990: }
! 2991: GETCHARLENTEST(c, eptr, len);
! 2992: if (!_pcre_xclass(c, data)) break;
! 2993: eptr += len;
! 2994: }
! 2995: for(;;)
! 2996: {
! 2997: RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
! 2998: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2999: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 3000: if (utf8) BACKCHAR(eptr);
! 3001: }
! 3002: RRETURN(MATCH_NOMATCH);
! 3003: }
! 3004:
! 3005: /* Control never gets here */
! 3006: }
! 3007: #endif /* End of XCLASS */
! 3008:
! 3009: /* Match a single character, casefully */
! 3010:
! 3011: case OP_CHAR:
! 3012: #ifdef SUPPORT_UTF8
! 3013: if (utf8)
! 3014: {
! 3015: length = 1;
! 3016: ecode++;
! 3017: GETCHARLEN(fc, ecode, length);
! 3018: if (length > md->end_subject - eptr)
! 3019: {
! 3020: CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
! 3021: RRETURN(MATCH_NOMATCH);
! 3022: }
! 3023: while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
! 3024: }
! 3025: else
! 3026: #endif
! 3027:
! 3028: /* Non-UTF-8 mode */
! 3029: {
! 3030: if (md->end_subject - eptr < 1)
! 3031: {
! 3032: SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
! 3033: RRETURN(MATCH_NOMATCH);
! 3034: }
! 3035: if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
! 3036: ecode += 2;
! 3037: }
! 3038: break;
! 3039:
! 3040: /* Match a single character, caselessly. If we are at the end of the
! 3041: subject, give up immediately. */
! 3042:
! 3043: case OP_CHARI:
! 3044: if (eptr >= md->end_subject)
! 3045: {
! 3046: SCHECK_PARTIAL();
! 3047: RRETURN(MATCH_NOMATCH);
! 3048: }
! 3049:
! 3050: #ifdef SUPPORT_UTF8
! 3051: if (utf8)
! 3052: {
! 3053: length = 1;
! 3054: ecode++;
! 3055: GETCHARLEN(fc, ecode, length);
! 3056:
! 3057: /* If the pattern character's value is < 128, we have only one byte, and
! 3058: we know that its other case must also be one byte long, so we can use the
! 3059: fast lookup table. We know that there is at least one byte left in the
! 3060: subject. */
! 3061:
! 3062: if (fc < 128)
! 3063: {
! 3064: if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
! 3065: }
! 3066:
! 3067: /* Otherwise we must pick up the subject character. Note that we cannot
! 3068: use the value of "length" to check for sufficient bytes left, because the
! 3069: other case of the character may have more or fewer bytes. */
! 3070:
! 3071: else
! 3072: {
! 3073: unsigned int dc;
! 3074: GETCHARINC(dc, eptr);
! 3075: ecode += length;
! 3076:
! 3077: /* If we have Unicode property support, we can use it to test the other
! 3078: case of the character, if there is one. */
! 3079:
! 3080: if (fc != dc)
! 3081: {
! 3082: #ifdef SUPPORT_UCP
! 3083: if (dc != UCD_OTHERCASE(fc))
! 3084: #endif
! 3085: RRETURN(MATCH_NOMATCH);
! 3086: }
! 3087: }
! 3088: }
! 3089: else
! 3090: #endif /* SUPPORT_UTF8 */
! 3091:
! 3092: /* Non-UTF-8 mode */
! 3093: {
! 3094: if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
! 3095: ecode += 2;
! 3096: }
! 3097: break;
! 3098:
! 3099: /* Match a single character repeatedly. */
! 3100:
! 3101: case OP_EXACT:
! 3102: case OP_EXACTI:
! 3103: min = max = GET2(ecode, 1);
! 3104: ecode += 3;
! 3105: goto REPEATCHAR;
! 3106:
! 3107: case OP_POSUPTO:
! 3108: case OP_POSUPTOI:
! 3109: possessive = TRUE;
! 3110: /* Fall through */
! 3111:
! 3112: case OP_UPTO:
! 3113: case OP_UPTOI:
! 3114: case OP_MINUPTO:
! 3115: case OP_MINUPTOI:
! 3116: min = 0;
! 3117: max = GET2(ecode, 1);
! 3118: minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
! 3119: ecode += 3;
! 3120: goto REPEATCHAR;
! 3121:
! 3122: case OP_POSSTAR:
! 3123: case OP_POSSTARI:
! 3124: possessive = TRUE;
! 3125: min = 0;
! 3126: max = INT_MAX;
! 3127: ecode++;
! 3128: goto REPEATCHAR;
! 3129:
! 3130: case OP_POSPLUS:
! 3131: case OP_POSPLUSI:
! 3132: possessive = TRUE;
! 3133: min = 1;
! 3134: max = INT_MAX;
! 3135: ecode++;
! 3136: goto REPEATCHAR;
! 3137:
! 3138: case OP_POSQUERY:
! 3139: case OP_POSQUERYI:
! 3140: possessive = TRUE;
! 3141: min = 0;
! 3142: max = 1;
! 3143: ecode++;
! 3144: goto REPEATCHAR;
! 3145:
! 3146: case OP_STAR:
! 3147: case OP_STARI:
! 3148: case OP_MINSTAR:
! 3149: case OP_MINSTARI:
! 3150: case OP_PLUS:
! 3151: case OP_PLUSI:
! 3152: case OP_MINPLUS:
! 3153: case OP_MINPLUSI:
! 3154: case OP_QUERY:
! 3155: case OP_QUERYI:
! 3156: case OP_MINQUERY:
! 3157: case OP_MINQUERYI:
! 3158: c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
! 3159: minimize = (c & 1) != 0;
! 3160: min = rep_min[c]; /* Pick up values from tables; */
! 3161: max = rep_max[c]; /* zero for max => infinity */
! 3162: if (max == 0) max = INT_MAX;
! 3163:
! 3164: /* Common code for all repeated single-character matches. */
! 3165:
! 3166: REPEATCHAR:
! 3167: #ifdef SUPPORT_UTF8
! 3168: if (utf8)
! 3169: {
! 3170: length = 1;
! 3171: charptr = ecode;
! 3172: GETCHARLEN(fc, ecode, length);
! 3173: ecode += length;
! 3174:
! 3175: /* Handle multibyte character matching specially here. There is
! 3176: support for caseless matching if UCP support is present. */
! 3177:
! 3178: if (length > 1)
! 3179: {
! 3180: #ifdef SUPPORT_UCP
! 3181: unsigned int othercase;
! 3182: if (op >= OP_STARI && /* Caseless */
! 3183: (othercase = UCD_OTHERCASE(fc)) != fc)
! 3184: oclength = _pcre_ord2utf8(othercase, occhars);
! 3185: else oclength = 0;
! 3186: #endif /* SUPPORT_UCP */
! 3187:
! 3188: for (i = 1; i <= min; i++)
! 3189: {
! 3190: if (eptr <= md->end_subject - length &&
! 3191: memcmp(eptr, charptr, length) == 0) eptr += length;
! 3192: #ifdef SUPPORT_UCP
! 3193: else if (oclength > 0 &&
! 3194: eptr <= md->end_subject - oclength &&
! 3195: memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
! 3196: #endif /* SUPPORT_UCP */
! 3197: else
! 3198: {
! 3199: CHECK_PARTIAL();
! 3200: RRETURN(MATCH_NOMATCH);
! 3201: }
! 3202: }
! 3203:
! 3204: if (min == max) continue;
! 3205:
! 3206: if (minimize)
! 3207: {
! 3208: for (fi = min;; fi++)
! 3209: {
! 3210: RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
! 3211: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3212: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 3213: if (eptr <= md->end_subject - length &&
! 3214: memcmp(eptr, charptr, length) == 0) eptr += length;
! 3215: #ifdef SUPPORT_UCP
! 3216: else if (oclength > 0 &&
! 3217: eptr <= md->end_subject - oclength &&
! 3218: memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
! 3219: #endif /* SUPPORT_UCP */
! 3220: else
! 3221: {
! 3222: CHECK_PARTIAL();
! 3223: RRETURN(MATCH_NOMATCH);
! 3224: }
! 3225: }
! 3226: /* Control never gets here */
! 3227: }
! 3228:
! 3229: else /* Maximize */
! 3230: {
! 3231: pp = eptr;
! 3232: for (i = min; i < max; i++)
! 3233: {
! 3234: if (eptr <= md->end_subject - length &&
! 3235: memcmp(eptr, charptr, length) == 0) eptr += length;
! 3236: #ifdef SUPPORT_UCP
! 3237: else if (oclength > 0 &&
! 3238: eptr <= md->end_subject - oclength &&
! 3239: memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
! 3240: #endif /* SUPPORT_UCP */
! 3241: else
! 3242: {
! 3243: CHECK_PARTIAL();
! 3244: break;
! 3245: }
! 3246: }
! 3247:
! 3248: if (possessive) continue;
! 3249:
! 3250: for(;;)
! 3251: {
! 3252: RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
! 3253: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3254: if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
! 3255: #ifdef SUPPORT_UCP
! 3256: eptr--;
! 3257: BACKCHAR(eptr);
! 3258: #else /* without SUPPORT_UCP */
! 3259: eptr -= length;
! 3260: #endif /* SUPPORT_UCP */
! 3261: }
! 3262: }
! 3263: /* Control never gets here */
! 3264: }
! 3265:
! 3266: /* If the length of a UTF-8 character is 1, we fall through here, and
! 3267: obey the code as for non-UTF-8 characters below, though in this case the
! 3268: value of fc will always be < 128. */
! 3269: }
! 3270: else
! 3271: #endif /* SUPPORT_UTF8 */
! 3272:
! 3273: /* When not in UTF-8 mode, load a single-byte character. */
! 3274:
! 3275: fc = *ecode++;
! 3276:
! 3277: /* The value of fc at this point is always less than 256, though we may or
! 3278: may not be in UTF-8 mode. The code is duplicated for the caseless and
! 3279: caseful cases, for speed, since matching characters is likely to be quite
! 3280: common. First, ensure the minimum number of matches are present. If min =
! 3281: max, continue at the same level without recursing. Otherwise, if
! 3282: minimizing, keep trying the rest of the expression and advancing one
! 3283: matching character if failing, up to the maximum. Alternatively, if
! 3284: maximizing, find the maximum number of characters and work backwards. */
! 3285:
! 3286: DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
! 3287: max, eptr));
! 3288:
! 3289: if (op >= OP_STARI) /* Caseless */
! 3290: {
! 3291: fc = md->lcc[fc];
! 3292: for (i = 1; i <= min; i++)
! 3293: {
! 3294: if (eptr >= md->end_subject)
! 3295: {
! 3296: SCHECK_PARTIAL();
! 3297: RRETURN(MATCH_NOMATCH);
! 3298: }
! 3299: if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
! 3300: }
! 3301: if (min == max) continue;
! 3302: if (minimize)
! 3303: {
! 3304: for (fi = min;; fi++)
! 3305: {
! 3306: RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
! 3307: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3308: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 3309: if (eptr >= md->end_subject)
! 3310: {
! 3311: SCHECK_PARTIAL();
! 3312: RRETURN(MATCH_NOMATCH);
! 3313: }
! 3314: if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
! 3315: }
! 3316: /* Control never gets here */
! 3317: }
! 3318: else /* Maximize */
! 3319: {
! 3320: pp = eptr;
! 3321: for (i = min; i < max; i++)
! 3322: {
! 3323: if (eptr >= md->end_subject)
! 3324: {
! 3325: SCHECK_PARTIAL();
! 3326: break;
! 3327: }
! 3328: if (fc != md->lcc[*eptr]) break;
! 3329: eptr++;
! 3330: }
! 3331:
! 3332: if (possessive) continue;
! 3333:
! 3334: while (eptr >= pp)
! 3335: {
! 3336: RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
! 3337: eptr--;
! 3338: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3339: }
! 3340: RRETURN(MATCH_NOMATCH);
! 3341: }
! 3342: /* Control never gets here */
! 3343: }
! 3344:
! 3345: /* Caseful comparisons (includes all multi-byte characters) */
! 3346:
! 3347: else
! 3348: {
! 3349: for (i = 1; i <= min; i++)
! 3350: {
! 3351: if (eptr >= md->end_subject)
! 3352: {
! 3353: SCHECK_PARTIAL();
! 3354: RRETURN(MATCH_NOMATCH);
! 3355: }
! 3356: if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
! 3357: }
! 3358:
! 3359: if (min == max) continue;
! 3360:
! 3361: if (minimize)
! 3362: {
! 3363: for (fi = min;; fi++)
! 3364: {
! 3365: RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
! 3366: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3367: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 3368: if (eptr >= md->end_subject)
! 3369: {
! 3370: SCHECK_PARTIAL();
! 3371: RRETURN(MATCH_NOMATCH);
! 3372: }
! 3373: if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
! 3374: }
! 3375: /* Control never gets here */
! 3376: }
! 3377: else /* Maximize */
! 3378: {
! 3379: pp = eptr;
! 3380: for (i = min; i < max; i++)
! 3381: {
! 3382: if (eptr >= md->end_subject)
! 3383: {
! 3384: SCHECK_PARTIAL();
! 3385: break;
! 3386: }
! 3387: if (fc != *eptr) break;
! 3388: eptr++;
! 3389: }
! 3390: if (possessive) continue;
! 3391:
! 3392: while (eptr >= pp)
! 3393: {
! 3394: RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
! 3395: eptr--;
! 3396: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3397: }
! 3398: RRETURN(MATCH_NOMATCH);
! 3399: }
! 3400: }
! 3401: /* Control never gets here */
! 3402:
! 3403: /* Match a negated single one-byte character. The character we are
! 3404: checking can be multibyte. */
! 3405:
! 3406: case OP_NOT:
! 3407: case OP_NOTI:
! 3408: if (eptr >= md->end_subject)
! 3409: {
! 3410: SCHECK_PARTIAL();
! 3411: RRETURN(MATCH_NOMATCH);
! 3412: }
! 3413: ecode++;
! 3414: GETCHARINCTEST(c, eptr);
! 3415: if (op == OP_NOTI) /* The caseless case */
! 3416: {
! 3417: #ifdef SUPPORT_UTF8
! 3418: if (c < 256)
! 3419: #endif
! 3420: c = md->lcc[c];
! 3421: if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
! 3422: }
! 3423: else /* Caseful */
! 3424: {
! 3425: if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
! 3426: }
! 3427: break;
! 3428:
! 3429: /* Match a negated single one-byte character repeatedly. This is almost a
! 3430: repeat of the code for a repeated single character, but I haven't found a
! 3431: nice way of commoning these up that doesn't require a test of the
! 3432: positive/negative option for each character match. Maybe that wouldn't add
! 3433: very much to the time taken, but character matching *is* what this is all
! 3434: about... */
! 3435:
! 3436: case OP_NOTEXACT:
! 3437: case OP_NOTEXACTI:
! 3438: min = max = GET2(ecode, 1);
! 3439: ecode += 3;
! 3440: goto REPEATNOTCHAR;
! 3441:
! 3442: case OP_NOTUPTO:
! 3443: case OP_NOTUPTOI:
! 3444: case OP_NOTMINUPTO:
! 3445: case OP_NOTMINUPTOI:
! 3446: min = 0;
! 3447: max = GET2(ecode, 1);
! 3448: minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
! 3449: ecode += 3;
! 3450: goto REPEATNOTCHAR;
! 3451:
! 3452: case OP_NOTPOSSTAR:
! 3453: case OP_NOTPOSSTARI:
! 3454: possessive = TRUE;
! 3455: min = 0;
! 3456: max = INT_MAX;
! 3457: ecode++;
! 3458: goto REPEATNOTCHAR;
! 3459:
! 3460: case OP_NOTPOSPLUS:
! 3461: case OP_NOTPOSPLUSI:
! 3462: possessive = TRUE;
! 3463: min = 1;
! 3464: max = INT_MAX;
! 3465: ecode++;
! 3466: goto REPEATNOTCHAR;
! 3467:
! 3468: case OP_NOTPOSQUERY:
! 3469: case OP_NOTPOSQUERYI:
! 3470: possessive = TRUE;
! 3471: min = 0;
! 3472: max = 1;
! 3473: ecode++;
! 3474: goto REPEATNOTCHAR;
! 3475:
! 3476: case OP_NOTPOSUPTO:
! 3477: case OP_NOTPOSUPTOI:
! 3478: possessive = TRUE;
! 3479: min = 0;
! 3480: max = GET2(ecode, 1);
! 3481: ecode += 3;
! 3482: goto REPEATNOTCHAR;
! 3483:
! 3484: case OP_NOTSTAR:
! 3485: case OP_NOTSTARI:
! 3486: case OP_NOTMINSTAR:
! 3487: case OP_NOTMINSTARI:
! 3488: case OP_NOTPLUS:
! 3489: case OP_NOTPLUSI:
! 3490: case OP_NOTMINPLUS:
! 3491: case OP_NOTMINPLUSI:
! 3492: case OP_NOTQUERY:
! 3493: case OP_NOTQUERYI:
! 3494: case OP_NOTMINQUERY:
! 3495: case OP_NOTMINQUERYI:
! 3496: c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
! 3497: minimize = (c & 1) != 0;
! 3498: min = rep_min[c]; /* Pick up values from tables; */
! 3499: max = rep_max[c]; /* zero for max => infinity */
! 3500: if (max == 0) max = INT_MAX;
! 3501:
! 3502: /* Common code for all repeated single-byte matches. */
! 3503:
! 3504: REPEATNOTCHAR:
! 3505: fc = *ecode++;
! 3506:
! 3507: /* The code is duplicated for the caseless and caseful cases, for speed,
! 3508: since matching characters is likely to be quite common. First, ensure the
! 3509: minimum number of matches are present. If min = max, continue at the same
! 3510: level without recursing. Otherwise, if minimizing, keep trying the rest of
! 3511: the expression and advancing one matching character if failing, up to the
! 3512: maximum. Alternatively, if maximizing, find the maximum number of
! 3513: characters and work backwards. */
! 3514:
! 3515: DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
! 3516: max, eptr));
! 3517:
! 3518: if (op >= OP_NOTSTARI) /* Caseless */
! 3519: {
! 3520: fc = md->lcc[fc];
! 3521:
! 3522: #ifdef SUPPORT_UTF8
! 3523: /* UTF-8 mode */
! 3524: if (utf8)
! 3525: {
! 3526: register unsigned int d;
! 3527: for (i = 1; i <= min; i++)
! 3528: {
! 3529: if (eptr >= md->end_subject)
! 3530: {
! 3531: SCHECK_PARTIAL();
! 3532: RRETURN(MATCH_NOMATCH);
! 3533: }
! 3534: GETCHARINC(d, eptr);
! 3535: if (d < 256) d = md->lcc[d];
! 3536: if (fc == d) RRETURN(MATCH_NOMATCH);
! 3537: }
! 3538: }
! 3539: else
! 3540: #endif
! 3541:
! 3542: /* Not UTF-8 mode */
! 3543: {
! 3544: for (i = 1; i <= min; i++)
! 3545: {
! 3546: if (eptr >= md->end_subject)
! 3547: {
! 3548: SCHECK_PARTIAL();
! 3549: RRETURN(MATCH_NOMATCH);
! 3550: }
! 3551: if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
! 3552: }
! 3553: }
! 3554:
! 3555: if (min == max) continue;
! 3556:
! 3557: if (minimize)
! 3558: {
! 3559: #ifdef SUPPORT_UTF8
! 3560: /* UTF-8 mode */
! 3561: if (utf8)
! 3562: {
! 3563: register unsigned int d;
! 3564: for (fi = min;; fi++)
! 3565: {
! 3566: RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
! 3567: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3568: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 3569: if (eptr >= md->end_subject)
! 3570: {
! 3571: SCHECK_PARTIAL();
! 3572: RRETURN(MATCH_NOMATCH);
! 3573: }
! 3574: GETCHARINC(d, eptr);
! 3575: if (d < 256) d = md->lcc[d];
! 3576: if (fc == d) RRETURN(MATCH_NOMATCH);
! 3577: }
! 3578: }
! 3579: else
! 3580: #endif
! 3581: /* Not UTF-8 mode */
! 3582: {
! 3583: for (fi = min;; fi++)
! 3584: {
! 3585: RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
! 3586: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3587: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 3588: if (eptr >= md->end_subject)
! 3589: {
! 3590: SCHECK_PARTIAL();
! 3591: RRETURN(MATCH_NOMATCH);
! 3592: }
! 3593: if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
! 3594: }
! 3595: }
! 3596: /* Control never gets here */
! 3597: }
! 3598:
! 3599: /* Maximize case */
! 3600:
! 3601: else
! 3602: {
! 3603: pp = eptr;
! 3604:
! 3605: #ifdef SUPPORT_UTF8
! 3606: /* UTF-8 mode */
! 3607: if (utf8)
! 3608: {
! 3609: register unsigned int d;
! 3610: for (i = min; i < max; i++)
! 3611: {
! 3612: int len = 1;
! 3613: if (eptr >= md->end_subject)
! 3614: {
! 3615: SCHECK_PARTIAL();
! 3616: break;
! 3617: }
! 3618: GETCHARLEN(d, eptr, len);
! 3619: if (d < 256) d = md->lcc[d];
! 3620: if (fc == d) break;
! 3621: eptr += len;
! 3622: }
! 3623: if (possessive) continue;
! 3624: for(;;)
! 3625: {
! 3626: RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
! 3627: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3628: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 3629: BACKCHAR(eptr);
! 3630: }
! 3631: }
! 3632: else
! 3633: #endif
! 3634: /* Not UTF-8 mode */
! 3635: {
! 3636: for (i = min; i < max; i++)
! 3637: {
! 3638: if (eptr >= md->end_subject)
! 3639: {
! 3640: SCHECK_PARTIAL();
! 3641: break;
! 3642: }
! 3643: if (fc == md->lcc[*eptr]) break;
! 3644: eptr++;
! 3645: }
! 3646: if (possessive) continue;
! 3647: while (eptr >= pp)
! 3648: {
! 3649: RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
! 3650: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3651: eptr--;
! 3652: }
! 3653: }
! 3654:
! 3655: RRETURN(MATCH_NOMATCH);
! 3656: }
! 3657: /* Control never gets here */
! 3658: }
! 3659:
! 3660: /* Caseful comparisons */
! 3661:
! 3662: else
! 3663: {
! 3664: #ifdef SUPPORT_UTF8
! 3665: /* UTF-8 mode */
! 3666: if (utf8)
! 3667: {
! 3668: register unsigned int d;
! 3669: for (i = 1; i <= min; i++)
! 3670: {
! 3671: if (eptr >= md->end_subject)
! 3672: {
! 3673: SCHECK_PARTIAL();
! 3674: RRETURN(MATCH_NOMATCH);
! 3675: }
! 3676: GETCHARINC(d, eptr);
! 3677: if (fc == d) RRETURN(MATCH_NOMATCH);
! 3678: }
! 3679: }
! 3680: else
! 3681: #endif
! 3682: /* Not UTF-8 mode */
! 3683: {
! 3684: for (i = 1; i <= min; i++)
! 3685: {
! 3686: if (eptr >= md->end_subject)
! 3687: {
! 3688: SCHECK_PARTIAL();
! 3689: RRETURN(MATCH_NOMATCH);
! 3690: }
! 3691: if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
! 3692: }
! 3693: }
! 3694:
! 3695: if (min == max) continue;
! 3696:
! 3697: if (minimize)
! 3698: {
! 3699: #ifdef SUPPORT_UTF8
! 3700: /* UTF-8 mode */
! 3701: if (utf8)
! 3702: {
! 3703: register unsigned int d;
! 3704: for (fi = min;; fi++)
! 3705: {
! 3706: RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
! 3707: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3708: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 3709: if (eptr >= md->end_subject)
! 3710: {
! 3711: SCHECK_PARTIAL();
! 3712: RRETURN(MATCH_NOMATCH);
! 3713: }
! 3714: GETCHARINC(d, eptr);
! 3715: if (fc == d) RRETURN(MATCH_NOMATCH);
! 3716: }
! 3717: }
! 3718: else
! 3719: #endif
! 3720: /* Not UTF-8 mode */
! 3721: {
! 3722: for (fi = min;; fi++)
! 3723: {
! 3724: RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
! 3725: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3726: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 3727: if (eptr >= md->end_subject)
! 3728: {
! 3729: SCHECK_PARTIAL();
! 3730: RRETURN(MATCH_NOMATCH);
! 3731: }
! 3732: if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
! 3733: }
! 3734: }
! 3735: /* Control never gets here */
! 3736: }
! 3737:
! 3738: /* Maximize case */
! 3739:
! 3740: else
! 3741: {
! 3742: pp = eptr;
! 3743:
! 3744: #ifdef SUPPORT_UTF8
! 3745: /* UTF-8 mode */
! 3746: if (utf8)
! 3747: {
! 3748: register unsigned int d;
! 3749: for (i = min; i < max; i++)
! 3750: {
! 3751: int len = 1;
! 3752: if (eptr >= md->end_subject)
! 3753: {
! 3754: SCHECK_PARTIAL();
! 3755: break;
! 3756: }
! 3757: GETCHARLEN(d, eptr, len);
! 3758: if (fc == d) break;
! 3759: eptr += len;
! 3760: }
! 3761: if (possessive) continue;
! 3762: for(;;)
! 3763: {
! 3764: RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
! 3765: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3766: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 3767: BACKCHAR(eptr);
! 3768: }
! 3769: }
! 3770: else
! 3771: #endif
! 3772: /* Not UTF-8 mode */
! 3773: {
! 3774: for (i = min; i < max; i++)
! 3775: {
! 3776: if (eptr >= md->end_subject)
! 3777: {
! 3778: SCHECK_PARTIAL();
! 3779: break;
! 3780: }
! 3781: if (fc == *eptr) break;
! 3782: eptr++;
! 3783: }
! 3784: if (possessive) continue;
! 3785: while (eptr >= pp)
! 3786: {
! 3787: RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
! 3788: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 3789: eptr--;
! 3790: }
! 3791: }
! 3792:
! 3793: RRETURN(MATCH_NOMATCH);
! 3794: }
! 3795: }
! 3796: /* Control never gets here */
! 3797:
! 3798: /* Match a single character type repeatedly; several different opcodes
! 3799: share code. This is very similar to the code for single characters, but we
! 3800: repeat it in the interests of efficiency. */
! 3801:
! 3802: case OP_TYPEEXACT:
! 3803: min = max = GET2(ecode, 1);
! 3804: minimize = TRUE;
! 3805: ecode += 3;
! 3806: goto REPEATTYPE;
! 3807:
! 3808: case OP_TYPEUPTO:
! 3809: case OP_TYPEMINUPTO:
! 3810: min = 0;
! 3811: max = GET2(ecode, 1);
! 3812: minimize = *ecode == OP_TYPEMINUPTO;
! 3813: ecode += 3;
! 3814: goto REPEATTYPE;
! 3815:
! 3816: case OP_TYPEPOSSTAR:
! 3817: possessive = TRUE;
! 3818: min = 0;
! 3819: max = INT_MAX;
! 3820: ecode++;
! 3821: goto REPEATTYPE;
! 3822:
! 3823: case OP_TYPEPOSPLUS:
! 3824: possessive = TRUE;
! 3825: min = 1;
! 3826: max = INT_MAX;
! 3827: ecode++;
! 3828: goto REPEATTYPE;
! 3829:
! 3830: case OP_TYPEPOSQUERY:
! 3831: possessive = TRUE;
! 3832: min = 0;
! 3833: max = 1;
! 3834: ecode++;
! 3835: goto REPEATTYPE;
! 3836:
! 3837: case OP_TYPEPOSUPTO:
! 3838: possessive = TRUE;
! 3839: min = 0;
! 3840: max = GET2(ecode, 1);
! 3841: ecode += 3;
! 3842: goto REPEATTYPE;
! 3843:
! 3844: case OP_TYPESTAR:
! 3845: case OP_TYPEMINSTAR:
! 3846: case OP_TYPEPLUS:
! 3847: case OP_TYPEMINPLUS:
! 3848: case OP_TYPEQUERY:
! 3849: case OP_TYPEMINQUERY:
! 3850: c = *ecode++ - OP_TYPESTAR;
! 3851: minimize = (c & 1) != 0;
! 3852: min = rep_min[c]; /* Pick up values from tables; */
! 3853: max = rep_max[c]; /* zero for max => infinity */
! 3854: if (max == 0) max = INT_MAX;
! 3855:
! 3856: /* Common code for all repeated single character type matches. Note that
! 3857: in UTF-8 mode, '.' matches a character of any length, but for the other
! 3858: character types, the valid characters are all one-byte long. */
! 3859:
! 3860: REPEATTYPE:
! 3861: ctype = *ecode++; /* Code for the character type */
! 3862:
! 3863: #ifdef SUPPORT_UCP
! 3864: if (ctype == OP_PROP || ctype == OP_NOTPROP)
! 3865: {
! 3866: prop_fail_result = ctype == OP_NOTPROP;
! 3867: prop_type = *ecode++;
! 3868: prop_value = *ecode++;
! 3869: }
! 3870: else prop_type = -1;
! 3871: #endif
! 3872:
! 3873: /* First, ensure the minimum number of matches are present. Use inline
! 3874: code for maximizing the speed, and do the type test once at the start
! 3875: (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
! 3876: is tidier. Also separate the UCP code, which can be the same for both UTF-8
! 3877: and single-bytes. */
! 3878:
! 3879: if (min > 0)
! 3880: {
! 3881: #ifdef SUPPORT_UCP
! 3882: if (prop_type >= 0)
! 3883: {
! 3884: switch(prop_type)
! 3885: {
! 3886: case PT_ANY:
! 3887: if (prop_fail_result) RRETURN(MATCH_NOMATCH);
! 3888: for (i = 1; i <= min; i++)
! 3889: {
! 3890: if (eptr >= md->end_subject)
! 3891: {
! 3892: SCHECK_PARTIAL();
! 3893: RRETURN(MATCH_NOMATCH);
! 3894: }
! 3895: GETCHARINCTEST(c, eptr);
! 3896: }
! 3897: break;
! 3898:
! 3899: case PT_LAMP:
! 3900: for (i = 1; i <= min; i++)
! 3901: {
! 3902: int chartype;
! 3903: if (eptr >= md->end_subject)
! 3904: {
! 3905: SCHECK_PARTIAL();
! 3906: RRETURN(MATCH_NOMATCH);
! 3907: }
! 3908: GETCHARINCTEST(c, eptr);
! 3909: chartype = UCD_CHARTYPE(c);
! 3910: if ((chartype == ucp_Lu ||
! 3911: chartype == ucp_Ll ||
! 3912: chartype == ucp_Lt) == prop_fail_result)
! 3913: RRETURN(MATCH_NOMATCH);
! 3914: }
! 3915: break;
! 3916:
! 3917: case PT_GC:
! 3918: for (i = 1; i <= min; i++)
! 3919: {
! 3920: if (eptr >= md->end_subject)
! 3921: {
! 3922: SCHECK_PARTIAL();
! 3923: RRETURN(MATCH_NOMATCH);
! 3924: }
! 3925: GETCHARINCTEST(c, eptr);
! 3926: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
! 3927: RRETURN(MATCH_NOMATCH);
! 3928: }
! 3929: break;
! 3930:
! 3931: case PT_PC:
! 3932: for (i = 1; i <= min; i++)
! 3933: {
! 3934: if (eptr >= md->end_subject)
! 3935: {
! 3936: SCHECK_PARTIAL();
! 3937: RRETURN(MATCH_NOMATCH);
! 3938: }
! 3939: GETCHARINCTEST(c, eptr);
! 3940: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
! 3941: RRETURN(MATCH_NOMATCH);
! 3942: }
! 3943: break;
! 3944:
! 3945: case PT_SC:
! 3946: for (i = 1; i <= min; i++)
! 3947: {
! 3948: if (eptr >= md->end_subject)
! 3949: {
! 3950: SCHECK_PARTIAL();
! 3951: RRETURN(MATCH_NOMATCH);
! 3952: }
! 3953: GETCHARINCTEST(c, eptr);
! 3954: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
! 3955: RRETURN(MATCH_NOMATCH);
! 3956: }
! 3957: break;
! 3958:
! 3959: case PT_ALNUM:
! 3960: for (i = 1; i <= min; i++)
! 3961: {
! 3962: int category;
! 3963: if (eptr >= md->end_subject)
! 3964: {
! 3965: SCHECK_PARTIAL();
! 3966: RRETURN(MATCH_NOMATCH);
! 3967: }
! 3968: GETCHARINCTEST(c, eptr);
! 3969: category = UCD_CATEGORY(c);
! 3970: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
! 3971: RRETURN(MATCH_NOMATCH);
! 3972: }
! 3973: break;
! 3974:
! 3975: case PT_SPACE: /* Perl space */
! 3976: for (i = 1; i <= min; i++)
! 3977: {
! 3978: if (eptr >= md->end_subject)
! 3979: {
! 3980: SCHECK_PARTIAL();
! 3981: RRETURN(MATCH_NOMATCH);
! 3982: }
! 3983: GETCHARINCTEST(c, eptr);
! 3984: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 3985: c == CHAR_FF || c == CHAR_CR)
! 3986: == prop_fail_result)
! 3987: RRETURN(MATCH_NOMATCH);
! 3988: }
! 3989: break;
! 3990:
! 3991: case PT_PXSPACE: /* POSIX space */
! 3992: for (i = 1; i <= min; i++)
! 3993: {
! 3994: if (eptr >= md->end_subject)
! 3995: {
! 3996: SCHECK_PARTIAL();
! 3997: RRETURN(MATCH_NOMATCH);
! 3998: }
! 3999: GETCHARINCTEST(c, eptr);
! 4000: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 4001: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
! 4002: == prop_fail_result)
! 4003: RRETURN(MATCH_NOMATCH);
! 4004: }
! 4005: break;
! 4006:
! 4007: case PT_WORD:
! 4008: for (i = 1; i <= min; i++)
! 4009: {
! 4010: int category;
! 4011: if (eptr >= md->end_subject)
! 4012: {
! 4013: SCHECK_PARTIAL();
! 4014: RRETURN(MATCH_NOMATCH);
! 4015: }
! 4016: GETCHARINCTEST(c, eptr);
! 4017: category = UCD_CATEGORY(c);
! 4018: if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
! 4019: == prop_fail_result)
! 4020: RRETURN(MATCH_NOMATCH);
! 4021: }
! 4022: break;
! 4023:
! 4024: /* This should not occur */
! 4025:
! 4026: default:
! 4027: RRETURN(PCRE_ERROR_INTERNAL);
! 4028: }
! 4029: }
! 4030:
! 4031: /* Match extended Unicode sequences. We will get here only if the
! 4032: support is in the binary; otherwise a compile-time error occurs. */
! 4033:
! 4034: else if (ctype == OP_EXTUNI)
! 4035: {
! 4036: for (i = 1; i <= min; i++)
! 4037: {
! 4038: if (eptr >= md->end_subject)
! 4039: {
! 4040: SCHECK_PARTIAL();
! 4041: RRETURN(MATCH_NOMATCH);
! 4042: }
! 4043: GETCHARINCTEST(c, eptr);
! 4044: if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
! 4045: while (eptr < md->end_subject)
! 4046: {
! 4047: int len = 1;
! 4048: if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 4049: if (UCD_CATEGORY(c) != ucp_M) break;
! 4050: eptr += len;
! 4051: }
! 4052: }
! 4053: }
! 4054:
! 4055: else
! 4056: #endif /* SUPPORT_UCP */
! 4057:
! 4058: /* Handle all other cases when the coding is UTF-8 */
! 4059:
! 4060: #ifdef SUPPORT_UTF8
! 4061: if (utf8) switch(ctype)
! 4062: {
! 4063: case OP_ANY:
! 4064: for (i = 1; i <= min; i++)
! 4065: {
! 4066: if (eptr >= md->end_subject)
! 4067: {
! 4068: SCHECK_PARTIAL();
! 4069: RRETURN(MATCH_NOMATCH);
! 4070: }
! 4071: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
! 4072: eptr++;
! 4073: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 4074: }
! 4075: break;
! 4076:
! 4077: case OP_ALLANY:
! 4078: for (i = 1; i <= min; i++)
! 4079: {
! 4080: if (eptr >= md->end_subject)
! 4081: {
! 4082: SCHECK_PARTIAL();
! 4083: RRETURN(MATCH_NOMATCH);
! 4084: }
! 4085: eptr++;
! 4086: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 4087: }
! 4088: break;
! 4089:
! 4090: case OP_ANYBYTE:
! 4091: if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
! 4092: eptr += min;
! 4093: break;
! 4094:
! 4095: case OP_ANYNL:
! 4096: for (i = 1; i <= min; i++)
! 4097: {
! 4098: if (eptr >= md->end_subject)
! 4099: {
! 4100: SCHECK_PARTIAL();
! 4101: RRETURN(MATCH_NOMATCH);
! 4102: }
! 4103: GETCHARINC(c, eptr);
! 4104: switch(c)
! 4105: {
! 4106: default: RRETURN(MATCH_NOMATCH);
! 4107:
! 4108: case 0x000d:
! 4109: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 4110: break;
! 4111:
! 4112: case 0x000a:
! 4113: break;
! 4114:
! 4115: case 0x000b:
! 4116: case 0x000c:
! 4117: case 0x0085:
! 4118: case 0x2028:
! 4119: case 0x2029:
! 4120: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
! 4121: break;
! 4122: }
! 4123: }
! 4124: break;
! 4125:
! 4126: case OP_NOT_HSPACE:
! 4127: for (i = 1; i <= min; i++)
! 4128: {
! 4129: if (eptr >= md->end_subject)
! 4130: {
! 4131: SCHECK_PARTIAL();
! 4132: RRETURN(MATCH_NOMATCH);
! 4133: }
! 4134: GETCHARINC(c, eptr);
! 4135: switch(c)
! 4136: {
! 4137: default: break;
! 4138: case 0x09: /* HT */
! 4139: case 0x20: /* SPACE */
! 4140: case 0xa0: /* NBSP */
! 4141: case 0x1680: /* OGHAM SPACE MARK */
! 4142: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4143: case 0x2000: /* EN QUAD */
! 4144: case 0x2001: /* EM QUAD */
! 4145: case 0x2002: /* EN SPACE */
! 4146: case 0x2003: /* EM SPACE */
! 4147: case 0x2004: /* THREE-PER-EM SPACE */
! 4148: case 0x2005: /* FOUR-PER-EM SPACE */
! 4149: case 0x2006: /* SIX-PER-EM SPACE */
! 4150: case 0x2007: /* FIGURE SPACE */
! 4151: case 0x2008: /* PUNCTUATION SPACE */
! 4152: case 0x2009: /* THIN SPACE */
! 4153: case 0x200A: /* HAIR SPACE */
! 4154: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4155: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4156: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4157: RRETURN(MATCH_NOMATCH);
! 4158: }
! 4159: }
! 4160: break;
! 4161:
! 4162: case OP_HSPACE:
! 4163: for (i = 1; i <= min; i++)
! 4164: {
! 4165: if (eptr >= md->end_subject)
! 4166: {
! 4167: SCHECK_PARTIAL();
! 4168: RRETURN(MATCH_NOMATCH);
! 4169: }
! 4170: GETCHARINC(c, eptr);
! 4171: switch(c)
! 4172: {
! 4173: default: RRETURN(MATCH_NOMATCH);
! 4174: case 0x09: /* HT */
! 4175: case 0x20: /* SPACE */
! 4176: case 0xa0: /* NBSP */
! 4177: case 0x1680: /* OGHAM SPACE MARK */
! 4178: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4179: case 0x2000: /* EN QUAD */
! 4180: case 0x2001: /* EM QUAD */
! 4181: case 0x2002: /* EN SPACE */
! 4182: case 0x2003: /* EM SPACE */
! 4183: case 0x2004: /* THREE-PER-EM SPACE */
! 4184: case 0x2005: /* FOUR-PER-EM SPACE */
! 4185: case 0x2006: /* SIX-PER-EM SPACE */
! 4186: case 0x2007: /* FIGURE SPACE */
! 4187: case 0x2008: /* PUNCTUATION SPACE */
! 4188: case 0x2009: /* THIN SPACE */
! 4189: case 0x200A: /* HAIR SPACE */
! 4190: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4191: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4192: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4193: break;
! 4194: }
! 4195: }
! 4196: break;
! 4197:
! 4198: case OP_NOT_VSPACE:
! 4199: for (i = 1; i <= min; i++)
! 4200: {
! 4201: if (eptr >= md->end_subject)
! 4202: {
! 4203: SCHECK_PARTIAL();
! 4204: RRETURN(MATCH_NOMATCH);
! 4205: }
! 4206: GETCHARINC(c, eptr);
! 4207: switch(c)
! 4208: {
! 4209: default: break;
! 4210: case 0x0a: /* LF */
! 4211: case 0x0b: /* VT */
! 4212: case 0x0c: /* FF */
! 4213: case 0x0d: /* CR */
! 4214: case 0x85: /* NEL */
! 4215: case 0x2028: /* LINE SEPARATOR */
! 4216: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4217: RRETURN(MATCH_NOMATCH);
! 4218: }
! 4219: }
! 4220: break;
! 4221:
! 4222: case OP_VSPACE:
! 4223: for (i = 1; i <= min; i++)
! 4224: {
! 4225: if (eptr >= md->end_subject)
! 4226: {
! 4227: SCHECK_PARTIAL();
! 4228: RRETURN(MATCH_NOMATCH);
! 4229: }
! 4230: GETCHARINC(c, eptr);
! 4231: switch(c)
! 4232: {
! 4233: default: RRETURN(MATCH_NOMATCH);
! 4234: case 0x0a: /* LF */
! 4235: case 0x0b: /* VT */
! 4236: case 0x0c: /* FF */
! 4237: case 0x0d: /* CR */
! 4238: case 0x85: /* NEL */
! 4239: case 0x2028: /* LINE SEPARATOR */
! 4240: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4241: break;
! 4242: }
! 4243: }
! 4244: break;
! 4245:
! 4246: case OP_NOT_DIGIT:
! 4247: for (i = 1; i <= min; i++)
! 4248: {
! 4249: if (eptr >= md->end_subject)
! 4250: {
! 4251: SCHECK_PARTIAL();
! 4252: RRETURN(MATCH_NOMATCH);
! 4253: }
! 4254: GETCHARINC(c, eptr);
! 4255: if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
! 4256: RRETURN(MATCH_NOMATCH);
! 4257: }
! 4258: break;
! 4259:
! 4260: case OP_DIGIT:
! 4261: for (i = 1; i <= min; i++)
! 4262: {
! 4263: if (eptr >= md->end_subject)
! 4264: {
! 4265: SCHECK_PARTIAL();
! 4266: RRETURN(MATCH_NOMATCH);
! 4267: }
! 4268: if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
! 4269: RRETURN(MATCH_NOMATCH);
! 4270: /* No need to skip more bytes - we know it's a 1-byte character */
! 4271: }
! 4272: break;
! 4273:
! 4274: case OP_NOT_WHITESPACE:
! 4275: for (i = 1; i <= min; i++)
! 4276: {
! 4277: if (eptr >= md->end_subject)
! 4278: {
! 4279: SCHECK_PARTIAL();
! 4280: RRETURN(MATCH_NOMATCH);
! 4281: }
! 4282: if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
! 4283: RRETURN(MATCH_NOMATCH);
! 4284: while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
! 4285: }
! 4286: break;
! 4287:
! 4288: case OP_WHITESPACE:
! 4289: for (i = 1; i <= min; i++)
! 4290: {
! 4291: if (eptr >= md->end_subject)
! 4292: {
! 4293: SCHECK_PARTIAL();
! 4294: RRETURN(MATCH_NOMATCH);
! 4295: }
! 4296: if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
! 4297: RRETURN(MATCH_NOMATCH);
! 4298: /* No need to skip more bytes - we know it's a 1-byte character */
! 4299: }
! 4300: break;
! 4301:
! 4302: case OP_NOT_WORDCHAR:
! 4303: for (i = 1; i <= min; i++)
! 4304: {
! 4305: if (eptr >= md->end_subject)
! 4306: {
! 4307: SCHECK_PARTIAL();
! 4308: RRETURN(MATCH_NOMATCH);
! 4309: }
! 4310: if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
! 4311: RRETURN(MATCH_NOMATCH);
! 4312: while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
! 4313: }
! 4314: break;
! 4315:
! 4316: case OP_WORDCHAR:
! 4317: for (i = 1; i <= min; i++)
! 4318: {
! 4319: if (eptr >= md->end_subject)
! 4320: {
! 4321: SCHECK_PARTIAL();
! 4322: RRETURN(MATCH_NOMATCH);
! 4323: }
! 4324: if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
! 4325: RRETURN(MATCH_NOMATCH);
! 4326: /* No need to skip more bytes - we know it's a 1-byte character */
! 4327: }
! 4328: break;
! 4329:
! 4330: default:
! 4331: RRETURN(PCRE_ERROR_INTERNAL);
! 4332: } /* End switch(ctype) */
! 4333:
! 4334: else
! 4335: #endif /* SUPPORT_UTF8 */
! 4336:
! 4337: /* Code for the non-UTF-8 case for minimum matching of operators other
! 4338: than OP_PROP and OP_NOTPROP. */
! 4339:
! 4340: switch(ctype)
! 4341: {
! 4342: case OP_ANY:
! 4343: for (i = 1; i <= min; i++)
! 4344: {
! 4345: if (eptr >= md->end_subject)
! 4346: {
! 4347: SCHECK_PARTIAL();
! 4348: RRETURN(MATCH_NOMATCH);
! 4349: }
! 4350: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
! 4351: eptr++;
! 4352: }
! 4353: break;
! 4354:
! 4355: case OP_ALLANY:
! 4356: if (eptr > md->end_subject - min)
! 4357: {
! 4358: SCHECK_PARTIAL();
! 4359: RRETURN(MATCH_NOMATCH);
! 4360: }
! 4361: eptr += min;
! 4362: break;
! 4363:
! 4364: case OP_ANYBYTE:
! 4365: if (eptr > md->end_subject - min)
! 4366: {
! 4367: SCHECK_PARTIAL();
! 4368: RRETURN(MATCH_NOMATCH);
! 4369: }
! 4370: eptr += min;
! 4371: break;
! 4372:
! 4373: case OP_ANYNL:
! 4374: for (i = 1; i <= min; i++)
! 4375: {
! 4376: if (eptr >= md->end_subject)
! 4377: {
! 4378: SCHECK_PARTIAL();
! 4379: RRETURN(MATCH_NOMATCH);
! 4380: }
! 4381: switch(*eptr++)
! 4382: {
! 4383: default: RRETURN(MATCH_NOMATCH);
! 4384:
! 4385: case 0x000d:
! 4386: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 4387: break;
! 4388:
! 4389: case 0x000a:
! 4390: break;
! 4391:
! 4392: case 0x000b:
! 4393: case 0x000c:
! 4394: case 0x0085:
! 4395: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
! 4396: break;
! 4397: }
! 4398: }
! 4399: break;
! 4400:
! 4401: case OP_NOT_HSPACE:
! 4402: for (i = 1; i <= min; i++)
! 4403: {
! 4404: if (eptr >= md->end_subject)
! 4405: {
! 4406: SCHECK_PARTIAL();
! 4407: RRETURN(MATCH_NOMATCH);
! 4408: }
! 4409: switch(*eptr++)
! 4410: {
! 4411: default: break;
! 4412: case 0x09: /* HT */
! 4413: case 0x20: /* SPACE */
! 4414: case 0xa0: /* NBSP */
! 4415: RRETURN(MATCH_NOMATCH);
! 4416: }
! 4417: }
! 4418: break;
! 4419:
! 4420: case OP_HSPACE:
! 4421: for (i = 1; i <= min; i++)
! 4422: {
! 4423: if (eptr >= md->end_subject)
! 4424: {
! 4425: SCHECK_PARTIAL();
! 4426: RRETURN(MATCH_NOMATCH);
! 4427: }
! 4428: switch(*eptr++)
! 4429: {
! 4430: default: RRETURN(MATCH_NOMATCH);
! 4431: case 0x09: /* HT */
! 4432: case 0x20: /* SPACE */
! 4433: case 0xa0: /* NBSP */
! 4434: break;
! 4435: }
! 4436: }
! 4437: break;
! 4438:
! 4439: case OP_NOT_VSPACE:
! 4440: for (i = 1; i <= min; i++)
! 4441: {
! 4442: if (eptr >= md->end_subject)
! 4443: {
! 4444: SCHECK_PARTIAL();
! 4445: RRETURN(MATCH_NOMATCH);
! 4446: }
! 4447: switch(*eptr++)
! 4448: {
! 4449: default: break;
! 4450: case 0x0a: /* LF */
! 4451: case 0x0b: /* VT */
! 4452: case 0x0c: /* FF */
! 4453: case 0x0d: /* CR */
! 4454: case 0x85: /* NEL */
! 4455: RRETURN(MATCH_NOMATCH);
! 4456: }
! 4457: }
! 4458: break;
! 4459:
! 4460: case OP_VSPACE:
! 4461: for (i = 1; i <= min; i++)
! 4462: {
! 4463: if (eptr >= md->end_subject)
! 4464: {
! 4465: SCHECK_PARTIAL();
! 4466: RRETURN(MATCH_NOMATCH);
! 4467: }
! 4468: switch(*eptr++)
! 4469: {
! 4470: default: RRETURN(MATCH_NOMATCH);
! 4471: case 0x0a: /* LF */
! 4472: case 0x0b: /* VT */
! 4473: case 0x0c: /* FF */
! 4474: case 0x0d: /* CR */
! 4475: case 0x85: /* NEL */
! 4476: break;
! 4477: }
! 4478: }
! 4479: break;
! 4480:
! 4481: case OP_NOT_DIGIT:
! 4482: for (i = 1; i <= min; i++)
! 4483: {
! 4484: if (eptr >= md->end_subject)
! 4485: {
! 4486: SCHECK_PARTIAL();
! 4487: RRETURN(MATCH_NOMATCH);
! 4488: }
! 4489: if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
! 4490: }
! 4491: break;
! 4492:
! 4493: case OP_DIGIT:
! 4494: for (i = 1; i <= min; i++)
! 4495: {
! 4496: if (eptr >= md->end_subject)
! 4497: {
! 4498: SCHECK_PARTIAL();
! 4499: RRETURN(MATCH_NOMATCH);
! 4500: }
! 4501: if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
! 4502: }
! 4503: break;
! 4504:
! 4505: case OP_NOT_WHITESPACE:
! 4506: for (i = 1; i <= min; i++)
! 4507: {
! 4508: if (eptr >= md->end_subject)
! 4509: {
! 4510: SCHECK_PARTIAL();
! 4511: RRETURN(MATCH_NOMATCH);
! 4512: }
! 4513: if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
! 4514: }
! 4515: break;
! 4516:
! 4517: case OP_WHITESPACE:
! 4518: for (i = 1; i <= min; i++)
! 4519: {
! 4520: if (eptr >= md->end_subject)
! 4521: {
! 4522: SCHECK_PARTIAL();
! 4523: RRETURN(MATCH_NOMATCH);
! 4524: }
! 4525: if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
! 4526: }
! 4527: break;
! 4528:
! 4529: case OP_NOT_WORDCHAR:
! 4530: for (i = 1; i <= min; i++)
! 4531: {
! 4532: if (eptr >= md->end_subject)
! 4533: {
! 4534: SCHECK_PARTIAL();
! 4535: RRETURN(MATCH_NOMATCH);
! 4536: }
! 4537: if ((md->ctypes[*eptr++] & ctype_word) != 0)
! 4538: RRETURN(MATCH_NOMATCH);
! 4539: }
! 4540: break;
! 4541:
! 4542: case OP_WORDCHAR:
! 4543: for (i = 1; i <= min; i++)
! 4544: {
! 4545: if (eptr >= md->end_subject)
! 4546: {
! 4547: SCHECK_PARTIAL();
! 4548: RRETURN(MATCH_NOMATCH);
! 4549: }
! 4550: if ((md->ctypes[*eptr++] & ctype_word) == 0)
! 4551: RRETURN(MATCH_NOMATCH);
! 4552: }
! 4553: break;
! 4554:
! 4555: default:
! 4556: RRETURN(PCRE_ERROR_INTERNAL);
! 4557: }
! 4558: }
! 4559:
! 4560: /* If min = max, continue at the same level without recursing */
! 4561:
! 4562: if (min == max) continue;
! 4563:
! 4564: /* If minimizing, we have to test the rest of the pattern before each
! 4565: subsequent match. Again, separate the UTF-8 case for speed, and also
! 4566: separate the UCP cases. */
! 4567:
! 4568: if (minimize)
! 4569: {
! 4570: #ifdef SUPPORT_UCP
! 4571: if (prop_type >= 0)
! 4572: {
! 4573: switch(prop_type)
! 4574: {
! 4575: case PT_ANY:
! 4576: for (fi = min;; fi++)
! 4577: {
! 4578: RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
! 4579: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4580: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4581: if (eptr >= md->end_subject)
! 4582: {
! 4583: SCHECK_PARTIAL();
! 4584: RRETURN(MATCH_NOMATCH);
! 4585: }
! 4586: GETCHARINCTEST(c, eptr);
! 4587: if (prop_fail_result) RRETURN(MATCH_NOMATCH);
! 4588: }
! 4589: /* Control never gets here */
! 4590:
! 4591: case PT_LAMP:
! 4592: for (fi = min;; fi++)
! 4593: {
! 4594: int chartype;
! 4595: RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
! 4596: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4597: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4598: if (eptr >= md->end_subject)
! 4599: {
! 4600: SCHECK_PARTIAL();
! 4601: RRETURN(MATCH_NOMATCH);
! 4602: }
! 4603: GETCHARINCTEST(c, eptr);
! 4604: chartype = UCD_CHARTYPE(c);
! 4605: if ((chartype == ucp_Lu ||
! 4606: chartype == ucp_Ll ||
! 4607: chartype == ucp_Lt) == prop_fail_result)
! 4608: RRETURN(MATCH_NOMATCH);
! 4609: }
! 4610: /* Control never gets here */
! 4611:
! 4612: case PT_GC:
! 4613: for (fi = min;; fi++)
! 4614: {
! 4615: RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
! 4616: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4617: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4618: if (eptr >= md->end_subject)
! 4619: {
! 4620: SCHECK_PARTIAL();
! 4621: RRETURN(MATCH_NOMATCH);
! 4622: }
! 4623: GETCHARINCTEST(c, eptr);
! 4624: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
! 4625: RRETURN(MATCH_NOMATCH);
! 4626: }
! 4627: /* Control never gets here */
! 4628:
! 4629: case PT_PC:
! 4630: for (fi = min;; fi++)
! 4631: {
! 4632: RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
! 4633: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4634: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4635: if (eptr >= md->end_subject)
! 4636: {
! 4637: SCHECK_PARTIAL();
! 4638: RRETURN(MATCH_NOMATCH);
! 4639: }
! 4640: GETCHARINCTEST(c, eptr);
! 4641: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
! 4642: RRETURN(MATCH_NOMATCH);
! 4643: }
! 4644: /* Control never gets here */
! 4645:
! 4646: case PT_SC:
! 4647: for (fi = min;; fi++)
! 4648: {
! 4649: RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
! 4650: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4651: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4652: if (eptr >= md->end_subject)
! 4653: {
! 4654: SCHECK_PARTIAL();
! 4655: RRETURN(MATCH_NOMATCH);
! 4656: }
! 4657: GETCHARINCTEST(c, eptr);
! 4658: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
! 4659: RRETURN(MATCH_NOMATCH);
! 4660: }
! 4661: /* Control never gets here */
! 4662:
! 4663: case PT_ALNUM:
! 4664: for (fi = min;; fi++)
! 4665: {
! 4666: int category;
! 4667: RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
! 4668: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4669: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4670: if (eptr >= md->end_subject)
! 4671: {
! 4672: SCHECK_PARTIAL();
! 4673: RRETURN(MATCH_NOMATCH);
! 4674: }
! 4675: GETCHARINCTEST(c, eptr);
! 4676: category = UCD_CATEGORY(c);
! 4677: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
! 4678: RRETURN(MATCH_NOMATCH);
! 4679: }
! 4680: /* Control never gets here */
! 4681:
! 4682: case PT_SPACE: /* Perl space */
! 4683: for (fi = min;; fi++)
! 4684: {
! 4685: RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
! 4686: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4687: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4688: if (eptr >= md->end_subject)
! 4689: {
! 4690: SCHECK_PARTIAL();
! 4691: RRETURN(MATCH_NOMATCH);
! 4692: }
! 4693: GETCHARINCTEST(c, eptr);
! 4694: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 4695: c == CHAR_FF || c == CHAR_CR)
! 4696: == prop_fail_result)
! 4697: RRETURN(MATCH_NOMATCH);
! 4698: }
! 4699: /* Control never gets here */
! 4700:
! 4701: case PT_PXSPACE: /* POSIX space */
! 4702: for (fi = min;; fi++)
! 4703: {
! 4704: RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
! 4705: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4706: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4707: if (eptr >= md->end_subject)
! 4708: {
! 4709: SCHECK_PARTIAL();
! 4710: RRETURN(MATCH_NOMATCH);
! 4711: }
! 4712: GETCHARINCTEST(c, eptr);
! 4713: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 4714: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
! 4715: == prop_fail_result)
! 4716: RRETURN(MATCH_NOMATCH);
! 4717: }
! 4718: /* Control never gets here */
! 4719:
! 4720: case PT_WORD:
! 4721: for (fi = min;; fi++)
! 4722: {
! 4723: int category;
! 4724: RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
! 4725: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4726: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4727: if (eptr >= md->end_subject)
! 4728: {
! 4729: SCHECK_PARTIAL();
! 4730: RRETURN(MATCH_NOMATCH);
! 4731: }
! 4732: GETCHARINCTEST(c, eptr);
! 4733: category = UCD_CATEGORY(c);
! 4734: if ((category == ucp_L ||
! 4735: category == ucp_N ||
! 4736: c == CHAR_UNDERSCORE)
! 4737: == prop_fail_result)
! 4738: RRETURN(MATCH_NOMATCH);
! 4739: }
! 4740: /* Control never gets here */
! 4741:
! 4742: /* This should never occur */
! 4743:
! 4744: default:
! 4745: RRETURN(PCRE_ERROR_INTERNAL);
! 4746: }
! 4747: }
! 4748:
! 4749: /* Match extended Unicode sequences. We will get here only if the
! 4750: support is in the binary; otherwise a compile-time error occurs. */
! 4751:
! 4752: else if (ctype == OP_EXTUNI)
! 4753: {
! 4754: for (fi = min;; fi++)
! 4755: {
! 4756: RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
! 4757: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4758: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4759: if (eptr >= md->end_subject)
! 4760: {
! 4761: SCHECK_PARTIAL();
! 4762: RRETURN(MATCH_NOMATCH);
! 4763: }
! 4764: GETCHARINCTEST(c, eptr);
! 4765: if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
! 4766: while (eptr < md->end_subject)
! 4767: {
! 4768: int len = 1;
! 4769: if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 4770: if (UCD_CATEGORY(c) != ucp_M) break;
! 4771: eptr += len;
! 4772: }
! 4773: }
! 4774: }
! 4775: else
! 4776: #endif /* SUPPORT_UCP */
! 4777:
! 4778: #ifdef SUPPORT_UTF8
! 4779: /* UTF-8 mode */
! 4780: if (utf8)
! 4781: {
! 4782: for (fi = min;; fi++)
! 4783: {
! 4784: RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
! 4785: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4786: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4787: if (eptr >= md->end_subject)
! 4788: {
! 4789: SCHECK_PARTIAL();
! 4790: RRETURN(MATCH_NOMATCH);
! 4791: }
! 4792: if (ctype == OP_ANY && IS_NEWLINE(eptr))
! 4793: RRETURN(MATCH_NOMATCH);
! 4794: GETCHARINC(c, eptr);
! 4795: switch(ctype)
! 4796: {
! 4797: case OP_ANY: /* This is the non-NL case */
! 4798: case OP_ALLANY:
! 4799: case OP_ANYBYTE:
! 4800: break;
! 4801:
! 4802: case OP_ANYNL:
! 4803: switch(c)
! 4804: {
! 4805: default: RRETURN(MATCH_NOMATCH);
! 4806: case 0x000d:
! 4807: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 4808: break;
! 4809: case 0x000a:
! 4810: break;
! 4811:
! 4812: case 0x000b:
! 4813: case 0x000c:
! 4814: case 0x0085:
! 4815: case 0x2028:
! 4816: case 0x2029:
! 4817: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
! 4818: break;
! 4819: }
! 4820: break;
! 4821:
! 4822: case OP_NOT_HSPACE:
! 4823: switch(c)
! 4824: {
! 4825: default: break;
! 4826: case 0x09: /* HT */
! 4827: case 0x20: /* SPACE */
! 4828: case 0xa0: /* NBSP */
! 4829: case 0x1680: /* OGHAM SPACE MARK */
! 4830: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4831: case 0x2000: /* EN QUAD */
! 4832: case 0x2001: /* EM QUAD */
! 4833: case 0x2002: /* EN SPACE */
! 4834: case 0x2003: /* EM SPACE */
! 4835: case 0x2004: /* THREE-PER-EM SPACE */
! 4836: case 0x2005: /* FOUR-PER-EM SPACE */
! 4837: case 0x2006: /* SIX-PER-EM SPACE */
! 4838: case 0x2007: /* FIGURE SPACE */
! 4839: case 0x2008: /* PUNCTUATION SPACE */
! 4840: case 0x2009: /* THIN SPACE */
! 4841: case 0x200A: /* HAIR SPACE */
! 4842: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4843: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4844: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4845: RRETURN(MATCH_NOMATCH);
! 4846: }
! 4847: break;
! 4848:
! 4849: case OP_HSPACE:
! 4850: switch(c)
! 4851: {
! 4852: default: RRETURN(MATCH_NOMATCH);
! 4853: case 0x09: /* HT */
! 4854: case 0x20: /* SPACE */
! 4855: case 0xa0: /* NBSP */
! 4856: case 0x1680: /* OGHAM SPACE MARK */
! 4857: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4858: case 0x2000: /* EN QUAD */
! 4859: case 0x2001: /* EM QUAD */
! 4860: case 0x2002: /* EN SPACE */
! 4861: case 0x2003: /* EM SPACE */
! 4862: case 0x2004: /* THREE-PER-EM SPACE */
! 4863: case 0x2005: /* FOUR-PER-EM SPACE */
! 4864: case 0x2006: /* SIX-PER-EM SPACE */
! 4865: case 0x2007: /* FIGURE SPACE */
! 4866: case 0x2008: /* PUNCTUATION SPACE */
! 4867: case 0x2009: /* THIN SPACE */
! 4868: case 0x200A: /* HAIR SPACE */
! 4869: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4870: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4871: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4872: break;
! 4873: }
! 4874: break;
! 4875:
! 4876: case OP_NOT_VSPACE:
! 4877: switch(c)
! 4878: {
! 4879: default: break;
! 4880: case 0x0a: /* LF */
! 4881: case 0x0b: /* VT */
! 4882: case 0x0c: /* FF */
! 4883: case 0x0d: /* CR */
! 4884: case 0x85: /* NEL */
! 4885: case 0x2028: /* LINE SEPARATOR */
! 4886: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4887: RRETURN(MATCH_NOMATCH);
! 4888: }
! 4889: break;
! 4890:
! 4891: case OP_VSPACE:
! 4892: switch(c)
! 4893: {
! 4894: default: RRETURN(MATCH_NOMATCH);
! 4895: case 0x0a: /* LF */
! 4896: case 0x0b: /* VT */
! 4897: case 0x0c: /* FF */
! 4898: case 0x0d: /* CR */
! 4899: case 0x85: /* NEL */
! 4900: case 0x2028: /* LINE SEPARATOR */
! 4901: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4902: break;
! 4903: }
! 4904: break;
! 4905:
! 4906: case OP_NOT_DIGIT:
! 4907: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
! 4908: RRETURN(MATCH_NOMATCH);
! 4909: break;
! 4910:
! 4911: case OP_DIGIT:
! 4912: if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
! 4913: RRETURN(MATCH_NOMATCH);
! 4914: break;
! 4915:
! 4916: case OP_NOT_WHITESPACE:
! 4917: if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
! 4918: RRETURN(MATCH_NOMATCH);
! 4919: break;
! 4920:
! 4921: case OP_WHITESPACE:
! 4922: if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
! 4923: RRETURN(MATCH_NOMATCH);
! 4924: break;
! 4925:
! 4926: case OP_NOT_WORDCHAR:
! 4927: if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
! 4928: RRETURN(MATCH_NOMATCH);
! 4929: break;
! 4930:
! 4931: case OP_WORDCHAR:
! 4932: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
! 4933: RRETURN(MATCH_NOMATCH);
! 4934: break;
! 4935:
! 4936: default:
! 4937: RRETURN(PCRE_ERROR_INTERNAL);
! 4938: }
! 4939: }
! 4940: }
! 4941: else
! 4942: #endif
! 4943: /* Not UTF-8 mode */
! 4944: {
! 4945: for (fi = min;; fi++)
! 4946: {
! 4947: RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
! 4948: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4949: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4950: if (eptr >= md->end_subject)
! 4951: {
! 4952: SCHECK_PARTIAL();
! 4953: RRETURN(MATCH_NOMATCH);
! 4954: }
! 4955: if (ctype == OP_ANY && IS_NEWLINE(eptr))
! 4956: RRETURN(MATCH_NOMATCH);
! 4957: c = *eptr++;
! 4958: switch(ctype)
! 4959: {
! 4960: case OP_ANY: /* This is the non-NL case */
! 4961: case OP_ALLANY:
! 4962: case OP_ANYBYTE:
! 4963: break;
! 4964:
! 4965: case OP_ANYNL:
! 4966: switch(c)
! 4967: {
! 4968: default: RRETURN(MATCH_NOMATCH);
! 4969: case 0x000d:
! 4970: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
! 4971: break;
! 4972:
! 4973: case 0x000a:
! 4974: break;
! 4975:
! 4976: case 0x000b:
! 4977: case 0x000c:
! 4978: case 0x0085:
! 4979: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
! 4980: break;
! 4981: }
! 4982: break;
! 4983:
! 4984: case OP_NOT_HSPACE:
! 4985: switch(c)
! 4986: {
! 4987: default: break;
! 4988: case 0x09: /* HT */
! 4989: case 0x20: /* SPACE */
! 4990: case 0xa0: /* NBSP */
! 4991: RRETURN(MATCH_NOMATCH);
! 4992: }
! 4993: break;
! 4994:
! 4995: case OP_HSPACE:
! 4996: switch(c)
! 4997: {
! 4998: default: RRETURN(MATCH_NOMATCH);
! 4999: case 0x09: /* HT */
! 5000: case 0x20: /* SPACE */
! 5001: case 0xa0: /* NBSP */
! 5002: break;
! 5003: }
! 5004: break;
! 5005:
! 5006: case OP_NOT_VSPACE:
! 5007: switch(c)
! 5008: {
! 5009: default: break;
! 5010: case 0x0a: /* LF */
! 5011: case 0x0b: /* VT */
! 5012: case 0x0c: /* FF */
! 5013: case 0x0d: /* CR */
! 5014: case 0x85: /* NEL */
! 5015: RRETURN(MATCH_NOMATCH);
! 5016: }
! 5017: break;
! 5018:
! 5019: case OP_VSPACE:
! 5020: switch(c)
! 5021: {
! 5022: default: RRETURN(MATCH_NOMATCH);
! 5023: case 0x0a: /* LF */
! 5024: case 0x0b: /* VT */
! 5025: case 0x0c: /* FF */
! 5026: case 0x0d: /* CR */
! 5027: case 0x85: /* NEL */
! 5028: break;
! 5029: }
! 5030: break;
! 5031:
! 5032: case OP_NOT_DIGIT:
! 5033: if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
! 5034: break;
! 5035:
! 5036: case OP_DIGIT:
! 5037: if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
! 5038: break;
! 5039:
! 5040: case OP_NOT_WHITESPACE:
! 5041: if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
! 5042: break;
! 5043:
! 5044: case OP_WHITESPACE:
! 5045: if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
! 5046: break;
! 5047:
! 5048: case OP_NOT_WORDCHAR:
! 5049: if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
! 5050: break;
! 5051:
! 5052: case OP_WORDCHAR:
! 5053: if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
! 5054: break;
! 5055:
! 5056: default:
! 5057: RRETURN(PCRE_ERROR_INTERNAL);
! 5058: }
! 5059: }
! 5060: }
! 5061: /* Control never gets here */
! 5062: }
! 5063:
! 5064: /* If maximizing, it is worth using inline code for speed, doing the type
! 5065: test once at the start (i.e. keep it out of the loop). Again, keep the
! 5066: UTF-8 and UCP stuff separate. */
! 5067:
! 5068: else
! 5069: {
! 5070: pp = eptr; /* Remember where we started */
! 5071:
! 5072: #ifdef SUPPORT_UCP
! 5073: if (prop_type >= 0)
! 5074: {
! 5075: switch(prop_type)
! 5076: {
! 5077: case PT_ANY:
! 5078: for (i = min; i < max; i++)
! 5079: {
! 5080: int len = 1;
! 5081: if (eptr >= md->end_subject)
! 5082: {
! 5083: SCHECK_PARTIAL();
! 5084: break;
! 5085: }
! 5086: GETCHARLENTEST(c, eptr, len);
! 5087: if (prop_fail_result) break;
! 5088: eptr+= len;
! 5089: }
! 5090: break;
! 5091:
! 5092: case PT_LAMP:
! 5093: for (i = min; i < max; i++)
! 5094: {
! 5095: int chartype;
! 5096: int len = 1;
! 5097: if (eptr >= md->end_subject)
! 5098: {
! 5099: SCHECK_PARTIAL();
! 5100: break;
! 5101: }
! 5102: GETCHARLENTEST(c, eptr, len);
! 5103: chartype = UCD_CHARTYPE(c);
! 5104: if ((chartype == ucp_Lu ||
! 5105: chartype == ucp_Ll ||
! 5106: chartype == ucp_Lt) == prop_fail_result)
! 5107: break;
! 5108: eptr+= len;
! 5109: }
! 5110: break;
! 5111:
! 5112: case PT_GC:
! 5113: for (i = min; i < max; i++)
! 5114: {
! 5115: int len = 1;
! 5116: if (eptr >= md->end_subject)
! 5117: {
! 5118: SCHECK_PARTIAL();
! 5119: break;
! 5120: }
! 5121: GETCHARLENTEST(c, eptr, len);
! 5122: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
! 5123: eptr+= len;
! 5124: }
! 5125: break;
! 5126:
! 5127: case PT_PC:
! 5128: for (i = min; i < max; i++)
! 5129: {
! 5130: int len = 1;
! 5131: if (eptr >= md->end_subject)
! 5132: {
! 5133: SCHECK_PARTIAL();
! 5134: break;
! 5135: }
! 5136: GETCHARLENTEST(c, eptr, len);
! 5137: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
! 5138: eptr+= len;
! 5139: }
! 5140: break;
! 5141:
! 5142: case PT_SC:
! 5143: for (i = min; i < max; i++)
! 5144: {
! 5145: int len = 1;
! 5146: if (eptr >= md->end_subject)
! 5147: {
! 5148: SCHECK_PARTIAL();
! 5149: break;
! 5150: }
! 5151: GETCHARLENTEST(c, eptr, len);
! 5152: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
! 5153: eptr+= len;
! 5154: }
! 5155: break;
! 5156:
! 5157: case PT_ALNUM:
! 5158: for (i = min; i < max; i++)
! 5159: {
! 5160: int category;
! 5161: int len = 1;
! 5162: if (eptr >= md->end_subject)
! 5163: {
! 5164: SCHECK_PARTIAL();
! 5165: break;
! 5166: }
! 5167: GETCHARLENTEST(c, eptr, len);
! 5168: category = UCD_CATEGORY(c);
! 5169: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
! 5170: break;
! 5171: eptr+= len;
! 5172: }
! 5173: break;
! 5174:
! 5175: case PT_SPACE: /* Perl space */
! 5176: for (i = min; i < max; i++)
! 5177: {
! 5178: int len = 1;
! 5179: if (eptr >= md->end_subject)
! 5180: {
! 5181: SCHECK_PARTIAL();
! 5182: break;
! 5183: }
! 5184: GETCHARLENTEST(c, eptr, len);
! 5185: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 5186: c == CHAR_FF || c == CHAR_CR)
! 5187: == prop_fail_result)
! 5188: break;
! 5189: eptr+= len;
! 5190: }
! 5191: break;
! 5192:
! 5193: case PT_PXSPACE: /* POSIX space */
! 5194: for (i = min; i < max; i++)
! 5195: {
! 5196: int len = 1;
! 5197: if (eptr >= md->end_subject)
! 5198: {
! 5199: SCHECK_PARTIAL();
! 5200: break;
! 5201: }
! 5202: GETCHARLENTEST(c, eptr, len);
! 5203: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
! 5204: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
! 5205: == prop_fail_result)
! 5206: break;
! 5207: eptr+= len;
! 5208: }
! 5209: break;
! 5210:
! 5211: case PT_WORD:
! 5212: for (i = min; i < max; i++)
! 5213: {
! 5214: int category;
! 5215: int len = 1;
! 5216: if (eptr >= md->end_subject)
! 5217: {
! 5218: SCHECK_PARTIAL();
! 5219: break;
! 5220: }
! 5221: GETCHARLENTEST(c, eptr, len);
! 5222: category = UCD_CATEGORY(c);
! 5223: if ((category == ucp_L || category == ucp_N ||
! 5224: c == CHAR_UNDERSCORE) == prop_fail_result)
! 5225: break;
! 5226: eptr+= len;
! 5227: }
! 5228: break;
! 5229:
! 5230: default:
! 5231: RRETURN(PCRE_ERROR_INTERNAL);
! 5232: }
! 5233:
! 5234: /* eptr is now past the end of the maximum run */
! 5235:
! 5236: if (possessive) continue;
! 5237: for(;;)
! 5238: {
! 5239: RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
! 5240: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 5241: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 5242: if (utf8) BACKCHAR(eptr);
! 5243: }
! 5244: }
! 5245:
! 5246: /* Match extended Unicode sequences. We will get here only if the
! 5247: support is in the binary; otherwise a compile-time error occurs. */
! 5248:
! 5249: else if (ctype == OP_EXTUNI)
! 5250: {
! 5251: for (i = min; i < max; i++)
! 5252: {
! 5253: int len = 1;
! 5254: if (eptr >= md->end_subject)
! 5255: {
! 5256: SCHECK_PARTIAL();
! 5257: break;
! 5258: }
! 5259: if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 5260: if (UCD_CATEGORY(c) == ucp_M) break;
! 5261: eptr += len;
! 5262: while (eptr < md->end_subject)
! 5263: {
! 5264: len = 1;
! 5265: if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 5266: if (UCD_CATEGORY(c) != ucp_M) break;
! 5267: eptr += len;
! 5268: }
! 5269: }
! 5270:
! 5271: /* eptr is now past the end of the maximum run */
! 5272:
! 5273: if (possessive) continue;
! 5274:
! 5275: for(;;)
! 5276: {
! 5277: RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
! 5278: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 5279: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 5280: for (;;) /* Move back over one extended */
! 5281: {
! 5282: if (!utf8) c = *eptr; else
! 5283: {
! 5284: BACKCHAR(eptr);
! 5285: GETCHAR(c, eptr);
! 5286: }
! 5287: if (UCD_CATEGORY(c) != ucp_M) break;
! 5288: eptr--;
! 5289: }
! 5290: }
! 5291: }
! 5292:
! 5293: else
! 5294: #endif /* SUPPORT_UCP */
! 5295:
! 5296: #ifdef SUPPORT_UTF8
! 5297: /* UTF-8 mode */
! 5298:
! 5299: if (utf8)
! 5300: {
! 5301: switch(ctype)
! 5302: {
! 5303: case OP_ANY:
! 5304: if (max < INT_MAX)
! 5305: {
! 5306: for (i = min; i < max; i++)
! 5307: {
! 5308: if (eptr >= md->end_subject)
! 5309: {
! 5310: SCHECK_PARTIAL();
! 5311: break;
! 5312: }
! 5313: if (IS_NEWLINE(eptr)) break;
! 5314: eptr++;
! 5315: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 5316: }
! 5317: }
! 5318:
! 5319: /* Handle unlimited UTF-8 repeat */
! 5320:
! 5321: else
! 5322: {
! 5323: for (i = min; i < max; i++)
! 5324: {
! 5325: if (eptr >= md->end_subject)
! 5326: {
! 5327: SCHECK_PARTIAL();
! 5328: break;
! 5329: }
! 5330: if (IS_NEWLINE(eptr)) break;
! 5331: eptr++;
! 5332: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 5333: }
! 5334: }
! 5335: break;
! 5336:
! 5337: case OP_ALLANY:
! 5338: if (max < INT_MAX)
! 5339: {
! 5340: for (i = min; i < max; i++)
! 5341: {
! 5342: if (eptr >= md->end_subject)
! 5343: {
! 5344: SCHECK_PARTIAL();
! 5345: break;
! 5346: }
! 5347: eptr++;
! 5348: while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
! 5349: }
! 5350: }
! 5351: else
! 5352: {
! 5353: eptr = md->end_subject; /* Unlimited UTF-8 repeat */
! 5354: SCHECK_PARTIAL();
! 5355: }
! 5356: break;
! 5357:
! 5358: /* The byte case is the same as non-UTF8 */
! 5359:
! 5360: case OP_ANYBYTE:
! 5361: c = max - min;
! 5362: if (c > (unsigned int)(md->end_subject - eptr))
! 5363: {
! 5364: eptr = md->end_subject;
! 5365: SCHECK_PARTIAL();
! 5366: }
! 5367: else eptr += c;
! 5368: break;
! 5369:
! 5370: case OP_ANYNL:
! 5371: for (i = min; i < max; i++)
! 5372: {
! 5373: int len = 1;
! 5374: if (eptr >= md->end_subject)
! 5375: {
! 5376: SCHECK_PARTIAL();
! 5377: break;
! 5378: }
! 5379: GETCHARLEN(c, eptr, len);
! 5380: if (c == 0x000d)
! 5381: {
! 5382: if (++eptr >= md->end_subject) break;
! 5383: if (*eptr == 0x000a) eptr++;
! 5384: }
! 5385: else
! 5386: {
! 5387: if (c != 0x000a &&
! 5388: (md->bsr_anycrlf ||
! 5389: (c != 0x000b && c != 0x000c &&
! 5390: c != 0x0085 && c != 0x2028 && c != 0x2029)))
! 5391: break;
! 5392: eptr += len;
! 5393: }
! 5394: }
! 5395: break;
! 5396:
! 5397: case OP_NOT_HSPACE:
! 5398: case OP_HSPACE:
! 5399: for (i = min; i < max; i++)
! 5400: {
! 5401: BOOL gotspace;
! 5402: int len = 1;
! 5403: if (eptr >= md->end_subject)
! 5404: {
! 5405: SCHECK_PARTIAL();
! 5406: break;
! 5407: }
! 5408: GETCHARLEN(c, eptr, len);
! 5409: switch(c)
! 5410: {
! 5411: default: gotspace = FALSE; break;
! 5412: case 0x09: /* HT */
! 5413: case 0x20: /* SPACE */
! 5414: case 0xa0: /* NBSP */
! 5415: case 0x1680: /* OGHAM SPACE MARK */
! 5416: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 5417: case 0x2000: /* EN QUAD */
! 5418: case 0x2001: /* EM QUAD */
! 5419: case 0x2002: /* EN SPACE */
! 5420: case 0x2003: /* EM SPACE */
! 5421: case 0x2004: /* THREE-PER-EM SPACE */
! 5422: case 0x2005: /* FOUR-PER-EM SPACE */
! 5423: case 0x2006: /* SIX-PER-EM SPACE */
! 5424: case 0x2007: /* FIGURE SPACE */
! 5425: case 0x2008: /* PUNCTUATION SPACE */
! 5426: case 0x2009: /* THIN SPACE */
! 5427: case 0x200A: /* HAIR SPACE */
! 5428: case 0x202f: /* NARROW NO-BREAK SPACE */
! 5429: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 5430: case 0x3000: /* IDEOGRAPHIC SPACE */
! 5431: gotspace = TRUE;
! 5432: break;
! 5433: }
! 5434: if (gotspace == (ctype == OP_NOT_HSPACE)) break;
! 5435: eptr += len;
! 5436: }
! 5437: break;
! 5438:
! 5439: case OP_NOT_VSPACE:
! 5440: case OP_VSPACE:
! 5441: for (i = min; i < max; i++)
! 5442: {
! 5443: BOOL gotspace;
! 5444: int len = 1;
! 5445: if (eptr >= md->end_subject)
! 5446: {
! 5447: SCHECK_PARTIAL();
! 5448: break;
! 5449: }
! 5450: GETCHARLEN(c, eptr, len);
! 5451: switch(c)
! 5452: {
! 5453: default: gotspace = FALSE; break;
! 5454: case 0x0a: /* LF */
! 5455: case 0x0b: /* VT */
! 5456: case 0x0c: /* FF */
! 5457: case 0x0d: /* CR */
! 5458: case 0x85: /* NEL */
! 5459: case 0x2028: /* LINE SEPARATOR */
! 5460: case 0x2029: /* PARAGRAPH SEPARATOR */
! 5461: gotspace = TRUE;
! 5462: break;
! 5463: }
! 5464: if (gotspace == (ctype == OP_NOT_VSPACE)) break;
! 5465: eptr += len;
! 5466: }
! 5467: break;
! 5468:
! 5469: case OP_NOT_DIGIT:
! 5470: for (i = min; i < max; i++)
! 5471: {
! 5472: int len = 1;
! 5473: if (eptr >= md->end_subject)
! 5474: {
! 5475: SCHECK_PARTIAL();
! 5476: break;
! 5477: }
! 5478: GETCHARLEN(c, eptr, len);
! 5479: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
! 5480: eptr+= len;
! 5481: }
! 5482: break;
! 5483:
! 5484: case OP_DIGIT:
! 5485: for (i = min; i < max; i++)
! 5486: {
! 5487: int len = 1;
! 5488: if (eptr >= md->end_subject)
! 5489: {
! 5490: SCHECK_PARTIAL();
! 5491: break;
! 5492: }
! 5493: GETCHARLEN(c, eptr, len);
! 5494: if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
! 5495: eptr+= len;
! 5496: }
! 5497: break;
! 5498:
! 5499: case OP_NOT_WHITESPACE:
! 5500: for (i = min; i < max; i++)
! 5501: {
! 5502: int len = 1;
! 5503: if (eptr >= md->end_subject)
! 5504: {
! 5505: SCHECK_PARTIAL();
! 5506: break;
! 5507: }
! 5508: GETCHARLEN(c, eptr, len);
! 5509: if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
! 5510: eptr+= len;
! 5511: }
! 5512: break;
! 5513:
! 5514: case OP_WHITESPACE:
! 5515: for (i = min; i < max; i++)
! 5516: {
! 5517: int len = 1;
! 5518: if (eptr >= md->end_subject)
! 5519: {
! 5520: SCHECK_PARTIAL();
! 5521: break;
! 5522: }
! 5523: GETCHARLEN(c, eptr, len);
! 5524: if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
! 5525: eptr+= len;
! 5526: }
! 5527: break;
! 5528:
! 5529: case OP_NOT_WORDCHAR:
! 5530: for (i = min; i < max; i++)
! 5531: {
! 5532: int len = 1;
! 5533: if (eptr >= md->end_subject)
! 5534: {
! 5535: SCHECK_PARTIAL();
! 5536: break;
! 5537: }
! 5538: GETCHARLEN(c, eptr, len);
! 5539: if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
! 5540: eptr+= len;
! 5541: }
! 5542: break;
! 5543:
! 5544: case OP_WORDCHAR:
! 5545: for (i = min; i < max; i++)
! 5546: {
! 5547: int len = 1;
! 5548: if (eptr >= md->end_subject)
! 5549: {
! 5550: SCHECK_PARTIAL();
! 5551: break;
! 5552: }
! 5553: GETCHARLEN(c, eptr, len);
! 5554: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
! 5555: eptr+= len;
! 5556: }
! 5557: break;
! 5558:
! 5559: default:
! 5560: RRETURN(PCRE_ERROR_INTERNAL);
! 5561: }
! 5562:
! 5563: /* eptr is now past the end of the maximum run. If possessive, we are
! 5564: done (no backing up). Otherwise, match at this position; anything other
! 5565: than no match is immediately returned. For nomatch, back up one
! 5566: character, unless we are matching \R and the last thing matched was
! 5567: \r\n, in which case, back up two bytes. */
! 5568:
! 5569: if (possessive) continue;
! 5570: for(;;)
! 5571: {
! 5572: RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
! 5573: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 5574: if (eptr-- == pp) break; /* Stop if tried at original pos */
! 5575: BACKCHAR(eptr);
! 5576: if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
! 5577: eptr[-1] == '\r') eptr--;
! 5578: }
! 5579: }
! 5580: else
! 5581: #endif /* SUPPORT_UTF8 */
! 5582:
! 5583: /* Not UTF-8 mode */
! 5584: {
! 5585: switch(ctype)
! 5586: {
! 5587: case OP_ANY:
! 5588: for (i = min; i < max; i++)
! 5589: {
! 5590: if (eptr >= md->end_subject)
! 5591: {
! 5592: SCHECK_PARTIAL();
! 5593: break;
! 5594: }
! 5595: if (IS_NEWLINE(eptr)) break;
! 5596: eptr++;
! 5597: }
! 5598: break;
! 5599:
! 5600: case OP_ALLANY:
! 5601: case OP_ANYBYTE:
! 5602: c = max - min;
! 5603: if (c > (unsigned int)(md->end_subject - eptr))
! 5604: {
! 5605: eptr = md->end_subject;
! 5606: SCHECK_PARTIAL();
! 5607: }
! 5608: else eptr += c;
! 5609: break;
! 5610:
! 5611: case OP_ANYNL:
! 5612: for (i = min; i < max; i++)
! 5613: {
! 5614: if (eptr >= md->end_subject)
! 5615: {
! 5616: SCHECK_PARTIAL();
! 5617: break;
! 5618: }
! 5619: c = *eptr;
! 5620: if (c == 0x000d)
! 5621: {
! 5622: if (++eptr >= md->end_subject) break;
! 5623: if (*eptr == 0x000a) eptr++;
! 5624: }
! 5625: else
! 5626: {
! 5627: if (c != 0x000a &&
! 5628: (md->bsr_anycrlf ||
! 5629: (c != 0x000b && c != 0x000c && c != 0x0085)))
! 5630: break;
! 5631: eptr++;
! 5632: }
! 5633: }
! 5634: break;
! 5635:
! 5636: case OP_NOT_HSPACE:
! 5637: for (i = min; i < max; i++)
! 5638: {
! 5639: if (eptr >= md->end_subject)
! 5640: {
! 5641: SCHECK_PARTIAL();
! 5642: break;
! 5643: }
! 5644: c = *eptr;
! 5645: if (c == 0x09 || c == 0x20 || c == 0xa0) break;
! 5646: eptr++;
! 5647: }
! 5648: break;
! 5649:
! 5650: case OP_HSPACE:
! 5651: for (i = min; i < max; i++)
! 5652: {
! 5653: if (eptr >= md->end_subject)
! 5654: {
! 5655: SCHECK_PARTIAL();
! 5656: break;
! 5657: }
! 5658: c = *eptr;
! 5659: if (c != 0x09 && c != 0x20 && c != 0xa0) break;
! 5660: eptr++;
! 5661: }
! 5662: break;
! 5663:
! 5664: case OP_NOT_VSPACE:
! 5665: for (i = min; i < max; i++)
! 5666: {
! 5667: if (eptr >= md->end_subject)
! 5668: {
! 5669: SCHECK_PARTIAL();
! 5670: break;
! 5671: }
! 5672: c = *eptr;
! 5673: if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
! 5674: break;
! 5675: eptr++;
! 5676: }
! 5677: break;
! 5678:
! 5679: case OP_VSPACE:
! 5680: for (i = min; i < max; i++)
! 5681: {
! 5682: if (eptr >= md->end_subject)
! 5683: {
! 5684: SCHECK_PARTIAL();
! 5685: break;
! 5686: }
! 5687: c = *eptr;
! 5688: if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
! 5689: break;
! 5690: eptr++;
! 5691: }
! 5692: break;
! 5693:
! 5694: case OP_NOT_DIGIT:
! 5695: for (i = min; i < max; i++)
! 5696: {
! 5697: if (eptr >= md->end_subject)
! 5698: {
! 5699: SCHECK_PARTIAL();
! 5700: break;
! 5701: }
! 5702: if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
! 5703: eptr++;
! 5704: }
! 5705: break;
! 5706:
! 5707: case OP_DIGIT:
! 5708: for (i = min; i < max; i++)
! 5709: {
! 5710: if (eptr >= md->end_subject)
! 5711: {
! 5712: SCHECK_PARTIAL();
! 5713: break;
! 5714: }
! 5715: if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
! 5716: eptr++;
! 5717: }
! 5718: break;
! 5719:
! 5720: case OP_NOT_WHITESPACE:
! 5721: for (i = min; i < max; i++)
! 5722: {
! 5723: if (eptr >= md->end_subject)
! 5724: {
! 5725: SCHECK_PARTIAL();
! 5726: break;
! 5727: }
! 5728: if ((md->ctypes[*eptr] & ctype_space) != 0) break;
! 5729: eptr++;
! 5730: }
! 5731: break;
! 5732:
! 5733: case OP_WHITESPACE:
! 5734: for (i = min; i < max; i++)
! 5735: {
! 5736: if (eptr >= md->end_subject)
! 5737: {
! 5738: SCHECK_PARTIAL();
! 5739: break;
! 5740: }
! 5741: if ((md->ctypes[*eptr] & ctype_space) == 0) break;
! 5742: eptr++;
! 5743: }
! 5744: break;
! 5745:
! 5746: case OP_NOT_WORDCHAR:
! 5747: for (i = min; i < max; i++)
! 5748: {
! 5749: if (eptr >= md->end_subject)
! 5750: {
! 5751: SCHECK_PARTIAL();
! 5752: break;
! 5753: }
! 5754: if ((md->ctypes[*eptr] & ctype_word) != 0) break;
! 5755: eptr++;
! 5756: }
! 5757: break;
! 5758:
! 5759: case OP_WORDCHAR:
! 5760: for (i = min; i < max; i++)
! 5761: {
! 5762: if (eptr >= md->end_subject)
! 5763: {
! 5764: SCHECK_PARTIAL();
! 5765: break;
! 5766: }
! 5767: if ((md->ctypes[*eptr] & ctype_word) == 0) break;
! 5768: eptr++;
! 5769: }
! 5770: break;
! 5771:
! 5772: default:
! 5773: RRETURN(PCRE_ERROR_INTERNAL);
! 5774: }
! 5775:
! 5776: /* eptr is now past the end of the maximum run. If possessive, we are
! 5777: done (no backing up). Otherwise, match at this position; anything other
! 5778: than no match is immediately returned. For nomatch, back up one
! 5779: character (byte), unless we are matching \R and the last thing matched
! 5780: was \r\n, in which case, back up two bytes. */
! 5781:
! 5782: if (possessive) continue;
! 5783: while (eptr >= pp)
! 5784: {
! 5785: RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
! 5786: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 5787: eptr--;
! 5788: if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
! 5789: eptr[-1] == '\r') eptr--;
! 5790: }
! 5791: }
! 5792:
! 5793: /* Get here if we can't make it match with any permitted repetitions */
! 5794:
! 5795: RRETURN(MATCH_NOMATCH);
! 5796: }
! 5797: /* Control never gets here */
! 5798:
! 5799: /* There's been some horrible disaster. Arrival here can only mean there is
! 5800: something seriously wrong in the code above or the OP_xxx definitions. */
! 5801:
! 5802: default:
! 5803: DPRINTF(("Unknown opcode %d\n", *ecode));
! 5804: RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
! 5805: }
! 5806:
! 5807: /* Do not stick any code in here without much thought; it is assumed
! 5808: that "continue" in the code above comes out to here to repeat the main
! 5809: loop. */
! 5810:
! 5811: } /* End of main loop */
! 5812: /* Control never reaches here */
! 5813:
! 5814:
! 5815: /* When compiling to use the heap rather than the stack for recursive calls to
! 5816: match(), the RRETURN() macro jumps here. The number that is saved in
! 5817: frame->Xwhere indicates which label we actually want to return to. */
! 5818:
! 5819: #ifdef NO_RECURSE
! 5820: #define LBL(val) case val: goto L_RM##val;
! 5821: HEAP_RETURN:
! 5822: switch (frame->Xwhere)
! 5823: {
! 5824: LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
! 5825: LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
! 5826: LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
! 5827: LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
! 5828: LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
! 5829: LBL(65) LBL(66)
! 5830: #ifdef SUPPORT_UTF8
! 5831: LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
! 5832: LBL(32) LBL(34) LBL(42) LBL(46)
! 5833: #ifdef SUPPORT_UCP
! 5834: LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
! 5835: LBL(59) LBL(60) LBL(61) LBL(62)
! 5836: #endif /* SUPPORT_UCP */
! 5837: #endif /* SUPPORT_UTF8 */
! 5838: default:
! 5839: DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
! 5840: return PCRE_ERROR_INTERNAL;
! 5841: }
! 5842: #undef LBL
! 5843: #endif /* NO_RECURSE */
! 5844: }
! 5845:
! 5846:
! 5847: /***************************************************************************
! 5848: ****************************************************************************
! 5849: RECURSION IN THE match() FUNCTION
! 5850:
! 5851: Undefine all the macros that were defined above to handle this. */
! 5852:
! 5853: #ifdef NO_RECURSE
! 5854: #undef eptr
! 5855: #undef ecode
! 5856: #undef mstart
! 5857: #undef offset_top
! 5858: #undef eptrb
! 5859: #undef flags
! 5860:
! 5861: #undef callpat
! 5862: #undef charptr
! 5863: #undef data
! 5864: #undef next
! 5865: #undef pp
! 5866: #undef prev
! 5867: #undef saved_eptr
! 5868:
! 5869: #undef new_recursive
! 5870:
! 5871: #undef cur_is_word
! 5872: #undef condition
! 5873: #undef prev_is_word
! 5874:
! 5875: #undef ctype
! 5876: #undef length
! 5877: #undef max
! 5878: #undef min
! 5879: #undef number
! 5880: #undef offset
! 5881: #undef op
! 5882: #undef save_capture_last
! 5883: #undef save_offset1
! 5884: #undef save_offset2
! 5885: #undef save_offset3
! 5886: #undef stacksave
! 5887:
! 5888: #undef newptrb
! 5889:
! 5890: #endif
! 5891:
! 5892: /* These two are defined as macros in both cases */
! 5893:
! 5894: #undef fc
! 5895: #undef fi
! 5896:
! 5897: /***************************************************************************
! 5898: ***************************************************************************/
! 5899:
! 5900:
! 5901:
! 5902: /*************************************************
! 5903: * Execute a Regular Expression *
! 5904: *************************************************/
! 5905:
! 5906: /* This function applies a compiled re to a subject string and picks out
! 5907: portions of the string if it matches. Two elements in the vector are set for
! 5908: each substring: the offsets to the start and end of the substring.
! 5909:
! 5910: Arguments:
! 5911: argument_re points to the compiled expression
! 5912: extra_data points to extra data or is NULL
! 5913: subject points to the subject string
! 5914: length length of subject string (may contain binary zeros)
! 5915: start_offset where to start in the subject string
! 5916: options option bits
! 5917: offsets points to a vector of ints to be filled in with offsets
! 5918: offsetcount the number of elements in the vector
! 5919:
! 5920: Returns: > 0 => success; value is the number of elements filled in
! 5921: = 0 => success, but offsets is not big enough
! 5922: -1 => failed to match
! 5923: < -1 => some kind of unexpected problem
! 5924: */
! 5925:
! 5926: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 5927: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
! 5928: PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
! 5929: int offsetcount)
! 5930: {
! 5931: int rc, ocount, arg_offset_max;
! 5932: int first_byte = -1;
! 5933: int req_byte = -1;
! 5934: int req_byte2 = -1;
! 5935: int newline;
! 5936: BOOL using_temporary_offsets = FALSE;
! 5937: BOOL anchored;
! 5938: BOOL startline;
! 5939: BOOL firstline;
! 5940: BOOL first_byte_caseless = FALSE;
! 5941: BOOL req_byte_caseless = FALSE;
! 5942: BOOL utf8;
! 5943: match_data match_block;
! 5944: match_data *md = &match_block;
! 5945: const uschar *tables;
! 5946: const uschar *start_bits = NULL;
! 5947: USPTR start_match = (USPTR)subject + start_offset;
! 5948: USPTR end_subject;
! 5949: USPTR start_partial = NULL;
! 5950: USPTR req_byte_ptr = start_match - 1;
! 5951:
! 5952: pcre_study_data internal_study;
! 5953: const pcre_study_data *study;
! 5954:
! 5955: real_pcre internal_re;
! 5956: const real_pcre *external_re = (const real_pcre *)argument_re;
! 5957: const real_pcre *re = external_re;
! 5958:
! 5959: /* Plausibility checks */
! 5960:
! 5961: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
! 5962: if (re == NULL || subject == NULL ||
! 5963: (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
! 5964: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
! 5965: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
! 5966:
! 5967: /* These two settings are used in the code for checking a UTF-8 string that
! 5968: follows immediately afterwards. Other values in the md block are used only
! 5969: during "normal" pcre_exec() processing, not when the JIT support is in use,
! 5970: so they are set up later. */
! 5971:
! 5972: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
! 5973: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
! 5974: ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
! 5975:
! 5976: /* Check a UTF-8 string if required. Pass back the character offset and error
! 5977: code for an invalid string if a results vector is available. */
! 5978:
! 5979: #ifdef SUPPORT_UTF8
! 5980: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
! 5981: {
! 5982: int erroroffset;
! 5983: int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
! 5984: if (errorcode != 0)
! 5985: {
! 5986: if (offsetcount >= 2)
! 5987: {
! 5988: offsets[0] = erroroffset;
! 5989: offsets[1] = errorcode;
! 5990: }
! 5991: return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
! 5992: PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
! 5993: }
! 5994:
! 5995: /* Check that a start_offset points to the start of a UTF-8 character. */
! 5996: if (start_offset > 0 && start_offset < length &&
! 5997: (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
! 5998: return PCRE_ERROR_BADUTF8_OFFSET;
! 5999: }
! 6000: #endif
! 6001:
! 6002: /* If the pattern was successfully studied with JIT support, run the JIT
! 6003: executable instead of the rest of this function. Most options must be set at
! 6004: compile time for the JIT code to be usable. Fallback to the normal code path if
! 6005: an unsupported flag is set. In particular, JIT does not support partial
! 6006: matching. */
! 6007:
! 6008: #ifdef SUPPORT_JIT
! 6009: if (extra_data != NULL
! 6010: && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
! 6011: && extra_data->executable_jit != NULL
! 6012: && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
! 6013: && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
! 6014: PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
! 6015: return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
! 6016: start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
! 6017: ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
! 6018: #endif
! 6019:
! 6020: /* Carry on with non-JIT matching. This information is for finding all the
! 6021: numbers associated with a given name, for condition testing. */
! 6022:
! 6023: md->name_table = (uschar *)re + re->name_table_offset;
! 6024: md->name_count = re->name_count;
! 6025: md->name_entry_size = re->name_entry_size;
! 6026:
! 6027: /* Fish out the optional data from the extra_data structure, first setting
! 6028: the default values. */
! 6029:
! 6030: study = NULL;
! 6031: md->match_limit = MATCH_LIMIT;
! 6032: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
! 6033: md->callout_data = NULL;
! 6034:
! 6035: /* The table pointer is always in native byte order. */
! 6036:
! 6037: tables = external_re->tables;
! 6038:
! 6039: if (extra_data != NULL)
! 6040: {
! 6041: register unsigned int flags = extra_data->flags;
! 6042: if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
! 6043: study = (const pcre_study_data *)extra_data->study_data;
! 6044: if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
! 6045: md->match_limit = extra_data->match_limit;
! 6046: if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
! 6047: md->match_limit_recursion = extra_data->match_limit_recursion;
! 6048: if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
! 6049: md->callout_data = extra_data->callout_data;
! 6050: if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
! 6051: }
! 6052:
! 6053: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
! 6054: is a feature that makes it possible to save compiled regex and re-use them
! 6055: in other programs later. */
! 6056:
! 6057: if (tables == NULL) tables = _pcre_default_tables;
! 6058:
! 6059: /* Check that the first field in the block is the magic number. If it is not,
! 6060: test for a regex that was compiled on a host of opposite endianness. If this is
! 6061: the case, flipped values are put in internal_re and internal_study if there was
! 6062: study data too. */
! 6063:
! 6064: if (re->magic_number != MAGIC_NUMBER)
! 6065: {
! 6066: re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
! 6067: if (re == NULL) return PCRE_ERROR_BADMAGIC;
! 6068: if (study != NULL) study = &internal_study;
! 6069: }
! 6070:
! 6071: /* Set up other data */
! 6072:
! 6073: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
! 6074: startline = (re->flags & PCRE_STARTLINE) != 0;
! 6075: firstline = (re->options & PCRE_FIRSTLINE) != 0;
! 6076:
! 6077: /* The code starts after the real_pcre block and the capture name table. */
! 6078:
! 6079: md->start_code = (const uschar *)external_re + re->name_table_offset +
! 6080: re->name_count * re->name_entry_size;
! 6081:
! 6082: md->start_subject = (USPTR)subject;
! 6083: md->start_offset = start_offset;
! 6084: md->end_subject = md->start_subject + length;
! 6085: end_subject = md->end_subject;
! 6086:
! 6087: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
! 6088: md->use_ucp = (re->options & PCRE_UCP) != 0;
! 6089: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
! 6090: md->ignore_skip_arg = FALSE;
! 6091:
! 6092: /* Some options are unpacked into BOOL variables in the hope that testing
! 6093: them will be faster than individual option bits. */
! 6094:
! 6095: md->notbol = (options & PCRE_NOTBOL) != 0;
! 6096: md->noteol = (options & PCRE_NOTEOL) != 0;
! 6097: md->notempty = (options & PCRE_NOTEMPTY) != 0;
! 6098: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
! 6099:
! 6100: md->hitend = FALSE;
! 6101: md->mark = md->nomatch_mark = NULL; /* In case never set */
! 6102:
! 6103: md->recursive = NULL; /* No recursion at top level */
! 6104: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
! 6105:
! 6106: md->lcc = tables + lcc_offset;
! 6107: md->ctypes = tables + ctypes_offset;
! 6108:
! 6109: /* Handle different \R options. */
! 6110:
! 6111: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
! 6112: {
! 6113: case 0:
! 6114: if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
! 6115: md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
! 6116: else
! 6117: #ifdef BSR_ANYCRLF
! 6118: md->bsr_anycrlf = TRUE;
! 6119: #else
! 6120: md->bsr_anycrlf = FALSE;
! 6121: #endif
! 6122: break;
! 6123:
! 6124: case PCRE_BSR_ANYCRLF:
! 6125: md->bsr_anycrlf = TRUE;
! 6126: break;
! 6127:
! 6128: case PCRE_BSR_UNICODE:
! 6129: md->bsr_anycrlf = FALSE;
! 6130: break;
! 6131:
! 6132: default: return PCRE_ERROR_BADNEWLINE;
! 6133: }
! 6134:
! 6135: /* Handle different types of newline. The three bits give eight cases. If
! 6136: nothing is set at run time, whatever was used at compile time applies. */
! 6137:
! 6138: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
! 6139: (pcre_uint32)options) & PCRE_NEWLINE_BITS)
! 6140: {
! 6141: case 0: newline = NEWLINE; break; /* Compile-time default */
! 6142: case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
! 6143: case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
! 6144: case PCRE_NEWLINE_CR+
! 6145: PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
! 6146: case PCRE_NEWLINE_ANY: newline = -1; break;
! 6147: case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
! 6148: default: return PCRE_ERROR_BADNEWLINE;
! 6149: }
! 6150:
! 6151: if (newline == -2)
! 6152: {
! 6153: md->nltype = NLTYPE_ANYCRLF;
! 6154: }
! 6155: else if (newline < 0)
! 6156: {
! 6157: md->nltype = NLTYPE_ANY;
! 6158: }
! 6159: else
! 6160: {
! 6161: md->nltype = NLTYPE_FIXED;
! 6162: if (newline > 255)
! 6163: {
! 6164: md->nllen = 2;
! 6165: md->nl[0] = (newline >> 8) & 255;
! 6166: md->nl[1] = newline & 255;
! 6167: }
! 6168: else
! 6169: {
! 6170: md->nllen = 1;
! 6171: md->nl[0] = newline;
! 6172: }
! 6173: }
! 6174:
! 6175: /* Partial matching was originally supported only for a restricted set of
! 6176: regexes; from release 8.00 there are no restrictions, but the bits are still
! 6177: defined (though never set). So there's no harm in leaving this code. */
! 6178:
! 6179: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
! 6180: return PCRE_ERROR_BADPARTIAL;
! 6181:
! 6182: /* If the expression has got more back references than the offsets supplied can
! 6183: hold, we get a temporary chunk of working store to use during the matching.
! 6184: Otherwise, we can use the vector supplied, rounding down its size to a multiple
! 6185: of 3. */
! 6186:
! 6187: ocount = offsetcount - (offsetcount % 3);
! 6188: arg_offset_max = (2*ocount)/3;
! 6189:
! 6190: if (re->top_backref > 0 && re->top_backref >= ocount/3)
! 6191: {
! 6192: ocount = re->top_backref * 3 + 3;
! 6193: md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
! 6194: if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
! 6195: using_temporary_offsets = TRUE;
! 6196: DPRINTF(("Got memory to hold back references\n"));
! 6197: }
! 6198: else md->offset_vector = offsets;
! 6199:
! 6200: md->offset_end = ocount;
! 6201: md->offset_max = (2*ocount)/3;
! 6202: md->offset_overflow = FALSE;
! 6203: md->capture_last = -1;
! 6204:
! 6205: /* Reset the working variable associated with each extraction. These should
! 6206: never be used unless previously set, but they get saved and restored, and so we
! 6207: initialize them to avoid reading uninitialized locations. Also, unset the
! 6208: offsets for the matched string. This is really just for tidiness with callouts,
! 6209: in case they inspect these fields. */
! 6210:
! 6211: if (md->offset_vector != NULL)
! 6212: {
! 6213: register int *iptr = md->offset_vector + ocount;
! 6214: register int *iend = iptr - re->top_bracket;
! 6215: if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
! 6216: while (--iptr >= iend) *iptr = -1;
! 6217: md->offset_vector[0] = md->offset_vector[1] = -1;
! 6218: }
! 6219:
! 6220: /* Set up the first character to match, if available. The first_byte value is
! 6221: never set for an anchored regular expression, but the anchoring may be forced
! 6222: at run time, so we have to test for anchoring. The first char may be unset for
! 6223: an unanchored pattern, of course. If there's no first char and the pattern was
! 6224: studied, there may be a bitmap of possible first characters. */
! 6225:
! 6226: if (!anchored)
! 6227: {
! 6228: if ((re->flags & PCRE_FIRSTSET) != 0)
! 6229: {
! 6230: first_byte = re->first_byte & 255;
! 6231: if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
! 6232: first_byte = md->lcc[first_byte];
! 6233: }
! 6234: else
! 6235: if (!startline && study != NULL &&
! 6236: (study->flags & PCRE_STUDY_MAPPED) != 0)
! 6237: start_bits = study->start_bits;
! 6238: }
! 6239:
! 6240: /* For anchored or unanchored matches, there may be a "last known required
! 6241: character" set. */
! 6242:
! 6243: if ((re->flags & PCRE_REQCHSET) != 0)
! 6244: {
! 6245: req_byte = re->req_byte & 255;
! 6246: req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
! 6247: req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
! 6248: }
! 6249:
! 6250:
! 6251:
! 6252:
! 6253: /* ==========================================================================*/
! 6254:
! 6255: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
! 6256: the loop runs just once. */
! 6257:
! 6258: for(;;)
! 6259: {
! 6260: USPTR save_end_subject = end_subject;
! 6261: USPTR new_start_match;
! 6262:
! 6263: /* If firstline is TRUE, the start of the match is constrained to the first
! 6264: line of a multiline string. That is, the match must be before or at the first
! 6265: newline. Implement this by temporarily adjusting end_subject so that we stop
! 6266: scanning at a newline. If the match fails at the newline, later code breaks
! 6267: this loop. */
! 6268:
! 6269: if (firstline)
! 6270: {
! 6271: USPTR t = start_match;
! 6272: #ifdef SUPPORT_UTF8
! 6273: if (utf8)
! 6274: {
! 6275: while (t < md->end_subject && !IS_NEWLINE(t))
! 6276: {
! 6277: t++;
! 6278: while (t < end_subject && (*t & 0xc0) == 0x80) t++;
! 6279: }
! 6280: }
! 6281: else
! 6282: #endif
! 6283: while (t < md->end_subject && !IS_NEWLINE(t)) t++;
! 6284: end_subject = t;
! 6285: }
! 6286:
! 6287: /* There are some optimizations that avoid running the match if a known
! 6288: starting point is not found, or if a known later character is not present.
! 6289: However, there is an option that disables these, for testing and for ensuring
! 6290: that all callouts do actually occur. The option can be set in the regex by
! 6291: (*NO_START_OPT) or passed in match-time options. */
! 6292:
! 6293: if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
! 6294: {
! 6295: /* Advance to a unique first byte if there is one. */
! 6296:
! 6297: if (first_byte >= 0)
! 6298: {
! 6299: if (first_byte_caseless)
! 6300: while (start_match < end_subject && md->lcc[*start_match] != first_byte)
! 6301: start_match++;
! 6302: else
! 6303: while (start_match < end_subject && *start_match != first_byte)
! 6304: start_match++;
! 6305: }
! 6306:
! 6307: /* Or to just after a linebreak for a multiline match */
! 6308:
! 6309: else if (startline)
! 6310: {
! 6311: if (start_match > md->start_subject + start_offset)
! 6312: {
! 6313: #ifdef SUPPORT_UTF8
! 6314: if (utf8)
! 6315: {
! 6316: while (start_match < end_subject && !WAS_NEWLINE(start_match))
! 6317: {
! 6318: start_match++;
! 6319: while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
! 6320: start_match++;
! 6321: }
! 6322: }
! 6323: else
! 6324: #endif
! 6325: while (start_match < end_subject && !WAS_NEWLINE(start_match))
! 6326: start_match++;
! 6327:
! 6328: /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
! 6329: and we are now at a LF, advance the match position by one more character.
! 6330: */
! 6331:
! 6332: if (start_match[-1] == CHAR_CR &&
! 6333: (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
! 6334: start_match < end_subject &&
! 6335: *start_match == CHAR_NL)
! 6336: start_match++;
! 6337: }
! 6338: }
! 6339:
! 6340: /* Or to a non-unique first byte after study */
! 6341:
! 6342: else if (start_bits != NULL)
! 6343: {
! 6344: while (start_match < end_subject)
! 6345: {
! 6346: register unsigned int c = *start_match;
! 6347: if ((start_bits[c/8] & (1 << (c&7))) == 0)
! 6348: {
! 6349: start_match++;
! 6350: #ifdef SUPPORT_UTF8
! 6351: if (utf8)
! 6352: while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
! 6353: start_match++;
! 6354: #endif
! 6355: }
! 6356: else break;
! 6357: }
! 6358: }
! 6359: } /* Starting optimizations */
! 6360:
! 6361: /* Restore fudged end_subject */
! 6362:
! 6363: end_subject = save_end_subject;
! 6364:
! 6365: /* The following two optimizations are disabled for partial matching or if
! 6366: disabling is explicitly requested. */
! 6367:
! 6368: if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
! 6369: {
! 6370: /* If the pattern was studied, a minimum subject length may be set. This is
! 6371: a lower bound; no actual string of that length may actually match the
! 6372: pattern. Although the value is, strictly, in characters, we treat it as
! 6373: bytes to avoid spending too much time in this optimization. */
! 6374:
! 6375: if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
! 6376: (pcre_uint32)(end_subject - start_match) < study->minlength)
! 6377: {
! 6378: rc = MATCH_NOMATCH;
! 6379: break;
! 6380: }
! 6381:
! 6382: /* If req_byte is set, we know that that character must appear in the
! 6383: subject for the match to succeed. If the first character is set, req_byte
! 6384: must be later in the subject; otherwise the test starts at the match point.
! 6385: This optimization can save a huge amount of backtracking in patterns with
! 6386: nested unlimited repeats that aren't going to match. Writing separate code
! 6387: for cased/caseless versions makes it go faster, as does using an
! 6388: autoincrement and backing off on a match.
! 6389:
! 6390: HOWEVER: when the subject string is very, very long, searching to its end
! 6391: can take a long time, and give bad performance on quite ordinary patterns.
! 6392: This showed up when somebody was matching something like /^\d+C/ on a
! 6393: 32-megabyte string... so we don't do this when the string is sufficiently
! 6394: long. */
! 6395:
! 6396: if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
! 6397: {
! 6398: register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
! 6399:
! 6400: /* We don't need to repeat the search if we haven't yet reached the
! 6401: place we found it at last time. */
! 6402:
! 6403: if (p > req_byte_ptr)
! 6404: {
! 6405: if (req_byte_caseless)
! 6406: {
! 6407: while (p < end_subject)
! 6408: {
! 6409: register int pp = *p++;
! 6410: if (pp == req_byte || pp == req_byte2) { p--; break; }
! 6411: }
! 6412: }
! 6413: else
! 6414: {
! 6415: while (p < end_subject)
! 6416: {
! 6417: if (*p++ == req_byte) { p--; break; }
! 6418: }
! 6419: }
! 6420:
! 6421: /* If we can't find the required character, break the matching loop,
! 6422: forcing a match failure. */
! 6423:
! 6424: if (p >= end_subject)
! 6425: {
! 6426: rc = MATCH_NOMATCH;
! 6427: break;
! 6428: }
! 6429:
! 6430: /* If we have found the required character, save the point where we
! 6431: found it, so that we don't search again next time round the loop if
! 6432: the start hasn't passed this character yet. */
! 6433:
! 6434: req_byte_ptr = p;
! 6435: }
! 6436: }
! 6437: }
! 6438:
! 6439: #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
! 6440: printf(">>>> Match against: ");
! 6441: pchars(start_match, end_subject - start_match, TRUE, md);
! 6442: printf("\n");
! 6443: #endif
! 6444:
! 6445: /* OK, we can now run the match. If "hitend" is set afterwards, remember the
! 6446: first starting point for which a partial match was found. */
! 6447:
! 6448: md->start_match_ptr = start_match;
! 6449: md->start_used_ptr = start_match;
! 6450: md->match_call_count = 0;
! 6451: md->match_function_type = 0;
! 6452: md->end_offset_top = 0;
! 6453: rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
! 6454: if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
! 6455:
! 6456: switch(rc)
! 6457: {
! 6458: /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
! 6459: the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
! 6460: entirely. The only way we can do that is to re-do the match at the same
! 6461: point, with a flag to force SKIP with an argument to be ignored. Just
! 6462: treating this case as NOMATCH does not work because it does not check other
! 6463: alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
! 6464:
! 6465: case MATCH_SKIP_ARG:
! 6466: new_start_match = start_match;
! 6467: md->ignore_skip_arg = TRUE;
! 6468: break;
! 6469:
! 6470: /* SKIP passes back the next starting point explicitly, but if it is the
! 6471: same as the match we have just done, treat it as NOMATCH. */
! 6472:
! 6473: case MATCH_SKIP:
! 6474: if (md->start_match_ptr != start_match)
! 6475: {
! 6476: new_start_match = md->start_match_ptr;
! 6477: break;
! 6478: }
! 6479: /* Fall through */
! 6480:
! 6481: /* NOMATCH and PRUNE advance by one character. THEN at this level acts
! 6482: exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
! 6483:
! 6484: case MATCH_NOMATCH:
! 6485: case MATCH_PRUNE:
! 6486: case MATCH_THEN:
! 6487: md->ignore_skip_arg = FALSE;
! 6488: new_start_match = start_match + 1;
! 6489: #ifdef SUPPORT_UTF8
! 6490: if (utf8)
! 6491: while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
! 6492: new_start_match++;
! 6493: #endif
! 6494: break;
! 6495:
! 6496: /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
! 6497:
! 6498: case MATCH_COMMIT:
! 6499: rc = MATCH_NOMATCH;
! 6500: goto ENDLOOP;
! 6501:
! 6502: /* Any other return is either a match, or some kind of error. */
! 6503:
! 6504: default:
! 6505: goto ENDLOOP;
! 6506: }
! 6507:
! 6508: /* Control reaches here for the various types of "no match at this point"
! 6509: result. Reset the code to MATCH_NOMATCH for subsequent checking. */
! 6510:
! 6511: rc = MATCH_NOMATCH;
! 6512:
! 6513: /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
! 6514: newline in the subject (though it may continue over the newline). Therefore,
! 6515: if we have just failed to match, starting at a newline, do not continue. */
! 6516:
! 6517: if (firstline && IS_NEWLINE(start_match)) break;
! 6518:
! 6519: /* Advance to new matching position */
! 6520:
! 6521: start_match = new_start_match;
! 6522:
! 6523: /* Break the loop if the pattern is anchored or if we have passed the end of
! 6524: the subject. */
! 6525:
! 6526: if (anchored || start_match > end_subject) break;
! 6527:
! 6528: /* If we have just passed a CR and we are now at a LF, and the pattern does
! 6529: not contain any explicit matches for \r or \n, and the newline option is CRLF
! 6530: or ANY or ANYCRLF, advance the match position by one more character. */
! 6531:
! 6532: if (start_match[-1] == CHAR_CR &&
! 6533: start_match < end_subject &&
! 6534: *start_match == CHAR_NL &&
! 6535: (re->flags & PCRE_HASCRORLF) == 0 &&
! 6536: (md->nltype == NLTYPE_ANY ||
! 6537: md->nltype == NLTYPE_ANYCRLF ||
! 6538: md->nllen == 2))
! 6539: start_match++;
! 6540:
! 6541: md->mark = NULL; /* Reset for start of next match attempt */
! 6542: } /* End of for(;;) "bumpalong" loop */
! 6543:
! 6544: /* ==========================================================================*/
! 6545:
! 6546: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
! 6547: conditions is true:
! 6548:
! 6549: (1) The pattern is anchored or the match was failed by (*COMMIT);
! 6550:
! 6551: (2) We are past the end of the subject;
! 6552:
! 6553: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
! 6554: this option requests that a match occur at or before the first newline in
! 6555: the subject.
! 6556:
! 6557: When we have a match and the offset vector is big enough to deal with any
! 6558: backreferences, captured substring offsets will already be set up. In the case
! 6559: where we had to get some local store to hold offsets for backreference
! 6560: processing, copy those that we can. In this case there need not be overflow if
! 6561: certain parts of the pattern were not used, even though there are more
! 6562: capturing parentheses than vector slots. */
! 6563:
! 6564: ENDLOOP:
! 6565:
! 6566: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
! 6567: {
! 6568: if (using_temporary_offsets)
! 6569: {
! 6570: if (arg_offset_max >= 4)
! 6571: {
! 6572: memcpy(offsets + 2, md->offset_vector + 2,
! 6573: (arg_offset_max - 2) * sizeof(int));
! 6574: DPRINTF(("Copied offsets from temporary memory\n"));
! 6575: }
! 6576: if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
! 6577: DPRINTF(("Freeing temporary memory\n"));
! 6578: (pcre_free)(md->offset_vector);
! 6579: }
! 6580:
! 6581: /* Set the return code to the number of captured strings, or 0 if there were
! 6582: too many to fit into the vector. */
! 6583:
! 6584: rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
! 6585: 0 : md->end_offset_top/2;
! 6586:
! 6587: /* If there is space in the offset vector, set any unused pairs at the end of
! 6588: the pattern to -1 for backwards compatibility. It is documented that this
! 6589: happens. In earlier versions, the whole set of potential capturing offsets
! 6590: was set to -1 each time round the loop, but this is handled differently now.
! 6591: "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
! 6592: those at the end that need unsetting here. We can't just unset them all at
! 6593: the start of the whole thing because they may get set in one branch that is
! 6594: not the final matching branch. */
! 6595:
! 6596: if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
! 6597: {
! 6598: register int *iptr, *iend;
! 6599: int resetcount = 2 + re->top_bracket * 2;
! 6600: if (resetcount > offsetcount) resetcount = ocount;
! 6601: iptr = offsets + md->end_offset_top;
! 6602: iend = offsets + resetcount;
! 6603: while (iptr < iend) *iptr++ = -1;
! 6604: }
! 6605:
! 6606: /* If there is space, set up the whole thing as substring 0. The value of
! 6607: md->start_match_ptr might be modified if \K was encountered on the success
! 6608: matching path. */
! 6609:
! 6610: if (offsetcount < 2) rc = 0; else
! 6611: {
! 6612: offsets[0] = (int)(md->start_match_ptr - md->start_subject);
! 6613: offsets[1] = (int)(md->end_match_ptr - md->start_subject);
! 6614: }
! 6615:
! 6616: /* Return MARK data if requested */
! 6617:
! 6618: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
! 6619: *(extra_data->mark) = (unsigned char *)(md->mark);
! 6620: DPRINTF((">>>> returning %d\n", rc));
! 6621: return rc;
! 6622: }
! 6623:
! 6624: /* Control gets here if there has been an error, or if the overall match
! 6625: attempt has failed at all permitted starting positions. */
! 6626:
! 6627: if (using_temporary_offsets)
! 6628: {
! 6629: DPRINTF(("Freeing temporary memory\n"));
! 6630: (pcre_free)(md->offset_vector);
! 6631: }
! 6632:
! 6633: /* For anything other than nomatch or partial match, just return the code. */
! 6634:
! 6635: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
! 6636: {
! 6637: DPRINTF((">>>> error: returning %d\n", rc));
! 6638: return rc;
! 6639: }
! 6640:
! 6641: /* Handle partial matches - disable any mark data */
! 6642:
! 6643: if (start_partial != NULL)
! 6644: {
! 6645: DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
! 6646: md->mark = NULL;
! 6647: if (offsetcount > 1)
! 6648: {
! 6649: offsets[0] = (int)(start_partial - (USPTR)subject);
! 6650: offsets[1] = (int)(end_subject - (USPTR)subject);
! 6651: }
! 6652: rc = PCRE_ERROR_PARTIAL;
! 6653: }
! 6654:
! 6655: /* This is the classic nomatch case */
! 6656:
! 6657: else
! 6658: {
! 6659: DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
! 6660: rc = PCRE_ERROR_NOMATCH;
! 6661: }
! 6662:
! 6663: /* Return the MARK data if it has been requested. */
! 6664:
! 6665: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
! 6666: *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);
! 6667: return rc;
! 6668: }
! 6669:
! 6670: /* End of pcre_exec.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>