Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_exec.c, revision 1.1.1.2
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
1.1.1.2 ! misho 9: Copyright (c) 1997-2012 University of Cambridge
1.1 misho 10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40: /* This module contains pcre_exec(), the externally visible function that does
41: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42: possible. There are also some static supporting functions. */
43:
1.1.1.2 ! misho 44: #ifdef HAVE_CONFIG_H
1.1 misho 45: #include "config.h"
1.1.1.2 ! misho 46: #endif
1.1 misho 47:
48: #define NLBLOCK md /* Block containing newline information */
49: #define PSSTART start_subject /* Field containing processed string start */
50: #define PSEND end_subject /* Field containing processed string end */
51:
52: #include "pcre_internal.h"
53:
54: /* Undefine some potentially clashing cpp symbols */
55:
56: #undef min
57: #undef max
58:
1.1.1.2 ! misho 59: /* Values for setting in md->match_function_type to indicate two special types
! 60: of call to match(). We do it this way to save on using another stack variable,
! 61: as stack usage is to be discouraged. */
1.1 misho 62:
1.1.1.2 ! misho 63: #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
! 64: #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
1.1 misho 65:
66: /* Non-error returns from the match() function. Error returns are externally
67: defined PCRE_ERROR_xxx codes, which are all negative. */
68:
69: #define MATCH_MATCH 1
70: #define MATCH_NOMATCH 0
71:
72: /* Special internal returns from the match() function. Make them sufficiently
73: negative to avoid the external error codes. */
74:
75: #define MATCH_ACCEPT (-999)
76: #define MATCH_COMMIT (-998)
1.1.1.2 ! misho 77: #define MATCH_KETRPOS (-997)
! 78: #define MATCH_ONCE (-996)
! 79: #define MATCH_PRUNE (-995)
! 80: #define MATCH_SKIP (-994)
! 81: #define MATCH_SKIP_ARG (-993)
! 82: #define MATCH_THEN (-992)
1.1 misho 83:
84: /* Maximum number of ints of offset to save on the stack for recursive calls.
85: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
86: because the offset vector is always a multiple of 3 long. */
87:
88: #define REC_STACK_SAVE_MAX 30
89:
90: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
91:
92: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
93: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
94:
95: #ifdef PCRE_DEBUG
96: /*************************************************
97: * Debugging function to print chars *
98: *************************************************/
99:
100: /* Print a sequence of chars in printable format, stopping at the end of the
101: subject if the requested.
102:
103: Arguments:
104: p points to characters
105: length number to print
106: is_subject TRUE if printing from within md->start_subject
107: md pointer to matching data block, if is_subject is TRUE
108:
109: Returns: nothing
110: */
111:
112: static void
1.1.1.2 ! misho 113: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
1.1 misho 114: {
1.1.1.2 ! misho 115: pcre_uint32 c;
! 116: BOOL utf = md->utf;
1.1 misho 117: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
118: while (length-- > 0)
1.1.1.2 ! misho 119: if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
1.1 misho 120: }
121: #endif
122:
123:
124:
125: /*************************************************
126: * Match a back-reference *
127: *************************************************/
128:
1.1.1.2 ! misho 129: /* Normally, if a back reference hasn't been set, the length that is passed is
! 130: negative, so the match always fails. However, in JavaScript compatibility mode,
! 131: the length passed is zero. Note that in caseless UTF-8 mode, the number of
! 132: subject bytes matched may be different to the number of reference bytes.
1.1 misho 133:
134: Arguments:
135: offset index into the offset vector
1.1.1.2 ! misho 136: eptr pointer into the subject
! 137: length length of reference to be matched (number of bytes)
1.1 misho 138: md points to match data block
1.1.1.2 ! misho 139: caseless TRUE if caseless
1.1 misho 140:
1.1.1.2 ! misho 141: Returns: >= 0 the number of subject bytes matched
! 142: -1 no match
! 143: -2 partial match; always given if at end subject
1.1 misho 144: */
145:
1.1.1.2 ! misho 146: static int
! 147: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
! 148: BOOL caseless)
1.1 misho 149: {
1.1.1.2 ! misho 150: PCRE_PUCHAR eptr_start = eptr;
! 151: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
! 152: #ifdef SUPPORT_UTF
! 153: BOOL utf = md->utf;
! 154: #endif
1.1 misho 155:
156: #ifdef PCRE_DEBUG
157: if (eptr >= md->end_subject)
158: printf("matching subject <null>");
159: else
160: {
161: printf("matching subject ");
162: pchars(eptr, length, TRUE, md);
163: }
164: printf(" against backref ");
165: pchars(p, length, FALSE, md);
166: printf("\n");
167: #endif
168:
1.1.1.2 ! misho 169: /* Always fail if reference not set (and not JavaScript compatible - in that
! 170: case the length is passed as zero). */
1.1 misho 171:
1.1.1.2 ! misho 172: if (length < 0) return -1;
1.1 misho 173:
174: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175: properly if Unicode properties are supported. Otherwise, we can check only
176: ASCII characters. */
177:
1.1.1.2 ! misho 178: if (caseless)
1.1 misho 179: {
1.1.1.2 ! misho 180: #ifdef SUPPORT_UTF
1.1 misho 181: #ifdef SUPPORT_UCP
1.1.1.2 ! misho 182: if (utf)
1.1 misho 183: {
1.1.1.2 ! misho 184: /* Match characters up to the end of the reference. NOTE: the number of
! 185: data units matched may differ, because in UTF-8 there are some characters
! 186: whose upper and lower case versions code have different numbers of bytes.
! 187: For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
! 188: (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
! 189: sequence of two of the latter. It is important, therefore, to check the
! 190: length along the reference, not along the subject (earlier code did this
! 191: wrong). */
! 192:
! 193: PCRE_PUCHAR endptr = p + length;
! 194: while (p < endptr)
! 195: {
! 196: pcre_uint32 c, d;
! 197: const ucd_record *ur;
! 198: if (eptr >= md->end_subject) return -2; /* Partial match */
1.1 misho 199: GETCHARINC(c, eptr);
200: GETCHARINC(d, p);
1.1.1.2 ! misho 201: ur = GET_UCD(d);
! 202: if (c != d && c != d + ur->other_case)
! 203: {
! 204: const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
! 205: for (;;)
! 206: {
! 207: if (c < *pp) return -1;
! 208: if (c == *pp++) break;
! 209: }
! 210: }
1.1 misho 211: }
212: }
213: else
214: #endif
215: #endif
216:
217: /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
218: is no UCP support. */
1.1.1.2 ! misho 219: {
! 220: while (length-- > 0)
! 221: {
! 222: pcre_uchar cc, cp;
! 223: if (eptr >= md->end_subject) return -2; /* Partial match */
! 224: cc = RAWUCHARTEST(eptr);
! 225: cp = RAWUCHARTEST(p);
! 226: if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
! 227: p++;
! 228: eptr++;
! 229: }
! 230: }
1.1 misho 231: }
232:
233: /* In the caseful case, we can just compare the bytes, whether or not we
234: are in UTF-8 mode. */
235:
236: else
1.1.1.2 ! misho 237: {
! 238: while (length-- > 0)
! 239: {
! 240: if (eptr >= md->end_subject) return -2; /* Partial match */
! 241: if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
! 242: }
! 243: }
1.1 misho 244:
1.1.1.2 ! misho 245: return (int)(eptr - eptr_start);
1.1 misho 246: }
247:
248:
249:
250: /***************************************************************************
251: ****************************************************************************
252: RECURSION IN THE match() FUNCTION
253:
254: The match() function is highly recursive, though not every recursive call
255: increases the recursive depth. Nevertheless, some regular expressions can cause
256: it to recurse to a great depth. I was writing for Unix, so I just let it call
257: itself recursively. This uses the stack for saving everything that has to be
258: saved for a recursive call. On Unix, the stack can be large, and this works
259: fine.
260:
261: It turns out that on some non-Unix-like systems there are problems with
262: programs that use a lot of stack. (This despite the fact that every last chip
263: has oodles of memory these days, and techniques for extending the stack have
264: been known for decades.) So....
265:
266: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
267: calls by keeping local variables that need to be preserved in blocks of memory
268: obtained from malloc() instead instead of on the stack. Macros are used to
269: achieve this so that the actual code doesn't look very different to what it
270: always used to.
271:
272: The original heap-recursive code used longjmp(). However, it seems that this
273: can be very slow on some operating systems. Following a suggestion from Stan
274: Switzer, the use of longjmp() has been abolished, at the cost of having to
275: provide a unique number for each call to RMATCH. There is no way of generating
276: a sequence of numbers at compile time in C. I have given them names, to make
277: them stand out more clearly.
278:
279: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
280: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
281: tests. Furthermore, not using longjmp() means that local dynamic variables
282: don't have indeterminate values; this has meant that the frame size can be
283: reduced because the result can be "passed back" by straight setting of the
284: variable instead of being passed in the frame.
285: ****************************************************************************
286: ***************************************************************************/
287:
288: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
289: below must be updated in sync. */
290:
291: enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
292: RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
293: RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
294: RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
295: RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
296: RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
1.1.1.2 ! misho 297: RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
1.1 misho 298:
299: /* These versions of the macros use the stack, as normal. There are debugging
300: versions and production versions. Note that the "rw" argument of RMATCH isn't
301: actually used in this definition. */
302:
303: #ifndef NO_RECURSE
304: #define REGISTER register
305:
306: #ifdef PCRE_DEBUG
1.1.1.2 ! misho 307: #define RMATCH(ra,rb,rc,rd,re,rw) \
1.1 misho 308: { \
309: printf("match() called in line %d\n", __LINE__); \
1.1.1.2 ! misho 310: rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
1.1 misho 311: printf("to line %d\n", __LINE__); \
312: }
313: #define RRETURN(ra) \
314: { \
1.1.1.2 ! misho 315: printf("match() returned %d from line %d\n", ra, __LINE__); \
1.1 misho 316: return ra; \
317: }
318: #else
1.1.1.2 ! misho 319: #define RMATCH(ra,rb,rc,rd,re,rw) \
! 320: rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
1.1 misho 321: #define RRETURN(ra) return ra
322: #endif
323:
324: #else
325:
326:
327: /* These versions of the macros manage a private stack on the heap. Note that
328: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
329: argument of match(), which never changes. */
330:
331: #define REGISTER
332:
1.1.1.2 ! misho 333: #define RMATCH(ra,rb,rc,rd,re,rw)\
1.1 misho 334: {\
1.1.1.2 ! misho 335: heapframe *newframe = frame->Xnextframe;\
! 336: if (newframe == NULL)\
! 337: {\
! 338: newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
! 339: if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
! 340: newframe->Xnextframe = NULL;\
! 341: frame->Xnextframe = newframe;\
! 342: }\
! 343: frame->Xwhere = rw;\
1.1 misho 344: newframe->Xeptr = ra;\
345: newframe->Xecode = rb;\
346: newframe->Xmstart = mstart;\
347: newframe->Xoffset_top = rc;\
1.1.1.2 ! misho 348: newframe->Xeptrb = re;\
1.1 misho 349: newframe->Xrdepth = frame->Xrdepth + 1;\
350: newframe->Xprevframe = frame;\
351: frame = newframe;\
352: DPRINTF(("restarting from line %d\n", __LINE__));\
353: goto HEAP_RECURSE;\
354: L_##rw:\
355: DPRINTF(("jumped back to line %d\n", __LINE__));\
356: }
357:
358: #define RRETURN(ra)\
359: {\
360: heapframe *oldframe = frame;\
361: frame = oldframe->Xprevframe;\
362: if (frame != NULL)\
363: {\
364: rrc = ra;\
365: goto HEAP_RETURN;\
366: }\
367: return ra;\
368: }
369:
370:
371: /* Structure for remembering the local variables in a private frame */
372:
373: typedef struct heapframe {
374: struct heapframe *Xprevframe;
1.1.1.2 ! misho 375: struct heapframe *Xnextframe;
1.1 misho 376:
377: /* Function arguments that may change */
378:
1.1.1.2 ! misho 379: PCRE_PUCHAR Xeptr;
! 380: const pcre_uchar *Xecode;
! 381: PCRE_PUCHAR Xmstart;
1.1 misho 382: int Xoffset_top;
383: eptrblock *Xeptrb;
384: unsigned int Xrdepth;
385:
386: /* Function local variables */
387:
1.1.1.2 ! misho 388: PCRE_PUCHAR Xcallpat;
! 389: #ifdef SUPPORT_UTF
! 390: PCRE_PUCHAR Xcharptr;
! 391: #endif
! 392: PCRE_PUCHAR Xdata;
! 393: PCRE_PUCHAR Xnext;
! 394: PCRE_PUCHAR Xpp;
! 395: PCRE_PUCHAR Xprev;
! 396: PCRE_PUCHAR Xsaved_eptr;
1.1 misho 397:
398: recursion_info Xnew_recursive;
399:
400: BOOL Xcur_is_word;
401: BOOL Xcondition;
402: BOOL Xprev_is_word;
403:
404: #ifdef SUPPORT_UCP
405: int Xprop_type;
1.1.1.2 ! misho 406: unsigned int Xprop_value;
1.1 misho 407: int Xprop_fail_result;
408: int Xoclength;
1.1.1.2 ! misho 409: pcre_uchar Xocchars[6];
1.1 misho 410: #endif
411:
412: int Xcodelink;
413: int Xctype;
414: unsigned int Xfc;
415: int Xfi;
416: int Xlength;
417: int Xmax;
418: int Xmin;
419: int Xnumber;
420: int Xoffset;
421: int Xop;
422: int Xsave_capture_last;
423: int Xsave_offset1, Xsave_offset2, Xsave_offset3;
424: int Xstacksave[REC_STACK_SAVE_MAX];
425:
426: eptrblock Xnewptrb;
427:
428: /* Where to jump back to */
429:
430: int Xwhere;
431:
432: } heapframe;
433:
434: #endif
435:
436:
437: /***************************************************************************
438: ***************************************************************************/
439:
440:
441:
442: /*************************************************
443: * Match from current position *
444: *************************************************/
445:
446: /* This function is called recursively in many circumstances. Whenever it
447: returns a negative (error) response, the outer incarnation must also return the
448: same response. */
449:
450: /* These macros pack up tests that are used for partial matching, and which
1.1.1.2 ! misho 451: appear several times in the code. We set the "hit end" flag if the pointer is
1.1 misho 452: at the end of the subject and also past the start of the subject (i.e.
453: something has been matched). For hard partial matching, we then return
454: immediately. The second one is used when we already know we are past the end of
455: the subject. */
456:
457: #define CHECK_PARTIAL()\
458: if (md->partial != 0 && eptr >= md->end_subject && \
459: eptr > md->start_used_ptr) \
460: { \
461: md->hitend = TRUE; \
1.1.1.2 ! misho 462: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.1 misho 463: }
464:
465: #define SCHECK_PARTIAL()\
466: if (md->partial != 0 && eptr > md->start_used_ptr) \
467: { \
468: md->hitend = TRUE; \
1.1.1.2 ! misho 469: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.1 misho 470: }
471:
472:
473: /* Performance note: It might be tempting to extract commonly used fields from
1.1.1.2 ! misho 474: the md structure (e.g. utf, end_subject) into individual variables to improve
1.1 misho 475: performance. Tests using gcc on a SPARC disproved this; in the first case, it
476: made performance worse.
477:
478: Arguments:
479: eptr pointer to current character in subject
480: ecode pointer to current position in compiled code
481: mstart pointer to the current match start position (can be modified
482: by encountering \K)
483: offset_top current top pointer
484: md pointer to "static" info for the match
485: eptrb pointer to chain of blocks containing eptr at start of
486: brackets - for testing for empty matches
487: rdepth the recursion depth
488:
489: Returns: MATCH_MATCH if matched ) these values are >= 0
490: MATCH_NOMATCH if failed to match )
491: a negative MATCH_xxx value for PRUNE, SKIP, etc
492: a negative PCRE_ERROR_xxx value if aborted by an error condition
493: (e.g. stopped by repeated call or recursion limit)
494: */
495:
496: static int
1.1.1.2 ! misho 497: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
! 498: PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
! 499: unsigned int rdepth)
1.1 misho 500: {
501: /* These variables do not need to be preserved over recursion in this function,
502: so they can be ordinary variables in all cases. Mark some of them with
503: "register" because they are used a lot in loops. */
504:
505: register int rrc; /* Returns from recursive calls */
506: register int i; /* Used for loops not involving calls to RMATCH() */
1.1.1.2 ! misho 507: register pcre_uint32 c; /* Character values not kept over RMATCH() calls */
! 508: register BOOL utf; /* Local copy of UTF flag for speed */
1.1 misho 509:
510: BOOL minimize, possessive; /* Quantifier options */
1.1.1.2 ! misho 511: BOOL caseless;
1.1 misho 512: int condcode;
513:
514: /* When recursion is not being used, all "local" variables that have to be
1.1.1.2 ! misho 515: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
! 516: frame on the stack here; subsequent instantiations are obtained from the heap
! 517: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
! 518: the top-level on the stack rather than malloc-ing them all gives a performance
! 519: boost in many cases where there is not much "recursion". */
1.1 misho 520:
521: #ifdef NO_RECURSE
1.1.1.2 ! misho 522: heapframe *frame = (heapframe *)md->match_frames_base;
1.1 misho 523:
524: /* Copy in the original argument variables */
525:
526: frame->Xeptr = eptr;
527: frame->Xecode = ecode;
528: frame->Xmstart = mstart;
529: frame->Xoffset_top = offset_top;
530: frame->Xeptrb = eptrb;
531: frame->Xrdepth = rdepth;
532:
533: /* This is where control jumps back to to effect "recursion" */
534:
535: HEAP_RECURSE:
536:
537: /* Macros make the argument variables come from the current frame */
538:
539: #define eptr frame->Xeptr
540: #define ecode frame->Xecode
541: #define mstart frame->Xmstart
542: #define offset_top frame->Xoffset_top
543: #define eptrb frame->Xeptrb
544: #define rdepth frame->Xrdepth
545:
546: /* Ditto for the local variables */
547:
1.1.1.2 ! misho 548: #ifdef SUPPORT_UTF
1.1 misho 549: #define charptr frame->Xcharptr
550: #endif
551: #define callpat frame->Xcallpat
552: #define codelink frame->Xcodelink
553: #define data frame->Xdata
554: #define next frame->Xnext
555: #define pp frame->Xpp
556: #define prev frame->Xprev
557: #define saved_eptr frame->Xsaved_eptr
558:
559: #define new_recursive frame->Xnew_recursive
560:
561: #define cur_is_word frame->Xcur_is_word
562: #define condition frame->Xcondition
563: #define prev_is_word frame->Xprev_is_word
564:
565: #ifdef SUPPORT_UCP
566: #define prop_type frame->Xprop_type
567: #define prop_value frame->Xprop_value
568: #define prop_fail_result frame->Xprop_fail_result
569: #define oclength frame->Xoclength
570: #define occhars frame->Xocchars
571: #endif
572:
573: #define ctype frame->Xctype
574: #define fc frame->Xfc
575: #define fi frame->Xfi
576: #define length frame->Xlength
577: #define max frame->Xmax
578: #define min frame->Xmin
579: #define number frame->Xnumber
580: #define offset frame->Xoffset
581: #define op frame->Xop
582: #define save_capture_last frame->Xsave_capture_last
583: #define save_offset1 frame->Xsave_offset1
584: #define save_offset2 frame->Xsave_offset2
585: #define save_offset3 frame->Xsave_offset3
586: #define stacksave frame->Xstacksave
587:
588: #define newptrb frame->Xnewptrb
589:
590: /* When recursion is being used, local variables are allocated on the stack and
591: get preserved during recursion in the normal way. In this environment, fi and
592: i, and fc and c, can be the same variables. */
593:
594: #else /* NO_RECURSE not defined */
595: #define fi i
596: #define fc c
597:
1.1.1.2 ! misho 598: /* Many of the following variables are used only in small blocks of the code.
! 599: My normal style of coding would have declared them within each of those blocks.
! 600: However, in order to accommodate the version of this code that uses an external
! 601: "stack" implemented on the heap, it is easier to declare them all here, so the
! 602: declarations can be cut out in a block. The only declarations within blocks
! 603: below are for variables that do not have to be preserved over a recursive call
! 604: to RMATCH(). */
! 605:
! 606: #ifdef SUPPORT_UTF
! 607: const pcre_uchar *charptr;
! 608: #endif
! 609: const pcre_uchar *callpat;
! 610: const pcre_uchar *data;
! 611: const pcre_uchar *next;
! 612: PCRE_PUCHAR pp;
! 613: const pcre_uchar *prev;
! 614: PCRE_PUCHAR saved_eptr;
! 615:
! 616: recursion_info new_recursive;
1.1 misho 617:
1.1.1.2 ! misho 618: BOOL cur_is_word;
1.1 misho 619: BOOL condition;
620: BOOL prev_is_word;
621:
622: #ifdef SUPPORT_UCP
623: int prop_type;
1.1.1.2 ! misho 624: unsigned int prop_value;
1.1 misho 625: int prop_fail_result;
626: int oclength;
1.1.1.2 ! misho 627: pcre_uchar occhars[6];
1.1 misho 628: #endif
629:
630: int codelink;
631: int ctype;
632: int length;
633: int max;
634: int min;
1.1.1.2 ! misho 635: unsigned int number;
1.1 misho 636: int offset;
1.1.1.2 ! misho 637: pcre_uchar op;
1.1 misho 638: int save_capture_last;
639: int save_offset1, save_offset2, save_offset3;
640: int stacksave[REC_STACK_SAVE_MAX];
641:
642: eptrblock newptrb;
1.1.1.2 ! misho 643:
! 644: /* There is a special fudge for calling match() in a way that causes it to
! 645: measure the size of its basic stack frame when the stack is being used for
! 646: recursion. The second argument (ecode) being NULL triggers this behaviour. It
! 647: cannot normally ever be NULL. The return is the negated value of the frame
! 648: size. */
! 649:
! 650: if (ecode == NULL)
! 651: {
! 652: if (rdepth == 0)
! 653: return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
! 654: else
! 655: {
! 656: int len = (char *)&rdepth - (char *)eptr;
! 657: return (len > 0)? -len : len;
! 658: }
! 659: }
1.1 misho 660: #endif /* NO_RECURSE */
661:
1.1.1.2 ! misho 662: /* To save space on the stack and in the heap frame, I have doubled up on some
! 663: of the local variables that are used only in localised parts of the code, but
! 664: still need to be preserved over recursive calls of match(). These macros define
! 665: the alternative names that are used. */
! 666:
! 667: #define allow_zero cur_is_word
! 668: #define cbegroup condition
! 669: #define code_offset codelink
! 670: #define condassert condition
! 671: #define matched_once prev_is_word
! 672: #define foc number
! 673: #define save_mark data
! 674:
1.1 misho 675: /* These statements are here to stop the compiler complaining about unitialized
676: variables. */
677:
678: #ifdef SUPPORT_UCP
679: prop_value = 0;
680: prop_fail_result = 0;
681: #endif
682:
683:
684: /* This label is used for tail recursion, which is used in a few cases even
685: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
686: used. Thanks to Ian Taylor for noticing this possibility and sending the
687: original patch. */
688:
689: TAIL_RECURSE:
690:
691: /* OK, now we can get on with the real code of the function. Recursive calls
692: are specified by the macro RMATCH and RRETURN is used to return. When
693: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
694: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
695: defined). However, RMATCH isn't like a function call because it's quite a
696: complicated macro. It has to be used in one particular way. This shouldn't,
697: however, impact performance when true recursion is being used. */
698:
1.1.1.2 ! misho 699: #ifdef SUPPORT_UTF
! 700: utf = md->utf; /* Local copy of the flag */
1.1 misho 701: #else
1.1.1.2 ! misho 702: utf = FALSE;
1.1 misho 703: #endif
704:
705: /* First check that we haven't called match() too many times, or that we
706: haven't exceeded the recursive call limit. */
707:
708: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
709: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
710:
711: /* At the start of a group with an unlimited repeat that may match an empty
1.1.1.2 ! misho 712: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
! 713: done this way to save having to use another function argument, which would take
! 714: up space on the stack. See also MATCH_CONDASSERT below.
! 715:
! 716: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
! 717: such remembered pointers, to be checked when we hit the closing ket, in order
! 718: to break infinite loops that match no characters. When match() is called in
! 719: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
! 720: NOT be used with tail recursion, because the memory block that is used is on
! 721: the stack, so a new one may be required for each match(). */
1.1 misho 722:
1.1.1.2 ! misho 723: if (md->match_function_type == MATCH_CBEGROUP)
1.1 misho 724: {
725: newptrb.epb_saved_eptr = eptr;
726: newptrb.epb_prev = eptrb;
727: eptrb = &newptrb;
1.1.1.2 ! misho 728: md->match_function_type = 0;
1.1 misho 729: }
730:
731: /* Now start processing the opcodes. */
732:
733: for (;;)
734: {
735: minimize = possessive = FALSE;
736: op = *ecode;
737:
738: switch(op)
739: {
740: case OP_MARK:
1.1.1.2 ! misho 741: md->nomatch_mark = ecode + 2;
! 742: md->mark = NULL; /* In case previously set by assertion */
! 743: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
! 744: eptrb, RM55);
! 745: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
! 746: md->mark == NULL) md->mark = ecode + 2;
1.1 misho 747:
748: /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
749: argument, and we must check whether that argument matches this MARK's
750: argument. It is passed back in md->start_match_ptr (an overloading of that
751: variable). If it does match, we reset that variable to the current subject
752: position and return MATCH_SKIP. Otherwise, pass back the return code
753: unaltered. */
754:
1.1.1.2 ! misho 755: else if (rrc == MATCH_SKIP_ARG &&
! 756: STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
1.1 misho 757: {
758: md->start_match_ptr = eptr;
759: RRETURN(MATCH_SKIP);
760: }
761: RRETURN(rrc);
762:
763: case OP_FAIL:
1.1.1.2 ! misho 764: RRETURN(MATCH_NOMATCH);
1.1 misho 765:
766: /* COMMIT overrides PRUNE, SKIP, and THEN */
767:
768: case OP_COMMIT:
1.1.1.2 ! misho 769: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
! 770: eptrb, RM52);
1.1 misho 771: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
772: rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
773: rrc != MATCH_THEN)
774: RRETURN(rrc);
1.1.1.2 ! misho 775: RRETURN(MATCH_COMMIT);
1.1 misho 776:
777: /* PRUNE overrides THEN */
778:
779: case OP_PRUNE:
1.1.1.2 ! misho 780: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
! 781: eptrb, RM51);
1.1 misho 782: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1.1.2 ! misho 783: RRETURN(MATCH_PRUNE);
1.1 misho 784:
785: case OP_PRUNE_ARG:
1.1.1.2 ! misho 786: md->nomatch_mark = ecode + 2;
! 787: md->mark = NULL; /* In case previously set by assertion */
! 788: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
! 789: eptrb, RM56);
! 790: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
! 791: md->mark == NULL) md->mark = ecode + 2;
1.1 misho 792: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
793: RRETURN(MATCH_PRUNE);
794:
795: /* SKIP overrides PRUNE and THEN */
796:
797: case OP_SKIP:
1.1.1.2 ! misho 798: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
! 799: eptrb, RM53);
1.1 misho 800: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
801: RRETURN(rrc);
802: md->start_match_ptr = eptr; /* Pass back current position */
1.1.1.2 ! misho 803: RRETURN(MATCH_SKIP);
! 804:
! 805: /* Note that, for Perl compatibility, SKIP with an argument does NOT set
! 806: nomatch_mark. There is a flag that disables this opcode when re-matching a
! 807: pattern that ended with a SKIP for which there was not a matching MARK. */
1.1 misho 808:
809: case OP_SKIP_ARG:
1.1.1.2 ! misho 810: if (md->ignore_skip_arg)
! 811: {
! 812: ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
! 813: break;
! 814: }
! 815: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
! 816: eptrb, RM57);
1.1 misho 817: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
818: RRETURN(rrc);
819:
820: /* Pass back the current skip name by overloading md->start_match_ptr and
821: returning the special MATCH_SKIP_ARG return code. This will either be
1.1.1.2 ! misho 822: caught by a matching MARK, or get to the top, where it causes a rematch
! 823: with the md->ignore_skip_arg flag set. */
1.1 misho 824:
825: md->start_match_ptr = ecode + 2;
826: RRETURN(MATCH_SKIP_ARG);
827:
1.1.1.2 ! misho 828: /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
! 829: the branch in which it occurs can be determined. Overload the start of
! 830: match pointer to do this. */
1.1 misho 831:
832: case OP_THEN:
1.1.1.2 ! misho 833: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
! 834: eptrb, RM54);
1.1 misho 835: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 836: md->start_match_ptr = ecode;
! 837: RRETURN(MATCH_THEN);
1.1 misho 838:
839: case OP_THEN_ARG:
1.1.1.2 ! misho 840: md->nomatch_mark = ecode + 2;
! 841: md->mark = NULL; /* In case previously set by assertion */
! 842: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
! 843: md, eptrb, RM58);
! 844: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
! 845: md->mark == NULL) md->mark = ecode + 2;
1.1 misho 846: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 847: md->start_match_ptr = ecode;
1.1 misho 848: RRETURN(MATCH_THEN);
849:
1.1.1.2 ! misho 850: /* Handle an atomic group that does not contain any capturing parentheses.
! 851: This can be handled like an assertion. Prior to 8.13, all atomic groups
! 852: were handled this way. In 8.13, the code was changed as below for ONCE, so
! 853: that backups pass through the group and thereby reset captured values.
! 854: However, this uses a lot more stack, so in 8.20, atomic groups that do not
! 855: contain any captures generate OP_ONCE_NC, which can be handled in the old,
! 856: less stack intensive way.
! 857:
! 858: Check the alternative branches in turn - the matching won't pass the KET
! 859: for this kind of subpattern. If any one branch matches, we carry on as at
! 860: the end of a normal bracket, leaving the subject pointer, but resetting
! 861: the start-of-match value in case it was changed by \K. */
! 862:
! 863: case OP_ONCE_NC:
! 864: prev = ecode;
! 865: saved_eptr = eptr;
! 866: save_mark = md->mark;
! 867: do
! 868: {
! 869: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
! 870: if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
! 871: {
! 872: mstart = md->start_match_ptr;
! 873: break;
! 874: }
! 875: if (rrc == MATCH_THEN)
! 876: {
! 877: next = ecode + GET(ecode,1);
! 878: if (md->start_match_ptr < next &&
! 879: (*ecode == OP_ALT || *next == OP_ALT))
! 880: rrc = MATCH_NOMATCH;
! 881: }
! 882:
! 883: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 884: ecode += GET(ecode,1);
! 885: md->mark = save_mark;
! 886: }
! 887: while (*ecode == OP_ALT);
! 888:
! 889: /* If hit the end of the group (which could be repeated), fail */
! 890:
! 891: if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
! 892:
! 893: /* Continue as from after the group, updating the offsets high water
! 894: mark, since extracts may have been taken. */
! 895:
! 896: do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
! 897:
! 898: offset_top = md->end_offset_top;
! 899: eptr = md->end_match_ptr;
! 900:
! 901: /* For a non-repeating ket, just continue at this level. This also
! 902: happens for a repeating ket if no characters were matched in the group.
! 903: This is the forcible breaking of infinite loops as implemented in Perl
! 904: 5.005. */
! 905:
! 906: if (*ecode == OP_KET || eptr == saved_eptr)
! 907: {
! 908: ecode += 1+LINK_SIZE;
! 909: break;
! 910: }
! 911:
! 912: /* The repeating kets try the rest of the pattern or restart from the
! 913: preceding bracket, in the appropriate order. The second "call" of match()
! 914: uses tail recursion, to avoid using another stack frame. */
! 915:
! 916: if (*ecode == OP_KETRMIN)
! 917: {
! 918: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
! 919: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 920: ecode = prev;
! 921: goto TAIL_RECURSE;
! 922: }
! 923: else /* OP_KETRMAX */
! 924: {
! 925: RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
! 926: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 927: ecode += 1 + LINK_SIZE;
! 928: goto TAIL_RECURSE;
! 929: }
! 930: /* Control never gets here */
! 931:
! 932: /* Handle a capturing bracket, other than those that are possessive with an
! 933: unlimited repeat. If there is space in the offset vector, save the current
! 934: subject position in the working slot at the top of the vector. We mustn't
! 935: change the current values of the data slot, because they may be set from a
! 936: previous iteration of this group, and be referred to by a reference inside
! 937: the group. A failure to match might occur after the group has succeeded,
! 938: if something later on doesn't match. For this reason, we need to restore
! 939: the working value and also the values of the final offsets, in case they
! 940: were set by a previous iteration of the same bracket.
1.1 misho 941:
942: If there isn't enough space in the offset vector, treat this as if it were
943: a non-capturing bracket. Don't worry about setting the flag for the error
944: case here; that is handled in the code for KET. */
945:
946: case OP_CBRA:
947: case OP_SCBRA:
948: number = GET2(ecode, 1+LINK_SIZE);
949: offset = number << 1;
950:
951: #ifdef PCRE_DEBUG
952: printf("start bracket %d\n", number);
953: printf("subject=");
954: pchars(eptr, 16, TRUE, md);
955: printf("\n");
956: #endif
957:
958: if (offset < md->offset_max)
959: {
960: save_offset1 = md->offset_vector[offset];
961: save_offset2 = md->offset_vector[offset+1];
962: save_offset3 = md->offset_vector[md->offset_end - number];
963: save_capture_last = md->capture_last;
1.1.1.2 ! misho 964: save_mark = md->mark;
1.1 misho 965:
966: DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
967: md->offset_vector[md->offset_end - number] =
968: (int)(eptr - md->start_subject);
969:
1.1.1.2 ! misho 970: for (;;)
1.1 misho 971: {
1.1.1.2 ! misho 972: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
! 973: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
! 974: eptrb, RM1);
! 975: if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
! 976:
! 977: /* If we backed up to a THEN, check whether it is within the current
! 978: branch by comparing the address of the THEN that is passed back with
! 979: the end of the branch. If it is within the current branch, and the
! 980: branch is one of two or more alternatives (it either starts or ends
! 981: with OP_ALT), we have reached the limit of THEN's action, so convert
! 982: the return code to NOMATCH, which will cause normal backtracking to
! 983: happen from now on. Otherwise, THEN is passed back to an outer
! 984: alternative. This implements Perl's treatment of parenthesized groups,
! 985: where a group not containing | does not affect the current alternative,
! 986: that is, (X) is NOT the same as (X|(*F)). */
! 987:
! 988: if (rrc == MATCH_THEN)
! 989: {
! 990: next = ecode + GET(ecode,1);
! 991: if (md->start_match_ptr < next &&
! 992: (*ecode == OP_ALT || *next == OP_ALT))
! 993: rrc = MATCH_NOMATCH;
! 994: }
! 995:
! 996: /* Anything other than NOMATCH is passed back. */
! 997:
! 998: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1 misho 999: md->capture_last = save_capture_last;
1000: ecode += GET(ecode, 1);
1.1.1.2 ! misho 1001: md->mark = save_mark;
! 1002: if (*ecode != OP_ALT) break;
1.1 misho 1003: }
1004:
1005: DPRINTF(("bracket %d failed\n", number));
1006: md->offset_vector[offset] = save_offset1;
1007: md->offset_vector[offset+1] = save_offset2;
1008: md->offset_vector[md->offset_end - number] = save_offset3;
1009:
1.1.1.2 ! misho 1010: /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
! 1011:
! 1012: RRETURN(rrc);
1.1 misho 1013: }
1014:
1015: /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1016: as a non-capturing bracket. */
1017:
1018: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1019: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1020:
1021: DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1022:
1023: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1024: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1025:
1.1.1.2 ! misho 1026: /* Non-capturing or atomic group, except for possessive with unlimited
! 1027: repeat and ONCE group with no captures. Loop for all the alternatives.
! 1028:
! 1029: When we get to the final alternative within the brackets, we used to return
! 1030: the result of a recursive call to match() whatever happened so it was
! 1031: possible to reduce stack usage by turning this into a tail recursion,
! 1032: except in the case of a possibly empty group. However, now that there is
! 1033: the possiblity of (*THEN) occurring in the final alternative, this
! 1034: optimization is no longer always possible.
! 1035:
! 1036: We can optimize if we know there are no (*THEN)s in the pattern; at present
! 1037: this is the best that can be done.
! 1038:
! 1039: MATCH_ONCE is returned when the end of an atomic group is successfully
! 1040: reached, but subsequent matching fails. It passes back up the tree (causing
! 1041: captured values to be reset) until the original atomic group level is
! 1042: reached. This is tested by comparing md->once_target with the start of the
! 1043: group. At this point, the return is converted into MATCH_NOMATCH so that
! 1044: previous backup points can be taken. */
1.1 misho 1045:
1.1.1.2 ! misho 1046: case OP_ONCE:
1.1 misho 1047: case OP_BRA:
1048: case OP_SBRA:
1049: DPRINTF(("start non-capturing bracket\n"));
1.1.1.2 ! misho 1050:
1.1 misho 1051: for (;;)
1052: {
1.1.1.2 ! misho 1053: if (op >= OP_SBRA || op == OP_ONCE)
! 1054: md->match_function_type = MATCH_CBEGROUP;
! 1055:
! 1056: /* If this is not a possibly empty group, and there are no (*THEN)s in
! 1057: the pattern, and this is the final alternative, optimize as described
! 1058: above. */
! 1059:
! 1060: else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1.1 misho 1061: {
1.1.1.2 ! misho 1062: ecode += PRIV(OP_lengths)[*ecode];
! 1063: goto TAIL_RECURSE;
! 1064: }
! 1065:
! 1066: /* In all other cases, we have to make another call to match(). */
! 1067:
! 1068: save_mark = md->mark;
! 1069: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
! 1070: RM2);
! 1071:
! 1072: /* See comment in the code for capturing groups above about handling
! 1073: THEN. */
! 1074:
! 1075: if (rrc == MATCH_THEN)
! 1076: {
! 1077: next = ecode + GET(ecode,1);
! 1078: if (md->start_match_ptr < next &&
! 1079: (*ecode == OP_ALT || *next == OP_ALT))
! 1080: rrc = MATCH_NOMATCH;
! 1081: }
! 1082:
! 1083: if (rrc != MATCH_NOMATCH)
! 1084: {
! 1085: if (rrc == MATCH_ONCE)
1.1 misho 1086: {
1.1.1.2 ! misho 1087: const pcre_uchar *scode = ecode;
! 1088: if (*scode != OP_ONCE) /* If not at start, find it */
! 1089: {
! 1090: while (*scode == OP_ALT) scode += GET(scode, 1);
! 1091: scode -= GET(scode, 1);
! 1092: }
! 1093: if (md->once_target == scode) rrc = MATCH_NOMATCH;
1.1 misho 1094: }
1.1.1.2 ! misho 1095: RRETURN(rrc);
! 1096: }
! 1097: ecode += GET(ecode, 1);
! 1098: md->mark = save_mark;
! 1099: if (*ecode != OP_ALT) break;
! 1100: }
1.1 misho 1101:
1.1.1.2 ! misho 1102: RRETURN(MATCH_NOMATCH);
1.1 misho 1103:
1.1.1.2 ! misho 1104: /* Handle possessive capturing brackets with an unlimited repeat. We come
! 1105: here from BRAZERO with allow_zero set TRUE. The offset_vector values are
! 1106: handled similarly to the normal case above. However, the matching is
! 1107: different. The end of these brackets will always be OP_KETRPOS, which
! 1108: returns MATCH_KETRPOS without going further in the pattern. By this means
! 1109: we can handle the group by iteration rather than recursion, thereby
! 1110: reducing the amount of stack needed. */
! 1111:
! 1112: case OP_CBRAPOS:
! 1113: case OP_SCBRAPOS:
! 1114: allow_zero = FALSE;
! 1115:
! 1116: POSSESSIVE_CAPTURE:
! 1117: number = GET2(ecode, 1+LINK_SIZE);
! 1118: offset = number << 1;
! 1119:
! 1120: #ifdef PCRE_DEBUG
! 1121: printf("start possessive bracket %d\n", number);
! 1122: printf("subject=");
! 1123: pchars(eptr, 16, TRUE, md);
! 1124: printf("\n");
! 1125: #endif
! 1126:
! 1127: if (offset < md->offset_max)
! 1128: {
! 1129: matched_once = FALSE;
! 1130: code_offset = (int)(ecode - md->start_code);
! 1131:
! 1132: save_offset1 = md->offset_vector[offset];
! 1133: save_offset2 = md->offset_vector[offset+1];
! 1134: save_offset3 = md->offset_vector[md->offset_end - number];
! 1135: save_capture_last = md->capture_last;
! 1136:
! 1137: DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
! 1138:
! 1139: /* Each time round the loop, save the current subject position for use
! 1140: when the group matches. For MATCH_MATCH, the group has matched, so we
! 1141: restart it with a new subject starting position, remembering that we had
! 1142: at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
! 1143: usual. If we haven't matched any alternatives in any iteration, check to
! 1144: see if a previous iteration matched. If so, the group has matched;
! 1145: continue from afterwards. Otherwise it has failed; restore the previous
! 1146: capture values before returning NOMATCH. */
! 1147:
! 1148: for (;;)
! 1149: {
! 1150: md->offset_vector[md->offset_end - number] =
! 1151: (int)(eptr - md->start_subject);
! 1152: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
! 1153: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
! 1154: eptrb, RM63);
! 1155: if (rrc == MATCH_KETRPOS)
! 1156: {
! 1157: offset_top = md->end_offset_top;
! 1158: eptr = md->end_match_ptr;
! 1159: ecode = md->start_code + code_offset;
! 1160: save_capture_last = md->capture_last;
! 1161: matched_once = TRUE;
! 1162: continue;
! 1163: }
! 1164:
! 1165: /* See comment in the code for capturing groups above about handling
! 1166: THEN. */
! 1167:
! 1168: if (rrc == MATCH_THEN)
! 1169: {
! 1170: next = ecode + GET(ecode,1);
! 1171: if (md->start_match_ptr < next &&
! 1172: (*ecode == OP_ALT || *next == OP_ALT))
! 1173: rrc = MATCH_NOMATCH;
! 1174: }
! 1175:
! 1176: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1177: md->capture_last = save_capture_last;
! 1178: ecode += GET(ecode, 1);
! 1179: if (*ecode != OP_ALT) break;
1.1 misho 1180: }
1181:
1.1.1.2 ! misho 1182: if (!matched_once)
! 1183: {
! 1184: md->offset_vector[offset] = save_offset1;
! 1185: md->offset_vector[offset+1] = save_offset2;
! 1186: md->offset_vector[md->offset_end - number] = save_offset3;
! 1187: }
1.1 misho 1188:
1.1.1.2 ! misho 1189: if (allow_zero || matched_once)
! 1190: {
! 1191: ecode += 1 + LINK_SIZE;
! 1192: break;
! 1193: }
! 1194:
! 1195: RRETURN(MATCH_NOMATCH);
! 1196: }
! 1197:
! 1198: /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
! 1199: as a non-capturing bracket. */
! 1200:
! 1201: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1202: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1203:
! 1204: DPRINTF(("insufficient capture room: treat as non-capturing\n"));
! 1205:
! 1206: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1207: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
! 1208:
! 1209: /* Non-capturing possessive bracket with unlimited repeat. We come here
! 1210: from BRAZERO with allow_zero = TRUE. The code is similar to the above,
! 1211: without the capturing complication. It is written out separately for speed
! 1212: and cleanliness. */
! 1213:
! 1214: case OP_BRAPOS:
! 1215: case OP_SBRAPOS:
! 1216: allow_zero = FALSE;
! 1217:
! 1218: POSSESSIVE_NON_CAPTURE:
! 1219: matched_once = FALSE;
! 1220: code_offset = (int)(ecode - md->start_code);
! 1221:
! 1222: for (;;)
! 1223: {
! 1224: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
! 1225: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
! 1226: eptrb, RM48);
! 1227: if (rrc == MATCH_KETRPOS)
! 1228: {
! 1229: offset_top = md->end_offset_top;
! 1230: eptr = md->end_match_ptr;
! 1231: ecode = md->start_code + code_offset;
! 1232: matched_once = TRUE;
! 1233: continue;
! 1234: }
! 1235:
! 1236: /* See comment in the code for capturing groups above about handling
! 1237: THEN. */
! 1238:
! 1239: if (rrc == MATCH_THEN)
! 1240: {
! 1241: next = ecode + GET(ecode,1);
! 1242: if (md->start_match_ptr < next &&
! 1243: (*ecode == OP_ALT || *next == OP_ALT))
! 1244: rrc = MATCH_NOMATCH;
! 1245: }
! 1246:
! 1247: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1 misho 1248: ecode += GET(ecode, 1);
1.1.1.2 ! misho 1249: if (*ecode != OP_ALT) break;
! 1250: }
! 1251:
! 1252: if (matched_once || allow_zero)
! 1253: {
! 1254: ecode += 1 + LINK_SIZE;
! 1255: break;
1.1 misho 1256: }
1.1.1.2 ! misho 1257: RRETURN(MATCH_NOMATCH);
! 1258:
1.1 misho 1259: /* Control never reaches here. */
1260:
1261: /* Conditional group: compilation checked that there are no more than
1262: two branches. If the condition is false, skipping the first branch takes us
1263: past the end if there is only one branch, but that's OK because that is
1.1.1.2 ! misho 1264: exactly what going to the ket would do. */
1.1 misho 1265:
1266: case OP_COND:
1267: case OP_SCOND:
1.1.1.2 ! misho 1268: codelink = GET(ecode, 1);
1.1 misho 1269:
1270: /* Because of the way auto-callout works during compile, a callout item is
1271: inserted between OP_COND and an assertion condition. */
1272:
1273: if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1274: {
1.1.1.2 ! misho 1275: if (PUBL(callout) != NULL)
1.1 misho 1276: {
1.1.1.2 ! misho 1277: PUBL(callout_block) cb;
! 1278: cb.version = 2; /* Version 1 of the callout block */
1.1 misho 1279: cb.callout_number = ecode[LINK_SIZE+2];
1280: cb.offset_vector = md->offset_vector;
1.1.1.2 ! misho 1281: #if defined COMPILE_PCRE8
1.1 misho 1282: cb.subject = (PCRE_SPTR)md->start_subject;
1.1.1.2 ! misho 1283: #elif defined COMPILE_PCRE16
! 1284: cb.subject = (PCRE_SPTR16)md->start_subject;
! 1285: #elif defined COMPILE_PCRE32
! 1286: cb.subject = (PCRE_SPTR32)md->start_subject;
! 1287: #endif
1.1 misho 1288: cb.subject_length = (int)(md->end_subject - md->start_subject);
1289: cb.start_match = (int)(mstart - md->start_subject);
1290: cb.current_position = (int)(eptr - md->start_subject);
1291: cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1292: cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1293: cb.capture_top = offset_top/2;
1294: cb.capture_last = md->capture_last;
1295: cb.callout_data = md->callout_data;
1.1.1.2 ! misho 1296: cb.mark = md->nomatch_mark;
! 1297: if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1 misho 1298: if (rrc < 0) RRETURN(rrc);
1299: }
1.1.1.2 ! misho 1300: ecode += PRIV(OP_lengths)[OP_CALLOUT];
1.1 misho 1301: }
1302:
1303: condcode = ecode[LINK_SIZE+1];
1304:
1305: /* Now see what the actual condition is */
1306:
1307: if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
1308: {
1309: if (md->recursive == NULL) /* Not recursing => FALSE */
1310: {
1311: condition = FALSE;
1312: ecode += GET(ecode, 1);
1313: }
1314: else
1315: {
1.1.1.2 ! misho 1316: unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
! 1317: condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1.1 misho 1318:
1319: /* If the test is for recursion into a specific subpattern, and it is
1320: false, but the test was set up by name, scan the table to see if the
1321: name refers to any other numbers, and test them. The condition is true
1322: if any one is set. */
1323:
1.1.1.2 ! misho 1324: if (!condition && condcode == OP_NRREF)
1.1 misho 1325: {
1.1.1.2 ! misho 1326: pcre_uchar *slotA = md->name_table;
1.1 misho 1327: for (i = 0; i < md->name_count; i++)
1328: {
1329: if (GET2(slotA, 0) == recno) break;
1330: slotA += md->name_entry_size;
1331: }
1332:
1333: /* Found a name for the number - there can be only one; duplicate
1334: names for different numbers are allowed, but not vice versa. First
1335: scan down for duplicates. */
1336:
1337: if (i < md->name_count)
1338: {
1.1.1.2 ! misho 1339: pcre_uchar *slotB = slotA;
1.1 misho 1340: while (slotB > md->name_table)
1341: {
1342: slotB -= md->name_entry_size;
1.1.1.2 ! misho 1343: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1344: {
1345: condition = GET2(slotB, 0) == md->recursive->group_num;
1346: if (condition) break;
1347: }
1348: else break;
1349: }
1350:
1351: /* Scan up for duplicates */
1352:
1353: if (!condition)
1354: {
1355: slotB = slotA;
1356: for (i++; i < md->name_count; i++)
1357: {
1358: slotB += md->name_entry_size;
1.1.1.2 ! misho 1359: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1360: {
1361: condition = GET2(slotB, 0) == md->recursive->group_num;
1362: if (condition) break;
1363: }
1364: else break;
1365: }
1366: }
1367: }
1368: }
1369:
1370: /* Chose branch according to the condition */
1371:
1.1.1.2 ! misho 1372: ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1 misho 1373: }
1374: }
1375:
1376: else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1377: {
1378: offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1379: condition = offset < offset_top && md->offset_vector[offset] >= 0;
1380:
1381: /* If the numbered capture is unset, but the reference was by name,
1382: scan the table to see if the name refers to any other numbers, and test
1383: them. The condition is true if any one is set. This is tediously similar
1384: to the code above, but not close enough to try to amalgamate. */
1385:
1386: if (!condition && condcode == OP_NCREF)
1387: {
1.1.1.2 ! misho 1388: unsigned int refno = offset >> 1;
! 1389: pcre_uchar *slotA = md->name_table;
1.1 misho 1390:
1391: for (i = 0; i < md->name_count; i++)
1392: {
1393: if (GET2(slotA, 0) == refno) break;
1394: slotA += md->name_entry_size;
1395: }
1396:
1397: /* Found a name for the number - there can be only one; duplicate names
1398: for different numbers are allowed, but not vice versa. First scan down
1399: for duplicates. */
1400:
1401: if (i < md->name_count)
1402: {
1.1.1.2 ! misho 1403: pcre_uchar *slotB = slotA;
1.1 misho 1404: while (slotB > md->name_table)
1405: {
1406: slotB -= md->name_entry_size;
1.1.1.2 ! misho 1407: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1408: {
1409: offset = GET2(slotB, 0) << 1;
1410: condition = offset < offset_top &&
1411: md->offset_vector[offset] >= 0;
1412: if (condition) break;
1413: }
1414: else break;
1415: }
1416:
1417: /* Scan up for duplicates */
1418:
1419: if (!condition)
1420: {
1421: slotB = slotA;
1422: for (i++; i < md->name_count; i++)
1423: {
1424: slotB += md->name_entry_size;
1.1.1.2 ! misho 1425: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1426: {
1427: offset = GET2(slotB, 0) << 1;
1428: condition = offset < offset_top &&
1429: md->offset_vector[offset] >= 0;
1430: if (condition) break;
1431: }
1432: else break;
1433: }
1434: }
1435: }
1436: }
1437:
1438: /* Chose branch according to the condition */
1439:
1.1.1.2 ! misho 1440: ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1 misho 1441: }
1442:
1443: else if (condcode == OP_DEF) /* DEFINE - always false */
1444: {
1445: condition = FALSE;
1446: ecode += GET(ecode, 1);
1447: }
1448:
1449: /* The condition is an assertion. Call match() to evaluate it - setting
1.1.1.2 ! misho 1450: md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
! 1451: an assertion. */
1.1 misho 1452:
1453: else
1454: {
1.1.1.2 ! misho 1455: md->match_function_type = MATCH_CONDASSERT;
! 1456: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1.1 misho 1457: if (rrc == MATCH_MATCH)
1458: {
1.1.1.2 ! misho 1459: if (md->end_offset_top > offset_top)
! 1460: offset_top = md->end_offset_top; /* Captures may have happened */
1.1 misho 1461: condition = TRUE;
1462: ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1463: while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1464: }
1.1.1.2 ! misho 1465:
! 1466: /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
! 1467: assertion; it is therefore treated as NOMATCH. */
! 1468:
! 1469: else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1.1 misho 1470: {
1471: RRETURN(rrc); /* Need braces because of following else */
1472: }
1473: else
1474: {
1475: condition = FALSE;
1476: ecode += codelink;
1477: }
1478: }
1479:
1.1.1.2 ! misho 1480: /* We are now at the branch that is to be obeyed. As there is only one, can
! 1481: use tail recursion to avoid using another stack frame, except when there is
! 1482: unlimited repeat of a possibly empty group. In the latter case, a recursive
! 1483: call to match() is always required, unless the second alternative doesn't
! 1484: exist, in which case we can just plough on. Note that, for compatibility
! 1485: with Perl, the | in a conditional group is NOT treated as creating two
! 1486: alternatives. If a THEN is encountered in the branch, it propagates out to
! 1487: the enclosing alternative (unless nested in a deeper set of alternatives,
! 1488: of course). */
1.1 misho 1489:
1490: if (condition || *ecode == OP_ALT)
1491: {
1.1.1.2 ! misho 1492: if (op != OP_SCOND)
1.1 misho 1493: {
1.1.1.2 ! misho 1494: ecode += 1 + LINK_SIZE;
1.1 misho 1495: goto TAIL_RECURSE;
1496: }
1.1.1.2 ! misho 1497:
! 1498: md->match_function_type = MATCH_CBEGROUP;
! 1499: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
! 1500: RRETURN(rrc);
1.1 misho 1501: }
1.1.1.2 ! misho 1502:
! 1503: /* Condition false & no alternative; continue after the group. */
! 1504:
! 1505: else
1.1 misho 1506: {
1507: ecode += 1 + LINK_SIZE;
1508: }
1509: break;
1510:
1511:
1512: /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1513: to close any currently open capturing brackets. */
1514:
1515: case OP_CLOSE:
1516: number = GET2(ecode, 1);
1517: offset = number << 1;
1518:
1519: #ifdef PCRE_DEBUG
1520: printf("end bracket %d at *ACCEPT", number);
1521: printf("\n");
1522: #endif
1523:
1524: md->capture_last = number;
1525: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1526: {
1527: md->offset_vector[offset] =
1528: md->offset_vector[md->offset_end - number];
1529: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1530: if (offset_top <= offset) offset_top = offset + 2;
1531: }
1.1.1.2 ! misho 1532: ecode += 1 + IMM2_SIZE;
1.1 misho 1533: break;
1534:
1535:
1.1.1.2 ! misho 1536: /* End of the pattern, either real or forced. */
1.1 misho 1537:
1538: case OP_END:
1.1.1.2 ! misho 1539: case OP_ACCEPT:
! 1540: case OP_ASSERT_ACCEPT:
1.1 misho 1541:
1.1.1.2 ! misho 1542: /* If we have matched an empty string, fail if not in an assertion and not
! 1543: in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
! 1544: is set and we have matched at the start of the subject. In both cases,
! 1545: backtracking will then try other alternatives, if any. */
! 1546:
! 1547: if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
! 1548: md->recursive == NULL &&
! 1549: (md->notempty ||
! 1550: (md->notempty_atstart &&
! 1551: mstart == md->start_subject + md->start_offset)))
! 1552: RRETURN(MATCH_NOMATCH);
1.1 misho 1553:
1554: /* Otherwise, we have a match. */
1555:
1556: md->end_match_ptr = eptr; /* Record where we ended */
1557: md->end_offset_top = offset_top; /* and how many extracts were taken */
1558: md->start_match_ptr = mstart; /* and the start (\K can modify) */
1559:
1560: /* For some reason, the macros don't work properly if an expression is
1.1.1.2 ! misho 1561: given as the argument to RRETURN when the heap is in use. */
1.1 misho 1562:
1563: rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1.1.1.2 ! misho 1564: RRETURN(rrc);
1.1 misho 1565:
1566: /* Assertion brackets. Check the alternative branches in turn - the
1567: matching won't pass the KET for an assertion. If any one branch matches,
1568: the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1569: start of each branch to move the current point backwards, so the code at
1.1.1.2 ! misho 1570: this level is identical to the lookahead case. When the assertion is part
! 1571: of a condition, we want to return immediately afterwards. The caller of
! 1572: this incarnation of the match() function will have set MATCH_CONDASSERT in
! 1573: md->match_function type, and one of these opcodes will be the first opcode
! 1574: that is processed. We use a local variable that is preserved over calls to
! 1575: match() to remember this case. */
1.1 misho 1576:
1577: case OP_ASSERT:
1578: case OP_ASSERTBACK:
1.1.1.2 ! misho 1579: save_mark = md->mark;
! 1580: if (md->match_function_type == MATCH_CONDASSERT)
! 1581: {
! 1582: condassert = TRUE;
! 1583: md->match_function_type = 0;
! 1584: }
! 1585: else condassert = FALSE;
! 1586:
1.1 misho 1587: do
1588: {
1.1.1.2 ! misho 1589: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1.1 misho 1590: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1591: {
1592: mstart = md->start_match_ptr; /* In case \K reset it */
1593: break;
1594: }
1.1.1.2 ! misho 1595: md->mark = save_mark;
! 1596:
! 1597: /* A COMMIT failure must fail the entire assertion, without trying any
! 1598: subsequent branches. */
! 1599:
! 1600: if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
! 1601:
! 1602: /* PCRE does not allow THEN to escape beyond an assertion; it
! 1603: is treated as NOMATCH. */
! 1604:
! 1605: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1 misho 1606: ecode += GET(ecode, 1);
1607: }
1608: while (*ecode == OP_ALT);
1.1.1.2 ! misho 1609:
! 1610: if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1.1 misho 1611:
1612: /* If checking an assertion for a condition, return MATCH_MATCH. */
1613:
1.1.1.2 ! misho 1614: if (condassert) RRETURN(MATCH_MATCH);
1.1 misho 1615:
1616: /* Continue from after the assertion, updating the offsets high water
1617: mark, since extracts may have been taken during the assertion. */
1618:
1619: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1620: ecode += 1 + LINK_SIZE;
1621: offset_top = md->end_offset_top;
1622: continue;
1623:
1624: /* Negative assertion: all branches must fail to match. Encountering SKIP,
1625: PRUNE, or COMMIT means we must assume failure without checking subsequent
1626: branches. */
1627:
1628: case OP_ASSERT_NOT:
1629: case OP_ASSERTBACK_NOT:
1.1.1.2 ! misho 1630: save_mark = md->mark;
! 1631: if (md->match_function_type == MATCH_CONDASSERT)
! 1632: {
! 1633: condassert = TRUE;
! 1634: md->match_function_type = 0;
! 1635: }
! 1636: else condassert = FALSE;
! 1637:
1.1 misho 1638: do
1639: {
1.1.1.2 ! misho 1640: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
! 1641: md->mark = save_mark;
! 1642: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1.1 misho 1643: if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1644: {
1645: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1646: break;
1647: }
1.1.1.2 ! misho 1648:
! 1649: /* PCRE does not allow THEN to escape beyond an assertion; it is treated
! 1650: as NOMATCH. */
! 1651:
! 1652: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1 misho 1653: ecode += GET(ecode,1);
1654: }
1655: while (*ecode == OP_ALT);
1656:
1.1.1.2 ! misho 1657: if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
1.1 misho 1658:
1659: ecode += 1 + LINK_SIZE;
1660: continue;
1661:
1662: /* Move the subject pointer back. This occurs only at the start of
1663: each branch of a lookbehind assertion. If we are too close to the start to
1664: move back, this match function fails. When working with UTF-8 we move
1665: back a number of characters, not bytes. */
1666:
1667: case OP_REVERSE:
1.1.1.2 ! misho 1668: #ifdef SUPPORT_UTF
! 1669: if (utf)
1.1 misho 1670: {
1671: i = GET(ecode, 1);
1672: while (i-- > 0)
1673: {
1674: eptr--;
1.1.1.2 ! misho 1675: if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1 misho 1676: BACKCHAR(eptr);
1677: }
1678: }
1679: else
1680: #endif
1681:
1682: /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1683:
1684: {
1685: eptr -= GET(ecode, 1);
1.1.1.2 ! misho 1686: if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1 misho 1687: }
1688:
1689: /* Save the earliest consulted character, then skip to next op code */
1690:
1691: if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1692: ecode += 1 + LINK_SIZE;
1693: break;
1694:
1695: /* The callout item calls an external function, if one is provided, passing
1696: details of the match so far. This is mainly for debugging, though the
1697: function is able to force a failure. */
1698:
1699: case OP_CALLOUT:
1.1.1.2 ! misho 1700: if (PUBL(callout) != NULL)
1.1 misho 1701: {
1.1.1.2 ! misho 1702: PUBL(callout_block) cb;
! 1703: cb.version = 2; /* Version 1 of the callout block */
1.1 misho 1704: cb.callout_number = ecode[1];
1705: cb.offset_vector = md->offset_vector;
1.1.1.2 ! misho 1706: #if defined COMPILE_PCRE8
1.1 misho 1707: cb.subject = (PCRE_SPTR)md->start_subject;
1.1.1.2 ! misho 1708: #elif defined COMPILE_PCRE16
! 1709: cb.subject = (PCRE_SPTR16)md->start_subject;
! 1710: #elif defined COMPILE_PCRE32
! 1711: cb.subject = (PCRE_SPTR32)md->start_subject;
! 1712: #endif
1.1 misho 1713: cb.subject_length = (int)(md->end_subject - md->start_subject);
1714: cb.start_match = (int)(mstart - md->start_subject);
1715: cb.current_position = (int)(eptr - md->start_subject);
1716: cb.pattern_position = GET(ecode, 2);
1717: cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1718: cb.capture_top = offset_top/2;
1719: cb.capture_last = md->capture_last;
1720: cb.callout_data = md->callout_data;
1.1.1.2 ! misho 1721: cb.mark = md->nomatch_mark;
! 1722: if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1 misho 1723: if (rrc < 0) RRETURN(rrc);
1724: }
1725: ecode += 2 + 2*LINK_SIZE;
1726: break;
1727:
1728: /* Recursion either matches the current regex, or some subexpression. The
1729: offset data is the offset to the starting bracket from the start of the
1730: whole pattern. (This is so that it works from duplicated subpatterns.)
1731:
1.1.1.2 ! misho 1732: The state of the capturing groups is preserved over recursion, and
! 1733: re-instated afterwards. We don't know how many are started and not yet
! 1734: finished (offset_top records the completed total) so we just have to save
! 1735: all the potential data. There may be up to 65535 such values, which is too
! 1736: large to put on the stack, but using malloc for small numbers seems
! 1737: expensive. As a compromise, the stack is used when there are no more than
! 1738: REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1.1 misho 1739:
1740: There are also other values that have to be saved. We use a chained
1741: sequence of blocks that actually live on the stack. Thanks to Robin Houston
1.1.1.2 ! misho 1742: for the original version of this logic. It has, however, been hacked around
! 1743: a lot, so he is not to blame for the current way it works. */
1.1 misho 1744:
1745: case OP_RECURSE:
1746: {
1.1.1.2 ! misho 1747: recursion_info *ri;
! 1748: unsigned int recno;
! 1749:
1.1 misho 1750: callpat = md->start_code + GET(ecode, 1);
1.1.1.2 ! misho 1751: recno = (callpat == md->start_code)? 0 :
1.1 misho 1752: GET2(callpat, 1 + LINK_SIZE);
1753:
1.1.1.2 ! misho 1754: /* Check for repeating a recursion without advancing the subject pointer.
! 1755: This should catch convoluted mutual recursions. (Some simple cases are
! 1756: caught at compile time.) */
! 1757:
! 1758: for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
! 1759: if (recno == ri->group_num && eptr == ri->subject_position)
! 1760: RRETURN(PCRE_ERROR_RECURSELOOP);
! 1761:
1.1 misho 1762: /* Add to "recursing stack" */
1763:
1.1.1.2 ! misho 1764: new_recursive.group_num = recno;
! 1765: new_recursive.subject_position = eptr;
1.1 misho 1766: new_recursive.prevrec = md->recursive;
1767: md->recursive = &new_recursive;
1768:
1.1.1.2 ! misho 1769: /* Where to continue from afterwards */
1.1 misho 1770:
1771: ecode += 1 + LINK_SIZE;
1772:
1.1.1.2 ! misho 1773: /* Now save the offset data */
1.1 misho 1774:
1775: new_recursive.saved_max = md->offset_end;
1776: if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1777: new_recursive.offset_save = stacksave;
1778: else
1779: {
1780: new_recursive.offset_save =
1.1.1.2 ! misho 1781: (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1.1 misho 1782: if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1783: }
1784: memcpy(new_recursive.offset_save, md->offset_vector,
1785: new_recursive.saved_max * sizeof(int));
1786:
1.1.1.2 ! misho 1787: /* OK, now we can do the recursion. After processing each alternative,
! 1788: restore the offset data. If there were nested recursions, md->recursive
! 1789: might be changed, so reset it before looping. */
1.1 misho 1790:
1791: DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1.1.1.2 ! misho 1792: cbegroup = (*callpat >= OP_SBRA);
1.1 misho 1793: do
1794: {
1.1.1.2 ! misho 1795: if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
! 1796: RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
! 1797: md, eptrb, RM6);
! 1798: memcpy(md->offset_vector, new_recursive.offset_save,
! 1799: new_recursive.saved_max * sizeof(int));
! 1800: md->recursive = new_recursive.prevrec;
1.1 misho 1801: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1802: {
1803: DPRINTF(("Recursion matched\n"));
1804: if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho 1805: (PUBL(free))(new_recursive.offset_save);
! 1806:
! 1807: /* Set where we got to in the subject, and reset the start in case
! 1808: it was changed by \K. This *is* propagated back out of a recursion,
! 1809: for Perl compatibility. */
! 1810:
! 1811: eptr = md->end_match_ptr;
! 1812: mstart = md->start_match_ptr;
! 1813: goto RECURSION_MATCHED; /* Exit loop; end processing */
1.1 misho 1814: }
1.1.1.2 ! misho 1815:
! 1816: /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
! 1817: is treated as NOMATCH. */
! 1818:
! 1819: else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
! 1820: rrc != MATCH_COMMIT)
1.1 misho 1821: {
1822: DPRINTF(("Recursion gave error %d\n", rrc));
1823: if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho 1824: (PUBL(free))(new_recursive.offset_save);
1.1 misho 1825: RRETURN(rrc);
1826: }
1827:
1828: md->recursive = &new_recursive;
1829: callpat += GET(callpat, 1);
1830: }
1831: while (*callpat == OP_ALT);
1832:
1833: DPRINTF(("Recursion didn't match\n"));
1834: md->recursive = new_recursive.prevrec;
1835: if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho 1836: (PUBL(free))(new_recursive.offset_save);
! 1837: RRETURN(MATCH_NOMATCH);
1.1 misho 1838: }
1839:
1.1.1.2 ! misho 1840: RECURSION_MATCHED:
! 1841: break;
1.1 misho 1842:
1843: /* An alternation is the end of a branch; scan along to find the end of the
1844: bracketed group and go to there. */
1845:
1846: case OP_ALT:
1847: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1848: break;
1849:
1850: /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1851: indicating that it may occur zero times. It may repeat infinitely, or not
1852: at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1853: with fixed upper repeat limits are compiled as a number of copies, with the
1854: optional ones preceded by BRAZERO or BRAMINZERO. */
1855:
1856: case OP_BRAZERO:
1.1.1.2 ! misho 1857: next = ecode + 1;
! 1858: RMATCH(eptr, next, offset_top, md, eptrb, RM10);
! 1859: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1860: do next += GET(next, 1); while (*next == OP_ALT);
! 1861: ecode = next + 1 + LINK_SIZE;
1.1 misho 1862: break;
1863:
1864: case OP_BRAMINZERO:
1.1.1.2 ! misho 1865: next = ecode + 1;
! 1866: do next += GET(next, 1); while (*next == OP_ALT);
! 1867: RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
! 1868: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1869: ecode++;
1.1 misho 1870: break;
1871:
1872: case OP_SKIPZERO:
1.1.1.2 ! misho 1873: next = ecode+1;
! 1874: do next += GET(next,1); while (*next == OP_ALT);
! 1875: ecode = next + 1 + LINK_SIZE;
1.1 misho 1876: break;
1877:
1.1.1.2 ! misho 1878: /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
! 1879: here; just jump to the group, with allow_zero set TRUE. */
! 1880:
! 1881: case OP_BRAPOSZERO:
! 1882: op = *(++ecode);
! 1883: allow_zero = TRUE;
! 1884: if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
! 1885: goto POSSESSIVE_NON_CAPTURE;
! 1886:
1.1 misho 1887: /* End of a group, repeated or non-repeating. */
1888:
1889: case OP_KET:
1890: case OP_KETRMIN:
1891: case OP_KETRMAX:
1.1.1.2 ! misho 1892: case OP_KETRPOS:
1.1 misho 1893: prev = ecode - GET(ecode, 1);
1894:
1895: /* If this was a group that remembered the subject start, in order to break
1896: infinite repeats of empty string matches, retrieve the subject start from
1897: the chain. Otherwise, set it NULL. */
1898:
1.1.1.2 ! misho 1899: if (*prev >= OP_SBRA || *prev == OP_ONCE)
1.1 misho 1900: {
1901: saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1902: eptrb = eptrb->epb_prev; /* Backup to previous group */
1903: }
1904: else saved_eptr = NULL;
1905:
1.1.1.2 ! misho 1906: /* If we are at the end of an assertion group or a non-capturing atomic
! 1907: group, stop matching and return MATCH_MATCH, but record the current high
! 1908: water mark for use by positive assertions. We also need to record the match
! 1909: start in case it was changed by \K. */
! 1910:
! 1911: if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
! 1912: *prev == OP_ONCE_NC)
1.1 misho 1913: {
1.1.1.2 ! misho 1914: md->end_match_ptr = eptr; /* For ONCE_NC */
1.1 misho 1915: md->end_offset_top = offset_top;
1916: md->start_match_ptr = mstart;
1.1.1.2 ! misho 1917: RRETURN(MATCH_MATCH); /* Sets md->mark */
1.1 misho 1918: }
1919:
1920: /* For capturing groups we have to check the group number back at the start
1921: and if necessary complete handling an extraction by setting the offsets and
1.1.1.2 ! misho 1922: bumping the high water mark. Whole-pattern recursion is coded as a recurse
! 1923: into group 0, so it won't be picked up here. Instead, we catch it when the
! 1924: OP_END is reached. Other recursion is handled here. We just have to record
! 1925: the current subject position and start match pointer and give a MATCH
! 1926: return. */
1.1 misho 1927:
1.1.1.2 ! misho 1928: if (*prev == OP_CBRA || *prev == OP_SCBRA ||
! 1929: *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1.1 misho 1930: {
1931: number = GET2(prev, 1+LINK_SIZE);
1932: offset = number << 1;
1933:
1934: #ifdef PCRE_DEBUG
1935: printf("end bracket %d", number);
1936: printf("\n");
1937: #endif
1938:
1.1.1.2 ! misho 1939: /* Handle a recursively called group. */
! 1940:
! 1941: if (md->recursive != NULL && md->recursive->group_num == number)
! 1942: {
! 1943: md->end_match_ptr = eptr;
! 1944: md->start_match_ptr = mstart;
! 1945: RRETURN(MATCH_MATCH);
! 1946: }
! 1947:
! 1948: /* Deal with capturing */
! 1949:
1.1 misho 1950: md->capture_last = number;
1951: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1952: {
1.1.1.2 ! misho 1953: /* If offset is greater than offset_top, it means that we are
! 1954: "skipping" a capturing group, and that group's offsets must be marked
! 1955: unset. In earlier versions of PCRE, all the offsets were unset at the
! 1956: start of matching, but this doesn't work because atomic groups and
! 1957: assertions can cause a value to be set that should later be unset.
! 1958: Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
! 1959: part of the atomic group, but this is not on the final matching path,
! 1960: so must be unset when 2 is set. (If there is no group 2, there is no
! 1961: problem, because offset_top will then be 2, indicating no capture.) */
! 1962:
! 1963: if (offset > offset_top)
! 1964: {
! 1965: register int *iptr = md->offset_vector + offset_top;
! 1966: register int *iend = md->offset_vector + offset;
! 1967: while (iptr < iend) *iptr++ = -1;
! 1968: }
! 1969:
! 1970: /* Now make the extraction */
! 1971:
1.1 misho 1972: md->offset_vector[offset] =
1973: md->offset_vector[md->offset_end - number];
1974: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1975: if (offset_top <= offset) offset_top = offset + 2;
1976: }
1977: }
1978:
1.1.1.2 ! misho 1979: /* For an ordinary non-repeating ket, just continue at this level. This
! 1980: also happens for a repeating ket if no characters were matched in the
! 1981: group. This is the forcible breaking of infinite loops as implemented in
! 1982: Perl 5.005. For a non-repeating atomic group that includes captures,
! 1983: establish a backup point by processing the rest of the pattern at a lower
! 1984: level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
! 1985: original OP_ONCE level, thereby bypassing intermediate backup points, but
! 1986: resetting any captures that happened along the way. */
1.1 misho 1987:
1988: if (*ecode == OP_KET || eptr == saved_eptr)
1989: {
1.1.1.2 ! misho 1990: if (*prev == OP_ONCE)
! 1991: {
! 1992: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
! 1993: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 1994: md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
! 1995: RRETURN(MATCH_ONCE);
! 1996: }
! 1997: ecode += 1 + LINK_SIZE; /* Carry on at this level */
1.1 misho 1998: break;
1999: }
2000:
1.1.1.2 ! misho 2001: /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
! 2002: and return the MATCH_KETRPOS. This makes it possible to do the repeats one
! 2003: at a time from the outer level, thus saving stack. */
! 2004:
! 2005: if (*ecode == OP_KETRPOS)
! 2006: {
! 2007: md->end_match_ptr = eptr;
! 2008: md->end_offset_top = offset_top;
! 2009: RRETURN(MATCH_KETRPOS);
! 2010: }
1.1 misho 2011:
1.1.1.2 ! misho 2012: /* The normal repeating kets try the rest of the pattern or restart from
! 2013: the preceding bracket, in the appropriate order. In the second case, we can
! 2014: use tail recursion to avoid using another stack frame, unless we have an
! 2015: an atomic group or an unlimited repeat of a group that can match an empty
! 2016: string. */
1.1 misho 2017:
2018: if (*ecode == OP_KETRMIN)
2019: {
1.1.1.2 ! misho 2020: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1.1 misho 2021: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 2022: if (*prev == OP_ONCE)
1.1 misho 2023: {
1.1.1.2 ! misho 2024: RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
! 2025: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2026: md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
! 2027: RRETURN(MATCH_ONCE);
! 2028: }
! 2029: if (*prev >= OP_SBRA) /* Could match an empty string */
! 2030: {
! 2031: RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1.1 misho 2032: RRETURN(rrc);
2033: }
2034: ecode = prev;
2035: goto TAIL_RECURSE;
2036: }
2037: else /* OP_KETRMAX */
2038: {
1.1.1.2 ! misho 2039: RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
! 2040: if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1.1 misho 2041: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 2042: if (*prev == OP_ONCE)
! 2043: {
! 2044: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
! 2045: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2046: md->once_target = prev;
! 2047: RRETURN(MATCH_ONCE);
! 2048: }
1.1 misho 2049: ecode += 1 + LINK_SIZE;
2050: goto TAIL_RECURSE;
2051: }
2052: /* Control never gets here */
2053:
1.1.1.2 ! misho 2054: /* Not multiline mode: start of subject assertion, unless notbol. */
1.1 misho 2055:
2056: case OP_CIRC:
1.1.1.2 ! misho 2057: if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1.1 misho 2058:
2059: /* Start of subject assertion */
2060:
2061: case OP_SOD:
1.1.1.2 ! misho 2062: if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
! 2063: ecode++;
! 2064: break;
! 2065:
! 2066: /* Multiline mode: start of subject unless notbol, or after any newline. */
! 2067:
! 2068: case OP_CIRCM:
! 2069: if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
! 2070: if (eptr != md->start_subject &&
! 2071: (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
! 2072: RRETURN(MATCH_NOMATCH);
1.1 misho 2073: ecode++;
2074: break;
2075:
2076: /* Start of match assertion */
2077:
2078: case OP_SOM:
1.1.1.2 ! misho 2079: if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1.1 misho 2080: ecode++;
2081: break;
2082:
2083: /* Reset the start of match point */
2084:
2085: case OP_SET_SOM:
2086: mstart = eptr;
2087: ecode++;
2088: break;
2089:
1.1.1.2 ! misho 2090: /* Multiline mode: assert before any newline, or before end of subject
! 2091: unless noteol is set. */
1.1 misho 2092:
1.1.1.2 ! misho 2093: case OP_DOLLM:
! 2094: if (eptr < md->end_subject)
1.1 misho 2095: {
1.1.1.2 ! misho 2096: if (!IS_NEWLINE(eptr))
1.1 misho 2097: {
1.1.1.2 ! misho 2098: if (md->partial != 0 &&
! 2099: eptr + 1 >= md->end_subject &&
! 2100: NLBLOCK->nltype == NLTYPE_FIXED &&
! 2101: NLBLOCK->nllen == 2 &&
! 2102: RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
! 2103: {
! 2104: md->hitend = TRUE;
! 2105: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 2106: }
! 2107: RRETURN(MATCH_NOMATCH);
1.1 misho 2108: }
2109: }
1.1.1.2 ! misho 2110: else
1.1 misho 2111: {
1.1.1.2 ! misho 2112: if (md->noteol) RRETURN(MATCH_NOMATCH);
! 2113: SCHECK_PARTIAL();
1.1 misho 2114: }
1.1.1.2 ! misho 2115: ecode++;
! 2116: break;
! 2117:
! 2118: /* Not multiline mode: assert before a terminating newline or before end of
! 2119: subject unless noteol is set. */
! 2120:
! 2121: case OP_DOLL:
! 2122: if (md->noteol) RRETURN(MATCH_NOMATCH);
! 2123: if (!md->endonly) goto ASSERT_NL_OR_EOS;
1.1 misho 2124:
2125: /* ... else fall through for endonly */
2126:
2127: /* End of subject assertion (\z) */
2128:
2129: case OP_EOD:
1.1.1.2 ! misho 2130: if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1.1 misho 2131: SCHECK_PARTIAL();
2132: ecode++;
2133: break;
2134:
2135: /* End of subject or ending \n assertion (\Z) */
2136:
2137: case OP_EODN:
2138: ASSERT_NL_OR_EOS:
2139: if (eptr < md->end_subject &&
2140: (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.1.1.2 ! misho 2141: {
! 2142: if (md->partial != 0 &&
! 2143: eptr + 1 >= md->end_subject &&
! 2144: NLBLOCK->nltype == NLTYPE_FIXED &&
! 2145: NLBLOCK->nllen == 2 &&
! 2146: RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
! 2147: {
! 2148: md->hitend = TRUE;
! 2149: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 2150: }
! 2151: RRETURN(MATCH_NOMATCH);
! 2152: }
1.1 misho 2153:
2154: /* Either at end of string or \n before end. */
2155:
2156: SCHECK_PARTIAL();
2157: ecode++;
2158: break;
2159:
2160: /* Word boundary assertions */
2161:
2162: case OP_NOT_WORD_BOUNDARY:
2163: case OP_WORD_BOUNDARY:
2164: {
2165:
2166: /* Find out if the previous and current characters are "word" characters.
2167: It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2168: be "non-word" characters. Remember the earliest consulted character for
2169: partial matching. */
2170:
1.1.1.2 ! misho 2171: #ifdef SUPPORT_UTF
! 2172: if (utf)
1.1 misho 2173: {
2174: /* Get status of previous character */
2175:
2176: if (eptr == md->start_subject) prev_is_word = FALSE; else
2177: {
1.1.1.2 ! misho 2178: PCRE_PUCHAR lastptr = eptr - 1;
! 2179: BACKCHAR(lastptr);
1.1 misho 2180: if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2181: GETCHAR(c, lastptr);
2182: #ifdef SUPPORT_UCP
2183: if (md->use_ucp)
2184: {
2185: if (c == '_') prev_is_word = TRUE; else
2186: {
2187: int cat = UCD_CATEGORY(c);
2188: prev_is_word = (cat == ucp_L || cat == ucp_N);
2189: }
2190: }
2191: else
2192: #endif
2193: prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2194: }
2195:
2196: /* Get status of next character */
2197:
2198: if (eptr >= md->end_subject)
2199: {
2200: SCHECK_PARTIAL();
2201: cur_is_word = FALSE;
2202: }
2203: else
2204: {
2205: GETCHAR(c, eptr);
2206: #ifdef SUPPORT_UCP
2207: if (md->use_ucp)
2208: {
2209: if (c == '_') cur_is_word = TRUE; else
2210: {
2211: int cat = UCD_CATEGORY(c);
2212: cur_is_word = (cat == ucp_L || cat == ucp_N);
2213: }
2214: }
2215: else
2216: #endif
2217: cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2218: }
2219: }
2220: else
2221: #endif
2222:
2223: /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2224: consistency with the behaviour of \w we do use it in this case. */
2225:
2226: {
2227: /* Get status of previous character */
2228:
2229: if (eptr == md->start_subject) prev_is_word = FALSE; else
2230: {
2231: if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2232: #ifdef SUPPORT_UCP
2233: if (md->use_ucp)
2234: {
2235: c = eptr[-1];
2236: if (c == '_') prev_is_word = TRUE; else
2237: {
2238: int cat = UCD_CATEGORY(c);
2239: prev_is_word = (cat == ucp_L || cat == ucp_N);
2240: }
2241: }
2242: else
2243: #endif
1.1.1.2 ! misho 2244: prev_is_word = MAX_255(eptr[-1])
! 2245: && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1.1 misho 2246: }
2247:
2248: /* Get status of next character */
2249:
2250: if (eptr >= md->end_subject)
2251: {
2252: SCHECK_PARTIAL();
2253: cur_is_word = FALSE;
2254: }
2255: else
2256: #ifdef SUPPORT_UCP
2257: if (md->use_ucp)
2258: {
2259: c = *eptr;
2260: if (c == '_') cur_is_word = TRUE; else
2261: {
2262: int cat = UCD_CATEGORY(c);
2263: cur_is_word = (cat == ucp_L || cat == ucp_N);
2264: }
2265: }
2266: else
2267: #endif
1.1.1.2 ! misho 2268: cur_is_word = MAX_255(*eptr)
! 2269: && ((md->ctypes[*eptr] & ctype_word) != 0);
1.1 misho 2270: }
2271:
2272: /* Now see if the situation is what we want */
2273:
2274: if ((*ecode++ == OP_WORD_BOUNDARY)?
2275: cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1.1.1.2 ! misho 2276: RRETURN(MATCH_NOMATCH);
1.1 misho 2277: }
2278: break;
2279:
1.1.1.2 ! misho 2280: /* Match any single character type except newline; have to take care with
! 2281: CRLF newlines and partial matching. */
1.1 misho 2282:
2283: case OP_ANY:
1.1.1.2 ! misho 2284: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
! 2285: if (md->partial != 0 &&
! 2286: eptr + 1 >= md->end_subject &&
! 2287: NLBLOCK->nltype == NLTYPE_FIXED &&
! 2288: NLBLOCK->nllen == 2 &&
! 2289: RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
! 2290: {
! 2291: md->hitend = TRUE;
! 2292: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 2293: }
! 2294:
1.1 misho 2295: /* Fall through */
2296:
1.1.1.2 ! misho 2297: /* Match any single character whatsoever. */
! 2298:
1.1 misho 2299: case OP_ALLANY:
1.1.1.2 ! misho 2300: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
! 2301: { /* not be updated before SCHECK_PARTIAL. */
1.1 misho 2302: SCHECK_PARTIAL();
1.1.1.2 ! misho 2303: RRETURN(MATCH_NOMATCH);
1.1 misho 2304: }
1.1.1.2 ! misho 2305: eptr++;
! 2306: #ifdef SUPPORT_UTF
! 2307: if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
! 2308: #endif
1.1 misho 2309: ecode++;
2310: break;
2311:
2312: /* Match a single byte, even in UTF-8 mode. This opcode really does match
2313: any byte, even newline, independent of the setting of PCRE_DOTALL. */
2314:
2315: case OP_ANYBYTE:
1.1.1.2 ! misho 2316: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
! 2317: { /* not be updated before SCHECK_PARTIAL. */
1.1 misho 2318: SCHECK_PARTIAL();
1.1.1.2 ! misho 2319: RRETURN(MATCH_NOMATCH);
1.1 misho 2320: }
1.1.1.2 ! misho 2321: eptr++;
1.1 misho 2322: ecode++;
2323: break;
2324:
2325: case OP_NOT_DIGIT:
2326: if (eptr >= md->end_subject)
2327: {
2328: SCHECK_PARTIAL();
1.1.1.2 ! misho 2329: RRETURN(MATCH_NOMATCH);
1.1 misho 2330: }
2331: GETCHARINCTEST(c, eptr);
2332: if (
1.1.1.2 ! misho 2333: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1 misho 2334: c < 256 &&
2335: #endif
2336: (md->ctypes[c] & ctype_digit) != 0
2337: )
1.1.1.2 ! misho 2338: RRETURN(MATCH_NOMATCH);
1.1 misho 2339: ecode++;
2340: break;
2341:
2342: case OP_DIGIT:
2343: if (eptr >= md->end_subject)
2344: {
2345: SCHECK_PARTIAL();
1.1.1.2 ! misho 2346: RRETURN(MATCH_NOMATCH);
1.1 misho 2347: }
2348: GETCHARINCTEST(c, eptr);
2349: if (
1.1.1.2 ! misho 2350: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
! 2351: c > 255 ||
1.1 misho 2352: #endif
2353: (md->ctypes[c] & ctype_digit) == 0
2354: )
1.1.1.2 ! misho 2355: RRETURN(MATCH_NOMATCH);
1.1 misho 2356: ecode++;
2357: break;
2358:
2359: case OP_NOT_WHITESPACE:
2360: if (eptr >= md->end_subject)
2361: {
2362: SCHECK_PARTIAL();
1.1.1.2 ! misho 2363: RRETURN(MATCH_NOMATCH);
1.1 misho 2364: }
2365: GETCHARINCTEST(c, eptr);
2366: if (
1.1.1.2 ! misho 2367: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1 misho 2368: c < 256 &&
2369: #endif
2370: (md->ctypes[c] & ctype_space) != 0
2371: )
1.1.1.2 ! misho 2372: RRETURN(MATCH_NOMATCH);
1.1 misho 2373: ecode++;
2374: break;
2375:
2376: case OP_WHITESPACE:
2377: if (eptr >= md->end_subject)
2378: {
2379: SCHECK_PARTIAL();
1.1.1.2 ! misho 2380: RRETURN(MATCH_NOMATCH);
1.1 misho 2381: }
2382: GETCHARINCTEST(c, eptr);
2383: if (
1.1.1.2 ! misho 2384: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
! 2385: c > 255 ||
1.1 misho 2386: #endif
2387: (md->ctypes[c] & ctype_space) == 0
2388: )
1.1.1.2 ! misho 2389: RRETURN(MATCH_NOMATCH);
1.1 misho 2390: ecode++;
2391: break;
2392:
2393: case OP_NOT_WORDCHAR:
2394: if (eptr >= md->end_subject)
2395: {
2396: SCHECK_PARTIAL();
1.1.1.2 ! misho 2397: RRETURN(MATCH_NOMATCH);
1.1 misho 2398: }
2399: GETCHARINCTEST(c, eptr);
2400: if (
1.1.1.2 ! misho 2401: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1 misho 2402: c < 256 &&
2403: #endif
2404: (md->ctypes[c] & ctype_word) != 0
2405: )
1.1.1.2 ! misho 2406: RRETURN(MATCH_NOMATCH);
1.1 misho 2407: ecode++;
2408: break;
2409:
2410: case OP_WORDCHAR:
2411: if (eptr >= md->end_subject)
2412: {
2413: SCHECK_PARTIAL();
1.1.1.2 ! misho 2414: RRETURN(MATCH_NOMATCH);
1.1 misho 2415: }
2416: GETCHARINCTEST(c, eptr);
2417: if (
1.1.1.2 ! misho 2418: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
! 2419: c > 255 ||
1.1 misho 2420: #endif
2421: (md->ctypes[c] & ctype_word) == 0
2422: )
1.1.1.2 ! misho 2423: RRETURN(MATCH_NOMATCH);
1.1 misho 2424: ecode++;
2425: break;
2426:
2427: case OP_ANYNL:
2428: if (eptr >= md->end_subject)
2429: {
2430: SCHECK_PARTIAL();
1.1.1.2 ! misho 2431: RRETURN(MATCH_NOMATCH);
1.1 misho 2432: }
2433: GETCHARINCTEST(c, eptr);
2434: switch(c)
2435: {
1.1.1.2 ! misho 2436: default: RRETURN(MATCH_NOMATCH);
! 2437:
! 2438: case CHAR_CR:
! 2439: if (eptr >= md->end_subject)
! 2440: {
! 2441: SCHECK_PARTIAL();
! 2442: }
! 2443: else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
1.1 misho 2444: break;
2445:
1.1.1.2 ! misho 2446: case CHAR_LF:
1.1 misho 2447: break;
2448:
1.1.1.2 ! misho 2449: case CHAR_VT:
! 2450: case CHAR_FF:
! 2451: case CHAR_NEL:
! 2452: #ifndef EBCDIC
1.1 misho 2453: case 0x2028:
2454: case 0x2029:
1.1.1.2 ! misho 2455: #endif /* Not EBCDIC */
! 2456: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1 misho 2457: break;
2458: }
2459: ecode++;
2460: break;
2461:
2462: case OP_NOT_HSPACE:
2463: if (eptr >= md->end_subject)
2464: {
2465: SCHECK_PARTIAL();
1.1.1.2 ! misho 2466: RRETURN(MATCH_NOMATCH);
1.1 misho 2467: }
2468: GETCHARINCTEST(c, eptr);
2469: switch(c)
2470: {
1.1.1.2 ! misho 2471: HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
1.1 misho 2472: default: break;
2473: }
2474: ecode++;
2475: break;
2476:
2477: case OP_HSPACE:
2478: if (eptr >= md->end_subject)
2479: {
2480: SCHECK_PARTIAL();
1.1.1.2 ! misho 2481: RRETURN(MATCH_NOMATCH);
1.1 misho 2482: }
2483: GETCHARINCTEST(c, eptr);
2484: switch(c)
2485: {
1.1.1.2 ! misho 2486: HSPACE_CASES: break; /* Byte and multibyte cases */
! 2487: default: RRETURN(MATCH_NOMATCH);
1.1 misho 2488: }
2489: ecode++;
2490: break;
2491:
2492: case OP_NOT_VSPACE:
2493: if (eptr >= md->end_subject)
2494: {
2495: SCHECK_PARTIAL();
1.1.1.2 ! misho 2496: RRETURN(MATCH_NOMATCH);
1.1 misho 2497: }
2498: GETCHARINCTEST(c, eptr);
2499: switch(c)
2500: {
1.1.1.2 ! misho 2501: VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1 misho 2502: default: break;
2503: }
2504: ecode++;
2505: break;
2506:
2507: case OP_VSPACE:
2508: if (eptr >= md->end_subject)
2509: {
2510: SCHECK_PARTIAL();
1.1.1.2 ! misho 2511: RRETURN(MATCH_NOMATCH);
1.1 misho 2512: }
2513: GETCHARINCTEST(c, eptr);
2514: switch(c)
2515: {
1.1.1.2 ! misho 2516: VSPACE_CASES: break;
! 2517: default: RRETURN(MATCH_NOMATCH);
1.1 misho 2518: }
2519: ecode++;
2520: break;
2521:
2522: #ifdef SUPPORT_UCP
2523: /* Check the next character by Unicode property. We will get here only
2524: if the support is in the binary; otherwise a compile-time error occurs. */
2525:
2526: case OP_PROP:
2527: case OP_NOTPROP:
2528: if (eptr >= md->end_subject)
2529: {
2530: SCHECK_PARTIAL();
1.1.1.2 ! misho 2531: RRETURN(MATCH_NOMATCH);
1.1 misho 2532: }
2533: GETCHARINCTEST(c, eptr);
2534: {
1.1.1.2 ! misho 2535: const pcre_uint32 *cp;
1.1 misho 2536: const ucd_record *prop = GET_UCD(c);
2537:
2538: switch(ecode[1])
2539: {
2540: case PT_ANY:
1.1.1.2 ! misho 2541: if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1.1 misho 2542: break;
2543:
2544: case PT_LAMP:
2545: if ((prop->chartype == ucp_Lu ||
2546: prop->chartype == ucp_Ll ||
2547: prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.1.1.2 ! misho 2548: RRETURN(MATCH_NOMATCH);
1.1 misho 2549: break;
2550:
2551: case PT_GC:
1.1.1.2 ! misho 2552: if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
! 2553: RRETURN(MATCH_NOMATCH);
1.1 misho 2554: break;
2555:
2556: case PT_PC:
2557: if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.1.1.2 ! misho 2558: RRETURN(MATCH_NOMATCH);
1.1 misho 2559: break;
2560:
2561: case PT_SC:
2562: if ((ecode[2] != prop->script) == (op == OP_PROP))
1.1.1.2 ! misho 2563: RRETURN(MATCH_NOMATCH);
1.1 misho 2564: break;
2565:
2566: /* These are specials */
2567:
2568: case PT_ALNUM:
1.1.1.2 ! misho 2569: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
! 2570: PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
! 2571: RRETURN(MATCH_NOMATCH);
1.1 misho 2572: break;
2573:
2574: case PT_SPACE: /* Perl space */
1.1.1.2 ! misho 2575: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1 misho 2576: c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2577: == (op == OP_NOTPROP))
1.1.1.2 ! misho 2578: RRETURN(MATCH_NOMATCH);
1.1 misho 2579: break;
2580:
2581: case PT_PXSPACE: /* POSIX space */
1.1.1.2 ! misho 2582: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1 misho 2583: c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2584: c == CHAR_FF || c == CHAR_CR)
2585: == (op == OP_NOTPROP))
1.1.1.2 ! misho 2586: RRETURN(MATCH_NOMATCH);
1.1 misho 2587: break;
2588:
2589: case PT_WORD:
1.1.1.2 ! misho 2590: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
! 2591: PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1 misho 2592: c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
1.1.1.2 ! misho 2593: RRETURN(MATCH_NOMATCH);
! 2594: break;
! 2595:
! 2596: case PT_CLIST:
! 2597: cp = PRIV(ucd_caseless_sets) + ecode[2];
! 2598: for (;;)
! 2599: {
! 2600: if (c < *cp)
! 2601: { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
! 2602: if (c == *cp++)
! 2603: { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
! 2604: }
1.1 misho 2605: break;
2606:
2607: /* This should never occur */
2608:
2609: default:
2610: RRETURN(PCRE_ERROR_INTERNAL);
2611: }
2612:
2613: ecode += 3;
2614: }
2615: break;
2616:
2617: /* Match an extended Unicode sequence. We will get here only if the support
2618: is in the binary; otherwise a compile-time error occurs. */
2619:
2620: case OP_EXTUNI:
2621: if (eptr >= md->end_subject)
2622: {
2623: SCHECK_PARTIAL();
1.1.1.2 ! misho 2624: RRETURN(MATCH_NOMATCH);
1.1 misho 2625: }
1.1.1.2 ! misho 2626: else
1.1 misho 2627: {
1.1.1.2 ! misho 2628: int lgb, rgb;
! 2629: GETCHARINCTEST(c, eptr);
! 2630: lgb = UCD_GRAPHBREAK(c);
1.1 misho 2631: while (eptr < md->end_subject)
2632: {
2633: int len = 1;
1.1.1.2 ! misho 2634: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 2635: rgb = UCD_GRAPHBREAK(c);
! 2636: if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
! 2637: lgb = rgb;
1.1 misho 2638: eptr += len;
2639: }
2640: }
1.1.1.2 ! misho 2641: CHECK_PARTIAL();
1.1 misho 2642: ecode++;
2643: break;
1.1.1.2 ! misho 2644: #endif /* SUPPORT_UCP */
1.1 misho 2645:
2646:
2647: /* Match a back reference, possibly repeatedly. Look past the end of the
2648: item to see if there is repeat information following. The code is similar
2649: to that for character classes, but repeated for efficiency. Then obey
2650: similar code to character type repeats - written out again for speed.
2651: However, if the referenced string is the empty string, always treat
2652: it as matched, any number of times (otherwise there could be infinite
2653: loops). */
2654:
2655: case OP_REF:
1.1.1.2 ! misho 2656: case OP_REFI:
! 2657: caseless = op == OP_REFI;
! 2658: offset = GET2(ecode, 1) << 1; /* Doubled ref number */
! 2659: ecode += 1 + IMM2_SIZE;
1.1 misho 2660:
1.1.1.2 ! misho 2661: /* If the reference is unset, there are two possibilities:
1.1 misho 2662:
1.1.1.2 ! misho 2663: (a) In the default, Perl-compatible state, set the length negative;
! 2664: this ensures that every attempt at a match fails. We can't just fail
! 2665: here, because of the possibility of quantifiers with zero minima.
1.1 misho 2666:
1.1.1.2 ! misho 2667: (b) If the JavaScript compatibility flag is set, set the length to zero
! 2668: so that the back reference matches an empty string.
1.1 misho 2669:
1.1.1.2 ! misho 2670: Otherwise, set the length to the length of what was matched by the
! 2671: referenced subpattern. */
1.1 misho 2672:
1.1.1.2 ! misho 2673: if (offset >= offset_top || md->offset_vector[offset] < 0)
! 2674: length = (md->jscript_compat)? 0 : -1;
! 2675: else
! 2676: length = md->offset_vector[offset+1] - md->offset_vector[offset];
1.1 misho 2677:
1.1.1.2 ! misho 2678: /* Set up for repetition, or handle the non-repeated case */
1.1 misho 2679:
1.1.1.2 ! misho 2680: switch (*ecode)
! 2681: {
! 2682: case OP_CRSTAR:
! 2683: case OP_CRMINSTAR:
! 2684: case OP_CRPLUS:
! 2685: case OP_CRMINPLUS:
! 2686: case OP_CRQUERY:
! 2687: case OP_CRMINQUERY:
! 2688: c = *ecode++ - OP_CRSTAR;
! 2689: minimize = (c & 1) != 0;
! 2690: min = rep_min[c]; /* Pick up values from tables; */
! 2691: max = rep_max[c]; /* zero for max => infinity */
! 2692: if (max == 0) max = INT_MAX;
! 2693: break;
1.1 misho 2694:
1.1.1.2 ! misho 2695: case OP_CRRANGE:
! 2696: case OP_CRMINRANGE:
! 2697: minimize = (*ecode == OP_CRMINRANGE);
! 2698: min = GET2(ecode, 1);
! 2699: max = GET2(ecode, 1 + IMM2_SIZE);
! 2700: if (max == 0) max = INT_MAX;
! 2701: ecode += 1 + 2 * IMM2_SIZE;
! 2702: break;
1.1 misho 2703:
1.1.1.2 ! misho 2704: default: /* No repeat follows */
! 2705: if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
! 2706: {
! 2707: if (length == -2) eptr = md->end_subject; /* Partial match */
! 2708: CHECK_PARTIAL();
! 2709: RRETURN(MATCH_NOMATCH);
1.1 misho 2710: }
1.1.1.2 ! misho 2711: eptr += length;
! 2712: continue; /* With the main loop */
! 2713: }
1.1 misho 2714:
1.1.1.2 ! misho 2715: /* Handle repeated back references. If the length of the reference is
! 2716: zero, just continue with the main loop. If the length is negative, it
! 2717: means the reference is unset in non-Java-compatible mode. If the minimum is
! 2718: zero, we can continue at the same level without recursion. For any other
! 2719: minimum, carrying on will result in NOMATCH. */
1.1 misho 2720:
1.1.1.2 ! misho 2721: if (length == 0) continue;
! 2722: if (length < 0 && min == 0) continue;
1.1 misho 2723:
1.1.1.2 ! misho 2724: /* First, ensure the minimum number of matches are present. We get back
! 2725: the length of the reference string explicitly rather than passing the
! 2726: address of eptr, so that eptr can be a register variable. */
1.1 misho 2727:
1.1.1.2 ! misho 2728: for (i = 1; i <= min; i++)
! 2729: {
! 2730: int slength;
! 2731: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1 misho 2732: {
1.1.1.2 ! misho 2733: if (slength == -2) eptr = md->end_subject; /* Partial match */
! 2734: CHECK_PARTIAL();
! 2735: RRETURN(MATCH_NOMATCH);
1.1 misho 2736: }
1.1.1.2 ! misho 2737: eptr += slength;
! 2738: }
1.1 misho 2739:
1.1.1.2 ! misho 2740: /* If min = max, continue at the same level without recursion.
! 2741: They are not both allowed to be zero. */
1.1 misho 2742:
1.1.1.2 ! misho 2743: if (min == max) continue;
1.1 misho 2744:
1.1.1.2 ! misho 2745: /* If minimizing, keep trying and advancing the pointer */
1.1 misho 2746:
1.1.1.2 ! misho 2747: if (minimize)
! 2748: {
! 2749: for (fi = min;; fi++)
1.1 misho 2750: {
1.1.1.2 ! misho 2751: int slength;
! 2752: RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
! 2753: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2754: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 2755: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1 misho 2756: {
1.1.1.2 ! misho 2757: if (slength == -2) eptr = md->end_subject; /* Partial match */
! 2758: CHECK_PARTIAL();
! 2759: RRETURN(MATCH_NOMATCH);
1.1 misho 2760: }
1.1.1.2 ! misho 2761: eptr += slength;
1.1 misho 2762: }
1.1.1.2 ! misho 2763: /* Control never gets here */
! 2764: }
1.1 misho 2765:
1.1.1.2 ! misho 2766: /* If maximizing, find the longest string and work backwards */
1.1 misho 2767:
1.1.1.2 ! misho 2768: else
! 2769: {
! 2770: pp = eptr;
! 2771: for (i = min; i < max; i++)
1.1 misho 2772: {
1.1.1.2 ! misho 2773: int slength;
! 2774: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1 misho 2775: {
1.1.1.2 ! misho 2776: /* Can't use CHECK_PARTIAL because we don't want to update eptr in
! 2777: the soft partial matching case. */
! 2778:
! 2779: if (slength == -2 && md->partial != 0 &&
! 2780: md->end_subject > md->start_used_ptr)
1.1 misho 2781: {
1.1.1.2 ! misho 2782: md->hitend = TRUE;
! 2783: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
1.1 misho 2784: }
1.1.1.2 ! misho 2785: break;
1.1 misho 2786: }
1.1.1.2 ! misho 2787: eptr += slength;
! 2788: }
! 2789:
! 2790: while (eptr >= pp)
! 2791: {
! 2792: RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
! 2793: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 2794: eptr -= length;
1.1 misho 2795: }
1.1.1.2 ! misho 2796: RRETURN(MATCH_NOMATCH);
1.1 misho 2797: }
2798: /* Control never gets here */
2799:
2800: /* Match a bit-mapped character class, possibly repeatedly. This op code is
2801: used when all the characters in the class have values in the range 0-255,
2802: and either the matching is caseful, or the characters are in the range
2803: 0-127 when UTF-8 processing is enabled. The only difference between
2804: OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2805: encountered.
2806:
2807: First, look past the end of the item to see if there is repeat information
2808: following. Then obey similar code to character type repeats - written out
2809: again for speed. */
2810:
2811: case OP_NCLASS:
2812: case OP_CLASS:
2813: {
1.1.1.2 ! misho 2814: /* The data variable is saved across frames, so the byte map needs to
! 2815: be stored there. */
! 2816: #define BYTE_MAP ((pcre_uint8 *)data)
1.1 misho 2817: data = ecode + 1; /* Save for matching */
1.1.1.2 ! misho 2818: ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
1.1 misho 2819:
2820: switch (*ecode)
2821: {
2822: case OP_CRSTAR:
2823: case OP_CRMINSTAR:
2824: case OP_CRPLUS:
2825: case OP_CRMINPLUS:
2826: case OP_CRQUERY:
2827: case OP_CRMINQUERY:
2828: c = *ecode++ - OP_CRSTAR;
2829: minimize = (c & 1) != 0;
2830: min = rep_min[c]; /* Pick up values from tables; */
2831: max = rep_max[c]; /* zero for max => infinity */
2832: if (max == 0) max = INT_MAX;
2833: break;
2834:
2835: case OP_CRRANGE:
2836: case OP_CRMINRANGE:
2837: minimize = (*ecode == OP_CRMINRANGE);
2838: min = GET2(ecode, 1);
1.1.1.2 ! misho 2839: max = GET2(ecode, 1 + IMM2_SIZE);
1.1 misho 2840: if (max == 0) max = INT_MAX;
1.1.1.2 ! misho 2841: ecode += 1 + 2 * IMM2_SIZE;
1.1 misho 2842: break;
2843:
2844: default: /* No repeat follows */
2845: min = max = 1;
2846: break;
2847: }
2848:
2849: /* First, ensure the minimum number of matches are present. */
2850:
1.1.1.2 ! misho 2851: #ifdef SUPPORT_UTF
! 2852: if (utf)
1.1 misho 2853: {
2854: for (i = 1; i <= min; i++)
2855: {
2856: if (eptr >= md->end_subject)
2857: {
2858: SCHECK_PARTIAL();
1.1.1.2 ! misho 2859: RRETURN(MATCH_NOMATCH);
1.1 misho 2860: }
2861: GETCHARINC(c, eptr);
2862: if (c > 255)
2863: {
1.1.1.2 ! misho 2864: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1 misho 2865: }
2866: else
1.1.1.2 ! misho 2867: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2868: }
2869: }
2870: else
2871: #endif
1.1.1.2 ! misho 2872: /* Not UTF mode */
1.1 misho 2873: {
2874: for (i = 1; i <= min; i++)
2875: {
2876: if (eptr >= md->end_subject)
2877: {
2878: SCHECK_PARTIAL();
1.1.1.2 ! misho 2879: RRETURN(MATCH_NOMATCH);
1.1 misho 2880: }
2881: c = *eptr++;
1.1.1.2 ! misho 2882: #ifndef COMPILE_PCRE8
! 2883: if (c > 255)
! 2884: {
! 2885: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
! 2886: }
! 2887: else
! 2888: #endif
! 2889: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2890: }
2891: }
2892:
2893: /* If max == min we can continue with the main loop without the
2894: need to recurse. */
2895:
2896: if (min == max) continue;
2897:
2898: /* If minimizing, keep testing the rest of the expression and advancing
2899: the pointer while it matches the class. */
2900:
2901: if (minimize)
2902: {
1.1.1.2 ! misho 2903: #ifdef SUPPORT_UTF
! 2904: if (utf)
1.1 misho 2905: {
2906: for (fi = min;; fi++)
2907: {
1.1.1.2 ! misho 2908: RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
1.1 misho 2909: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 2910: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 2911: if (eptr >= md->end_subject)
2912: {
2913: SCHECK_PARTIAL();
1.1.1.2 ! misho 2914: RRETURN(MATCH_NOMATCH);
1.1 misho 2915: }
2916: GETCHARINC(c, eptr);
2917: if (c > 255)
2918: {
1.1.1.2 ! misho 2919: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1 misho 2920: }
2921: else
1.1.1.2 ! misho 2922: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2923: }
2924: }
2925: else
2926: #endif
1.1.1.2 ! misho 2927: /* Not UTF mode */
1.1 misho 2928: {
2929: for (fi = min;; fi++)
2930: {
1.1.1.2 ! misho 2931: RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
1.1 misho 2932: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 2933: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 2934: if (eptr >= md->end_subject)
2935: {
2936: SCHECK_PARTIAL();
1.1.1.2 ! misho 2937: RRETURN(MATCH_NOMATCH);
1.1 misho 2938: }
2939: c = *eptr++;
1.1.1.2 ! misho 2940: #ifndef COMPILE_PCRE8
! 2941: if (c > 255)
! 2942: {
! 2943: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
! 2944: }
! 2945: else
! 2946: #endif
! 2947: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2948: }
2949: }
2950: /* Control never gets here */
2951: }
2952:
2953: /* If maximizing, find the longest possible run, then work backwards. */
2954:
2955: else
2956: {
2957: pp = eptr;
2958:
1.1.1.2 ! misho 2959: #ifdef SUPPORT_UTF
! 2960: if (utf)
1.1 misho 2961: {
2962: for (i = min; i < max; i++)
2963: {
2964: int len = 1;
2965: if (eptr >= md->end_subject)
2966: {
2967: SCHECK_PARTIAL();
2968: break;
2969: }
2970: GETCHARLEN(c, eptr, len);
2971: if (c > 255)
2972: {
2973: if (op == OP_CLASS) break;
2974: }
2975: else
1.1.1.2 ! misho 2976: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1 misho 2977: eptr += len;
2978: }
2979: for (;;)
2980: {
1.1.1.2 ! misho 2981: RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
1.1 misho 2982: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2983: if (eptr-- == pp) break; /* Stop if tried at original pos */
2984: BACKCHAR(eptr);
2985: }
2986: }
2987: else
2988: #endif
1.1.1.2 ! misho 2989: /* Not UTF mode */
1.1 misho 2990: {
2991: for (i = min; i < max; i++)
2992: {
2993: if (eptr >= md->end_subject)
2994: {
2995: SCHECK_PARTIAL();
2996: break;
2997: }
2998: c = *eptr;
1.1.1.2 ! misho 2999: #ifndef COMPILE_PCRE8
! 3000: if (c > 255)
! 3001: {
! 3002: if (op == OP_CLASS) break;
! 3003: }
! 3004: else
! 3005: #endif
! 3006: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1 misho 3007: eptr++;
3008: }
3009: while (eptr >= pp)
3010: {
1.1.1.2 ! misho 3011: RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
1.1 misho 3012: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3013: eptr--;
3014: }
3015: }
3016:
1.1.1.2 ! misho 3017: RRETURN(MATCH_NOMATCH);
1.1 misho 3018: }
1.1.1.2 ! misho 3019: #undef BYTE_MAP
1.1 misho 3020: }
3021: /* Control never gets here */
3022:
3023:
3024: /* Match an extended character class. This opcode is encountered only
3025: when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3026: mode, because Unicode properties are supported in non-UTF-8 mode. */
3027:
1.1.1.2 ! misho 3028: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1 misho 3029: case OP_XCLASS:
3030: {
3031: data = ecode + 1 + LINK_SIZE; /* Save for matching */
3032: ecode += GET(ecode, 1); /* Advance past the item */
3033:
3034: switch (*ecode)
3035: {
3036: case OP_CRSTAR:
3037: case OP_CRMINSTAR:
3038: case OP_CRPLUS:
3039: case OP_CRMINPLUS:
3040: case OP_CRQUERY:
3041: case OP_CRMINQUERY:
3042: c = *ecode++ - OP_CRSTAR;
3043: minimize = (c & 1) != 0;
3044: min = rep_min[c]; /* Pick up values from tables; */
3045: max = rep_max[c]; /* zero for max => infinity */
3046: if (max == 0) max = INT_MAX;
3047: break;
3048:
3049: case OP_CRRANGE:
3050: case OP_CRMINRANGE:
3051: minimize = (*ecode == OP_CRMINRANGE);
3052: min = GET2(ecode, 1);
1.1.1.2 ! misho 3053: max = GET2(ecode, 1 + IMM2_SIZE);
1.1 misho 3054: if (max == 0) max = INT_MAX;
1.1.1.2 ! misho 3055: ecode += 1 + 2 * IMM2_SIZE;
1.1 misho 3056: break;
3057:
3058: default: /* No repeat follows */
3059: min = max = 1;
3060: break;
3061: }
3062:
3063: /* First, ensure the minimum number of matches are present. */
3064:
3065: for (i = 1; i <= min; i++)
3066: {
3067: if (eptr >= md->end_subject)
3068: {
3069: SCHECK_PARTIAL();
1.1.1.2 ! misho 3070: RRETURN(MATCH_NOMATCH);
1.1 misho 3071: }
3072: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 3073: if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1 misho 3074: }
3075:
3076: /* If max == min we can continue with the main loop without the
3077: need to recurse. */
3078:
3079: if (min == max) continue;
3080:
3081: /* If minimizing, keep testing the rest of the expression and advancing
3082: the pointer while it matches the class. */
3083:
3084: if (minimize)
3085: {
3086: for (fi = min;; fi++)
3087: {
1.1.1.2 ! misho 3088: RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
1.1 misho 3089: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3090: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3091: if (eptr >= md->end_subject)
3092: {
3093: SCHECK_PARTIAL();
1.1.1.2 ! misho 3094: RRETURN(MATCH_NOMATCH);
1.1 misho 3095: }
3096: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 3097: if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1 misho 3098: }
3099: /* Control never gets here */
3100: }
3101:
3102: /* If maximizing, find the longest possible run, then work backwards. */
3103:
3104: else
3105: {
3106: pp = eptr;
3107: for (i = min; i < max; i++)
3108: {
3109: int len = 1;
3110: if (eptr >= md->end_subject)
3111: {
3112: SCHECK_PARTIAL();
3113: break;
3114: }
1.1.1.2 ! misho 3115: #ifdef SUPPORT_UTF
1.1 misho 3116: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 3117: #else
! 3118: c = *eptr;
! 3119: #endif
! 3120: if (!PRIV(xclass)(c, data, utf)) break;
1.1 misho 3121: eptr += len;
3122: }
3123: for(;;)
3124: {
1.1.1.2 ! misho 3125: RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
1.1 misho 3126: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3127: if (eptr-- == pp) break; /* Stop if tried at original pos */
1.1.1.2 ! misho 3128: #ifdef SUPPORT_UTF
! 3129: if (utf) BACKCHAR(eptr);
! 3130: #endif
1.1 misho 3131: }
1.1.1.2 ! misho 3132: RRETURN(MATCH_NOMATCH);
1.1 misho 3133: }
3134:
3135: /* Control never gets here */
3136: }
3137: #endif /* End of XCLASS */
3138:
3139: /* Match a single character, casefully */
3140:
3141: case OP_CHAR:
1.1.1.2 ! misho 3142: #ifdef SUPPORT_UTF
! 3143: if (utf)
1.1 misho 3144: {
3145: length = 1;
3146: ecode++;
3147: GETCHARLEN(fc, ecode, length);
3148: if (length > md->end_subject - eptr)
3149: {
3150: CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
1.1.1.2 ! misho 3151: RRETURN(MATCH_NOMATCH);
1.1 misho 3152: }
1.1.1.2 ! misho 3153: while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
1.1 misho 3154: }
3155: else
3156: #endif
1.1.1.2 ! misho 3157: /* Not UTF mode */
1.1 misho 3158: {
3159: if (md->end_subject - eptr < 1)
3160: {
3161: SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
1.1.1.2 ! misho 3162: RRETURN(MATCH_NOMATCH);
1.1 misho 3163: }
1.1.1.2 ! misho 3164: if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1.1 misho 3165: ecode += 2;
3166: }
3167: break;
3168:
1.1.1.2 ! misho 3169: /* Match a single character, caselessly. If we are at the end of the
! 3170: subject, give up immediately. */
! 3171:
! 3172: case OP_CHARI:
! 3173: if (eptr >= md->end_subject)
! 3174: {
! 3175: SCHECK_PARTIAL();
! 3176: RRETURN(MATCH_NOMATCH);
! 3177: }
1.1 misho 3178:
1.1.1.2 ! misho 3179: #ifdef SUPPORT_UTF
! 3180: if (utf)
1.1 misho 3181: {
3182: length = 1;
3183: ecode++;
3184: GETCHARLEN(fc, ecode, length);
3185:
3186: /* If the pattern character's value is < 128, we have only one byte, and
1.1.1.2 ! misho 3187: we know that its other case must also be one byte long, so we can use the
! 3188: fast lookup table. We know that there is at least one byte left in the
! 3189: subject. */
1.1 misho 3190:
3191: if (fc < 128)
3192: {
1.1.1.2 ! misho 3193: pcre_uchar cc = RAWUCHAR(eptr);
! 3194: if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
! 3195: ecode++;
! 3196: eptr++;
1.1 misho 3197: }
3198:
1.1.1.2 ! misho 3199: /* Otherwise we must pick up the subject character. Note that we cannot
! 3200: use the value of "length" to check for sufficient bytes left, because the
! 3201: other case of the character may have more or fewer bytes. */
1.1 misho 3202:
3203: else
3204: {
1.1.1.2 ! misho 3205: pcre_uint32 dc;
1.1 misho 3206: GETCHARINC(dc, eptr);
3207: ecode += length;
3208:
3209: /* If we have Unicode property support, we can use it to test the other
3210: case of the character, if there is one. */
3211:
3212: if (fc != dc)
3213: {
3214: #ifdef SUPPORT_UCP
3215: if (dc != UCD_OTHERCASE(fc))
3216: #endif
1.1.1.2 ! misho 3217: RRETURN(MATCH_NOMATCH);
1.1 misho 3218: }
3219: }
3220: }
3221: else
1.1.1.2 ! misho 3222: #endif /* SUPPORT_UTF */
1.1 misho 3223:
1.1.1.2 ! misho 3224: /* Not UTF mode */
1.1 misho 3225: {
1.1.1.2 ! misho 3226: if (TABLE_GET(ecode[1], md->lcc, ecode[1])
! 3227: != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
! 3228: eptr++;
1.1 misho 3229: ecode += 2;
3230: }
3231: break;
3232:
3233: /* Match a single character repeatedly. */
3234:
3235: case OP_EXACT:
1.1.1.2 ! misho 3236: case OP_EXACTI:
1.1 misho 3237: min = max = GET2(ecode, 1);
1.1.1.2 ! misho 3238: ecode += 1 + IMM2_SIZE;
1.1 misho 3239: goto REPEATCHAR;
3240:
3241: case OP_POSUPTO:
1.1.1.2 ! misho 3242: case OP_POSUPTOI:
1.1 misho 3243: possessive = TRUE;
3244: /* Fall through */
3245:
3246: case OP_UPTO:
1.1.1.2 ! misho 3247: case OP_UPTOI:
1.1 misho 3248: case OP_MINUPTO:
1.1.1.2 ! misho 3249: case OP_MINUPTOI:
1.1 misho 3250: min = 0;
3251: max = GET2(ecode, 1);
1.1.1.2 ! misho 3252: minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
! 3253: ecode += 1 + IMM2_SIZE;
1.1 misho 3254: goto REPEATCHAR;
3255:
3256: case OP_POSSTAR:
1.1.1.2 ! misho 3257: case OP_POSSTARI:
1.1 misho 3258: possessive = TRUE;
3259: min = 0;
3260: max = INT_MAX;
3261: ecode++;
3262: goto REPEATCHAR;
3263:
3264: case OP_POSPLUS:
1.1.1.2 ! misho 3265: case OP_POSPLUSI:
1.1 misho 3266: possessive = TRUE;
3267: min = 1;
3268: max = INT_MAX;
3269: ecode++;
3270: goto REPEATCHAR;
3271:
3272: case OP_POSQUERY:
1.1.1.2 ! misho 3273: case OP_POSQUERYI:
1.1 misho 3274: possessive = TRUE;
3275: min = 0;
3276: max = 1;
3277: ecode++;
3278: goto REPEATCHAR;
3279:
3280: case OP_STAR:
1.1.1.2 ! misho 3281: case OP_STARI:
1.1 misho 3282: case OP_MINSTAR:
1.1.1.2 ! misho 3283: case OP_MINSTARI:
1.1 misho 3284: case OP_PLUS:
1.1.1.2 ! misho 3285: case OP_PLUSI:
1.1 misho 3286: case OP_MINPLUS:
1.1.1.2 ! misho 3287: case OP_MINPLUSI:
1.1 misho 3288: case OP_QUERY:
1.1.1.2 ! misho 3289: case OP_QUERYI:
1.1 misho 3290: case OP_MINQUERY:
1.1.1.2 ! misho 3291: case OP_MINQUERYI:
! 3292: c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
1.1 misho 3293: minimize = (c & 1) != 0;
3294: min = rep_min[c]; /* Pick up values from tables; */
3295: max = rep_max[c]; /* zero for max => infinity */
3296: if (max == 0) max = INT_MAX;
3297:
3298: /* Common code for all repeated single-character matches. */
3299:
3300: REPEATCHAR:
1.1.1.2 ! misho 3301: #ifdef SUPPORT_UTF
! 3302: if (utf)
1.1 misho 3303: {
3304: length = 1;
3305: charptr = ecode;
3306: GETCHARLEN(fc, ecode, length);
3307: ecode += length;
3308:
3309: /* Handle multibyte character matching specially here. There is
3310: support for caseless matching if UCP support is present. */
3311:
3312: if (length > 1)
3313: {
3314: #ifdef SUPPORT_UCP
1.1.1.2 ! misho 3315: pcre_uint32 othercase;
! 3316: if (op >= OP_STARI && /* Caseless */
1.1 misho 3317: (othercase = UCD_OTHERCASE(fc)) != fc)
1.1.1.2 ! misho 3318: oclength = PRIV(ord2utf)(othercase, occhars);
1.1 misho 3319: else oclength = 0;
3320: #endif /* SUPPORT_UCP */
3321:
3322: for (i = 1; i <= min; i++)
3323: {
3324: if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho 3325: memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1 misho 3326: #ifdef SUPPORT_UCP
3327: else if (oclength > 0 &&
3328: eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho 3329: memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1 misho 3330: #endif /* SUPPORT_UCP */
3331: else
3332: {
3333: CHECK_PARTIAL();
1.1.1.2 ! misho 3334: RRETURN(MATCH_NOMATCH);
1.1 misho 3335: }
3336: }
3337:
3338: if (min == max) continue;
3339:
3340: if (minimize)
3341: {
3342: for (fi = min;; fi++)
3343: {
1.1.1.2 ! misho 3344: RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
1.1 misho 3345: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3346: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3347: if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho 3348: memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1 misho 3349: #ifdef SUPPORT_UCP
3350: else if (oclength > 0 &&
3351: eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho 3352: memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1 misho 3353: #endif /* SUPPORT_UCP */
3354: else
3355: {
3356: CHECK_PARTIAL();
1.1.1.2 ! misho 3357: RRETURN(MATCH_NOMATCH);
1.1 misho 3358: }
3359: }
3360: /* Control never gets here */
3361: }
3362:
3363: else /* Maximize */
3364: {
3365: pp = eptr;
3366: for (i = min; i < max; i++)
3367: {
3368: if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho 3369: memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1 misho 3370: #ifdef SUPPORT_UCP
3371: else if (oclength > 0 &&
3372: eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho 3373: memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1 misho 3374: #endif /* SUPPORT_UCP */
3375: else
3376: {
3377: CHECK_PARTIAL();
3378: break;
3379: }
3380: }
3381:
3382: if (possessive) continue;
3383:
3384: for(;;)
3385: {
1.1.1.2 ! misho 3386: RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
1.1 misho 3387: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3388: if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
1.1 misho 3389: #ifdef SUPPORT_UCP
3390: eptr--;
3391: BACKCHAR(eptr);
3392: #else /* without SUPPORT_UCP */
3393: eptr -= length;
3394: #endif /* SUPPORT_UCP */
3395: }
3396: }
3397: /* Control never gets here */
3398: }
3399:
3400: /* If the length of a UTF-8 character is 1, we fall through here, and
3401: obey the code as for non-UTF-8 characters below, though in this case the
3402: value of fc will always be < 128. */
3403: }
3404: else
1.1.1.2 ! misho 3405: #endif /* SUPPORT_UTF */
! 3406: /* When not in UTF-8 mode, load a single-byte character. */
! 3407: fc = *ecode++;
1.1 misho 3408:
1.1.1.2 ! misho 3409: /* The value of fc at this point is always one character, though we may
! 3410: or may not be in UTF mode. The code is duplicated for the caseless and
1.1 misho 3411: caseful cases, for speed, since matching characters is likely to be quite
3412: common. First, ensure the minimum number of matches are present. If min =
3413: max, continue at the same level without recursing. Otherwise, if
3414: minimizing, keep trying the rest of the expression and advancing one
3415: matching character if failing, up to the maximum. Alternatively, if
3416: maximizing, find the maximum number of characters and work backwards. */
3417:
3418: DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.2 ! misho 3419: max, (char *)eptr));
1.1 misho 3420:
1.1.1.2 ! misho 3421: if (op >= OP_STARI) /* Caseless */
1.1 misho 3422: {
1.1.1.2 ! misho 3423: #ifdef COMPILE_PCRE8
! 3424: /* fc must be < 128 if UTF is enabled. */
! 3425: foc = md->fcc[fc];
! 3426: #else
! 3427: #ifdef SUPPORT_UTF
! 3428: #ifdef SUPPORT_UCP
! 3429: if (utf && fc > 127)
! 3430: foc = UCD_OTHERCASE(fc);
! 3431: #else
! 3432: if (utf && fc > 127)
! 3433: foc = fc;
! 3434: #endif /* SUPPORT_UCP */
! 3435: else
! 3436: #endif /* SUPPORT_UTF */
! 3437: foc = TABLE_GET(fc, md->fcc, fc);
! 3438: #endif /* COMPILE_PCRE8 */
! 3439:
1.1 misho 3440: for (i = 1; i <= min; i++)
3441: {
1.1.1.2 ! misho 3442: pcre_uchar cc;
! 3443:
1.1 misho 3444: if (eptr >= md->end_subject)
3445: {
3446: SCHECK_PARTIAL();
1.1.1.2 ! misho 3447: RRETURN(MATCH_NOMATCH);
1.1 misho 3448: }
1.1.1.2 ! misho 3449: cc = RAWUCHARTEST(eptr);
! 3450: if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
! 3451: eptr++;
1.1 misho 3452: }
3453: if (min == max) continue;
3454: if (minimize)
3455: {
3456: for (fi = min;; fi++)
3457: {
1.1.1.2 ! misho 3458: pcre_uchar cc;
! 3459:
! 3460: RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
1.1 misho 3461: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3462: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3463: if (eptr >= md->end_subject)
3464: {
3465: SCHECK_PARTIAL();
1.1.1.2 ! misho 3466: RRETURN(MATCH_NOMATCH);
1.1 misho 3467: }
1.1.1.2 ! misho 3468: cc = RAWUCHARTEST(eptr);
! 3469: if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
! 3470: eptr++;
1.1 misho 3471: }
3472: /* Control never gets here */
3473: }
3474: else /* Maximize */
3475: {
3476: pp = eptr;
3477: for (i = min; i < max; i++)
3478: {
1.1.1.2 ! misho 3479: pcre_uchar cc;
! 3480:
1.1 misho 3481: if (eptr >= md->end_subject)
3482: {
3483: SCHECK_PARTIAL();
3484: break;
3485: }
1.1.1.2 ! misho 3486: cc = RAWUCHARTEST(eptr);
! 3487: if (fc != cc && foc != cc) break;
1.1 misho 3488: eptr++;
3489: }
3490:
3491: if (possessive) continue;
3492:
3493: while (eptr >= pp)
3494: {
1.1.1.2 ! misho 3495: RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
1.1 misho 3496: eptr--;
3497: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498: }
1.1.1.2 ! misho 3499: RRETURN(MATCH_NOMATCH);
1.1 misho 3500: }
3501: /* Control never gets here */
3502: }
3503:
3504: /* Caseful comparisons (includes all multi-byte characters) */
3505:
3506: else
3507: {
3508: for (i = 1; i <= min; i++)
3509: {
3510: if (eptr >= md->end_subject)
3511: {
3512: SCHECK_PARTIAL();
1.1.1.2 ! misho 3513: RRETURN(MATCH_NOMATCH);
1.1 misho 3514: }
1.1.1.2 ! misho 3515: if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.1 misho 3516: }
3517:
3518: if (min == max) continue;
3519:
3520: if (minimize)
3521: {
3522: for (fi = min;; fi++)
3523: {
1.1.1.2 ! misho 3524: RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
1.1 misho 3525: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3526: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3527: if (eptr >= md->end_subject)
3528: {
3529: SCHECK_PARTIAL();
1.1.1.2 ! misho 3530: RRETURN(MATCH_NOMATCH);
1.1 misho 3531: }
1.1.1.2 ! misho 3532: if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.1 misho 3533: }
3534: /* Control never gets here */
3535: }
3536: else /* Maximize */
3537: {
3538: pp = eptr;
3539: for (i = min; i < max; i++)
3540: {
3541: if (eptr >= md->end_subject)
3542: {
3543: SCHECK_PARTIAL();
3544: break;
3545: }
1.1.1.2 ! misho 3546: if (fc != RAWUCHARTEST(eptr)) break;
1.1 misho 3547: eptr++;
3548: }
3549: if (possessive) continue;
3550:
3551: while (eptr >= pp)
3552: {
1.1.1.2 ! misho 3553: RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
1.1 misho 3554: eptr--;
3555: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3556: }
1.1.1.2 ! misho 3557: RRETURN(MATCH_NOMATCH);
1.1 misho 3558: }
3559: }
3560: /* Control never gets here */
3561:
3562: /* Match a negated single one-byte character. The character we are
3563: checking can be multibyte. */
3564:
3565: case OP_NOT:
1.1.1.2 ! misho 3566: case OP_NOTI:
1.1 misho 3567: if (eptr >= md->end_subject)
3568: {
3569: SCHECK_PARTIAL();
1.1.1.2 ! misho 3570: RRETURN(MATCH_NOMATCH);
1.1 misho 3571: }
1.1.1.2 ! misho 3572: #ifdef SUPPORT_UTF
! 3573: if (utf)
1.1 misho 3574: {
1.1.1.2 ! misho 3575: register pcre_uint32 ch, och;
! 3576:
! 3577: ecode++;
! 3578: GETCHARINC(ch, ecode);
! 3579: GETCHARINC(c, eptr);
! 3580:
! 3581: if (op == OP_NOT)
! 3582: {
! 3583: if (ch == c) RRETURN(MATCH_NOMATCH);
! 3584: }
! 3585: else
! 3586: {
! 3587: #ifdef SUPPORT_UCP
! 3588: if (ch > 127)
! 3589: och = UCD_OTHERCASE(ch);
! 3590: #else
! 3591: if (ch > 127)
! 3592: och = ch;
! 3593: #endif /* SUPPORT_UCP */
! 3594: else
! 3595: och = TABLE_GET(ch, md->fcc, ch);
! 3596: if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
! 3597: }
1.1 misho 3598: }
3599: else
1.1.1.2 ! misho 3600: #endif
1.1 misho 3601: {
1.1.1.2 ! misho 3602: register pcre_uint32 ch = ecode[1];
! 3603: c = *eptr++;
! 3604: if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
! 3605: RRETURN(MATCH_NOMATCH);
! 3606: ecode += 2;
1.1 misho 3607: }
3608: break;
3609:
3610: /* Match a negated single one-byte character repeatedly. This is almost a
3611: repeat of the code for a repeated single character, but I haven't found a
3612: nice way of commoning these up that doesn't require a test of the
3613: positive/negative option for each character match. Maybe that wouldn't add
3614: very much to the time taken, but character matching *is* what this is all
3615: about... */
3616:
3617: case OP_NOTEXACT:
1.1.1.2 ! misho 3618: case OP_NOTEXACTI:
1.1 misho 3619: min = max = GET2(ecode, 1);
1.1.1.2 ! misho 3620: ecode += 1 + IMM2_SIZE;
1.1 misho 3621: goto REPEATNOTCHAR;
3622:
3623: case OP_NOTUPTO:
1.1.1.2 ! misho 3624: case OP_NOTUPTOI:
1.1 misho 3625: case OP_NOTMINUPTO:
1.1.1.2 ! misho 3626: case OP_NOTMINUPTOI:
1.1 misho 3627: min = 0;
3628: max = GET2(ecode, 1);
1.1.1.2 ! misho 3629: minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
! 3630: ecode += 1 + IMM2_SIZE;
1.1 misho 3631: goto REPEATNOTCHAR;
3632:
3633: case OP_NOTPOSSTAR:
1.1.1.2 ! misho 3634: case OP_NOTPOSSTARI:
1.1 misho 3635: possessive = TRUE;
3636: min = 0;
3637: max = INT_MAX;
3638: ecode++;
3639: goto REPEATNOTCHAR;
3640:
3641: case OP_NOTPOSPLUS:
1.1.1.2 ! misho 3642: case OP_NOTPOSPLUSI:
1.1 misho 3643: possessive = TRUE;
3644: min = 1;
3645: max = INT_MAX;
3646: ecode++;
3647: goto REPEATNOTCHAR;
3648:
3649: case OP_NOTPOSQUERY:
1.1.1.2 ! misho 3650: case OP_NOTPOSQUERYI:
1.1 misho 3651: possessive = TRUE;
3652: min = 0;
3653: max = 1;
3654: ecode++;
3655: goto REPEATNOTCHAR;
3656:
3657: case OP_NOTPOSUPTO:
1.1.1.2 ! misho 3658: case OP_NOTPOSUPTOI:
1.1 misho 3659: possessive = TRUE;
3660: min = 0;
3661: max = GET2(ecode, 1);
1.1.1.2 ! misho 3662: ecode += 1 + IMM2_SIZE;
1.1 misho 3663: goto REPEATNOTCHAR;
3664:
3665: case OP_NOTSTAR:
1.1.1.2 ! misho 3666: case OP_NOTSTARI:
1.1 misho 3667: case OP_NOTMINSTAR:
1.1.1.2 ! misho 3668: case OP_NOTMINSTARI:
1.1 misho 3669: case OP_NOTPLUS:
1.1.1.2 ! misho 3670: case OP_NOTPLUSI:
1.1 misho 3671: case OP_NOTMINPLUS:
1.1.1.2 ! misho 3672: case OP_NOTMINPLUSI:
1.1 misho 3673: case OP_NOTQUERY:
1.1.1.2 ! misho 3674: case OP_NOTQUERYI:
1.1 misho 3675: case OP_NOTMINQUERY:
1.1.1.2 ! misho 3676: case OP_NOTMINQUERYI:
! 3677: c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1.1 misho 3678: minimize = (c & 1) != 0;
3679: min = rep_min[c]; /* Pick up values from tables; */
3680: max = rep_max[c]; /* zero for max => infinity */
3681: if (max == 0) max = INT_MAX;
3682:
3683: /* Common code for all repeated single-byte matches. */
3684:
3685: REPEATNOTCHAR:
1.1.1.2 ! misho 3686: GETCHARINCTEST(fc, ecode);
1.1 misho 3687:
3688: /* The code is duplicated for the caseless and caseful cases, for speed,
3689: since matching characters is likely to be quite common. First, ensure the
3690: minimum number of matches are present. If min = max, continue at the same
3691: level without recursing. Otherwise, if minimizing, keep trying the rest of
3692: the expression and advancing one matching character if failing, up to the
3693: maximum. Alternatively, if maximizing, find the maximum number of
3694: characters and work backwards. */
3695:
3696: DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.1.1.2 ! misho 3697: max, (char *)eptr));
1.1 misho 3698:
1.1.1.2 ! misho 3699: if (op >= OP_NOTSTARI) /* Caseless */
1.1 misho 3700: {
1.1.1.2 ! misho 3701: #ifdef SUPPORT_UTF
! 3702: #ifdef SUPPORT_UCP
! 3703: if (utf && fc > 127)
! 3704: foc = UCD_OTHERCASE(fc);
! 3705: #else
! 3706: if (utf && fc > 127)
! 3707: foc = fc;
! 3708: #endif /* SUPPORT_UCP */
! 3709: else
! 3710: #endif /* SUPPORT_UTF */
! 3711: foc = TABLE_GET(fc, md->fcc, fc);
1.1 misho 3712:
1.1.1.2 ! misho 3713: #ifdef SUPPORT_UTF
! 3714: if (utf)
1.1 misho 3715: {
1.1.1.2 ! misho 3716: register pcre_uint32 d;
1.1 misho 3717: for (i = 1; i <= min; i++)
3718: {
3719: if (eptr >= md->end_subject)
3720: {
3721: SCHECK_PARTIAL();
1.1.1.2 ! misho 3722: RRETURN(MATCH_NOMATCH);
1.1 misho 3723: }
3724: GETCHARINC(d, eptr);
1.1.1.2 ! misho 3725: if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1 misho 3726: }
3727: }
3728: else
3729: #endif
1.1.1.2 ! misho 3730: /* Not UTF mode */
1.1 misho 3731: {
3732: for (i = 1; i <= min; i++)
3733: {
3734: if (eptr >= md->end_subject)
3735: {
3736: SCHECK_PARTIAL();
1.1.1.2 ! misho 3737: RRETURN(MATCH_NOMATCH);
1.1 misho 3738: }
1.1.1.2 ! misho 3739: if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
! 3740: eptr++;
1.1 misho 3741: }
3742: }
3743:
3744: if (min == max) continue;
3745:
3746: if (minimize)
3747: {
1.1.1.2 ! misho 3748: #ifdef SUPPORT_UTF
! 3749: if (utf)
1.1 misho 3750: {
1.1.1.2 ! misho 3751: register pcre_uint32 d;
1.1 misho 3752: for (fi = min;; fi++)
3753: {
1.1.1.2 ! misho 3754: RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
1.1 misho 3755: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3756: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3757: if (eptr >= md->end_subject)
3758: {
3759: SCHECK_PARTIAL();
1.1.1.2 ! misho 3760: RRETURN(MATCH_NOMATCH);
1.1 misho 3761: }
3762: GETCHARINC(d, eptr);
1.1.1.2 ! misho 3763: if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1 misho 3764: }
3765: }
3766: else
3767: #endif
1.1.1.2 ! misho 3768: /* Not UTF mode */
1.1 misho 3769: {
3770: for (fi = min;; fi++)
3771: {
1.1.1.2 ! misho 3772: RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
1.1 misho 3773: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3774: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3775: if (eptr >= md->end_subject)
3776: {
3777: SCHECK_PARTIAL();
1.1.1.2 ! misho 3778: RRETURN(MATCH_NOMATCH);
1.1 misho 3779: }
1.1.1.2 ! misho 3780: if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
! 3781: eptr++;
1.1 misho 3782: }
3783: }
3784: /* Control never gets here */
3785: }
3786:
3787: /* Maximize case */
3788:
3789: else
3790: {
3791: pp = eptr;
3792:
1.1.1.2 ! misho 3793: #ifdef SUPPORT_UTF
! 3794: if (utf)
1.1 misho 3795: {
1.1.1.2 ! misho 3796: register pcre_uint32 d;
1.1 misho 3797: for (i = min; i < max; i++)
3798: {
3799: int len = 1;
3800: if (eptr >= md->end_subject)
3801: {
3802: SCHECK_PARTIAL();
3803: break;
3804: }
3805: GETCHARLEN(d, eptr, len);
1.1.1.2 ! misho 3806: if (fc == d || (unsigned int)foc == d) break;
1.1 misho 3807: eptr += len;
3808: }
1.1.1.2 ! misho 3809: if (possessive) continue;
! 3810: for(;;)
1.1 misho 3811: {
1.1.1.2 ! misho 3812: RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
1.1 misho 3813: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3814: if (eptr-- == pp) break; /* Stop if tried at original pos */
3815: BACKCHAR(eptr);
3816: }
3817: }
3818: else
3819: #endif
1.1.1.2 ! misho 3820: /* Not UTF mode */
1.1 misho 3821: {
3822: for (i = min; i < max; i++)
3823: {
3824: if (eptr >= md->end_subject)
3825: {
3826: SCHECK_PARTIAL();
3827: break;
3828: }
1.1.1.2 ! misho 3829: if (fc == *eptr || foc == *eptr) break;
1.1 misho 3830: eptr++;
3831: }
3832: if (possessive) continue;
3833: while (eptr >= pp)
3834: {
1.1.1.2 ! misho 3835: RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
1.1 misho 3836: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3837: eptr--;
3838: }
3839: }
3840:
1.1.1.2 ! misho 3841: RRETURN(MATCH_NOMATCH);
1.1 misho 3842: }
3843: /* Control never gets here */
3844: }
3845:
3846: /* Caseful comparisons */
3847:
3848: else
3849: {
1.1.1.2 ! misho 3850: #ifdef SUPPORT_UTF
! 3851: if (utf)
1.1 misho 3852: {
1.1.1.2 ! misho 3853: register pcre_uint32 d;
1.1 misho 3854: for (i = 1; i <= min; i++)
3855: {
3856: if (eptr >= md->end_subject)
3857: {
3858: SCHECK_PARTIAL();
1.1.1.2 ! misho 3859: RRETURN(MATCH_NOMATCH);
1.1 misho 3860: }
3861: GETCHARINC(d, eptr);
1.1.1.2 ! misho 3862: if (fc == d) RRETURN(MATCH_NOMATCH);
1.1 misho 3863: }
3864: }
3865: else
3866: #endif
1.1.1.2 ! misho 3867: /* Not UTF mode */
1.1 misho 3868: {
3869: for (i = 1; i <= min; i++)
3870: {
3871: if (eptr >= md->end_subject)
3872: {
3873: SCHECK_PARTIAL();
1.1.1.2 ! misho 3874: RRETURN(MATCH_NOMATCH);
1.1 misho 3875: }
1.1.1.2 ! misho 3876: if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.1 misho 3877: }
3878: }
3879:
3880: if (min == max) continue;
3881:
3882: if (minimize)
3883: {
1.1.1.2 ! misho 3884: #ifdef SUPPORT_UTF
! 3885: if (utf)
1.1 misho 3886: {
1.1.1.2 ! misho 3887: register pcre_uint32 d;
1.1 misho 3888: for (fi = min;; fi++)
3889: {
1.1.1.2 ! misho 3890: RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
1.1 misho 3891: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3892: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3893: if (eptr >= md->end_subject)
3894: {
3895: SCHECK_PARTIAL();
1.1.1.2 ! misho 3896: RRETURN(MATCH_NOMATCH);
1.1 misho 3897: }
3898: GETCHARINC(d, eptr);
1.1.1.2 ! misho 3899: if (fc == d) RRETURN(MATCH_NOMATCH);
1.1 misho 3900: }
3901: }
3902: else
3903: #endif
1.1.1.2 ! misho 3904: /* Not UTF mode */
1.1 misho 3905: {
3906: for (fi = min;; fi++)
3907: {
1.1.1.2 ! misho 3908: RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
1.1 misho 3909: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 3910: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 3911: if (eptr >= md->end_subject)
3912: {
3913: SCHECK_PARTIAL();
1.1.1.2 ! misho 3914: RRETURN(MATCH_NOMATCH);
1.1 misho 3915: }
1.1.1.2 ! misho 3916: if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.1 misho 3917: }
3918: }
3919: /* Control never gets here */
3920: }
3921:
3922: /* Maximize case */
3923:
3924: else
3925: {
3926: pp = eptr;
3927:
1.1.1.2 ! misho 3928: #ifdef SUPPORT_UTF
! 3929: if (utf)
1.1 misho 3930: {
1.1.1.2 ! misho 3931: register pcre_uint32 d;
1.1 misho 3932: for (i = min; i < max; i++)
3933: {
3934: int len = 1;
3935: if (eptr >= md->end_subject)
3936: {
3937: SCHECK_PARTIAL();
3938: break;
3939: }
3940: GETCHARLEN(d, eptr, len);
3941: if (fc == d) break;
3942: eptr += len;
3943: }
3944: if (possessive) continue;
3945: for(;;)
3946: {
1.1.1.2 ! misho 3947: RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
1.1 misho 3948: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3949: if (eptr-- == pp) break; /* Stop if tried at original pos */
3950: BACKCHAR(eptr);
3951: }
3952: }
3953: else
3954: #endif
1.1.1.2 ! misho 3955: /* Not UTF mode */
1.1 misho 3956: {
3957: for (i = min; i < max; i++)
3958: {
3959: if (eptr >= md->end_subject)
3960: {
3961: SCHECK_PARTIAL();
3962: break;
3963: }
3964: if (fc == *eptr) break;
3965: eptr++;
3966: }
3967: if (possessive) continue;
3968: while (eptr >= pp)
3969: {
1.1.1.2 ! misho 3970: RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
1.1 misho 3971: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3972: eptr--;
3973: }
3974: }
3975:
1.1.1.2 ! misho 3976: RRETURN(MATCH_NOMATCH);
1.1 misho 3977: }
3978: }
3979: /* Control never gets here */
3980:
3981: /* Match a single character type repeatedly; several different opcodes
3982: share code. This is very similar to the code for single characters, but we
3983: repeat it in the interests of efficiency. */
3984:
3985: case OP_TYPEEXACT:
3986: min = max = GET2(ecode, 1);
3987: minimize = TRUE;
1.1.1.2 ! misho 3988: ecode += 1 + IMM2_SIZE;
1.1 misho 3989: goto REPEATTYPE;
3990:
3991: case OP_TYPEUPTO:
3992: case OP_TYPEMINUPTO:
3993: min = 0;
3994: max = GET2(ecode, 1);
3995: minimize = *ecode == OP_TYPEMINUPTO;
1.1.1.2 ! misho 3996: ecode += 1 + IMM2_SIZE;
1.1 misho 3997: goto REPEATTYPE;
3998:
3999: case OP_TYPEPOSSTAR:
4000: possessive = TRUE;
4001: min = 0;
4002: max = INT_MAX;
4003: ecode++;
4004: goto REPEATTYPE;
4005:
4006: case OP_TYPEPOSPLUS:
4007: possessive = TRUE;
4008: min = 1;
4009: max = INT_MAX;
4010: ecode++;
4011: goto REPEATTYPE;
4012:
4013: case OP_TYPEPOSQUERY:
4014: possessive = TRUE;
4015: min = 0;
4016: max = 1;
4017: ecode++;
4018: goto REPEATTYPE;
4019:
4020: case OP_TYPEPOSUPTO:
4021: possessive = TRUE;
4022: min = 0;
4023: max = GET2(ecode, 1);
1.1.1.2 ! misho 4024: ecode += 1 + IMM2_SIZE;
1.1 misho 4025: goto REPEATTYPE;
4026:
4027: case OP_TYPESTAR:
4028: case OP_TYPEMINSTAR:
4029: case OP_TYPEPLUS:
4030: case OP_TYPEMINPLUS:
4031: case OP_TYPEQUERY:
4032: case OP_TYPEMINQUERY:
4033: c = *ecode++ - OP_TYPESTAR;
4034: minimize = (c & 1) != 0;
4035: min = rep_min[c]; /* Pick up values from tables; */
4036: max = rep_max[c]; /* zero for max => infinity */
4037: if (max == 0) max = INT_MAX;
4038:
4039: /* Common code for all repeated single character type matches. Note that
4040: in UTF-8 mode, '.' matches a character of any length, but for the other
4041: character types, the valid characters are all one-byte long. */
4042:
4043: REPEATTYPE:
4044: ctype = *ecode++; /* Code for the character type */
4045:
4046: #ifdef SUPPORT_UCP
4047: if (ctype == OP_PROP || ctype == OP_NOTPROP)
4048: {
4049: prop_fail_result = ctype == OP_NOTPROP;
4050: prop_type = *ecode++;
4051: prop_value = *ecode++;
4052: }
4053: else prop_type = -1;
4054: #endif
4055:
4056: /* First, ensure the minimum number of matches are present. Use inline
4057: code for maximizing the speed, and do the type test once at the start
4058: (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4059: is tidier. Also separate the UCP code, which can be the same for both UTF-8
4060: and single-bytes. */
4061:
4062: if (min > 0)
4063: {
4064: #ifdef SUPPORT_UCP
4065: if (prop_type >= 0)
4066: {
4067: switch(prop_type)
4068: {
4069: case PT_ANY:
1.1.1.2 ! misho 4070: if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1 misho 4071: for (i = 1; i <= min; i++)
4072: {
4073: if (eptr >= md->end_subject)
4074: {
4075: SCHECK_PARTIAL();
1.1.1.2 ! misho 4076: RRETURN(MATCH_NOMATCH);
1.1 misho 4077: }
4078: GETCHARINCTEST(c, eptr);
4079: }
4080: break;
4081:
4082: case PT_LAMP:
4083: for (i = 1; i <= min; i++)
4084: {
1.1.1.2 ! misho 4085: int chartype;
1.1 misho 4086: if (eptr >= md->end_subject)
4087: {
4088: SCHECK_PARTIAL();
1.1.1.2 ! misho 4089: RRETURN(MATCH_NOMATCH);
1.1 misho 4090: }
4091: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4092: chartype = UCD_CHARTYPE(c);
! 4093: if ((chartype == ucp_Lu ||
! 4094: chartype == ucp_Ll ||
! 4095: chartype == ucp_Lt) == prop_fail_result)
! 4096: RRETURN(MATCH_NOMATCH);
1.1 misho 4097: }
4098: break;
4099:
4100: case PT_GC:
4101: for (i = 1; i <= min; i++)
4102: {
4103: if (eptr >= md->end_subject)
4104: {
4105: SCHECK_PARTIAL();
1.1.1.2 ! misho 4106: RRETURN(MATCH_NOMATCH);
1.1 misho 4107: }
4108: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4109: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
! 4110: RRETURN(MATCH_NOMATCH);
1.1 misho 4111: }
4112: break;
4113:
4114: case PT_PC:
4115: for (i = 1; i <= min; i++)
4116: {
4117: if (eptr >= md->end_subject)
4118: {
4119: SCHECK_PARTIAL();
1.1.1.2 ! misho 4120: RRETURN(MATCH_NOMATCH);
1.1 misho 4121: }
4122: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4123: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
! 4124: RRETURN(MATCH_NOMATCH);
1.1 misho 4125: }
4126: break;
4127:
4128: case PT_SC:
4129: for (i = 1; i <= min; i++)
4130: {
4131: if (eptr >= md->end_subject)
4132: {
4133: SCHECK_PARTIAL();
1.1.1.2 ! misho 4134: RRETURN(MATCH_NOMATCH);
1.1 misho 4135: }
4136: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4137: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
! 4138: RRETURN(MATCH_NOMATCH);
1.1 misho 4139: }
4140: break;
4141:
4142: case PT_ALNUM:
4143: for (i = 1; i <= min; i++)
4144: {
1.1.1.2 ! misho 4145: int category;
1.1 misho 4146: if (eptr >= md->end_subject)
4147: {
4148: SCHECK_PARTIAL();
1.1.1.2 ! misho 4149: RRETURN(MATCH_NOMATCH);
1.1 misho 4150: }
4151: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4152: category = UCD_CATEGORY(c);
! 4153: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
! 4154: RRETURN(MATCH_NOMATCH);
1.1 misho 4155: }
4156: break;
4157:
4158: case PT_SPACE: /* Perl space */
4159: for (i = 1; i <= min; i++)
4160: {
4161: if (eptr >= md->end_subject)
4162: {
4163: SCHECK_PARTIAL();
1.1.1.2 ! misho 4164: RRETURN(MATCH_NOMATCH);
1.1 misho 4165: }
4166: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4167: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1 misho 4168: c == CHAR_FF || c == CHAR_CR)
4169: == prop_fail_result)
1.1.1.2 ! misho 4170: RRETURN(MATCH_NOMATCH);
1.1 misho 4171: }
4172: break;
4173:
4174: case PT_PXSPACE: /* POSIX space */
4175: for (i = 1; i <= min; i++)
4176: {
4177: if (eptr >= md->end_subject)
4178: {
4179: SCHECK_PARTIAL();
1.1.1.2 ! misho 4180: RRETURN(MATCH_NOMATCH);
1.1 misho 4181: }
4182: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4183: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1 misho 4184: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4185: == prop_fail_result)
1.1.1.2 ! misho 4186: RRETURN(MATCH_NOMATCH);
1.1 misho 4187: }
4188: break;
4189:
4190: case PT_WORD:
4191: for (i = 1; i <= min; i++)
4192: {
1.1.1.2 ! misho 4193: int category;
1.1 misho 4194: if (eptr >= md->end_subject)
4195: {
4196: SCHECK_PARTIAL();
1.1.1.2 ! misho 4197: RRETURN(MATCH_NOMATCH);
1.1 misho 4198: }
4199: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4200: category = UCD_CATEGORY(c);
! 4201: if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
1.1 misho 4202: == prop_fail_result)
1.1.1.2 ! misho 4203: RRETURN(MATCH_NOMATCH);
! 4204: }
! 4205: break;
! 4206:
! 4207: case PT_CLIST:
! 4208: for (i = 1; i <= min; i++)
! 4209: {
! 4210: const pcre_uint32 *cp;
! 4211: if (eptr >= md->end_subject)
! 4212: {
! 4213: SCHECK_PARTIAL();
! 4214: RRETURN(MATCH_NOMATCH);
! 4215: }
! 4216: GETCHARINCTEST(c, eptr);
! 4217: cp = PRIV(ucd_caseless_sets) + prop_value;
! 4218: for (;;)
! 4219: {
! 4220: if (c < *cp)
! 4221: { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
! 4222: if (c == *cp++)
! 4223: { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
! 4224: }
1.1 misho 4225: }
4226: break;
4227:
4228: /* This should not occur */
4229:
4230: default:
4231: RRETURN(PCRE_ERROR_INTERNAL);
4232: }
4233: }
4234:
4235: /* Match extended Unicode sequences. We will get here only if the
4236: support is in the binary; otherwise a compile-time error occurs. */
4237:
4238: else if (ctype == OP_EXTUNI)
4239: {
4240: for (i = 1; i <= min; i++)
4241: {
4242: if (eptr >= md->end_subject)
4243: {
4244: SCHECK_PARTIAL();
1.1.1.2 ! misho 4245: RRETURN(MATCH_NOMATCH);
1.1 misho 4246: }
1.1.1.2 ! misho 4247: else
1.1 misho 4248: {
1.1.1.2 ! misho 4249: int lgb, rgb;
! 4250: GETCHARINCTEST(c, eptr);
! 4251: lgb = UCD_GRAPHBREAK(c);
! 4252: while (eptr < md->end_subject)
! 4253: {
! 4254: int len = 1;
! 4255: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 4256: rgb = UCD_GRAPHBREAK(c);
! 4257: if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
! 4258: lgb = rgb;
! 4259: eptr += len;
! 4260: }
1.1 misho 4261: }
1.1.1.2 ! misho 4262: CHECK_PARTIAL();
1.1 misho 4263: }
4264: }
4265:
4266: else
4267: #endif /* SUPPORT_UCP */
4268:
4269: /* Handle all other cases when the coding is UTF-8 */
4270:
1.1.1.2 ! misho 4271: #ifdef SUPPORT_UTF
! 4272: if (utf) switch(ctype)
1.1 misho 4273: {
4274: case OP_ANY:
4275: for (i = 1; i <= min; i++)
4276: {
4277: if (eptr >= md->end_subject)
4278: {
4279: SCHECK_PARTIAL();
1.1.1.2 ! misho 4280: RRETURN(MATCH_NOMATCH);
! 4281: }
! 4282: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
! 4283: if (md->partial != 0 &&
! 4284: eptr + 1 >= md->end_subject &&
! 4285: NLBLOCK->nltype == NLTYPE_FIXED &&
! 4286: NLBLOCK->nllen == 2 &&
! 4287: RAWUCHAR(eptr) == NLBLOCK->nl[0])
! 4288: {
! 4289: md->hitend = TRUE;
! 4290: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
1.1 misho 4291: }
4292: eptr++;
1.1.1.2 ! misho 4293: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4294: }
4295: break;
4296:
4297: case OP_ALLANY:
4298: for (i = 1; i <= min; i++)
4299: {
4300: if (eptr >= md->end_subject)
4301: {
4302: SCHECK_PARTIAL();
1.1.1.2 ! misho 4303: RRETURN(MATCH_NOMATCH);
1.1 misho 4304: }
4305: eptr++;
1.1.1.2 ! misho 4306: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4307: }
4308: break;
4309:
4310: case OP_ANYBYTE:
1.1.1.2 ! misho 4311: if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
1.1 misho 4312: eptr += min;
4313: break;
4314:
4315: case OP_ANYNL:
4316: for (i = 1; i <= min; i++)
4317: {
4318: if (eptr >= md->end_subject)
4319: {
4320: SCHECK_PARTIAL();
1.1.1.2 ! misho 4321: RRETURN(MATCH_NOMATCH);
1.1 misho 4322: }
4323: GETCHARINC(c, eptr);
4324: switch(c)
4325: {
1.1.1.2 ! misho 4326: default: RRETURN(MATCH_NOMATCH);
! 4327:
! 4328: case CHAR_CR:
! 4329: if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1 misho 4330: break;
4331:
1.1.1.2 ! misho 4332: case CHAR_LF:
1.1 misho 4333: break;
4334:
1.1.1.2 ! misho 4335: case CHAR_VT:
! 4336: case CHAR_FF:
! 4337: case CHAR_NEL:
! 4338: #ifndef EBCDIC
1.1 misho 4339: case 0x2028:
4340: case 0x2029:
1.1.1.2 ! misho 4341: #endif /* Not EBCDIC */
! 4342: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1 misho 4343: break;
4344: }
4345: }
4346: break;
4347:
4348: case OP_NOT_HSPACE:
4349: for (i = 1; i <= min; i++)
4350: {
4351: if (eptr >= md->end_subject)
4352: {
4353: SCHECK_PARTIAL();
1.1.1.2 ! misho 4354: RRETURN(MATCH_NOMATCH);
1.1 misho 4355: }
4356: GETCHARINC(c, eptr);
4357: switch(c)
4358: {
1.1.1.2 ! misho 4359: HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
1.1 misho 4360: default: break;
4361: }
4362: }
4363: break;
4364:
4365: case OP_HSPACE:
4366: for (i = 1; i <= min; i++)
4367: {
4368: if (eptr >= md->end_subject)
4369: {
4370: SCHECK_PARTIAL();
1.1.1.2 ! misho 4371: RRETURN(MATCH_NOMATCH);
1.1 misho 4372: }
4373: GETCHARINC(c, eptr);
4374: switch(c)
4375: {
1.1.1.2 ! misho 4376: HSPACE_CASES: break; /* Byte and multibyte cases */
! 4377: default: RRETURN(MATCH_NOMATCH);
1.1 misho 4378: }
4379: }
4380: break;
4381:
4382: case OP_NOT_VSPACE:
4383: for (i = 1; i <= min; i++)
4384: {
4385: if (eptr >= md->end_subject)
4386: {
4387: SCHECK_PARTIAL();
1.1.1.2 ! misho 4388: RRETURN(MATCH_NOMATCH);
1.1 misho 4389: }
4390: GETCHARINC(c, eptr);
4391: switch(c)
4392: {
1.1.1.2 ! misho 4393: VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1 misho 4394: default: break;
4395: }
4396: }
4397: break;
4398:
4399: case OP_VSPACE:
4400: for (i = 1; i <= min; i++)
4401: {
4402: if (eptr >= md->end_subject)
4403: {
4404: SCHECK_PARTIAL();
1.1.1.2 ! misho 4405: RRETURN(MATCH_NOMATCH);
1.1 misho 4406: }
4407: GETCHARINC(c, eptr);
4408: switch(c)
4409: {
1.1.1.2 ! misho 4410: VSPACE_CASES: break;
! 4411: default: RRETURN(MATCH_NOMATCH);
1.1 misho 4412: }
4413: }
4414: break;
4415:
4416: case OP_NOT_DIGIT:
4417: for (i = 1; i <= min; i++)
4418: {
4419: if (eptr >= md->end_subject)
4420: {
4421: SCHECK_PARTIAL();
1.1.1.2 ! misho 4422: RRETURN(MATCH_NOMATCH);
1.1 misho 4423: }
4424: GETCHARINC(c, eptr);
4425: if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
1.1.1.2 ! misho 4426: RRETURN(MATCH_NOMATCH);
1.1 misho 4427: }
4428: break;
4429:
4430: case OP_DIGIT:
4431: for (i = 1; i <= min; i++)
4432: {
1.1.1.2 ! misho 4433: pcre_uchar cc;
! 4434:
1.1 misho 4435: if (eptr >= md->end_subject)
4436: {
4437: SCHECK_PARTIAL();
1.1.1.2 ! misho 4438: RRETURN(MATCH_NOMATCH);
1.1 misho 4439: }
1.1.1.2 ! misho 4440: cc = RAWUCHAR(eptr);
! 4441: if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
! 4442: RRETURN(MATCH_NOMATCH);
! 4443: eptr++;
1.1 misho 4444: /* No need to skip more bytes - we know it's a 1-byte character */
4445: }
4446: break;
4447:
4448: case OP_NOT_WHITESPACE:
4449: for (i = 1; i <= min; i++)
4450: {
1.1.1.2 ! misho 4451: pcre_uchar cc;
! 4452:
1.1 misho 4453: if (eptr >= md->end_subject)
4454: {
4455: SCHECK_PARTIAL();
1.1.1.2 ! misho 4456: RRETURN(MATCH_NOMATCH);
1.1 misho 4457: }
1.1.1.2 ! misho 4458: cc = RAWUCHAR(eptr);
! 4459: if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
! 4460: RRETURN(MATCH_NOMATCH);
! 4461: eptr++;
! 4462: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4463: }
4464: break;
4465:
4466: case OP_WHITESPACE:
4467: for (i = 1; i <= min; i++)
4468: {
1.1.1.2 ! misho 4469: pcre_uchar cc;
! 4470:
1.1 misho 4471: if (eptr >= md->end_subject)
4472: {
4473: SCHECK_PARTIAL();
1.1.1.2 ! misho 4474: RRETURN(MATCH_NOMATCH);
1.1 misho 4475: }
1.1.1.2 ! misho 4476: cc = RAWUCHAR(eptr);
! 4477: if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
! 4478: RRETURN(MATCH_NOMATCH);
! 4479: eptr++;
1.1 misho 4480: /* No need to skip more bytes - we know it's a 1-byte character */
4481: }
4482: break;
4483:
4484: case OP_NOT_WORDCHAR:
4485: for (i = 1; i <= min; i++)
4486: {
1.1.1.2 ! misho 4487: pcre_uchar cc;
! 4488:
1.1 misho 4489: if (eptr >= md->end_subject)
4490: {
4491: SCHECK_PARTIAL();
1.1.1.2 ! misho 4492: RRETURN(MATCH_NOMATCH);
1.1 misho 4493: }
1.1.1.2 ! misho 4494: cc = RAWUCHAR(eptr);
! 4495: if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
! 4496: RRETURN(MATCH_NOMATCH);
! 4497: eptr++;
! 4498: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4499: }
4500: break;
4501:
4502: case OP_WORDCHAR:
4503: for (i = 1; i <= min; i++)
4504: {
1.1.1.2 ! misho 4505: pcre_uchar cc;
! 4506:
1.1 misho 4507: if (eptr >= md->end_subject)
4508: {
4509: SCHECK_PARTIAL();
1.1.1.2 ! misho 4510: RRETURN(MATCH_NOMATCH);
1.1 misho 4511: }
1.1.1.2 ! misho 4512: cc = RAWUCHAR(eptr);
! 4513: if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
! 4514: RRETURN(MATCH_NOMATCH);
! 4515: eptr++;
1.1 misho 4516: /* No need to skip more bytes - we know it's a 1-byte character */
4517: }
4518: break;
4519:
4520: default:
4521: RRETURN(PCRE_ERROR_INTERNAL);
4522: } /* End switch(ctype) */
4523:
4524: else
1.1.1.2 ! misho 4525: #endif /* SUPPORT_UTF */
1.1 misho 4526:
4527: /* Code for the non-UTF-8 case for minimum matching of operators other
4528: than OP_PROP and OP_NOTPROP. */
4529:
4530: switch(ctype)
4531: {
4532: case OP_ANY:
4533: for (i = 1; i <= min; i++)
4534: {
4535: if (eptr >= md->end_subject)
4536: {
4537: SCHECK_PARTIAL();
1.1.1.2 ! misho 4538: RRETURN(MATCH_NOMATCH);
! 4539: }
! 4540: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
! 4541: if (md->partial != 0 &&
! 4542: eptr + 1 >= md->end_subject &&
! 4543: NLBLOCK->nltype == NLTYPE_FIXED &&
! 4544: NLBLOCK->nllen == 2 &&
! 4545: *eptr == NLBLOCK->nl[0])
! 4546: {
! 4547: md->hitend = TRUE;
! 4548: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
1.1 misho 4549: }
4550: eptr++;
4551: }
4552: break;
4553:
4554: case OP_ALLANY:
4555: if (eptr > md->end_subject - min)
4556: {
4557: SCHECK_PARTIAL();
1.1.1.2 ! misho 4558: RRETURN(MATCH_NOMATCH);
1.1 misho 4559: }
4560: eptr += min;
4561: break;
4562:
4563: case OP_ANYBYTE:
4564: if (eptr > md->end_subject - min)
4565: {
4566: SCHECK_PARTIAL();
1.1.1.2 ! misho 4567: RRETURN(MATCH_NOMATCH);
1.1 misho 4568: }
4569: eptr += min;
4570: break;
4571:
4572: case OP_ANYNL:
4573: for (i = 1; i <= min; i++)
4574: {
4575: if (eptr >= md->end_subject)
4576: {
4577: SCHECK_PARTIAL();
1.1.1.2 ! misho 4578: RRETURN(MATCH_NOMATCH);
1.1 misho 4579: }
4580: switch(*eptr++)
4581: {
1.1.1.2 ! misho 4582: default: RRETURN(MATCH_NOMATCH);
! 4583:
! 4584: case CHAR_CR:
! 4585: if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1 misho 4586: break;
1.1.1.2 ! misho 4587:
! 4588: case CHAR_LF:
1.1 misho 4589: break;
4590:
1.1.1.2 ! misho 4591: case CHAR_VT:
! 4592: case CHAR_FF:
! 4593: case CHAR_NEL:
! 4594: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 4595: case 0x2028:
! 4596: case 0x2029:
! 4597: #endif
! 4598: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1 misho 4599: break;
4600: }
4601: }
4602: break;
4603:
4604: case OP_NOT_HSPACE:
4605: for (i = 1; i <= min; i++)
4606: {
4607: if (eptr >= md->end_subject)
4608: {
4609: SCHECK_PARTIAL();
1.1.1.2 ! misho 4610: RRETURN(MATCH_NOMATCH);
1.1 misho 4611: }
4612: switch(*eptr++)
4613: {
4614: default: break;
1.1.1.2 ! misho 4615: HSPACE_BYTE_CASES:
! 4616: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 4617: HSPACE_MULTIBYTE_CASES:
! 4618: #endif
! 4619: RRETURN(MATCH_NOMATCH);
1.1 misho 4620: }
4621: }
4622: break;
4623:
4624: case OP_HSPACE:
4625: for (i = 1; i <= min; i++)
4626: {
4627: if (eptr >= md->end_subject)
4628: {
4629: SCHECK_PARTIAL();
1.1.1.2 ! misho 4630: RRETURN(MATCH_NOMATCH);
1.1 misho 4631: }
4632: switch(*eptr++)
4633: {
1.1.1.2 ! misho 4634: default: RRETURN(MATCH_NOMATCH);
! 4635: HSPACE_BYTE_CASES:
! 4636: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 4637: HSPACE_MULTIBYTE_CASES:
! 4638: #endif
1.1 misho 4639: break;
4640: }
4641: }
4642: break;
4643:
4644: case OP_NOT_VSPACE:
4645: for (i = 1; i <= min; i++)
4646: {
4647: if (eptr >= md->end_subject)
4648: {
4649: SCHECK_PARTIAL();
1.1.1.2 ! misho 4650: RRETURN(MATCH_NOMATCH);
1.1 misho 4651: }
4652: switch(*eptr++)
4653: {
1.1.1.2 ! misho 4654: VSPACE_BYTE_CASES:
! 4655: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 4656: VSPACE_MULTIBYTE_CASES:
! 4657: #endif
! 4658: RRETURN(MATCH_NOMATCH);
1.1 misho 4659: default: break;
4660: }
4661: }
4662: break;
4663:
4664: case OP_VSPACE:
4665: for (i = 1; i <= min; i++)
4666: {
4667: if (eptr >= md->end_subject)
4668: {
4669: SCHECK_PARTIAL();
1.1.1.2 ! misho 4670: RRETURN(MATCH_NOMATCH);
1.1 misho 4671: }
4672: switch(*eptr++)
4673: {
1.1.1.2 ! misho 4674: default: RRETURN(MATCH_NOMATCH);
! 4675: VSPACE_BYTE_CASES:
! 4676: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 4677: VSPACE_MULTIBYTE_CASES:
! 4678: #endif
1.1 misho 4679: break;
4680: }
4681: }
4682: break;
4683:
4684: case OP_NOT_DIGIT:
4685: for (i = 1; i <= min; i++)
4686: {
4687: if (eptr >= md->end_subject)
4688: {
4689: SCHECK_PARTIAL();
1.1.1.2 ! misho 4690: RRETURN(MATCH_NOMATCH);
1.1 misho 4691: }
1.1.1.2 ! misho 4692: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
! 4693: RRETURN(MATCH_NOMATCH);
! 4694: eptr++;
1.1 misho 4695: }
4696: break;
4697:
4698: case OP_DIGIT:
4699: for (i = 1; i <= min; i++)
4700: {
4701: if (eptr >= md->end_subject)
4702: {
4703: SCHECK_PARTIAL();
1.1.1.2 ! misho 4704: RRETURN(MATCH_NOMATCH);
1.1 misho 4705: }
1.1.1.2 ! misho 4706: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
! 4707: RRETURN(MATCH_NOMATCH);
! 4708: eptr++;
1.1 misho 4709: }
4710: break;
4711:
4712: case OP_NOT_WHITESPACE:
4713: for (i = 1; i <= min; i++)
4714: {
4715: if (eptr >= md->end_subject)
4716: {
4717: SCHECK_PARTIAL();
1.1.1.2 ! misho 4718: RRETURN(MATCH_NOMATCH);
1.1 misho 4719: }
1.1.1.2 ! misho 4720: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
! 4721: RRETURN(MATCH_NOMATCH);
! 4722: eptr++;
1.1 misho 4723: }
4724: break;
4725:
4726: case OP_WHITESPACE:
4727: for (i = 1; i <= min; i++)
4728: {
4729: if (eptr >= md->end_subject)
4730: {
4731: SCHECK_PARTIAL();
1.1.1.2 ! misho 4732: RRETURN(MATCH_NOMATCH);
1.1 misho 4733: }
1.1.1.2 ! misho 4734: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
! 4735: RRETURN(MATCH_NOMATCH);
! 4736: eptr++;
1.1 misho 4737: }
4738: break;
4739:
4740: case OP_NOT_WORDCHAR:
4741: for (i = 1; i <= min; i++)
4742: {
4743: if (eptr >= md->end_subject)
4744: {
4745: SCHECK_PARTIAL();
1.1.1.2 ! misho 4746: RRETURN(MATCH_NOMATCH);
1.1 misho 4747: }
1.1.1.2 ! misho 4748: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
! 4749: RRETURN(MATCH_NOMATCH);
! 4750: eptr++;
1.1 misho 4751: }
4752: break;
4753:
4754: case OP_WORDCHAR:
4755: for (i = 1; i <= min; i++)
4756: {
4757: if (eptr >= md->end_subject)
4758: {
4759: SCHECK_PARTIAL();
1.1.1.2 ! misho 4760: RRETURN(MATCH_NOMATCH);
1.1 misho 4761: }
1.1.1.2 ! misho 4762: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
! 4763: RRETURN(MATCH_NOMATCH);
! 4764: eptr++;
1.1 misho 4765: }
4766: break;
4767:
4768: default:
4769: RRETURN(PCRE_ERROR_INTERNAL);
4770: }
4771: }
4772:
4773: /* If min = max, continue at the same level without recursing */
4774:
4775: if (min == max) continue;
4776:
4777: /* If minimizing, we have to test the rest of the pattern before each
4778: subsequent match. Again, separate the UTF-8 case for speed, and also
4779: separate the UCP cases. */
4780:
4781: if (minimize)
4782: {
4783: #ifdef SUPPORT_UCP
4784: if (prop_type >= 0)
4785: {
4786: switch(prop_type)
4787: {
4788: case PT_ANY:
4789: for (fi = min;; fi++)
4790: {
1.1.1.2 ! misho 4791: RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
1.1 misho 4792: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4793: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4794: if (eptr >= md->end_subject)
4795: {
4796: SCHECK_PARTIAL();
1.1.1.2 ! misho 4797: RRETURN(MATCH_NOMATCH);
1.1 misho 4798: }
4799: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4800: if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1 misho 4801: }
4802: /* Control never gets here */
4803:
4804: case PT_LAMP:
4805: for (fi = min;; fi++)
4806: {
1.1.1.2 ! misho 4807: int chartype;
! 4808: RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
1.1 misho 4809: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4810: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4811: if (eptr >= md->end_subject)
4812: {
4813: SCHECK_PARTIAL();
1.1.1.2 ! misho 4814: RRETURN(MATCH_NOMATCH);
1.1 misho 4815: }
4816: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4817: chartype = UCD_CHARTYPE(c);
! 4818: if ((chartype == ucp_Lu ||
! 4819: chartype == ucp_Ll ||
! 4820: chartype == ucp_Lt) == prop_fail_result)
! 4821: RRETURN(MATCH_NOMATCH);
1.1 misho 4822: }
4823: /* Control never gets here */
4824:
4825: case PT_GC:
4826: for (fi = min;; fi++)
4827: {
1.1.1.2 ! misho 4828: RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
1.1 misho 4829: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4830: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4831: if (eptr >= md->end_subject)
4832: {
4833: SCHECK_PARTIAL();
1.1.1.2 ! misho 4834: RRETURN(MATCH_NOMATCH);
1.1 misho 4835: }
4836: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4837: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
! 4838: RRETURN(MATCH_NOMATCH);
1.1 misho 4839: }
4840: /* Control never gets here */
4841:
4842: case PT_PC:
4843: for (fi = min;; fi++)
4844: {
1.1.1.2 ! misho 4845: RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
1.1 misho 4846: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4847: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4848: if (eptr >= md->end_subject)
4849: {
4850: SCHECK_PARTIAL();
1.1.1.2 ! misho 4851: RRETURN(MATCH_NOMATCH);
1.1 misho 4852: }
4853: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4854: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
! 4855: RRETURN(MATCH_NOMATCH);
1.1 misho 4856: }
4857: /* Control never gets here */
4858:
4859: case PT_SC:
4860: for (fi = min;; fi++)
4861: {
1.1.1.2 ! misho 4862: RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
1.1 misho 4863: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4864: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4865: if (eptr >= md->end_subject)
4866: {
4867: SCHECK_PARTIAL();
1.1.1.2 ! misho 4868: RRETURN(MATCH_NOMATCH);
1.1 misho 4869: }
4870: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4871: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
! 4872: RRETURN(MATCH_NOMATCH);
1.1 misho 4873: }
4874: /* Control never gets here */
4875:
4876: case PT_ALNUM:
4877: for (fi = min;; fi++)
4878: {
1.1.1.2 ! misho 4879: int category;
! 4880: RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
1.1 misho 4881: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4882: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4883: if (eptr >= md->end_subject)
4884: {
4885: SCHECK_PARTIAL();
1.1.1.2 ! misho 4886: RRETURN(MATCH_NOMATCH);
1.1 misho 4887: }
4888: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4889: category = UCD_CATEGORY(c);
! 4890: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
! 4891: RRETURN(MATCH_NOMATCH);
1.1 misho 4892: }
4893: /* Control never gets here */
4894:
4895: case PT_SPACE: /* Perl space */
4896: for (fi = min;; fi++)
4897: {
1.1.1.2 ! misho 4898: RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
1.1 misho 4899: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4900: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4901: if (eptr >= md->end_subject)
4902: {
4903: SCHECK_PARTIAL();
1.1.1.2 ! misho 4904: RRETURN(MATCH_NOMATCH);
1.1 misho 4905: }
4906: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4907: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1 misho 4908: c == CHAR_FF || c == CHAR_CR)
4909: == prop_fail_result)
1.1.1.2 ! misho 4910: RRETURN(MATCH_NOMATCH);
1.1 misho 4911: }
4912: /* Control never gets here */
4913:
4914: case PT_PXSPACE: /* POSIX space */
4915: for (fi = min;; fi++)
4916: {
1.1.1.2 ! misho 4917: RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
1.1 misho 4918: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4919: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4920: if (eptr >= md->end_subject)
4921: {
4922: SCHECK_PARTIAL();
1.1.1.2 ! misho 4923: RRETURN(MATCH_NOMATCH);
1.1 misho 4924: }
4925: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4926: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1 misho 4927: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4928: == prop_fail_result)
1.1.1.2 ! misho 4929: RRETURN(MATCH_NOMATCH);
1.1 misho 4930: }
4931: /* Control never gets here */
4932:
4933: case PT_WORD:
4934: for (fi = min;; fi++)
4935: {
1.1.1.2 ! misho 4936: int category;
! 4937: RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
1.1 misho 4938: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4939: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4940: if (eptr >= md->end_subject)
4941: {
4942: SCHECK_PARTIAL();
1.1.1.2 ! misho 4943: RRETURN(MATCH_NOMATCH);
1.1 misho 4944: }
4945: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 4946: category = UCD_CATEGORY(c);
! 4947: if ((category == ucp_L ||
! 4948: category == ucp_N ||
1.1 misho 4949: c == CHAR_UNDERSCORE)
4950: == prop_fail_result)
1.1.1.2 ! misho 4951: RRETURN(MATCH_NOMATCH);
1.1 misho 4952: }
4953: /* Control never gets here */
4954:
1.1.1.2 ! misho 4955: case PT_CLIST:
! 4956: for (fi = min;; fi++)
! 4957: {
! 4958: const pcre_uint32 *cp;
! 4959: RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
! 4960: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
! 4961: if (fi >= max) RRETURN(MATCH_NOMATCH);
! 4962: if (eptr >= md->end_subject)
! 4963: {
! 4964: SCHECK_PARTIAL();
! 4965: RRETURN(MATCH_NOMATCH);
! 4966: }
! 4967: GETCHARINCTEST(c, eptr);
! 4968: cp = PRIV(ucd_caseless_sets) + prop_value;
! 4969: for (;;)
! 4970: {
! 4971: if (c < *cp)
! 4972: { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
! 4973: if (c == *cp++)
! 4974: { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
! 4975: }
! 4976: }
! 4977: /* Control never gets here */
1.1 misho 4978:
1.1.1.2 ! misho 4979: /* This should never occur */
1.1 misho 4980: default:
4981: RRETURN(PCRE_ERROR_INTERNAL);
4982: }
4983: }
4984:
4985: /* Match extended Unicode sequences. We will get here only if the
4986: support is in the binary; otherwise a compile-time error occurs. */
4987:
4988: else if (ctype == OP_EXTUNI)
4989: {
4990: for (fi = min;; fi++)
4991: {
1.1.1.2 ! misho 4992: RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
1.1 misho 4993: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 4994: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 4995: if (eptr >= md->end_subject)
4996: {
4997: SCHECK_PARTIAL();
1.1.1.2 ! misho 4998: RRETURN(MATCH_NOMATCH);
1.1 misho 4999: }
1.1.1.2 ! misho 5000: else
1.1 misho 5001: {
1.1.1.2 ! misho 5002: int lgb, rgb;
! 5003: GETCHARINCTEST(c, eptr);
! 5004: lgb = UCD_GRAPHBREAK(c);
! 5005: while (eptr < md->end_subject)
! 5006: {
! 5007: int len = 1;
! 5008: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 5009: rgb = UCD_GRAPHBREAK(c);
! 5010: if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
! 5011: lgb = rgb;
! 5012: eptr += len;
! 5013: }
1.1 misho 5014: }
1.1.1.2 ! misho 5015: CHECK_PARTIAL();
1.1 misho 5016: }
5017: }
5018: else
5019: #endif /* SUPPORT_UCP */
5020:
1.1.1.2 ! misho 5021: #ifdef SUPPORT_UTF
! 5022: if (utf)
1.1 misho 5023: {
5024: for (fi = min;; fi++)
5025: {
1.1.1.2 ! misho 5026: RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
1.1 misho 5027: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 5028: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 5029: if (eptr >= md->end_subject)
5030: {
5031: SCHECK_PARTIAL();
1.1.1.2 ! misho 5032: RRETURN(MATCH_NOMATCH);
1.1 misho 5033: }
5034: if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.1.1.2 ! misho 5035: RRETURN(MATCH_NOMATCH);
1.1 misho 5036: GETCHARINC(c, eptr);
5037: switch(ctype)
5038: {
1.1.1.2 ! misho 5039: case OP_ANY: /* This is the non-NL case */
! 5040: if (md->partial != 0 && /* Take care with CRLF partial */
! 5041: eptr >= md->end_subject &&
! 5042: NLBLOCK->nltype == NLTYPE_FIXED &&
! 5043: NLBLOCK->nllen == 2 &&
! 5044: c == NLBLOCK->nl[0])
! 5045: {
! 5046: md->hitend = TRUE;
! 5047: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 5048: }
! 5049: break;
! 5050:
1.1 misho 5051: case OP_ALLANY:
5052: case OP_ANYBYTE:
5053: break;
5054:
5055: case OP_ANYNL:
5056: switch(c)
5057: {
1.1.1.2 ! misho 5058: default: RRETURN(MATCH_NOMATCH);
! 5059: case CHAR_CR:
! 5060: if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1 misho 5061: break;
1.1.1.2 ! misho 5062:
! 5063: case CHAR_LF:
1.1 misho 5064: break;
5065:
1.1.1.2 ! misho 5066: case CHAR_VT:
! 5067: case CHAR_FF:
! 5068: case CHAR_NEL:
! 5069: #ifndef EBCDIC
1.1 misho 5070: case 0x2028:
5071: case 0x2029:
1.1.1.2 ! misho 5072: #endif /* Not EBCDIC */
! 5073: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1 misho 5074: break;
5075: }
5076: break;
5077:
5078: case OP_NOT_HSPACE:
5079: switch(c)
5080: {
1.1.1.2 ! misho 5081: HSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1 misho 5082: default: break;
5083: }
5084: break;
5085:
5086: case OP_HSPACE:
5087: switch(c)
5088: {
1.1.1.2 ! misho 5089: HSPACE_CASES: break;
! 5090: default: RRETURN(MATCH_NOMATCH);
1.1 misho 5091: }
5092: break;
5093:
5094: case OP_NOT_VSPACE:
5095: switch(c)
5096: {
1.1.1.2 ! misho 5097: VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1 misho 5098: default: break;
5099: }
5100: break;
5101:
5102: case OP_VSPACE:
5103: switch(c)
5104: {
1.1.1.2 ! misho 5105: VSPACE_CASES: break;
! 5106: default: RRETURN(MATCH_NOMATCH);
1.1 misho 5107: }
5108: break;
5109:
5110: case OP_NOT_DIGIT:
5111: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
1.1.1.2 ! misho 5112: RRETURN(MATCH_NOMATCH);
1.1 misho 5113: break;
5114:
5115: case OP_DIGIT:
5116: if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
1.1.1.2 ! misho 5117: RRETURN(MATCH_NOMATCH);
1.1 misho 5118: break;
5119:
5120: case OP_NOT_WHITESPACE:
5121: if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
1.1.1.2 ! misho 5122: RRETURN(MATCH_NOMATCH);
1.1 misho 5123: break;
5124:
5125: case OP_WHITESPACE:
1.1.1.2 ! misho 5126: if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
! 5127: RRETURN(MATCH_NOMATCH);
1.1 misho 5128: break;
5129:
5130: case OP_NOT_WORDCHAR:
5131: if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
1.1.1.2 ! misho 5132: RRETURN(MATCH_NOMATCH);
1.1 misho 5133: break;
5134:
5135: case OP_WORDCHAR:
5136: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
1.1.1.2 ! misho 5137: RRETURN(MATCH_NOMATCH);
1.1 misho 5138: break;
5139:
5140: default:
5141: RRETURN(PCRE_ERROR_INTERNAL);
5142: }
5143: }
5144: }
5145: else
5146: #endif
1.1.1.2 ! misho 5147: /* Not UTF mode */
1.1 misho 5148: {
5149: for (fi = min;; fi++)
5150: {
1.1.1.2 ! misho 5151: RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
1.1 misho 5152: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 5153: if (fi >= max) RRETURN(MATCH_NOMATCH);
1.1 misho 5154: if (eptr >= md->end_subject)
5155: {
5156: SCHECK_PARTIAL();
1.1.1.2 ! misho 5157: RRETURN(MATCH_NOMATCH);
1.1 misho 5158: }
5159: if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.1.1.2 ! misho 5160: RRETURN(MATCH_NOMATCH);
1.1 misho 5161: c = *eptr++;
5162: switch(ctype)
5163: {
1.1.1.2 ! misho 5164: case OP_ANY: /* This is the non-NL case */
! 5165: if (md->partial != 0 && /* Take care with CRLF partial */
! 5166: eptr >= md->end_subject &&
! 5167: NLBLOCK->nltype == NLTYPE_FIXED &&
! 5168: NLBLOCK->nllen == 2 &&
! 5169: c == NLBLOCK->nl[0])
! 5170: {
! 5171: md->hitend = TRUE;
! 5172: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 5173: }
! 5174: break;
! 5175:
1.1 misho 5176: case OP_ALLANY:
5177: case OP_ANYBYTE:
5178: break;
5179:
5180: case OP_ANYNL:
5181: switch(c)
5182: {
1.1.1.2 ! misho 5183: default: RRETURN(MATCH_NOMATCH);
! 5184: case CHAR_CR:
! 5185: if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1 misho 5186: break;
5187:
1.1.1.2 ! misho 5188: case CHAR_LF:
1.1 misho 5189: break;
5190:
1.1.1.2 ! misho 5191: case CHAR_VT:
! 5192: case CHAR_FF:
! 5193: case CHAR_NEL:
! 5194: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5195: case 0x2028:
! 5196: case 0x2029:
! 5197: #endif
! 5198: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1 misho 5199: break;
5200: }
5201: break;
5202:
5203: case OP_NOT_HSPACE:
5204: switch(c)
5205: {
5206: default: break;
1.1.1.2 ! misho 5207: HSPACE_BYTE_CASES:
! 5208: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5209: HSPACE_MULTIBYTE_CASES:
! 5210: #endif
! 5211: RRETURN(MATCH_NOMATCH);
1.1 misho 5212: }
5213: break;
5214:
5215: case OP_HSPACE:
5216: switch(c)
5217: {
1.1.1.2 ! misho 5218: default: RRETURN(MATCH_NOMATCH);
! 5219: HSPACE_BYTE_CASES:
! 5220: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5221: HSPACE_MULTIBYTE_CASES:
! 5222: #endif
1.1 misho 5223: break;
5224: }
5225: break;
5226:
5227: case OP_NOT_VSPACE:
5228: switch(c)
5229: {
5230: default: break;
1.1.1.2 ! misho 5231: VSPACE_BYTE_CASES:
! 5232: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5233: VSPACE_MULTIBYTE_CASES:
! 5234: #endif
! 5235: RRETURN(MATCH_NOMATCH);
1.1 misho 5236: }
5237: break;
5238:
5239: case OP_VSPACE:
5240: switch(c)
5241: {
1.1.1.2 ! misho 5242: default: RRETURN(MATCH_NOMATCH);
! 5243: VSPACE_BYTE_CASES:
! 5244: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5245: VSPACE_MULTIBYTE_CASES:
! 5246: #endif
1.1 misho 5247: break;
5248: }
5249: break;
5250:
5251: case OP_NOT_DIGIT:
1.1.1.2 ! misho 5252: if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5253: break;
5254:
5255: case OP_DIGIT:
1.1.1.2 ! misho 5256: if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5257: break;
5258:
5259: case OP_NOT_WHITESPACE:
1.1.1.2 ! misho 5260: if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5261: break;
5262:
5263: case OP_WHITESPACE:
1.1.1.2 ! misho 5264: if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5265: break;
5266:
5267: case OP_NOT_WORDCHAR:
1.1.1.2 ! misho 5268: if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5269: break;
5270:
5271: case OP_WORDCHAR:
1.1.1.2 ! misho 5272: if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5273: break;
5274:
5275: default:
5276: RRETURN(PCRE_ERROR_INTERNAL);
5277: }
5278: }
5279: }
5280: /* Control never gets here */
5281: }
5282:
5283: /* If maximizing, it is worth using inline code for speed, doing the type
5284: test once at the start (i.e. keep it out of the loop). Again, keep the
5285: UTF-8 and UCP stuff separate. */
5286:
5287: else
5288: {
5289: pp = eptr; /* Remember where we started */
5290:
5291: #ifdef SUPPORT_UCP
5292: if (prop_type >= 0)
5293: {
5294: switch(prop_type)
5295: {
5296: case PT_ANY:
5297: for (i = min; i < max; i++)
5298: {
5299: int len = 1;
5300: if (eptr >= md->end_subject)
5301: {
5302: SCHECK_PARTIAL();
5303: break;
5304: }
5305: GETCHARLENTEST(c, eptr, len);
5306: if (prop_fail_result) break;
5307: eptr+= len;
5308: }
5309: break;
5310:
5311: case PT_LAMP:
5312: for (i = min; i < max; i++)
5313: {
1.1.1.2 ! misho 5314: int chartype;
1.1 misho 5315: int len = 1;
5316: if (eptr >= md->end_subject)
5317: {
5318: SCHECK_PARTIAL();
5319: break;
5320: }
5321: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5322: chartype = UCD_CHARTYPE(c);
! 5323: if ((chartype == ucp_Lu ||
! 5324: chartype == ucp_Ll ||
! 5325: chartype == ucp_Lt) == prop_fail_result)
1.1 misho 5326: break;
5327: eptr+= len;
5328: }
5329: break;
5330:
5331: case PT_GC:
5332: for (i = min; i < max; i++)
5333: {
5334: int len = 1;
5335: if (eptr >= md->end_subject)
5336: {
5337: SCHECK_PARTIAL();
5338: break;
5339: }
5340: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5341: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
1.1 misho 5342: eptr+= len;
5343: }
5344: break;
5345:
5346: case PT_PC:
5347: for (i = min; i < max; i++)
5348: {
5349: int len = 1;
5350: if (eptr >= md->end_subject)
5351: {
5352: SCHECK_PARTIAL();
5353: break;
5354: }
5355: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5356: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
1.1 misho 5357: eptr+= len;
5358: }
5359: break;
5360:
5361: case PT_SC:
5362: for (i = min; i < max; i++)
5363: {
5364: int len = 1;
5365: if (eptr >= md->end_subject)
5366: {
5367: SCHECK_PARTIAL();
5368: break;
5369: }
5370: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5371: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
1.1 misho 5372: eptr+= len;
5373: }
5374: break;
5375:
5376: case PT_ALNUM:
5377: for (i = min; i < max; i++)
5378: {
1.1.1.2 ! misho 5379: int category;
1.1 misho 5380: int len = 1;
5381: if (eptr >= md->end_subject)
5382: {
5383: SCHECK_PARTIAL();
5384: break;
5385: }
5386: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5387: category = UCD_CATEGORY(c);
! 5388: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
1.1 misho 5389: break;
5390: eptr+= len;
5391: }
5392: break;
5393:
5394: case PT_SPACE: /* Perl space */
5395: for (i = min; i < max; i++)
5396: {
5397: int len = 1;
5398: if (eptr >= md->end_subject)
5399: {
5400: SCHECK_PARTIAL();
5401: break;
5402: }
5403: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5404: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1 misho 5405: c == CHAR_FF || c == CHAR_CR)
5406: == prop_fail_result)
5407: break;
5408: eptr+= len;
5409: }
5410: break;
5411:
5412: case PT_PXSPACE: /* POSIX space */
5413: for (i = min; i < max; i++)
5414: {
5415: int len = 1;
5416: if (eptr >= md->end_subject)
5417: {
5418: SCHECK_PARTIAL();
5419: break;
5420: }
5421: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5422: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.1 misho 5423: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5424: == prop_fail_result)
5425: break;
5426: eptr+= len;
5427: }
5428: break;
5429:
5430: case PT_WORD:
5431: for (i = min; i < max; i++)
5432: {
1.1.1.2 ! misho 5433: int category;
1.1 misho 5434: int len = 1;
5435: if (eptr >= md->end_subject)
5436: {
5437: SCHECK_PARTIAL();
5438: break;
5439: }
5440: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 5441: category = UCD_CATEGORY(c);
! 5442: if ((category == ucp_L || category == ucp_N ||
1.1 misho 5443: c == CHAR_UNDERSCORE) == prop_fail_result)
5444: break;
5445: eptr+= len;
5446: }
5447: break;
5448:
1.1.1.2 ! misho 5449: case PT_CLIST:
! 5450: for (i = min; i < max; i++)
! 5451: {
! 5452: const pcre_uint32 *cp;
! 5453: int len = 1;
! 5454: if (eptr >= md->end_subject)
! 5455: {
! 5456: SCHECK_PARTIAL();
! 5457: break;
! 5458: }
! 5459: GETCHARLENTEST(c, eptr, len);
! 5460: cp = PRIV(ucd_caseless_sets) + prop_value;
! 5461: for (;;)
! 5462: {
! 5463: if (c < *cp)
! 5464: { if (prop_fail_result) break; else goto GOT_MAX; }
! 5465: if (c == *cp++)
! 5466: { if (prop_fail_result) goto GOT_MAX; else break; }
! 5467: }
! 5468: eptr += len;
! 5469: }
! 5470: GOT_MAX:
! 5471: break;
! 5472:
1.1 misho 5473: default:
5474: RRETURN(PCRE_ERROR_INTERNAL);
5475: }
5476:
5477: /* eptr is now past the end of the maximum run */
5478:
5479: if (possessive) continue;
5480: for(;;)
5481: {
1.1.1.2 ! misho 5482: RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
1.1 misho 5483: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5484: if (eptr-- == pp) break; /* Stop if tried at original pos */
1.1.1.2 ! misho 5485: if (utf) BACKCHAR(eptr);
1.1 misho 5486: }
5487: }
5488:
5489: /* Match extended Unicode sequences. We will get here only if the
5490: support is in the binary; otherwise a compile-time error occurs. */
5491:
5492: else if (ctype == OP_EXTUNI)
5493: {
5494: for (i = min; i < max; i++)
5495: {
5496: if (eptr >= md->end_subject)
5497: {
5498: SCHECK_PARTIAL();
5499: break;
5500: }
1.1.1.2 ! misho 5501: else
1.1 misho 5502: {
1.1.1.2 ! misho 5503: int lgb, rgb;
! 5504: GETCHARINCTEST(c, eptr);
! 5505: lgb = UCD_GRAPHBREAK(c);
! 5506: while (eptr < md->end_subject)
1.1 misho 5507: {
1.1.1.2 ! misho 5508: int len = 1;
! 5509: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
! 5510: rgb = UCD_GRAPHBREAK(c);
! 5511: if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
! 5512: lgb = rgb;
! 5513: eptr += len;
1.1 misho 5514: }
5515: }
1.1.1.2 ! misho 5516: CHECK_PARTIAL();
1.1 misho 5517: }
5518:
5519: /* eptr is now past the end of the maximum run */
5520:
5521: if (possessive) continue;
5522:
5523: for(;;)
5524: {
1.1.1.2 ! misho 5525: RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
1.1 misho 5526: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5527: if (eptr-- == pp) break; /* Stop if tried at original pos */
5528: for (;;) /* Move back over one extended */
5529: {
1.1.1.2 ! misho 5530: if (!utf) c = *eptr; else
1.1 misho 5531: {
5532: BACKCHAR(eptr);
1.1.1.2 ! misho 5533: GETCHAR(c, eptr);
1.1 misho 5534: }
1.1.1.2 ! misho 5535: if (UCD_CATEGORY(c) != ucp_M) break;
1.1 misho 5536: eptr--;
5537: }
5538: }
5539: }
5540:
5541: else
5542: #endif /* SUPPORT_UCP */
5543:
1.1.1.2 ! misho 5544: #ifdef SUPPORT_UTF
! 5545: if (utf)
1.1 misho 5546: {
5547: switch(ctype)
5548: {
5549: case OP_ANY:
5550: if (max < INT_MAX)
5551: {
5552: for (i = min; i < max; i++)
5553: {
5554: if (eptr >= md->end_subject)
5555: {
5556: SCHECK_PARTIAL();
5557: break;
5558: }
5559: if (IS_NEWLINE(eptr)) break;
1.1.1.2 ! misho 5560: if (md->partial != 0 && /* Take care with CRLF partial */
! 5561: eptr + 1 >= md->end_subject &&
! 5562: NLBLOCK->nltype == NLTYPE_FIXED &&
! 5563: NLBLOCK->nllen == 2 &&
! 5564: RAWUCHAR(eptr) == NLBLOCK->nl[0])
! 5565: {
! 5566: md->hitend = TRUE;
! 5567: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 5568: }
1.1 misho 5569: eptr++;
1.1.1.2 ! misho 5570: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 5571: }
5572: }
5573:
5574: /* Handle unlimited UTF-8 repeat */
5575:
5576: else
5577: {
5578: for (i = min; i < max; i++)
5579: {
5580: if (eptr >= md->end_subject)
5581: {
5582: SCHECK_PARTIAL();
5583: break;
5584: }
5585: if (IS_NEWLINE(eptr)) break;
1.1.1.2 ! misho 5586: if (md->partial != 0 && /* Take care with CRLF partial */
! 5587: eptr + 1 >= md->end_subject &&
! 5588: NLBLOCK->nltype == NLTYPE_FIXED &&
! 5589: NLBLOCK->nllen == 2 &&
! 5590: RAWUCHAR(eptr) == NLBLOCK->nl[0])
! 5591: {
! 5592: md->hitend = TRUE;
! 5593: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 5594: }
1.1 misho 5595: eptr++;
1.1.1.2 ! misho 5596: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 5597: }
5598: }
5599: break;
5600:
5601: case OP_ALLANY:
5602: if (max < INT_MAX)
5603: {
5604: for (i = min; i < max; i++)
5605: {
5606: if (eptr >= md->end_subject)
5607: {
5608: SCHECK_PARTIAL();
5609: break;
5610: }
5611: eptr++;
1.1.1.2 ! misho 5612: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 5613: }
5614: }
1.1.1.2 ! misho 5615: else
! 5616: {
! 5617: eptr = md->end_subject; /* Unlimited UTF-8 repeat */
! 5618: SCHECK_PARTIAL();
! 5619: }
1.1 misho 5620: break;
5621:
5622: /* The byte case is the same as non-UTF8 */
5623:
5624: case OP_ANYBYTE:
5625: c = max - min;
5626: if (c > (unsigned int)(md->end_subject - eptr))
5627: {
5628: eptr = md->end_subject;
5629: SCHECK_PARTIAL();
5630: }
5631: else eptr += c;
5632: break;
5633:
5634: case OP_ANYNL:
5635: for (i = min; i < max; i++)
5636: {
5637: int len = 1;
5638: if (eptr >= md->end_subject)
5639: {
5640: SCHECK_PARTIAL();
5641: break;
5642: }
5643: GETCHARLEN(c, eptr, len);
1.1.1.2 ! misho 5644: if (c == CHAR_CR)
1.1 misho 5645: {
5646: if (++eptr >= md->end_subject) break;
1.1.1.2 ! misho 5647: if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
1.1 misho 5648: }
5649: else
5650: {
1.1.1.2 ! misho 5651: if (c != CHAR_LF &&
1.1 misho 5652: (md->bsr_anycrlf ||
1.1.1.2 ! misho 5653: (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
! 5654: #ifndef EBCDIC
! 5655: && c != 0x2028 && c != 0x2029
! 5656: #endif /* Not EBCDIC */
! 5657: )))
1.1 misho 5658: break;
5659: eptr += len;
5660: }
5661: }
5662: break;
5663:
5664: case OP_NOT_HSPACE:
5665: case OP_HSPACE:
5666: for (i = min; i < max; i++)
5667: {
5668: BOOL gotspace;
5669: int len = 1;
5670: if (eptr >= md->end_subject)
5671: {
5672: SCHECK_PARTIAL();
5673: break;
5674: }
5675: GETCHARLEN(c, eptr, len);
5676: switch(c)
5677: {
1.1.1.2 ! misho 5678: HSPACE_CASES: gotspace = TRUE; break;
1.1 misho 5679: default: gotspace = FALSE; break;
5680: }
5681: if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5682: eptr += len;
5683: }
5684: break;
5685:
5686: case OP_NOT_VSPACE:
5687: case OP_VSPACE:
5688: for (i = min; i < max; i++)
5689: {
5690: BOOL gotspace;
5691: int len = 1;
5692: if (eptr >= md->end_subject)
5693: {
5694: SCHECK_PARTIAL();
5695: break;
5696: }
5697: GETCHARLEN(c, eptr, len);
5698: switch(c)
5699: {
1.1.1.2 ! misho 5700: VSPACE_CASES: gotspace = TRUE; break;
1.1 misho 5701: default: gotspace = FALSE; break;
5702: }
5703: if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5704: eptr += len;
5705: }
5706: break;
5707:
5708: case OP_NOT_DIGIT:
5709: for (i = min; i < max; i++)
5710: {
5711: int len = 1;
5712: if (eptr >= md->end_subject)
5713: {
5714: SCHECK_PARTIAL();
5715: break;
5716: }
5717: GETCHARLEN(c, eptr, len);
5718: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5719: eptr+= len;
5720: }
5721: break;
5722:
5723: case OP_DIGIT:
5724: for (i = min; i < max; i++)
5725: {
5726: int len = 1;
5727: if (eptr >= md->end_subject)
5728: {
5729: SCHECK_PARTIAL();
5730: break;
5731: }
5732: GETCHARLEN(c, eptr, len);
5733: if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5734: eptr+= len;
5735: }
5736: break;
5737:
5738: case OP_NOT_WHITESPACE:
5739: for (i = min; i < max; i++)
5740: {
5741: int len = 1;
5742: if (eptr >= md->end_subject)
5743: {
5744: SCHECK_PARTIAL();
5745: break;
5746: }
5747: GETCHARLEN(c, eptr, len);
5748: if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5749: eptr+= len;
5750: }
5751: break;
5752:
5753: case OP_WHITESPACE:
5754: for (i = min; i < max; i++)
5755: {
5756: int len = 1;
5757: if (eptr >= md->end_subject)
5758: {
5759: SCHECK_PARTIAL();
5760: break;
5761: }
5762: GETCHARLEN(c, eptr, len);
5763: if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5764: eptr+= len;
5765: }
5766: break;
5767:
5768: case OP_NOT_WORDCHAR:
5769: for (i = min; i < max; i++)
5770: {
5771: int len = 1;
5772: if (eptr >= md->end_subject)
5773: {
5774: SCHECK_PARTIAL();
5775: break;
5776: }
5777: GETCHARLEN(c, eptr, len);
5778: if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5779: eptr+= len;
5780: }
5781: break;
5782:
5783: case OP_WORDCHAR:
5784: for (i = min; i < max; i++)
5785: {
5786: int len = 1;
5787: if (eptr >= md->end_subject)
5788: {
5789: SCHECK_PARTIAL();
5790: break;
5791: }
5792: GETCHARLEN(c, eptr, len);
5793: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5794: eptr+= len;
5795: }
5796: break;
5797:
5798: default:
5799: RRETURN(PCRE_ERROR_INTERNAL);
5800: }
5801:
1.1.1.2 ! misho 5802: /* eptr is now past the end of the maximum run. If possessive, we are
! 5803: done (no backing up). Otherwise, match at this position; anything other
! 5804: than no match is immediately returned. For nomatch, back up one
! 5805: character, unless we are matching \R and the last thing matched was
! 5806: \r\n, in which case, back up two bytes. */
1.1 misho 5807:
5808: if (possessive) continue;
5809: for(;;)
5810: {
1.1.1.2 ! misho 5811: RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
1.1 misho 5812: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5813: if (eptr-- == pp) break; /* Stop if tried at original pos */
5814: BACKCHAR(eptr);
1.1.1.2 ! misho 5815: if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL &&
! 5816: RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
1.1 misho 5817: }
5818: }
5819: else
1.1.1.2 ! misho 5820: #endif /* SUPPORT_UTF */
! 5821: /* Not UTF mode */
1.1 misho 5822: {
5823: switch(ctype)
5824: {
5825: case OP_ANY:
5826: for (i = min; i < max; i++)
5827: {
5828: if (eptr >= md->end_subject)
5829: {
5830: SCHECK_PARTIAL();
5831: break;
5832: }
5833: if (IS_NEWLINE(eptr)) break;
1.1.1.2 ! misho 5834: if (md->partial != 0 && /* Take care with CRLF partial */
! 5835: eptr + 1 >= md->end_subject &&
! 5836: NLBLOCK->nltype == NLTYPE_FIXED &&
! 5837: NLBLOCK->nllen == 2 &&
! 5838: *eptr == NLBLOCK->nl[0])
! 5839: {
! 5840: md->hitend = TRUE;
! 5841: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
! 5842: }
1.1 misho 5843: eptr++;
5844: }
5845: break;
5846:
5847: case OP_ALLANY:
5848: case OP_ANYBYTE:
5849: c = max - min;
5850: if (c > (unsigned int)(md->end_subject - eptr))
5851: {
5852: eptr = md->end_subject;
5853: SCHECK_PARTIAL();
5854: }
5855: else eptr += c;
5856: break;
5857:
5858: case OP_ANYNL:
5859: for (i = min; i < max; i++)
5860: {
5861: if (eptr >= md->end_subject)
5862: {
5863: SCHECK_PARTIAL();
5864: break;
5865: }
5866: c = *eptr;
1.1.1.2 ! misho 5867: if (c == CHAR_CR)
1.1 misho 5868: {
5869: if (++eptr >= md->end_subject) break;
1.1.1.2 ! misho 5870: if (*eptr == CHAR_LF) eptr++;
1.1 misho 5871: }
5872: else
5873: {
1.1.1.2 ! misho 5874: if (c != CHAR_LF && (md->bsr_anycrlf ||
! 5875: (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
! 5876: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5877: && c != 0x2028 && c != 0x2029
! 5878: #endif
! 5879: ))) break;
1.1 misho 5880: eptr++;
5881: }
5882: }
5883: break;
5884:
5885: case OP_NOT_HSPACE:
5886: for (i = min; i < max; i++)
5887: {
5888: if (eptr >= md->end_subject)
5889: {
5890: SCHECK_PARTIAL();
5891: break;
5892: }
1.1.1.2 ! misho 5893: switch(*eptr)
! 5894: {
! 5895: default: eptr++; break;
! 5896: HSPACE_BYTE_CASES:
! 5897: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5898: HSPACE_MULTIBYTE_CASES:
! 5899: #endif
! 5900: goto ENDLOOP00;
! 5901: }
1.1 misho 5902: }
1.1.1.2 ! misho 5903: ENDLOOP00:
1.1 misho 5904: break;
5905:
5906: case OP_HSPACE:
5907: for (i = min; i < max; i++)
5908: {
5909: if (eptr >= md->end_subject)
5910: {
5911: SCHECK_PARTIAL();
5912: break;
5913: }
1.1.1.2 ! misho 5914: switch(*eptr)
! 5915: {
! 5916: default: goto ENDLOOP01;
! 5917: HSPACE_BYTE_CASES:
! 5918: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5919: HSPACE_MULTIBYTE_CASES:
! 5920: #endif
! 5921: eptr++; break;
! 5922: }
1.1 misho 5923: }
1.1.1.2 ! misho 5924: ENDLOOP01:
1.1 misho 5925: break;
5926:
5927: case OP_NOT_VSPACE:
5928: for (i = min; i < max; i++)
5929: {
5930: if (eptr >= md->end_subject)
5931: {
5932: SCHECK_PARTIAL();
5933: break;
5934: }
1.1.1.2 ! misho 5935: switch(*eptr)
! 5936: {
! 5937: default: eptr++; break;
! 5938: VSPACE_BYTE_CASES:
! 5939: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5940: VSPACE_MULTIBYTE_CASES:
! 5941: #endif
! 5942: goto ENDLOOP02;
! 5943: }
1.1 misho 5944: }
1.1.1.2 ! misho 5945: ENDLOOP02:
1.1 misho 5946: break;
5947:
5948: case OP_VSPACE:
5949: for (i = min; i < max; i++)
5950: {
5951: if (eptr >= md->end_subject)
5952: {
5953: SCHECK_PARTIAL();
5954: break;
5955: }
1.1.1.2 ! misho 5956: switch(*eptr)
! 5957: {
! 5958: default: goto ENDLOOP03;
! 5959: VSPACE_BYTE_CASES:
! 5960: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
! 5961: VSPACE_MULTIBYTE_CASES:
! 5962: #endif
! 5963: eptr++; break;
! 5964: }
1.1 misho 5965: }
1.1.1.2 ! misho 5966: ENDLOOP03:
1.1 misho 5967: break;
5968:
5969: case OP_NOT_DIGIT:
5970: for (i = min; i < max; i++)
5971: {
5972: if (eptr >= md->end_subject)
5973: {
5974: SCHECK_PARTIAL();
5975: break;
5976: }
1.1.1.2 ! misho 5977: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1 misho 5978: eptr++;
5979: }
5980: break;
5981:
5982: case OP_DIGIT:
5983: for (i = min; i < max; i++)
5984: {
5985: if (eptr >= md->end_subject)
5986: {
5987: SCHECK_PARTIAL();
5988: break;
5989: }
1.1.1.2 ! misho 5990: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1 misho 5991: eptr++;
5992: }
5993: break;
5994:
5995: case OP_NOT_WHITESPACE:
5996: for (i = min; i < max; i++)
5997: {
5998: if (eptr >= md->end_subject)
5999: {
6000: SCHECK_PARTIAL();
6001: break;
6002: }
1.1.1.2 ! misho 6003: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
1.1 misho 6004: eptr++;
6005: }
6006: break;
6007:
6008: case OP_WHITESPACE:
6009: for (i = min; i < max; i++)
6010: {
6011: if (eptr >= md->end_subject)
6012: {
6013: SCHECK_PARTIAL();
6014: break;
6015: }
1.1.1.2 ! misho 6016: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
1.1 misho 6017: eptr++;
6018: }
6019: break;
6020:
6021: case OP_NOT_WORDCHAR:
6022: for (i = min; i < max; i++)
6023: {
6024: if (eptr >= md->end_subject)
6025: {
6026: SCHECK_PARTIAL();
6027: break;
6028: }
1.1.1.2 ! misho 6029: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
1.1 misho 6030: eptr++;
6031: }
6032: break;
6033:
6034: case OP_WORDCHAR:
6035: for (i = min; i < max; i++)
6036: {
6037: if (eptr >= md->end_subject)
6038: {
6039: SCHECK_PARTIAL();
6040: break;
6041: }
1.1.1.2 ! misho 6042: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
1.1 misho 6043: eptr++;
6044: }
6045: break;
6046:
6047: default:
6048: RRETURN(PCRE_ERROR_INTERNAL);
6049: }
6050:
1.1.1.2 ! misho 6051: /* eptr is now past the end of the maximum run. If possessive, we are
! 6052: done (no backing up). Otherwise, match at this position; anything other
! 6053: than no match is immediately returned. For nomatch, back up one
! 6054: character (byte), unless we are matching \R and the last thing matched
! 6055: was \r\n, in which case, back up two bytes. */
1.1 misho 6056:
6057: if (possessive) continue;
6058: while (eptr >= pp)
6059: {
1.1.1.2 ! misho 6060: RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
1.1 misho 6061: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1.1.2 ! misho 6062: eptr--;
! 6063: if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF &&
! 6064: eptr[-1] == CHAR_CR) eptr--;
1.1 misho 6065: }
6066: }
6067:
6068: /* Get here if we can't make it match with any permitted repetitions */
6069:
1.1.1.2 ! misho 6070: RRETURN(MATCH_NOMATCH);
1.1 misho 6071: }
6072: /* Control never gets here */
6073:
6074: /* There's been some horrible disaster. Arrival here can only mean there is
6075: something seriously wrong in the code above or the OP_xxx definitions. */
6076:
6077: default:
6078: DPRINTF(("Unknown opcode %d\n", *ecode));
6079: RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6080: }
6081:
6082: /* Do not stick any code in here without much thought; it is assumed
6083: that "continue" in the code above comes out to here to repeat the main
6084: loop. */
6085:
6086: } /* End of main loop */
6087: /* Control never reaches here */
6088:
6089:
6090: /* When compiling to use the heap rather than the stack for recursive calls to
6091: match(), the RRETURN() macro jumps here. The number that is saved in
6092: frame->Xwhere indicates which label we actually want to return to. */
6093:
6094: #ifdef NO_RECURSE
6095: #define LBL(val) case val: goto L_RM##val;
6096: HEAP_RETURN:
6097: switch (frame->Xwhere)
6098: {
6099: LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6100: LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6101: LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6102: LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
1.1.1.2 ! misho 6103: LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
! 6104: LBL(65) LBL(66)
! 6105: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
! 6106: LBL(21)
! 6107: #endif
! 6108: #ifdef SUPPORT_UTF
! 6109: LBL(16) LBL(18) LBL(20)
! 6110: LBL(22) LBL(23) LBL(28) LBL(30)
1.1 misho 6111: LBL(32) LBL(34) LBL(42) LBL(46)
6112: #ifdef SUPPORT_UCP
6113: LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
1.1.1.2 ! misho 6114: LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
1.1 misho 6115: #endif /* SUPPORT_UCP */
1.1.1.2 ! misho 6116: #endif /* SUPPORT_UTF */
1.1 misho 6117: default:
6118: DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6119: return PCRE_ERROR_INTERNAL;
6120: }
6121: #undef LBL
6122: #endif /* NO_RECURSE */
6123: }
6124:
6125:
6126: /***************************************************************************
6127: ****************************************************************************
6128: RECURSION IN THE match() FUNCTION
6129:
6130: Undefine all the macros that were defined above to handle this. */
6131:
6132: #ifdef NO_RECURSE
6133: #undef eptr
6134: #undef ecode
6135: #undef mstart
6136: #undef offset_top
6137: #undef eptrb
6138: #undef flags
6139:
6140: #undef callpat
6141: #undef charptr
6142: #undef data
6143: #undef next
6144: #undef pp
6145: #undef prev
6146: #undef saved_eptr
6147:
6148: #undef new_recursive
6149:
6150: #undef cur_is_word
6151: #undef condition
6152: #undef prev_is_word
6153:
6154: #undef ctype
6155: #undef length
6156: #undef max
6157: #undef min
6158: #undef number
6159: #undef offset
6160: #undef op
6161: #undef save_capture_last
6162: #undef save_offset1
6163: #undef save_offset2
6164: #undef save_offset3
6165: #undef stacksave
6166:
6167: #undef newptrb
6168:
6169: #endif
6170:
6171: /* These two are defined as macros in both cases */
6172:
6173: #undef fc
6174: #undef fi
6175:
6176: /***************************************************************************
6177: ***************************************************************************/
6178:
6179:
1.1.1.2 ! misho 6180: #ifdef NO_RECURSE
! 6181: /*************************************************
! 6182: * Release allocated heap frames *
! 6183: *************************************************/
! 6184:
! 6185: /* This function releases all the allocated frames. The base frame is on the
! 6186: machine stack, and so must not be freed.
! 6187:
! 6188: Argument: the address of the base frame
! 6189: Returns: nothing
! 6190: */
! 6191:
! 6192: static void
! 6193: release_match_heapframes (heapframe *frame_base)
! 6194: {
! 6195: heapframe *nextframe = frame_base->Xnextframe;
! 6196: while (nextframe != NULL)
! 6197: {
! 6198: heapframe *oldframe = nextframe;
! 6199: nextframe = nextframe->Xnextframe;
! 6200: (PUBL(stack_free))(oldframe);
! 6201: }
! 6202: }
! 6203: #endif
! 6204:
1.1 misho 6205:
6206: /*************************************************
6207: * Execute a Regular Expression *
6208: *************************************************/
6209:
6210: /* This function applies a compiled re to a subject string and picks out
6211: portions of the string if it matches. Two elements in the vector are set for
6212: each substring: the offsets to the start and end of the substring.
6213:
6214: Arguments:
6215: argument_re points to the compiled expression
6216: extra_data points to extra data or is NULL
6217: subject points to the subject string
6218: length length of subject string (may contain binary zeros)
6219: start_offset where to start in the subject string
6220: options option bits
6221: offsets points to a vector of ints to be filled in with offsets
6222: offsetcount the number of elements in the vector
6223:
6224: Returns: > 0 => success; value is the number of elements filled in
6225: = 0 => success, but offsets is not big enough
6226: -1 => failed to match
6227: < -1 => some kind of unexpected problem
6228: */
6229:
1.1.1.2 ! misho 6230: #if defined COMPILE_PCRE8
1.1 misho 6231: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6232: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6233: PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6234: int offsetcount)
1.1.1.2 ! misho 6235: #elif defined COMPILE_PCRE16
! 6236: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 6237: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
! 6238: PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
! 6239: int offsetcount)
! 6240: #elif defined COMPILE_PCRE32
! 6241: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 6242: pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
! 6243: PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
! 6244: int offsetcount)
! 6245: #endif
1.1 misho 6246: {
1.1.1.2 ! misho 6247: int rc, ocount, arg_offset_max;
1.1 misho 6248: int newline;
6249: BOOL using_temporary_offsets = FALSE;
6250: BOOL anchored;
6251: BOOL startline;
6252: BOOL firstline;
1.1.1.2 ! misho 6253: BOOL utf;
! 6254: BOOL has_first_char = FALSE;
! 6255: BOOL has_req_char = FALSE;
! 6256: pcre_uchar first_char = 0;
! 6257: pcre_uchar first_char2 = 0;
! 6258: pcre_uchar req_char = 0;
! 6259: pcre_uchar req_char2 = 0;
1.1 misho 6260: match_data match_block;
6261: match_data *md = &match_block;
1.1.1.2 ! misho 6262: const pcre_uint8 *tables;
! 6263: const pcre_uint8 *start_bits = NULL;
! 6264: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
! 6265: PCRE_PUCHAR end_subject;
! 6266: PCRE_PUCHAR start_partial = NULL;
! 6267: PCRE_PUCHAR req_char_ptr = start_match - 1;
1.1 misho 6268:
6269: const pcre_study_data *study;
1.1.1.2 ! misho 6270: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
! 6271:
! 6272: #ifdef NO_RECURSE
! 6273: heapframe frame_zero;
! 6274: frame_zero.Xprevframe = NULL; /* Marks the top level */
! 6275: frame_zero.Xnextframe = NULL; /* None are allocated yet */
! 6276: md->match_frames_base = &frame_zero;
! 6277: #endif
! 6278:
! 6279: /* Check for the special magic call that measures the size of the stack used
! 6280: per recursive call of match(). Without the funny casting for sizeof, a Windows
! 6281: compiler gave this error: "unary minus operator applied to unsigned type,
! 6282: result still unsigned". Hopefully the cast fixes that. */
1.1 misho 6283:
1.1.1.2 ! misho 6284: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
! 6285: start_offset == -999)
! 6286: #ifdef NO_RECURSE
! 6287: return -((int)sizeof(heapframe));
! 6288: #else
! 6289: return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
! 6290: #endif
1.1 misho 6291:
6292: /* Plausibility checks */
6293:
6294: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1.1.1.2 ! misho 6295: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
! 6296: return PCRE_ERROR_NULL;
1.1 misho 6297: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1.1.1.2 ! misho 6298: if (length < 0) return PCRE_ERROR_BADLENGTH;
1.1 misho 6299: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6300:
1.1.1.2 ! misho 6301: /* Check that the first field in the block is the magic number. If it is not,
! 6302: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
! 6303: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
! 6304: means that the pattern is likely compiled with different endianness. */
! 6305:
! 6306: if (re->magic_number != MAGIC_NUMBER)
! 6307: return re->magic_number == REVERSED_MAGIC_NUMBER?
! 6308: PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
! 6309: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
! 6310:
! 6311: /* These two settings are used in the code for checking a UTF-8 string that
! 6312: follows immediately afterwards. Other values in the md block are used only
! 6313: during "normal" pcre_exec() processing, not when the JIT support is in use,
! 6314: so they are set up later. */
! 6315:
! 6316: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
! 6317: utf = md->utf = (re->options & PCRE_UTF8) != 0;
! 6318: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
! 6319: ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
! 6320:
! 6321: /* Check a UTF-8 string if required. Pass back the character offset and error
! 6322: code for an invalid string if a results vector is available. */
! 6323:
! 6324: #ifdef SUPPORT_UTF
! 6325: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
! 6326: {
! 6327: int erroroffset;
! 6328: int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
! 6329: if (errorcode != 0)
! 6330: {
! 6331: if (offsetcount >= 2)
! 6332: {
! 6333: offsets[0] = erroroffset;
! 6334: offsets[1] = errorcode;
! 6335: }
! 6336: #if defined COMPILE_PCRE8
! 6337: return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
! 6338: PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
! 6339: #elif defined COMPILE_PCRE16
! 6340: return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
! 6341: PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
! 6342: #elif defined COMPILE_PCRE32
! 6343: return PCRE_ERROR_BADUTF32;
! 6344: #endif
! 6345: }
! 6346: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
! 6347: /* Check that a start_offset points to the start of a UTF character. */
! 6348: if (start_offset > 0 && start_offset < length &&
! 6349: NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
! 6350: return PCRE_ERROR_BADUTF8_OFFSET;
! 6351: #endif
! 6352: }
! 6353: #endif
! 6354:
! 6355: /* If the pattern was successfully studied with JIT support, run the JIT
! 6356: executable instead of the rest of this function. Most options must be set at
! 6357: compile time for the JIT code to be usable. Fallback to the normal code path if
! 6358: an unsupported flag is set. */
! 6359:
! 6360: #ifdef SUPPORT_JIT
! 6361: if (extra_data != NULL
! 6362: && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
! 6363: PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
! 6364: && extra_data->executable_jit != NULL
! 6365: && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
! 6366: {
! 6367: rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
! 6368: start_offset, options, offsets, offsetcount);
! 6369:
! 6370: /* PCRE_ERROR_NULL means that the selected normal or partial matching
! 6371: mode is not compiled. In this case we simply fallback to interpreter. */
! 6372:
! 6373: if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
! 6374: }
! 6375: #endif
! 6376:
! 6377: /* Carry on with non-JIT matching. This information is for finding all the
! 6378: numbers associated with a given name, for condition testing. */
1.1 misho 6379:
1.1.1.2 ! misho 6380: md->name_table = (pcre_uchar *)re + re->name_table_offset;
1.1 misho 6381: md->name_count = re->name_count;
6382: md->name_entry_size = re->name_entry_size;
6383:
6384: /* Fish out the optional data from the extra_data structure, first setting
6385: the default values. */
6386:
6387: study = NULL;
6388: md->match_limit = MATCH_LIMIT;
6389: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6390: md->callout_data = NULL;
6391:
6392: /* The table pointer is always in native byte order. */
6393:
1.1.1.2 ! misho 6394: tables = re->tables;
1.1 misho 6395:
6396: if (extra_data != NULL)
6397: {
6398: register unsigned int flags = extra_data->flags;
6399: if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6400: study = (const pcre_study_data *)extra_data->study_data;
6401: if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6402: md->match_limit = extra_data->match_limit;
6403: if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6404: md->match_limit_recursion = extra_data->match_limit_recursion;
6405: if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6406: md->callout_data = extra_data->callout_data;
6407: if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6408: }
6409:
6410: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6411: is a feature that makes it possible to save compiled regex and re-use them
6412: in other programs later. */
6413:
1.1.1.2 ! misho 6414: if (tables == NULL) tables = PRIV(default_tables);
1.1 misho 6415:
6416: /* Set up other data */
6417:
6418: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6419: startline = (re->flags & PCRE_STARTLINE) != 0;
6420: firstline = (re->options & PCRE_FIRSTLINE) != 0;
6421:
6422: /* The code starts after the real_pcre block and the capture name table. */
6423:
1.1.1.2 ! misho 6424: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
1.1 misho 6425: re->name_count * re->name_entry_size;
6426:
1.1.1.2 ! misho 6427: md->start_subject = (PCRE_PUCHAR)subject;
1.1 misho 6428: md->start_offset = start_offset;
6429: md->end_subject = md->start_subject + length;
6430: end_subject = md->end_subject;
6431:
6432: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6433: md->use_ucp = (re->options & PCRE_UCP) != 0;
6434: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
1.1.1.2 ! misho 6435: md->ignore_skip_arg = FALSE;
! 6436:
! 6437: /* Some options are unpacked into BOOL variables in the hope that testing
! 6438: them will be faster than individual option bits. */
1.1 misho 6439:
6440: md->notbol = (options & PCRE_NOTBOL) != 0;
6441: md->noteol = (options & PCRE_NOTEOL) != 0;
6442: md->notempty = (options & PCRE_NOTEMPTY) != 0;
6443: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
1.1.1.2 ! misho 6444:
1.1 misho 6445: md->hitend = FALSE;
1.1.1.2 ! misho 6446: md->mark = md->nomatch_mark = NULL; /* In case never set */
1.1 misho 6447:
6448: md->recursive = NULL; /* No recursion at top level */
1.1.1.2 ! misho 6449: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
1.1 misho 6450:
6451: md->lcc = tables + lcc_offset;
1.1.1.2 ! misho 6452: md->fcc = tables + fcc_offset;
1.1 misho 6453: md->ctypes = tables + ctypes_offset;
6454:
6455: /* Handle different \R options. */
6456:
6457: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6458: {
6459: case 0:
6460: if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6461: md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6462: else
6463: #ifdef BSR_ANYCRLF
6464: md->bsr_anycrlf = TRUE;
6465: #else
6466: md->bsr_anycrlf = FALSE;
6467: #endif
6468: break;
6469:
6470: case PCRE_BSR_ANYCRLF:
6471: md->bsr_anycrlf = TRUE;
6472: break;
6473:
6474: case PCRE_BSR_UNICODE:
6475: md->bsr_anycrlf = FALSE;
6476: break;
6477:
6478: default: return PCRE_ERROR_BADNEWLINE;
6479: }
6480:
6481: /* Handle different types of newline. The three bits give eight cases. If
6482: nothing is set at run time, whatever was used at compile time applies. */
6483:
6484: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6485: (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6486: {
6487: case 0: newline = NEWLINE; break; /* Compile-time default */
6488: case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6489: case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6490: case PCRE_NEWLINE_CR+
6491: PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6492: case PCRE_NEWLINE_ANY: newline = -1; break;
6493: case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6494: default: return PCRE_ERROR_BADNEWLINE;
6495: }
6496:
6497: if (newline == -2)
6498: {
6499: md->nltype = NLTYPE_ANYCRLF;
6500: }
6501: else if (newline < 0)
6502: {
6503: md->nltype = NLTYPE_ANY;
6504: }
6505: else
6506: {
6507: md->nltype = NLTYPE_FIXED;
6508: if (newline > 255)
6509: {
6510: md->nllen = 2;
6511: md->nl[0] = (newline >> 8) & 255;
6512: md->nl[1] = newline & 255;
6513: }
6514: else
6515: {
6516: md->nllen = 1;
6517: md->nl[0] = newline;
6518: }
6519: }
6520:
6521: /* Partial matching was originally supported only for a restricted set of
6522: regexes; from release 8.00 there are no restrictions, but the bits are still
6523: defined (though never set). So there's no harm in leaving this code. */
6524:
6525: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6526: return PCRE_ERROR_BADPARTIAL;
6527:
6528: /* If the expression has got more back references than the offsets supplied can
6529: hold, we get a temporary chunk of working store to use during the matching.
6530: Otherwise, we can use the vector supplied, rounding down its size to a multiple
6531: of 3. */
6532:
6533: ocount = offsetcount - (offsetcount % 3);
1.1.1.2 ! misho 6534: arg_offset_max = (2*ocount)/3;
1.1 misho 6535:
6536: if (re->top_backref > 0 && re->top_backref >= ocount/3)
6537: {
6538: ocount = re->top_backref * 3 + 3;
1.1.1.2 ! misho 6539: md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
1.1 misho 6540: if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6541: using_temporary_offsets = TRUE;
6542: DPRINTF(("Got memory to hold back references\n"));
6543: }
6544: else md->offset_vector = offsets;
6545:
6546: md->offset_end = ocount;
6547: md->offset_max = (2*ocount)/3;
6548: md->offset_overflow = FALSE;
6549: md->capture_last = -1;
6550:
6551: /* Reset the working variable associated with each extraction. These should
6552: never be used unless previously set, but they get saved and restored, and so we
1.1.1.2 ! misho 6553: initialize them to avoid reading uninitialized locations. Also, unset the
! 6554: offsets for the matched string. This is really just for tidiness with callouts,
! 6555: in case they inspect these fields. */
1.1 misho 6556:
6557: if (md->offset_vector != NULL)
6558: {
6559: register int *iptr = md->offset_vector + ocount;
1.1.1.2 ! misho 6560: register int *iend = iptr - re->top_bracket;
! 6561: if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
1.1 misho 6562: while (--iptr >= iend) *iptr = -1;
1.1.1.2 ! misho 6563: md->offset_vector[0] = md->offset_vector[1] = -1;
1.1 misho 6564: }
6565:
1.1.1.2 ! misho 6566: /* Set up the first character to match, if available. The first_char value is
1.1 misho 6567: never set for an anchored regular expression, but the anchoring may be forced
6568: at run time, so we have to test for anchoring. The first char may be unset for
6569: an unanchored pattern, of course. If there's no first char and the pattern was
6570: studied, there may be a bitmap of possible first characters. */
6571:
6572: if (!anchored)
6573: {
6574: if ((re->flags & PCRE_FIRSTSET) != 0)
6575: {
1.1.1.2 ! misho 6576: has_first_char = TRUE;
! 6577: first_char = first_char2 = (pcre_uchar)(re->first_char);
! 6578: if ((re->flags & PCRE_FCH_CASELESS) != 0)
! 6579: {
! 6580: first_char2 = TABLE_GET(first_char, md->fcc, first_char);
! 6581: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
! 6582: if (utf && first_char > 127)
! 6583: first_char2 = UCD_OTHERCASE(first_char);
! 6584: #endif
! 6585: }
1.1 misho 6586: }
6587: else
6588: if (!startline && study != NULL &&
6589: (study->flags & PCRE_STUDY_MAPPED) != 0)
6590: start_bits = study->start_bits;
6591: }
6592:
6593: /* For anchored or unanchored matches, there may be a "last known required
6594: character" set. */
6595:
6596: if ((re->flags & PCRE_REQCHSET) != 0)
6597: {
1.1.1.2 ! misho 6598: has_req_char = TRUE;
! 6599: req_char = req_char2 = (pcre_uchar)(re->req_char);
! 6600: if ((re->flags & PCRE_RCH_CASELESS) != 0)
! 6601: {
! 6602: req_char2 = TABLE_GET(req_char, md->fcc, req_char);
! 6603: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
! 6604: if (utf && req_char > 127)
! 6605: req_char2 = UCD_OTHERCASE(req_char);
! 6606: #endif
! 6607: }
1.1 misho 6608: }
6609:
6610:
6611: /* ==========================================================================*/
6612:
6613: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6614: the loop runs just once. */
6615:
6616: for(;;)
6617: {
1.1.1.2 ! misho 6618: PCRE_PUCHAR save_end_subject = end_subject;
! 6619: PCRE_PUCHAR new_start_match;
1.1 misho 6620:
6621: /* If firstline is TRUE, the start of the match is constrained to the first
6622: line of a multiline string. That is, the match must be before or at the first
6623: newline. Implement this by temporarily adjusting end_subject so that we stop
6624: scanning at a newline. If the match fails at the newline, later code breaks
6625: this loop. */
6626:
6627: if (firstline)
6628: {
1.1.1.2 ! misho 6629: PCRE_PUCHAR t = start_match;
! 6630: #ifdef SUPPORT_UTF
! 6631: if (utf)
1.1 misho 6632: {
6633: while (t < md->end_subject && !IS_NEWLINE(t))
6634: {
6635: t++;
1.1.1.2 ! misho 6636: ACROSSCHAR(t < end_subject, *t, t++);
1.1 misho 6637: }
6638: }
6639: else
6640: #endif
6641: while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6642: end_subject = t;
6643: }
6644:
6645: /* There are some optimizations that avoid running the match if a known
6646: starting point is not found, or if a known later character is not present.
6647: However, there is an option that disables these, for testing and for ensuring
6648: that all callouts do actually occur. The option can be set in the regex by
6649: (*NO_START_OPT) or passed in match-time options. */
6650:
6651: if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6652: {
1.1.1.2 ! misho 6653: /* Advance to a unique first char if there is one. */
1.1 misho 6654:
1.1.1.2 ! misho 6655: if (has_first_char)
1.1 misho 6656: {
1.1.1.2 ! misho 6657: pcre_uchar smc;
! 6658:
! 6659: if (first_char != first_char2)
! 6660: while (start_match < end_subject &&
! 6661: (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
1.1 misho 6662: start_match++;
6663: else
1.1.1.2 ! misho 6664: while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
1.1 misho 6665: start_match++;
6666: }
6667:
6668: /* Or to just after a linebreak for a multiline match */
6669:
6670: else if (startline)
6671: {
6672: if (start_match > md->start_subject + start_offset)
6673: {
1.1.1.2 ! misho 6674: #ifdef SUPPORT_UTF
! 6675: if (utf)
1.1 misho 6676: {
6677: while (start_match < end_subject && !WAS_NEWLINE(start_match))
6678: {
6679: start_match++;
1.1.1.2 ! misho 6680: ACROSSCHAR(start_match < end_subject, *start_match,
! 6681: start_match++);
1.1 misho 6682: }
6683: }
6684: else
6685: #endif
6686: while (start_match < end_subject && !WAS_NEWLINE(start_match))
6687: start_match++;
6688:
6689: /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6690: and we are now at a LF, advance the match position by one more character.
6691: */
6692:
6693: if (start_match[-1] == CHAR_CR &&
6694: (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6695: start_match < end_subject &&
1.1.1.2 ! misho 6696: RAWUCHARTEST(start_match) == CHAR_NL)
1.1 misho 6697: start_match++;
6698: }
6699: }
6700:
6701: /* Or to a non-unique first byte after study */
6702:
6703: else if (start_bits != NULL)
6704: {
6705: while (start_match < end_subject)
6706: {
1.1.1.2 ! misho 6707: register pcre_uint32 c = RAWUCHARTEST(start_match);
! 6708: #ifndef COMPILE_PCRE8
! 6709: if (c > 255) c = 255;
! 6710: #endif
1.1 misho 6711: if ((start_bits[c/8] & (1 << (c&7))) == 0)
6712: {
6713: start_match++;
1.1.1.2 ! misho 6714: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
! 6715: /* In non 8-bit mode, the iteration will stop for
! 6716: characters > 255 at the beginning or not stop at all. */
! 6717: if (utf)
! 6718: ACROSSCHAR(start_match < end_subject, *start_match,
! 6719: start_match++);
1.1 misho 6720: #endif
6721: }
6722: else break;
6723: }
6724: }
6725: } /* Starting optimizations */
6726:
6727: /* Restore fudged end_subject */
6728:
6729: end_subject = save_end_subject;
6730:
6731: /* The following two optimizations are disabled for partial matching or if
6732: disabling is explicitly requested. */
6733:
1.1.1.2 ! misho 6734: if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
1.1 misho 6735: {
6736: /* If the pattern was studied, a minimum subject length may be set. This is
6737: a lower bound; no actual string of that length may actually match the
6738: pattern. Although the value is, strictly, in characters, we treat it as
6739: bytes to avoid spending too much time in this optimization. */
6740:
6741: if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6742: (pcre_uint32)(end_subject - start_match) < study->minlength)
6743: {
6744: rc = MATCH_NOMATCH;
6745: break;
6746: }
6747:
1.1.1.2 ! misho 6748: /* If req_char is set, we know that that character must appear in the
! 6749: subject for the match to succeed. If the first character is set, req_char
1.1 misho 6750: must be later in the subject; otherwise the test starts at the match point.
6751: This optimization can save a huge amount of backtracking in patterns with
6752: nested unlimited repeats that aren't going to match. Writing separate code
6753: for cased/caseless versions makes it go faster, as does using an
6754: autoincrement and backing off on a match.
6755:
6756: HOWEVER: when the subject string is very, very long, searching to its end
6757: can take a long time, and give bad performance on quite ordinary patterns.
6758: This showed up when somebody was matching something like /^\d+C/ on a
6759: 32-megabyte string... so we don't do this when the string is sufficiently
6760: long. */
6761:
1.1.1.2 ! misho 6762: if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
1.1 misho 6763: {
1.1.1.2 ! misho 6764: register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
1.1 misho 6765:
6766: /* We don't need to repeat the search if we haven't yet reached the
6767: place we found it at last time. */
6768:
1.1.1.2 ! misho 6769: if (p > req_char_ptr)
1.1 misho 6770: {
1.1.1.2 ! misho 6771: if (req_char != req_char2)
1.1 misho 6772: {
6773: while (p < end_subject)
6774: {
1.1.1.2 ! misho 6775: register pcre_uint32 pp = RAWUCHARINCTEST(p);
! 6776: if (pp == req_char || pp == req_char2) { p--; break; }
1.1 misho 6777: }
6778: }
6779: else
6780: {
6781: while (p < end_subject)
6782: {
1.1.1.2 ! misho 6783: if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
1.1 misho 6784: }
6785: }
6786:
6787: /* If we can't find the required character, break the matching loop,
6788: forcing a match failure. */
6789:
6790: if (p >= end_subject)
6791: {
6792: rc = MATCH_NOMATCH;
6793: break;
6794: }
6795:
6796: /* If we have found the required character, save the point where we
6797: found it, so that we don't search again next time round the loop if
6798: the start hasn't passed this character yet. */
6799:
1.1.1.2 ! misho 6800: req_char_ptr = p;
1.1 misho 6801: }
6802: }
6803: }
6804:
6805: #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6806: printf(">>>> Match against: ");
6807: pchars(start_match, end_subject - start_match, TRUE, md);
6808: printf("\n");
6809: #endif
6810:
6811: /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6812: first starting point for which a partial match was found. */
6813:
6814: md->start_match_ptr = start_match;
6815: md->start_used_ptr = start_match;
6816: md->match_call_count = 0;
1.1.1.2 ! misho 6817: md->match_function_type = 0;
! 6818: md->end_offset_top = 0;
! 6819: rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
1.1 misho 6820: if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6821:
6822: switch(rc)
6823: {
1.1.1.2 ! misho 6824: /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
! 6825: the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
! 6826: entirely. The only way we can do that is to re-do the match at the same
! 6827: point, with a flag to force SKIP with an argument to be ignored. Just
! 6828: treating this case as NOMATCH does not work because it does not check other
! 6829: alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
! 6830:
! 6831: case MATCH_SKIP_ARG:
! 6832: new_start_match = start_match;
! 6833: md->ignore_skip_arg = TRUE;
! 6834: break;
! 6835:
1.1 misho 6836: /* SKIP passes back the next starting point explicitly, but if it is the
6837: same as the match we have just done, treat it as NOMATCH. */
6838:
6839: case MATCH_SKIP:
6840: if (md->start_match_ptr != start_match)
6841: {
6842: new_start_match = md->start_match_ptr;
6843: break;
6844: }
6845: /* Fall through */
6846:
6847: /* NOMATCH and PRUNE advance by one character. THEN at this level acts
1.1.1.2 ! misho 6848: exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
1.1 misho 6849:
6850: case MATCH_NOMATCH:
6851: case MATCH_PRUNE:
6852: case MATCH_THEN:
1.1.1.2 ! misho 6853: md->ignore_skip_arg = FALSE;
1.1 misho 6854: new_start_match = start_match + 1;
1.1.1.2 ! misho 6855: #ifdef SUPPORT_UTF
! 6856: if (utf)
! 6857: ACROSSCHAR(new_start_match < end_subject, *new_start_match,
! 6858: new_start_match++);
1.1 misho 6859: #endif
6860: break;
6861:
6862: /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6863:
6864: case MATCH_COMMIT:
6865: rc = MATCH_NOMATCH;
6866: goto ENDLOOP;
6867:
6868: /* Any other return is either a match, or some kind of error. */
6869:
6870: default:
6871: goto ENDLOOP;
6872: }
6873:
6874: /* Control reaches here for the various types of "no match at this point"
6875: result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6876:
6877: rc = MATCH_NOMATCH;
6878:
6879: /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6880: newline in the subject (though it may continue over the newline). Therefore,
6881: if we have just failed to match, starting at a newline, do not continue. */
6882:
6883: if (firstline && IS_NEWLINE(start_match)) break;
6884:
6885: /* Advance to new matching position */
6886:
6887: start_match = new_start_match;
6888:
6889: /* Break the loop if the pattern is anchored or if we have passed the end of
6890: the subject. */
6891:
6892: if (anchored || start_match > end_subject) break;
6893:
6894: /* If we have just passed a CR and we are now at a LF, and the pattern does
6895: not contain any explicit matches for \r or \n, and the newline option is CRLF
1.1.1.2 ! misho 6896: or ANY or ANYCRLF, advance the match position by one more character. In
! 6897: normal matching start_match will aways be greater than the first position at
! 6898: this stage, but a failed *SKIP can cause a return at the same point, which is
! 6899: why the first test exists. */
1.1 misho 6900:
1.1.1.2 ! misho 6901: if (start_match > (PCRE_PUCHAR)subject + start_offset &&
! 6902: start_match[-1] == CHAR_CR &&
1.1 misho 6903: start_match < end_subject &&
6904: *start_match == CHAR_NL &&
6905: (re->flags & PCRE_HASCRORLF) == 0 &&
6906: (md->nltype == NLTYPE_ANY ||
6907: md->nltype == NLTYPE_ANYCRLF ||
6908: md->nllen == 2))
6909: start_match++;
6910:
6911: md->mark = NULL; /* Reset for start of next match attempt */
6912: } /* End of for(;;) "bumpalong" loop */
6913:
6914: /* ==========================================================================*/
6915:
6916: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6917: conditions is true:
6918:
6919: (1) The pattern is anchored or the match was failed by (*COMMIT);
6920:
6921: (2) We are past the end of the subject;
6922:
6923: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6924: this option requests that a match occur at or before the first newline in
6925: the subject.
6926:
6927: When we have a match and the offset vector is big enough to deal with any
6928: backreferences, captured substring offsets will already be set up. In the case
6929: where we had to get some local store to hold offsets for backreference
6930: processing, copy those that we can. In this case there need not be overflow if
6931: certain parts of the pattern were not used, even though there are more
6932: capturing parentheses than vector slots. */
6933:
6934: ENDLOOP:
6935:
6936: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6937: {
6938: if (using_temporary_offsets)
6939: {
1.1.1.2 ! misho 6940: if (arg_offset_max >= 4)
1.1 misho 6941: {
6942: memcpy(offsets + 2, md->offset_vector + 2,
1.1.1.2 ! misho 6943: (arg_offset_max - 2) * sizeof(int));
1.1 misho 6944: DPRINTF(("Copied offsets from temporary memory\n"));
6945: }
1.1.1.2 ! misho 6946: if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
1.1 misho 6947: DPRINTF(("Freeing temporary memory\n"));
1.1.1.2 ! misho 6948: (PUBL(free))(md->offset_vector);
1.1 misho 6949: }
6950:
1.1.1.2 ! misho 6951: /* Set the return code to the number of captured strings, or 0 if there were
1.1 misho 6952: too many to fit into the vector. */
6953:
1.1.1.2 ! misho 6954: rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
! 6955: 0 : md->end_offset_top/2;
! 6956:
! 6957: /* If there is space in the offset vector, set any unused pairs at the end of
! 6958: the pattern to -1 for backwards compatibility. It is documented that this
! 6959: happens. In earlier versions, the whole set of potential capturing offsets
! 6960: was set to -1 each time round the loop, but this is handled differently now.
! 6961: "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
! 6962: those at the end that need unsetting here. We can't just unset them all at
! 6963: the start of the whole thing because they may get set in one branch that is
! 6964: not the final matching branch. */
! 6965:
! 6966: if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
! 6967: {
! 6968: register int *iptr, *iend;
! 6969: int resetcount = 2 + re->top_bracket * 2;
! 6970: if (resetcount > offsetcount) resetcount = offsetcount;
! 6971: iptr = offsets + md->end_offset_top;
! 6972: iend = offsets + resetcount;
! 6973: while (iptr < iend) *iptr++ = -1;
! 6974: }
1.1 misho 6975:
6976: /* If there is space, set up the whole thing as substring 0. The value of
6977: md->start_match_ptr might be modified if \K was encountered on the success
6978: matching path. */
6979:
6980: if (offsetcount < 2) rc = 0; else
6981: {
6982: offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6983: offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6984: }
6985:
1.1.1.2 ! misho 6986: /* Return MARK data if requested */
! 6987:
! 6988: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
! 6989: *(extra_data->mark) = (pcre_uchar *)md->mark;
1.1 misho 6990: DPRINTF((">>>> returning %d\n", rc));
1.1.1.2 ! misho 6991: #ifdef NO_RECURSE
! 6992: release_match_heapframes(&frame_zero);
! 6993: #endif
! 6994: return rc;
1.1 misho 6995: }
6996:
6997: /* Control gets here if there has been an error, or if the overall match
6998: attempt has failed at all permitted starting positions. */
6999:
7000: if (using_temporary_offsets)
7001: {
7002: DPRINTF(("Freeing temporary memory\n"));
1.1.1.2 ! misho 7003: (PUBL(free))(md->offset_vector);
1.1 misho 7004: }
7005:
7006: /* For anything other than nomatch or partial match, just return the code. */
7007:
7008: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7009: {
7010: DPRINTF((">>>> error: returning %d\n", rc));
1.1.1.2 ! misho 7011: #ifdef NO_RECURSE
! 7012: release_match_heapframes(&frame_zero);
! 7013: #endif
1.1 misho 7014: return rc;
7015: }
7016:
7017: /* Handle partial matches - disable any mark data */
7018:
7019: if (start_partial != NULL)
7020: {
7021: DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7022: md->mark = NULL;
7023: if (offsetcount > 1)
7024: {
1.1.1.2 ! misho 7025: offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
! 7026: offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
1.1 misho 7027: }
7028: rc = PCRE_ERROR_PARTIAL;
7029: }
7030:
7031: /* This is the classic nomatch case */
7032:
7033: else
7034: {
7035: DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7036: rc = PCRE_ERROR_NOMATCH;
7037: }
7038:
7039: /* Return the MARK data if it has been requested. */
7040:
7041: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2 ! misho 7042: *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
! 7043: #ifdef NO_RECURSE
! 7044: release_match_heapframes(&frame_zero);
! 7045: #endif
1.1 misho 7046: return rc;
7047: }
7048:
7049: /* End of pcre_exec.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>