Annotation of embedaddon/pcre/pcre_exec.c, revision 1.1.1.2
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
1.1.1.2 ! misho 9: Copyright (c) 1997-2012 University of Cambridge
1.1 misho 10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains pcre_exec(), the externally visible function that does
42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43: possible. There are also some static supporting functions. */
44:
45: #ifdef HAVE_CONFIG_H
46: #include "config.h"
47: #endif
48:
49: #define NLBLOCK md /* Block containing newline information */
50: #define PSSTART start_subject /* Field containing processed string start */
51: #define PSEND end_subject /* Field containing processed string end */
52:
53: #include "pcre_internal.h"
54:
55: /* Undefine some potentially clashing cpp symbols */
56:
57: #undef min
58: #undef max
59:
60: /* Values for setting in md->match_function_type to indicate two special types
61: of call to match(). We do it this way to save on using another stack variable,
62: as stack usage is to be discouraged. */
63:
64: #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
65: #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
66:
67: /* Non-error returns from the match() function. Error returns are externally
68: defined PCRE_ERROR_xxx codes, which are all negative. */
69:
70: #define MATCH_MATCH 1
71: #define MATCH_NOMATCH 0
72:
73: /* Special internal returns from the match() function. Make them sufficiently
74: negative to avoid the external error codes. */
75:
76: #define MATCH_ACCEPT (-999)
77: #define MATCH_COMMIT (-998)
78: #define MATCH_KETRPOS (-997)
79: #define MATCH_ONCE (-996)
80: #define MATCH_PRUNE (-995)
81: #define MATCH_SKIP (-994)
82: #define MATCH_SKIP_ARG (-993)
83: #define MATCH_THEN (-992)
84:
85: /* Maximum number of ints of offset to save on the stack for recursive calls.
86: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
87: because the offset vector is always a multiple of 3 long. */
88:
89: #define REC_STACK_SAVE_MAX 30
90:
91: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
92:
93: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
94: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
95:
96:
97:
98: #ifdef PCRE_DEBUG
99: /*************************************************
100: * Debugging function to print chars *
101: *************************************************/
102:
103: /* Print a sequence of chars in printable format, stopping at the end of the
104: subject if the requested.
105:
106: Arguments:
107: p points to characters
108: length number to print
109: is_subject TRUE if printing from within md->start_subject
110: md pointer to matching data block, if is_subject is TRUE
111:
112: Returns: nothing
113: */
114:
115: static void
1.1.1.2 ! misho 116: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
1.1 misho 117: {
118: unsigned int c;
119: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
120: while (length-- > 0)
121: if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
122: }
123: #endif
124:
125:
126:
127: /*************************************************
128: * Match a back-reference *
129: *************************************************/
130:
131: /* Normally, if a back reference hasn't been set, the length that is passed is
132: negative, so the match always fails. However, in JavaScript compatibility mode,
133: the length passed is zero. Note that in caseless UTF-8 mode, the number of
134: subject bytes matched may be different to the number of reference bytes.
135:
136: Arguments:
137: offset index into the offset vector
138: eptr pointer into the subject
139: length length of reference to be matched (number of bytes)
140: md points to match data block
141: caseless TRUE if caseless
142:
143: Returns: < 0 if not matched, otherwise the number of subject bytes matched
144: */
145:
146: static int
1.1.1.2 ! misho 147: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
1.1 misho 148: BOOL caseless)
149: {
1.1.1.2 ! misho 150: PCRE_PUCHAR eptr_start = eptr;
! 151: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
1.1 misho 152:
153: #ifdef PCRE_DEBUG
154: if (eptr >= md->end_subject)
155: printf("matching subject <null>");
156: else
157: {
158: printf("matching subject ");
159: pchars(eptr, length, TRUE, md);
160: }
161: printf(" against backref ");
162: pchars(p, length, FALSE, md);
163: printf("\n");
164: #endif
165:
166: /* Always fail if reference not set (and not JavaScript compatible). */
167:
168: if (length < 0) return -1;
169:
170: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
171: properly if Unicode properties are supported. Otherwise, we can check only
172: ASCII characters. */
173:
174: if (caseless)
175: {
1.1.1.2 ! misho 176: #ifdef SUPPORT_UTF
1.1 misho 177: #ifdef SUPPORT_UCP
1.1.1.2 ! misho 178: if (md->utf)
1.1 misho 179: {
180: /* Match characters up to the end of the reference. NOTE: the number of
181: bytes matched may differ, because there are some characters whose upper and
182: lower case versions code as different numbers of bytes. For example, U+023A
183: (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
184: a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
185: the latter. It is important, therefore, to check the length along the
186: reference, not along the subject (earlier code did this wrong). */
187:
1.1.1.2 ! misho 188: PCRE_PUCHAR endptr = p + length;
1.1 misho 189: while (p < endptr)
190: {
191: int c, d;
192: if (eptr >= md->end_subject) return -1;
193: GETCHARINC(c, eptr);
194: GETCHARINC(d, p);
195: if (c != d && c != UCD_OTHERCASE(d)) return -1;
196: }
197: }
198: else
199: #endif
200: #endif
201:
202: /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
203: is no UCP support. */
204: {
205: if (eptr + length > md->end_subject) return -1;
206: while (length-- > 0)
1.1.1.2 ! misho 207: {
! 208: if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
! 209: p++;
! 210: eptr++;
! 211: }
1.1 misho 212: }
213: }
214:
215: /* In the caseful case, we can just compare the bytes, whether or not we
216: are in UTF-8 mode. */
217:
218: else
219: {
220: if (eptr + length > md->end_subject) return -1;
221: while (length-- > 0) if (*p++ != *eptr++) return -1;
222: }
223:
224: return (int)(eptr - eptr_start);
225: }
226:
227:
228:
229: /***************************************************************************
230: ****************************************************************************
231: RECURSION IN THE match() FUNCTION
232:
233: The match() function is highly recursive, though not every recursive call
234: increases the recursive depth. Nevertheless, some regular expressions can cause
235: it to recurse to a great depth. I was writing for Unix, so I just let it call
236: itself recursively. This uses the stack for saving everything that has to be
237: saved for a recursive call. On Unix, the stack can be large, and this works
238: fine.
239:
240: It turns out that on some non-Unix-like systems there are problems with
241: programs that use a lot of stack. (This despite the fact that every last chip
242: has oodles of memory these days, and techniques for extending the stack have
243: been known for decades.) So....
244:
245: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
246: calls by keeping local variables that need to be preserved in blocks of memory
247: obtained from malloc() instead instead of on the stack. Macros are used to
248: achieve this so that the actual code doesn't look very different to what it
249: always used to.
250:
251: The original heap-recursive code used longjmp(). However, it seems that this
252: can be very slow on some operating systems. Following a suggestion from Stan
253: Switzer, the use of longjmp() has been abolished, at the cost of having to
254: provide a unique number for each call to RMATCH. There is no way of generating
255: a sequence of numbers at compile time in C. I have given them names, to make
256: them stand out more clearly.
257:
258: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260: tests. Furthermore, not using longjmp() means that local dynamic variables
261: don't have indeterminate values; this has meant that the frame size can be
262: reduced because the result can be "passed back" by straight setting of the
263: variable instead of being passed in the frame.
264: ****************************************************************************
265: ***************************************************************************/
266:
267: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268: below must be updated in sync. */
269:
270: enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
271: RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272: RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273: RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274: RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275: RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276: RM61, RM62, RM63, RM64, RM65, RM66 };
277:
278: /* These versions of the macros use the stack, as normal. There are debugging
279: versions and production versions. Note that the "rw" argument of RMATCH isn't
280: actually used in this definition. */
281:
282: #ifndef NO_RECURSE
283: #define REGISTER register
284:
285: #ifdef PCRE_DEBUG
286: #define RMATCH(ra,rb,rc,rd,re,rw) \
287: { \
288: printf("match() called in line %d\n", __LINE__); \
289: rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
290: printf("to line %d\n", __LINE__); \
291: }
292: #define RRETURN(ra) \
293: { \
294: printf("match() returned %d from line %d ", ra, __LINE__); \
295: return ra; \
296: }
297: #else
298: #define RMATCH(ra,rb,rc,rd,re,rw) \
299: rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
300: #define RRETURN(ra) return ra
301: #endif
302:
303: #else
304:
305:
306: /* These versions of the macros manage a private stack on the heap. Note that
307: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308: argument of match(), which never changes. */
309:
310: #define REGISTER
311:
312: #define RMATCH(ra,rb,rc,rd,re,rw)\
313: {\
1.1.1.2 ! misho 314: heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
1.1 misho 315: if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316: frame->Xwhere = rw; \
317: newframe->Xeptr = ra;\
318: newframe->Xecode = rb;\
319: newframe->Xmstart = mstart;\
320: newframe->Xoffset_top = rc;\
321: newframe->Xeptrb = re;\
322: newframe->Xrdepth = frame->Xrdepth + 1;\
323: newframe->Xprevframe = frame;\
324: frame = newframe;\
325: DPRINTF(("restarting from line %d\n", __LINE__));\
326: goto HEAP_RECURSE;\
327: L_##rw:\
328: DPRINTF(("jumped back to line %d\n", __LINE__));\
329: }
330:
331: #define RRETURN(ra)\
332: {\
333: heapframe *oldframe = frame;\
334: frame = oldframe->Xprevframe;\
1.1.1.2 ! misho 335: if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
1.1 misho 336: if (frame != NULL)\
337: {\
338: rrc = ra;\
339: goto HEAP_RETURN;\
340: }\
341: return ra;\
342: }
343:
344:
345: /* Structure for remembering the local variables in a private frame */
346:
347: typedef struct heapframe {
348: struct heapframe *Xprevframe;
349:
350: /* Function arguments that may change */
351:
1.1.1.2 ! misho 352: PCRE_PUCHAR Xeptr;
! 353: const pcre_uchar *Xecode;
! 354: PCRE_PUCHAR Xmstart;
1.1 misho 355: int Xoffset_top;
356: eptrblock *Xeptrb;
357: unsigned int Xrdepth;
358:
359: /* Function local variables */
360:
1.1.1.2 ! misho 361: PCRE_PUCHAR Xcallpat;
! 362: #ifdef SUPPORT_UTF
! 363: PCRE_PUCHAR Xcharptr;
! 364: #endif
! 365: PCRE_PUCHAR Xdata;
! 366: PCRE_PUCHAR Xnext;
! 367: PCRE_PUCHAR Xpp;
! 368: PCRE_PUCHAR Xprev;
! 369: PCRE_PUCHAR Xsaved_eptr;
1.1 misho 370:
371: recursion_info Xnew_recursive;
372:
373: BOOL Xcur_is_word;
374: BOOL Xcondition;
375: BOOL Xprev_is_word;
376:
377: #ifdef SUPPORT_UCP
378: int Xprop_type;
379: int Xprop_value;
380: int Xprop_fail_result;
381: int Xoclength;
1.1.1.2 ! misho 382: pcre_uchar Xocchars[6];
1.1 misho 383: #endif
384:
385: int Xcodelink;
386: int Xctype;
387: unsigned int Xfc;
388: int Xfi;
389: int Xlength;
390: int Xmax;
391: int Xmin;
392: int Xnumber;
393: int Xoffset;
394: int Xop;
395: int Xsave_capture_last;
396: int Xsave_offset1, Xsave_offset2, Xsave_offset3;
397: int Xstacksave[REC_STACK_SAVE_MAX];
398:
399: eptrblock Xnewptrb;
400:
401: /* Where to jump back to */
402:
403: int Xwhere;
404:
405: } heapframe;
406:
407: #endif
408:
409:
410: /***************************************************************************
411: ***************************************************************************/
412:
413:
414:
415: /*************************************************
416: * Match from current position *
417: *************************************************/
418:
419: /* This function is called recursively in many circumstances. Whenever it
420: returns a negative (error) response, the outer incarnation must also return the
421: same response. */
422:
423: /* These macros pack up tests that are used for partial matching, and which
424: appear several times in the code. We set the "hit end" flag if the pointer is
425: at the end of the subject and also past the start of the subject (i.e.
426: something has been matched). For hard partial matching, we then return
427: immediately. The second one is used when we already know we are past the end of
428: the subject. */
429:
430: #define CHECK_PARTIAL()\
431: if (md->partial != 0 && eptr >= md->end_subject && \
432: eptr > md->start_used_ptr) \
433: { \
434: md->hitend = TRUE; \
435: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
436: }
437:
438: #define SCHECK_PARTIAL()\
439: if (md->partial != 0 && eptr > md->start_used_ptr) \
440: { \
441: md->hitend = TRUE; \
442: if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
443: }
444:
445:
446: /* Performance note: It might be tempting to extract commonly used fields from
1.1.1.2 ! misho 447: the md structure (e.g. utf, end_subject) into individual variables to improve
1.1 misho 448: performance. Tests using gcc on a SPARC disproved this; in the first case, it
449: made performance worse.
450:
451: Arguments:
452: eptr pointer to current character in subject
453: ecode pointer to current position in compiled code
454: mstart pointer to the current match start position (can be modified
455: by encountering \K)
456: offset_top current top pointer
457: md pointer to "static" info for the match
458: eptrb pointer to chain of blocks containing eptr at start of
459: brackets - for testing for empty matches
460: rdepth the recursion depth
461:
462: Returns: MATCH_MATCH if matched ) these values are >= 0
463: MATCH_NOMATCH if failed to match )
464: a negative MATCH_xxx value for PRUNE, SKIP, etc
465: a negative PCRE_ERROR_xxx value if aborted by an error condition
466: (e.g. stopped by repeated call or recursion limit)
467: */
468:
469: static int
1.1.1.2 ! misho 470: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
! 471: PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
! 472: unsigned int rdepth)
1.1 misho 473: {
474: /* These variables do not need to be preserved over recursion in this function,
475: so they can be ordinary variables in all cases. Mark some of them with
476: "register" because they are used a lot in loops. */
477:
478: register int rrc; /* Returns from recursive calls */
479: register int i; /* Used for loops not involving calls to RMATCH() */
480: register unsigned int c; /* Character values not kept over RMATCH() calls */
1.1.1.2 ! misho 481: register BOOL utf; /* Local copy of UTF flag for speed */
1.1 misho 482:
483: BOOL minimize, possessive; /* Quantifier options */
484: BOOL caseless;
485: int condcode;
486:
487: /* When recursion is not being used, all "local" variables that have to be
1.1.1.2 ! misho 488: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
! 489: frame on the stack here; subsequent instantiations are obtained from the heap
! 490: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
! 491: the top-level on the stack rather than malloc-ing them all gives a performance
! 492: boost in many cases where there is not much "recursion". */
1.1 misho 493:
494: #ifdef NO_RECURSE
1.1.1.2 ! misho 495: heapframe frame_zero;
! 496: heapframe *frame = &frame_zero;
1.1 misho 497: frame->Xprevframe = NULL; /* Marks the top level */
498:
499: /* Copy in the original argument variables */
500:
501: frame->Xeptr = eptr;
502: frame->Xecode = ecode;
503: frame->Xmstart = mstart;
504: frame->Xoffset_top = offset_top;
505: frame->Xeptrb = eptrb;
506: frame->Xrdepth = rdepth;
507:
508: /* This is where control jumps back to to effect "recursion" */
509:
510: HEAP_RECURSE:
511:
512: /* Macros make the argument variables come from the current frame */
513:
514: #define eptr frame->Xeptr
515: #define ecode frame->Xecode
516: #define mstart frame->Xmstart
517: #define offset_top frame->Xoffset_top
518: #define eptrb frame->Xeptrb
519: #define rdepth frame->Xrdepth
520:
521: /* Ditto for the local variables */
522:
1.1.1.2 ! misho 523: #ifdef SUPPORT_UTF
1.1 misho 524: #define charptr frame->Xcharptr
525: #endif
526: #define callpat frame->Xcallpat
527: #define codelink frame->Xcodelink
528: #define data frame->Xdata
529: #define next frame->Xnext
530: #define pp frame->Xpp
531: #define prev frame->Xprev
532: #define saved_eptr frame->Xsaved_eptr
533:
534: #define new_recursive frame->Xnew_recursive
535:
536: #define cur_is_word frame->Xcur_is_word
537: #define condition frame->Xcondition
538: #define prev_is_word frame->Xprev_is_word
539:
540: #ifdef SUPPORT_UCP
541: #define prop_type frame->Xprop_type
542: #define prop_value frame->Xprop_value
543: #define prop_fail_result frame->Xprop_fail_result
544: #define oclength frame->Xoclength
545: #define occhars frame->Xocchars
546: #endif
547:
548: #define ctype frame->Xctype
549: #define fc frame->Xfc
550: #define fi frame->Xfi
551: #define length frame->Xlength
552: #define max frame->Xmax
553: #define min frame->Xmin
554: #define number frame->Xnumber
555: #define offset frame->Xoffset
556: #define op frame->Xop
557: #define save_capture_last frame->Xsave_capture_last
558: #define save_offset1 frame->Xsave_offset1
559: #define save_offset2 frame->Xsave_offset2
560: #define save_offset3 frame->Xsave_offset3
561: #define stacksave frame->Xstacksave
562:
563: #define newptrb frame->Xnewptrb
564:
565: /* When recursion is being used, local variables are allocated on the stack and
566: get preserved during recursion in the normal way. In this environment, fi and
567: i, and fc and c, can be the same variables. */
568:
569: #else /* NO_RECURSE not defined */
570: #define fi i
571: #define fc c
572:
573: /* Many of the following variables are used only in small blocks of the code.
574: My normal style of coding would have declared them within each of those blocks.
575: However, in order to accommodate the version of this code that uses an external
576: "stack" implemented on the heap, it is easier to declare them all here, so the
577: declarations can be cut out in a block. The only declarations within blocks
578: below are for variables that do not have to be preserved over a recursive call
579: to RMATCH(). */
580:
1.1.1.2 ! misho 581: #ifdef SUPPORT_UTF
! 582: const pcre_uchar *charptr;
1.1 misho 583: #endif
1.1.1.2 ! misho 584: const pcre_uchar *callpat;
! 585: const pcre_uchar *data;
! 586: const pcre_uchar *next;
! 587: PCRE_PUCHAR pp;
! 588: const pcre_uchar *prev;
! 589: PCRE_PUCHAR saved_eptr;
1.1 misho 590:
591: recursion_info new_recursive;
592:
593: BOOL cur_is_word;
594: BOOL condition;
595: BOOL prev_is_word;
596:
597: #ifdef SUPPORT_UCP
598: int prop_type;
599: int prop_value;
600: int prop_fail_result;
601: int oclength;
1.1.1.2 ! misho 602: pcre_uchar occhars[6];
1.1 misho 603: #endif
604:
605: int codelink;
606: int ctype;
607: int length;
608: int max;
609: int min;
610: int number;
611: int offset;
612: int op;
613: int save_capture_last;
614: int save_offset1, save_offset2, save_offset3;
615: int stacksave[REC_STACK_SAVE_MAX];
616:
617: eptrblock newptrb;
1.1.1.2 ! misho 618:
! 619: /* There is a special fudge for calling match() in a way that causes it to
! 620: measure the size of its basic stack frame when the stack is being used for
! 621: recursion. The second argument (ecode) being NULL triggers this behaviour. It
! 622: cannot normally ever be NULL. The return is the negated value of the frame
! 623: size. */
! 624:
! 625: if (ecode == NULL)
! 626: {
! 627: if (rdepth == 0)
! 628: return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
! 629: else
! 630: {
! 631: int len = (char *)&rdepth - (char *)eptr;
! 632: return (len > 0)? -len : len;
! 633: }
! 634: }
1.1 misho 635: #endif /* NO_RECURSE */
636:
637: /* To save space on the stack and in the heap frame, I have doubled up on some
638: of the local variables that are used only in localised parts of the code, but
639: still need to be preserved over recursive calls of match(). These macros define
640: the alternative names that are used. */
641:
642: #define allow_zero cur_is_word
643: #define cbegroup condition
644: #define code_offset codelink
645: #define condassert condition
646: #define matched_once prev_is_word
1.1.1.2 ! misho 647: #define foc number
! 648: #define save_mark data
1.1 misho 649:
650: /* These statements are here to stop the compiler complaining about unitialized
651: variables. */
652:
653: #ifdef SUPPORT_UCP
654: prop_value = 0;
655: prop_fail_result = 0;
656: #endif
657:
658:
659: /* This label is used for tail recursion, which is used in a few cases even
660: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
661: used. Thanks to Ian Taylor for noticing this possibility and sending the
662: original patch. */
663:
664: TAIL_RECURSE:
665:
666: /* OK, now we can get on with the real code of the function. Recursive calls
667: are specified by the macro RMATCH and RRETURN is used to return. When
668: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
669: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
670: defined). However, RMATCH isn't like a function call because it's quite a
671: complicated macro. It has to be used in one particular way. This shouldn't,
672: however, impact performance when true recursion is being used. */
673:
1.1.1.2 ! misho 674: #ifdef SUPPORT_UTF
! 675: utf = md->utf; /* Local copy of the flag */
1.1 misho 676: #else
1.1.1.2 ! misho 677: utf = FALSE;
1.1 misho 678: #endif
679:
680: /* First check that we haven't called match() too many times, or that we
681: haven't exceeded the recursive call limit. */
682:
683: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
684: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
685:
686: /* At the start of a group with an unlimited repeat that may match an empty
687: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
688: done this way to save having to use another function argument, which would take
689: up space on the stack. See also MATCH_CONDASSERT below.
690:
691: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
692: such remembered pointers, to be checked when we hit the closing ket, in order
693: to break infinite loops that match no characters. When match() is called in
694: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
695: NOT be used with tail recursion, because the memory block that is used is on
696: the stack, so a new one may be required for each match(). */
697:
698: if (md->match_function_type == MATCH_CBEGROUP)
699: {
700: newptrb.epb_saved_eptr = eptr;
701: newptrb.epb_prev = eptrb;
702: eptrb = &newptrb;
703: md->match_function_type = 0;
704: }
705:
706: /* Now start processing the opcodes. */
707:
708: for (;;)
709: {
710: minimize = possessive = FALSE;
711: op = *ecode;
712:
713: switch(op)
714: {
715: case OP_MARK:
716: md->nomatch_mark = ecode + 2;
717: md->mark = NULL; /* In case previously set by assertion */
1.1.1.2 ! misho 718: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1 misho 719: eptrb, RM55);
720: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
721: md->mark == NULL) md->mark = ecode + 2;
722:
723: /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
724: argument, and we must check whether that argument matches this MARK's
725: argument. It is passed back in md->start_match_ptr (an overloading of that
726: variable). If it does match, we reset that variable to the current subject
727: position and return MATCH_SKIP. Otherwise, pass back the return code
728: unaltered. */
729:
730: else if (rrc == MATCH_SKIP_ARG &&
1.1.1.2 ! misho 731: STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
1.1 misho 732: {
733: md->start_match_ptr = eptr;
734: RRETURN(MATCH_SKIP);
735: }
736: RRETURN(rrc);
737:
738: case OP_FAIL:
739: RRETURN(MATCH_NOMATCH);
740:
741: /* COMMIT overrides PRUNE, SKIP, and THEN */
742:
743: case OP_COMMIT:
1.1.1.2 ! misho 744: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1 misho 745: eptrb, RM52);
746: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
747: rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
748: rrc != MATCH_THEN)
749: RRETURN(rrc);
750: RRETURN(MATCH_COMMIT);
751:
752: /* PRUNE overrides THEN */
753:
754: case OP_PRUNE:
1.1.1.2 ! misho 755: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1 misho 756: eptrb, RM51);
757: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
758: RRETURN(MATCH_PRUNE);
759:
760: case OP_PRUNE_ARG:
761: md->nomatch_mark = ecode + 2;
762: md->mark = NULL; /* In case previously set by assertion */
1.1.1.2 ! misho 763: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1 misho 764: eptrb, RM56);
765: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
766: md->mark == NULL) md->mark = ecode + 2;
767: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
768: RRETURN(MATCH_PRUNE);
769:
770: /* SKIP overrides PRUNE and THEN */
771:
772: case OP_SKIP:
1.1.1.2 ! misho 773: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1 misho 774: eptrb, RM53);
775: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
776: RRETURN(rrc);
777: md->start_match_ptr = eptr; /* Pass back current position */
778: RRETURN(MATCH_SKIP);
779:
780: /* Note that, for Perl compatibility, SKIP with an argument does NOT set
781: nomatch_mark. There is a flag that disables this opcode when re-matching a
782: pattern that ended with a SKIP for which there was not a matching MARK. */
783:
784: case OP_SKIP_ARG:
785: if (md->ignore_skip_arg)
786: {
1.1.1.2 ! misho 787: ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
1.1 misho 788: break;
789: }
1.1.1.2 ! misho 790: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
1.1 misho 791: eptrb, RM57);
792: if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
793: RRETURN(rrc);
794:
795: /* Pass back the current skip name by overloading md->start_match_ptr and
796: returning the special MATCH_SKIP_ARG return code. This will either be
797: caught by a matching MARK, or get to the top, where it causes a rematch
798: with the md->ignore_skip_arg flag set. */
799:
800: md->start_match_ptr = ecode + 2;
801: RRETURN(MATCH_SKIP_ARG);
802:
803: /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
804: the branch in which it occurs can be determined. Overload the start of
805: match pointer to do this. */
806:
807: case OP_THEN:
1.1.1.2 ! misho 808: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1 misho 809: eptrb, RM54);
810: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
811: md->start_match_ptr = ecode;
812: RRETURN(MATCH_THEN);
813:
814: case OP_THEN_ARG:
815: md->nomatch_mark = ecode + 2;
816: md->mark = NULL; /* In case previously set by assertion */
1.1.1.2 ! misho 817: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
1.1 misho 818: md, eptrb, RM58);
819: if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
820: md->mark == NULL) md->mark = ecode + 2;
821: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
822: md->start_match_ptr = ecode;
823: RRETURN(MATCH_THEN);
824:
825: /* Handle an atomic group that does not contain any capturing parentheses.
826: This can be handled like an assertion. Prior to 8.13, all atomic groups
827: were handled this way. In 8.13, the code was changed as below for ONCE, so
828: that backups pass through the group and thereby reset captured values.
829: However, this uses a lot more stack, so in 8.20, atomic groups that do not
830: contain any captures generate OP_ONCE_NC, which can be handled in the old,
831: less stack intensive way.
832:
833: Check the alternative branches in turn - the matching won't pass the KET
834: for this kind of subpattern. If any one branch matches, we carry on as at
835: the end of a normal bracket, leaving the subject pointer, but resetting
836: the start-of-match value in case it was changed by \K. */
837:
838: case OP_ONCE_NC:
839: prev = ecode;
840: saved_eptr = eptr;
1.1.1.2 ! misho 841: save_mark = md->mark;
1.1 misho 842: do
843: {
844: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
845: if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
846: {
847: mstart = md->start_match_ptr;
848: break;
849: }
850: if (rrc == MATCH_THEN)
851: {
852: next = ecode + GET(ecode,1);
853: if (md->start_match_ptr < next &&
854: (*ecode == OP_ALT || *next == OP_ALT))
855: rrc = MATCH_NOMATCH;
856: }
857:
858: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
859: ecode += GET(ecode,1);
1.1.1.2 ! misho 860: md->mark = save_mark;
1.1 misho 861: }
862: while (*ecode == OP_ALT);
863:
864: /* If hit the end of the group (which could be repeated), fail */
865:
866: if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
867:
868: /* Continue as from after the group, updating the offsets high water
869: mark, since extracts may have been taken. */
870:
871: do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
872:
873: offset_top = md->end_offset_top;
874: eptr = md->end_match_ptr;
875:
876: /* For a non-repeating ket, just continue at this level. This also
877: happens for a repeating ket if no characters were matched in the group.
878: This is the forcible breaking of infinite loops as implemented in Perl
879: 5.005. */
880:
881: if (*ecode == OP_KET || eptr == saved_eptr)
882: {
883: ecode += 1+LINK_SIZE;
884: break;
885: }
886:
887: /* The repeating kets try the rest of the pattern or restart from the
888: preceding bracket, in the appropriate order. The second "call" of match()
889: uses tail recursion, to avoid using another stack frame. */
890:
891: if (*ecode == OP_KETRMIN)
892: {
893: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
894: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
895: ecode = prev;
896: goto TAIL_RECURSE;
897: }
898: else /* OP_KETRMAX */
899: {
900: md->match_function_type = MATCH_CBEGROUP;
901: RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
902: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
903: ecode += 1 + LINK_SIZE;
904: goto TAIL_RECURSE;
905: }
906: /* Control never gets here */
907:
908: /* Handle a capturing bracket, other than those that are possessive with an
909: unlimited repeat. If there is space in the offset vector, save the current
910: subject position in the working slot at the top of the vector. We mustn't
911: change the current values of the data slot, because they may be set from a
912: previous iteration of this group, and be referred to by a reference inside
913: the group. A failure to match might occur after the group has succeeded,
914: if something later on doesn't match. For this reason, we need to restore
915: the working value and also the values of the final offsets, in case they
916: were set by a previous iteration of the same bracket.
917:
918: If there isn't enough space in the offset vector, treat this as if it were
919: a non-capturing bracket. Don't worry about setting the flag for the error
920: case here; that is handled in the code for KET. */
921:
922: case OP_CBRA:
923: case OP_SCBRA:
924: number = GET2(ecode, 1+LINK_SIZE);
925: offset = number << 1;
926:
927: #ifdef PCRE_DEBUG
928: printf("start bracket %d\n", number);
929: printf("subject=");
930: pchars(eptr, 16, TRUE, md);
931: printf("\n");
932: #endif
933:
934: if (offset < md->offset_max)
935: {
936: save_offset1 = md->offset_vector[offset];
937: save_offset2 = md->offset_vector[offset+1];
938: save_offset3 = md->offset_vector[md->offset_end - number];
939: save_capture_last = md->capture_last;
1.1.1.2 ! misho 940: save_mark = md->mark;
1.1 misho 941:
942: DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
943: md->offset_vector[md->offset_end - number] =
944: (int)(eptr - md->start_subject);
945:
946: for (;;)
947: {
948: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2 ! misho 949: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1 misho 950: eptrb, RM1);
951: if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
952:
953: /* If we backed up to a THEN, check whether it is within the current
954: branch by comparing the address of the THEN that is passed back with
955: the end of the branch. If it is within the current branch, and the
956: branch is one of two or more alternatives (it either starts or ends
957: with OP_ALT), we have reached the limit of THEN's action, so convert
958: the return code to NOMATCH, which will cause normal backtracking to
959: happen from now on. Otherwise, THEN is passed back to an outer
960: alternative. This implements Perl's treatment of parenthesized groups,
961: where a group not containing | does not affect the current alternative,
962: that is, (X) is NOT the same as (X|(*F)). */
963:
964: if (rrc == MATCH_THEN)
965: {
966: next = ecode + GET(ecode,1);
967: if (md->start_match_ptr < next &&
968: (*ecode == OP_ALT || *next == OP_ALT))
969: rrc = MATCH_NOMATCH;
970: }
971:
972: /* Anything other than NOMATCH is passed back. */
973:
974: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
975: md->capture_last = save_capture_last;
976: ecode += GET(ecode, 1);
1.1.1.2 ! misho 977: md->mark = save_mark;
1.1 misho 978: if (*ecode != OP_ALT) break;
979: }
980:
981: DPRINTF(("bracket %d failed\n", number));
982: md->offset_vector[offset] = save_offset1;
983: md->offset_vector[offset+1] = save_offset2;
984: md->offset_vector[md->offset_end - number] = save_offset3;
985:
986: /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
987:
988: RRETURN(rrc);
989: }
990:
991: /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
992: as a non-capturing bracket. */
993:
994: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
995: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
996:
997: DPRINTF(("insufficient capture room: treat as non-capturing\n"));
998:
999: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1000: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1001:
1002: /* Non-capturing or atomic group, except for possessive with unlimited
1003: repeat and ONCE group with no captures. Loop for all the alternatives.
1004:
1005: When we get to the final alternative within the brackets, we used to return
1006: the result of a recursive call to match() whatever happened so it was
1007: possible to reduce stack usage by turning this into a tail recursion,
1008: except in the case of a possibly empty group. However, now that there is
1009: the possiblity of (*THEN) occurring in the final alternative, this
1010: optimization is no longer always possible.
1011:
1012: We can optimize if we know there are no (*THEN)s in the pattern; at present
1013: this is the best that can be done.
1014:
1015: MATCH_ONCE is returned when the end of an atomic group is successfully
1016: reached, but subsequent matching fails. It passes back up the tree (causing
1017: captured values to be reset) until the original atomic group level is
1018: reached. This is tested by comparing md->once_target with the start of the
1019: group. At this point, the return is converted into MATCH_NOMATCH so that
1020: previous backup points can be taken. */
1021:
1022: case OP_ONCE:
1023: case OP_BRA:
1024: case OP_SBRA:
1025: DPRINTF(("start non-capturing bracket\n"));
1026:
1027: for (;;)
1028: {
1029: if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
1030:
1031: /* If this is not a possibly empty group, and there are no (*THEN)s in
1032: the pattern, and this is the final alternative, optimize as described
1033: above. */
1034:
1035: else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1036: {
1.1.1.2 ! misho 1037: ecode += PRIV(OP_lengths)[*ecode];
1.1 misho 1038: goto TAIL_RECURSE;
1039: }
1040:
1041: /* In all other cases, we have to make another call to match(). */
1042:
1.1.1.2 ! misho 1043: save_mark = md->mark;
! 1044: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1.1 misho 1045: RM2);
1046:
1047: /* See comment in the code for capturing groups above about handling
1048: THEN. */
1049:
1050: if (rrc == MATCH_THEN)
1051: {
1052: next = ecode + GET(ecode,1);
1053: if (md->start_match_ptr < next &&
1054: (*ecode == OP_ALT || *next == OP_ALT))
1055: rrc = MATCH_NOMATCH;
1056: }
1057:
1058: if (rrc != MATCH_NOMATCH)
1059: {
1060: if (rrc == MATCH_ONCE)
1061: {
1.1.1.2 ! misho 1062: const pcre_uchar *scode = ecode;
1.1 misho 1063: if (*scode != OP_ONCE) /* If not at start, find it */
1064: {
1065: while (*scode == OP_ALT) scode += GET(scode, 1);
1066: scode -= GET(scode, 1);
1067: }
1068: if (md->once_target == scode) rrc = MATCH_NOMATCH;
1069: }
1070: RRETURN(rrc);
1071: }
1072: ecode += GET(ecode, 1);
1.1.1.2 ! misho 1073: md->mark = save_mark;
1.1 misho 1074: if (*ecode != OP_ALT) break;
1075: }
1076:
1077: RRETURN(MATCH_NOMATCH);
1078:
1079: /* Handle possessive capturing brackets with an unlimited repeat. We come
1080: here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1081: handled similarly to the normal case above. However, the matching is
1082: different. The end of these brackets will always be OP_KETRPOS, which
1083: returns MATCH_KETRPOS without going further in the pattern. By this means
1084: we can handle the group by iteration rather than recursion, thereby
1085: reducing the amount of stack needed. */
1086:
1087: case OP_CBRAPOS:
1088: case OP_SCBRAPOS:
1089: allow_zero = FALSE;
1090:
1091: POSSESSIVE_CAPTURE:
1092: number = GET2(ecode, 1+LINK_SIZE);
1093: offset = number << 1;
1094:
1095: #ifdef PCRE_DEBUG
1096: printf("start possessive bracket %d\n", number);
1097: printf("subject=");
1098: pchars(eptr, 16, TRUE, md);
1099: printf("\n");
1100: #endif
1101:
1102: if (offset < md->offset_max)
1103: {
1104: matched_once = FALSE;
1105: code_offset = (int)(ecode - md->start_code);
1106:
1107: save_offset1 = md->offset_vector[offset];
1108: save_offset2 = md->offset_vector[offset+1];
1109: save_offset3 = md->offset_vector[md->offset_end - number];
1110: save_capture_last = md->capture_last;
1111:
1112: DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1113:
1114: /* Each time round the loop, save the current subject position for use
1115: when the group matches. For MATCH_MATCH, the group has matched, so we
1116: restart it with a new subject starting position, remembering that we had
1117: at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1118: usual. If we haven't matched any alternatives in any iteration, check to
1119: see if a previous iteration matched. If so, the group has matched;
1120: continue from afterwards. Otherwise it has failed; restore the previous
1121: capture values before returning NOMATCH. */
1122:
1123: for (;;)
1124: {
1125: md->offset_vector[md->offset_end - number] =
1126: (int)(eptr - md->start_subject);
1127: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2 ! misho 1128: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1 misho 1129: eptrb, RM63);
1130: if (rrc == MATCH_KETRPOS)
1131: {
1132: offset_top = md->end_offset_top;
1133: eptr = md->end_match_ptr;
1134: ecode = md->start_code + code_offset;
1135: save_capture_last = md->capture_last;
1136: matched_once = TRUE;
1137: continue;
1138: }
1139:
1140: /* See comment in the code for capturing groups above about handling
1141: THEN. */
1142:
1143: if (rrc == MATCH_THEN)
1144: {
1145: next = ecode + GET(ecode,1);
1146: if (md->start_match_ptr < next &&
1147: (*ecode == OP_ALT || *next == OP_ALT))
1148: rrc = MATCH_NOMATCH;
1149: }
1150:
1151: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1152: md->capture_last = save_capture_last;
1153: ecode += GET(ecode, 1);
1154: if (*ecode != OP_ALT) break;
1155: }
1156:
1157: if (!matched_once)
1158: {
1159: md->offset_vector[offset] = save_offset1;
1160: md->offset_vector[offset+1] = save_offset2;
1161: md->offset_vector[md->offset_end - number] = save_offset3;
1162: }
1163:
1164: if (allow_zero || matched_once)
1165: {
1166: ecode += 1 + LINK_SIZE;
1167: break;
1168: }
1169:
1170: RRETURN(MATCH_NOMATCH);
1171: }
1172:
1173: /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1174: as a non-capturing bracket. */
1175:
1176: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1177: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1178:
1179: DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1180:
1181: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1182: /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1183:
1184: /* Non-capturing possessive bracket with unlimited repeat. We come here
1185: from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1186: without the capturing complication. It is written out separately for speed
1187: and cleanliness. */
1188:
1189: case OP_BRAPOS:
1190: case OP_SBRAPOS:
1191: allow_zero = FALSE;
1192:
1193: POSSESSIVE_NON_CAPTURE:
1194: matched_once = FALSE;
1195: code_offset = (int)(ecode - md->start_code);
1196:
1197: for (;;)
1198: {
1199: if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2 ! misho 1200: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1.1 misho 1201: eptrb, RM48);
1202: if (rrc == MATCH_KETRPOS)
1203: {
1204: offset_top = md->end_offset_top;
1205: eptr = md->end_match_ptr;
1206: ecode = md->start_code + code_offset;
1207: matched_once = TRUE;
1208: continue;
1209: }
1210:
1211: /* See comment in the code for capturing groups above about handling
1212: THEN. */
1213:
1214: if (rrc == MATCH_THEN)
1215: {
1216: next = ecode + GET(ecode,1);
1217: if (md->start_match_ptr < next &&
1218: (*ecode == OP_ALT || *next == OP_ALT))
1219: rrc = MATCH_NOMATCH;
1220: }
1221:
1222: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1223: ecode += GET(ecode, 1);
1224: if (*ecode != OP_ALT) break;
1225: }
1226:
1227: if (matched_once || allow_zero)
1228: {
1229: ecode += 1 + LINK_SIZE;
1230: break;
1231: }
1232: RRETURN(MATCH_NOMATCH);
1233:
1234: /* Control never reaches here. */
1235:
1236: /* Conditional group: compilation checked that there are no more than
1237: two branches. If the condition is false, skipping the first branch takes us
1238: past the end if there is only one branch, but that's OK because that is
1239: exactly what going to the ket would do. */
1240:
1241: case OP_COND:
1242: case OP_SCOND:
1243: codelink = GET(ecode, 1);
1244:
1245: /* Because of the way auto-callout works during compile, a callout item is
1246: inserted between OP_COND and an assertion condition. */
1247:
1248: if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1249: {
1.1.1.2 ! misho 1250: if (PUBL(callout) != NULL)
1.1 misho 1251: {
1.1.1.2 ! misho 1252: PUBL(callout_block) cb;
1.1 misho 1253: cb.version = 2; /* Version 1 of the callout block */
1254: cb.callout_number = ecode[LINK_SIZE+2];
1255: cb.offset_vector = md->offset_vector;
1.1.1.2 ! misho 1256: #ifdef COMPILE_PCRE8
1.1 misho 1257: cb.subject = (PCRE_SPTR)md->start_subject;
1.1.1.2 ! misho 1258: #else
! 1259: cb.subject = (PCRE_SPTR16)md->start_subject;
! 1260: #endif
1.1 misho 1261: cb.subject_length = (int)(md->end_subject - md->start_subject);
1262: cb.start_match = (int)(mstart - md->start_subject);
1263: cb.current_position = (int)(eptr - md->start_subject);
1264: cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1265: cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1266: cb.capture_top = offset_top/2;
1267: cb.capture_last = md->capture_last;
1268: cb.callout_data = md->callout_data;
1269: cb.mark = md->nomatch_mark;
1.1.1.2 ! misho 1270: if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1 misho 1271: if (rrc < 0) RRETURN(rrc);
1272: }
1.1.1.2 ! misho 1273: ecode += PRIV(OP_lengths)[OP_CALLOUT];
1.1 misho 1274: }
1275:
1276: condcode = ecode[LINK_SIZE+1];
1277:
1278: /* Now see what the actual condition is */
1279:
1280: if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
1281: {
1282: if (md->recursive == NULL) /* Not recursing => FALSE */
1283: {
1284: condition = FALSE;
1285: ecode += GET(ecode, 1);
1286: }
1287: else
1288: {
1289: int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
1290: condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1291:
1292: /* If the test is for recursion into a specific subpattern, and it is
1293: false, but the test was set up by name, scan the table to see if the
1294: name refers to any other numbers, and test them. The condition is true
1295: if any one is set. */
1296:
1297: if (!condition && condcode == OP_NRREF)
1298: {
1.1.1.2 ! misho 1299: pcre_uchar *slotA = md->name_table;
1.1 misho 1300: for (i = 0; i < md->name_count; i++)
1301: {
1302: if (GET2(slotA, 0) == recno) break;
1303: slotA += md->name_entry_size;
1304: }
1305:
1306: /* Found a name for the number - there can be only one; duplicate
1307: names for different numbers are allowed, but not vice versa. First
1308: scan down for duplicates. */
1309:
1310: if (i < md->name_count)
1311: {
1.1.1.2 ! misho 1312: pcre_uchar *slotB = slotA;
1.1 misho 1313: while (slotB > md->name_table)
1314: {
1315: slotB -= md->name_entry_size;
1.1.1.2 ! misho 1316: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1317: {
1318: condition = GET2(slotB, 0) == md->recursive->group_num;
1319: if (condition) break;
1320: }
1321: else break;
1322: }
1323:
1324: /* Scan up for duplicates */
1325:
1326: if (!condition)
1327: {
1328: slotB = slotA;
1329: for (i++; i < md->name_count; i++)
1330: {
1331: slotB += md->name_entry_size;
1.1.1.2 ! misho 1332: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1333: {
1334: condition = GET2(slotB, 0) == md->recursive->group_num;
1335: if (condition) break;
1336: }
1337: else break;
1338: }
1339: }
1340: }
1341: }
1342:
1343: /* Chose branch according to the condition */
1344:
1.1.1.2 ! misho 1345: ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1 misho 1346: }
1347: }
1348:
1349: else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1350: {
1351: offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1352: condition = offset < offset_top && md->offset_vector[offset] >= 0;
1353:
1354: /* If the numbered capture is unset, but the reference was by name,
1355: scan the table to see if the name refers to any other numbers, and test
1356: them. The condition is true if any one is set. This is tediously similar
1357: to the code above, but not close enough to try to amalgamate. */
1358:
1359: if (!condition && condcode == OP_NCREF)
1360: {
1361: int refno = offset >> 1;
1.1.1.2 ! misho 1362: pcre_uchar *slotA = md->name_table;
1.1 misho 1363:
1364: for (i = 0; i < md->name_count; i++)
1365: {
1366: if (GET2(slotA, 0) == refno) break;
1367: slotA += md->name_entry_size;
1368: }
1369:
1370: /* Found a name for the number - there can be only one; duplicate names
1371: for different numbers are allowed, but not vice versa. First scan down
1372: for duplicates. */
1373:
1374: if (i < md->name_count)
1375: {
1.1.1.2 ! misho 1376: pcre_uchar *slotB = slotA;
1.1 misho 1377: while (slotB > md->name_table)
1378: {
1379: slotB -= md->name_entry_size;
1.1.1.2 ! misho 1380: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1381: {
1382: offset = GET2(slotB, 0) << 1;
1383: condition = offset < offset_top &&
1384: md->offset_vector[offset] >= 0;
1385: if (condition) break;
1386: }
1387: else break;
1388: }
1389:
1390: /* Scan up for duplicates */
1391:
1392: if (!condition)
1393: {
1394: slotB = slotA;
1395: for (i++; i < md->name_count; i++)
1396: {
1397: slotB += md->name_entry_size;
1.1.1.2 ! misho 1398: if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.1 misho 1399: {
1400: offset = GET2(slotB, 0) << 1;
1401: condition = offset < offset_top &&
1402: md->offset_vector[offset] >= 0;
1403: if (condition) break;
1404: }
1405: else break;
1406: }
1407: }
1408: }
1409: }
1410:
1411: /* Chose branch according to the condition */
1412:
1.1.1.2 ! misho 1413: ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1 misho 1414: }
1415:
1416: else if (condcode == OP_DEF) /* DEFINE - always false */
1417: {
1418: condition = FALSE;
1419: ecode += GET(ecode, 1);
1420: }
1421:
1422: /* The condition is an assertion. Call match() to evaluate it - setting
1423: md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1424: an assertion. */
1425:
1426: else
1427: {
1428: md->match_function_type = MATCH_CONDASSERT;
1429: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1430: if (rrc == MATCH_MATCH)
1431: {
1432: if (md->end_offset_top > offset_top)
1433: offset_top = md->end_offset_top; /* Captures may have happened */
1434: condition = TRUE;
1435: ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1436: while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1437: }
1438:
1439: /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1440: assertion; it is therefore treated as NOMATCH. */
1441:
1442: else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1443: {
1444: RRETURN(rrc); /* Need braces because of following else */
1445: }
1446: else
1447: {
1448: condition = FALSE;
1449: ecode += codelink;
1450: }
1451: }
1452:
1453: /* We are now at the branch that is to be obeyed. As there is only one, can
1454: use tail recursion to avoid using another stack frame, except when there is
1455: unlimited repeat of a possibly empty group. In the latter case, a recursive
1456: call to match() is always required, unless the second alternative doesn't
1457: exist, in which case we can just plough on. Note that, for compatibility
1458: with Perl, the | in a conditional group is NOT treated as creating two
1459: alternatives. If a THEN is encountered in the branch, it propagates out to
1460: the enclosing alternative (unless nested in a deeper set of alternatives,
1461: of course). */
1462:
1463: if (condition || *ecode == OP_ALT)
1464: {
1465: if (op != OP_SCOND)
1466: {
1467: ecode += 1 + LINK_SIZE;
1468: goto TAIL_RECURSE;
1469: }
1470:
1471: md->match_function_type = MATCH_CBEGROUP;
1472: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1473: RRETURN(rrc);
1474: }
1475:
1476: /* Condition false & no alternative; continue after the group. */
1477:
1478: else
1479: {
1480: ecode += 1 + LINK_SIZE;
1481: }
1482: break;
1483:
1484:
1485: /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1486: to close any currently open capturing brackets. */
1487:
1488: case OP_CLOSE:
1489: number = GET2(ecode, 1);
1490: offset = number << 1;
1491:
1492: #ifdef PCRE_DEBUG
1493: printf("end bracket %d at *ACCEPT", number);
1494: printf("\n");
1495: #endif
1496:
1497: md->capture_last = number;
1498: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1499: {
1500: md->offset_vector[offset] =
1501: md->offset_vector[md->offset_end - number];
1502: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1503: if (offset_top <= offset) offset_top = offset + 2;
1504: }
1.1.1.2 ! misho 1505: ecode += 1 + IMM2_SIZE;
1.1 misho 1506: break;
1507:
1508:
1509: /* End of the pattern, either real or forced. */
1510:
1511: case OP_END:
1512: case OP_ACCEPT:
1513: case OP_ASSERT_ACCEPT:
1514:
1515: /* If we have matched an empty string, fail if not in an assertion and not
1516: in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1517: is set and we have matched at the start of the subject. In both cases,
1518: backtracking will then try other alternatives, if any. */
1519:
1520: if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1521: md->recursive == NULL &&
1522: (md->notempty ||
1523: (md->notempty_atstart &&
1524: mstart == md->start_subject + md->start_offset)))
1525: RRETURN(MATCH_NOMATCH);
1526:
1527: /* Otherwise, we have a match. */
1528:
1529: md->end_match_ptr = eptr; /* Record where we ended */
1530: md->end_offset_top = offset_top; /* and how many extracts were taken */
1531: md->start_match_ptr = mstart; /* and the start (\K can modify) */
1532:
1533: /* For some reason, the macros don't work properly if an expression is
1534: given as the argument to RRETURN when the heap is in use. */
1535:
1536: rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1537: RRETURN(rrc);
1538:
1539: /* Assertion brackets. Check the alternative branches in turn - the
1540: matching won't pass the KET for an assertion. If any one branch matches,
1541: the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1542: start of each branch to move the current point backwards, so the code at
1543: this level is identical to the lookahead case. When the assertion is part
1544: of a condition, we want to return immediately afterwards. The caller of
1545: this incarnation of the match() function will have set MATCH_CONDASSERT in
1546: md->match_function type, and one of these opcodes will be the first opcode
1547: that is processed. We use a local variable that is preserved over calls to
1548: match() to remember this case. */
1549:
1550: case OP_ASSERT:
1551: case OP_ASSERTBACK:
1.1.1.2 ! misho 1552: save_mark = md->mark;
1.1 misho 1553: if (md->match_function_type == MATCH_CONDASSERT)
1554: {
1555: condassert = TRUE;
1556: md->match_function_type = 0;
1557: }
1558: else condassert = FALSE;
1559:
1560: do
1561: {
1562: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1563: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1564: {
1565: mstart = md->start_match_ptr; /* In case \K reset it */
1566: break;
1567: }
1568:
1569: /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1570: as NOMATCH. */
1571:
1572: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1573: ecode += GET(ecode, 1);
1.1.1.2 ! misho 1574: md->mark = save_mark;
1.1 misho 1575: }
1576: while (*ecode == OP_ALT);
1577:
1578: if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1579:
1580: /* If checking an assertion for a condition, return MATCH_MATCH. */
1581:
1582: if (condassert) RRETURN(MATCH_MATCH);
1583:
1584: /* Continue from after the assertion, updating the offsets high water
1585: mark, since extracts may have been taken during the assertion. */
1586:
1587: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1588: ecode += 1 + LINK_SIZE;
1589: offset_top = md->end_offset_top;
1590: continue;
1591:
1592: /* Negative assertion: all branches must fail to match. Encountering SKIP,
1593: PRUNE, or COMMIT means we must assume failure without checking subsequent
1594: branches. */
1595:
1596: case OP_ASSERT_NOT:
1597: case OP_ASSERTBACK_NOT:
1.1.1.2 ! misho 1598: save_mark = md->mark;
1.1 misho 1599: if (md->match_function_type == MATCH_CONDASSERT)
1600: {
1601: condassert = TRUE;
1602: md->match_function_type = 0;
1603: }
1604: else condassert = FALSE;
1605:
1606: do
1607: {
1608: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1.1.1.2 ! misho 1609: md->mark = save_mark;
1.1 misho 1610: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1611: if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1612: {
1613: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1614: break;
1615: }
1616:
1617: /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1618: as NOMATCH. */
1619:
1620: if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1621: ecode += GET(ecode,1);
1622: }
1623: while (*ecode == OP_ALT);
1624:
1625: if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
1626:
1627: ecode += 1 + LINK_SIZE;
1628: continue;
1629:
1630: /* Move the subject pointer back. This occurs only at the start of
1631: each branch of a lookbehind assertion. If we are too close to the start to
1632: move back, this match function fails. When working with UTF-8 we move
1633: back a number of characters, not bytes. */
1634:
1635: case OP_REVERSE:
1.1.1.2 ! misho 1636: #ifdef SUPPORT_UTF
! 1637: if (utf)
1.1 misho 1638: {
1639: i = GET(ecode, 1);
1640: while (i-- > 0)
1641: {
1642: eptr--;
1643: if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1644: BACKCHAR(eptr);
1645: }
1646: }
1647: else
1648: #endif
1649:
1650: /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1651:
1652: {
1653: eptr -= GET(ecode, 1);
1654: if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1655: }
1656:
1657: /* Save the earliest consulted character, then skip to next op code */
1658:
1659: if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1660: ecode += 1 + LINK_SIZE;
1661: break;
1662:
1663: /* The callout item calls an external function, if one is provided, passing
1664: details of the match so far. This is mainly for debugging, though the
1665: function is able to force a failure. */
1666:
1667: case OP_CALLOUT:
1.1.1.2 ! misho 1668: if (PUBL(callout) != NULL)
1.1 misho 1669: {
1.1.1.2 ! misho 1670: PUBL(callout_block) cb;
1.1 misho 1671: cb.version = 2; /* Version 1 of the callout block */
1672: cb.callout_number = ecode[1];
1673: cb.offset_vector = md->offset_vector;
1.1.1.2 ! misho 1674: #ifdef COMPILE_PCRE8
1.1 misho 1675: cb.subject = (PCRE_SPTR)md->start_subject;
1.1.1.2 ! misho 1676: #else
! 1677: cb.subject = (PCRE_SPTR16)md->start_subject;
! 1678: #endif
1.1 misho 1679: cb.subject_length = (int)(md->end_subject - md->start_subject);
1680: cb.start_match = (int)(mstart - md->start_subject);
1681: cb.current_position = (int)(eptr - md->start_subject);
1682: cb.pattern_position = GET(ecode, 2);
1683: cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1684: cb.capture_top = offset_top/2;
1685: cb.capture_last = md->capture_last;
1686: cb.callout_data = md->callout_data;
1687: cb.mark = md->nomatch_mark;
1.1.1.2 ! misho 1688: if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1 misho 1689: if (rrc < 0) RRETURN(rrc);
1690: }
1691: ecode += 2 + 2*LINK_SIZE;
1692: break;
1693:
1694: /* Recursion either matches the current regex, or some subexpression. The
1695: offset data is the offset to the starting bracket from the start of the
1696: whole pattern. (This is so that it works from duplicated subpatterns.)
1697:
1698: The state of the capturing groups is preserved over recursion, and
1699: re-instated afterwards. We don't know how many are started and not yet
1700: finished (offset_top records the completed total) so we just have to save
1701: all the potential data. There may be up to 65535 such values, which is too
1702: large to put on the stack, but using malloc for small numbers seems
1703: expensive. As a compromise, the stack is used when there are no more than
1704: REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1705:
1706: There are also other values that have to be saved. We use a chained
1707: sequence of blocks that actually live on the stack. Thanks to Robin Houston
1708: for the original version of this logic. It has, however, been hacked around
1709: a lot, so he is not to blame for the current way it works. */
1710:
1711: case OP_RECURSE:
1712: {
1713: recursion_info *ri;
1714: int recno;
1715:
1716: callpat = md->start_code + GET(ecode, 1);
1717: recno = (callpat == md->start_code)? 0 :
1718: GET2(callpat, 1 + LINK_SIZE);
1719:
1720: /* Check for repeating a recursion without advancing the subject pointer.
1721: This should catch convoluted mutual recursions. (Some simple cases are
1722: caught at compile time.) */
1723:
1724: for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1725: if (recno == ri->group_num && eptr == ri->subject_position)
1726: RRETURN(PCRE_ERROR_RECURSELOOP);
1727:
1728: /* Add to "recursing stack" */
1729:
1730: new_recursive.group_num = recno;
1731: new_recursive.subject_position = eptr;
1732: new_recursive.prevrec = md->recursive;
1733: md->recursive = &new_recursive;
1734:
1735: /* Where to continue from afterwards */
1736:
1737: ecode += 1 + LINK_SIZE;
1738:
1739: /* Now save the offset data */
1740:
1741: new_recursive.saved_max = md->offset_end;
1742: if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1743: new_recursive.offset_save = stacksave;
1744: else
1745: {
1746: new_recursive.offset_save =
1.1.1.2 ! misho 1747: (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1.1 misho 1748: if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1749: }
1750: memcpy(new_recursive.offset_save, md->offset_vector,
1751: new_recursive.saved_max * sizeof(int));
1752:
1753: /* OK, now we can do the recursion. After processing each alternative,
1754: restore the offset data. If there were nested recursions, md->recursive
1755: might be changed, so reset it before looping. */
1756:
1757: DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1758: cbegroup = (*callpat >= OP_SBRA);
1759: do
1760: {
1761: if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1.1.1.2 ! misho 1762: RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1.1 misho 1763: md, eptrb, RM6);
1764: memcpy(md->offset_vector, new_recursive.offset_save,
1765: new_recursive.saved_max * sizeof(int));
1766: md->recursive = new_recursive.prevrec;
1767: if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1768: {
1769: DPRINTF(("Recursion matched\n"));
1770: if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho 1771: (PUBL(free))(new_recursive.offset_save);
1.1 misho 1772:
1773: /* Set where we got to in the subject, and reset the start in case
1774: it was changed by \K. This *is* propagated back out of a recursion,
1775: for Perl compatibility. */
1776:
1777: eptr = md->end_match_ptr;
1778: mstart = md->start_match_ptr;
1779: goto RECURSION_MATCHED; /* Exit loop; end processing */
1780: }
1781:
1782: /* PCRE does not allow THEN to escape beyond a recursion; it is treated
1783: as NOMATCH. */
1784:
1785: else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1786: {
1787: DPRINTF(("Recursion gave error %d\n", rrc));
1788: if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho 1789: (PUBL(free))(new_recursive.offset_save);
1.1 misho 1790: RRETURN(rrc);
1791: }
1792:
1793: md->recursive = &new_recursive;
1794: callpat += GET(callpat, 1);
1795: }
1796: while (*callpat == OP_ALT);
1797:
1798: DPRINTF(("Recursion didn't match\n"));
1799: md->recursive = new_recursive.prevrec;
1800: if (new_recursive.offset_save != stacksave)
1.1.1.2 ! misho 1801: (PUBL(free))(new_recursive.offset_save);
1.1 misho 1802: RRETURN(MATCH_NOMATCH);
1803: }
1804:
1805: RECURSION_MATCHED:
1806: break;
1807:
1808: /* An alternation is the end of a branch; scan along to find the end of the
1809: bracketed group and go to there. */
1810:
1811: case OP_ALT:
1812: do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1813: break;
1814:
1815: /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1816: indicating that it may occur zero times. It may repeat infinitely, or not
1817: at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1818: with fixed upper repeat limits are compiled as a number of copies, with the
1819: optional ones preceded by BRAZERO or BRAMINZERO. */
1820:
1821: case OP_BRAZERO:
1822: next = ecode + 1;
1823: RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1824: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1825: do next += GET(next, 1); while (*next == OP_ALT);
1826: ecode = next + 1 + LINK_SIZE;
1827: break;
1828:
1829: case OP_BRAMINZERO:
1830: next = ecode + 1;
1831: do next += GET(next, 1); while (*next == OP_ALT);
1832: RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1833: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1834: ecode++;
1835: break;
1836:
1837: case OP_SKIPZERO:
1838: next = ecode+1;
1839: do next += GET(next,1); while (*next == OP_ALT);
1840: ecode = next + 1 + LINK_SIZE;
1841: break;
1842:
1843: /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1844: here; just jump to the group, with allow_zero set TRUE. */
1845:
1846: case OP_BRAPOSZERO:
1847: op = *(++ecode);
1848: allow_zero = TRUE;
1849: if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1850: goto POSSESSIVE_NON_CAPTURE;
1851:
1852: /* End of a group, repeated or non-repeating. */
1853:
1854: case OP_KET:
1855: case OP_KETRMIN:
1856: case OP_KETRMAX:
1857: case OP_KETRPOS:
1858: prev = ecode - GET(ecode, 1);
1859:
1860: /* If this was a group that remembered the subject start, in order to break
1861: infinite repeats of empty string matches, retrieve the subject start from
1862: the chain. Otherwise, set it NULL. */
1863:
1864: if (*prev >= OP_SBRA || *prev == OP_ONCE)
1865: {
1866: saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1867: eptrb = eptrb->epb_prev; /* Backup to previous group */
1868: }
1869: else saved_eptr = NULL;
1870:
1871: /* If we are at the end of an assertion group or a non-capturing atomic
1872: group, stop matching and return MATCH_MATCH, but record the current high
1873: water mark for use by positive assertions. We also need to record the match
1874: start in case it was changed by \K. */
1875:
1876: if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1877: *prev == OP_ONCE_NC)
1878: {
1879: md->end_match_ptr = eptr; /* For ONCE_NC */
1880: md->end_offset_top = offset_top;
1881: md->start_match_ptr = mstart;
1882: RRETURN(MATCH_MATCH); /* Sets md->mark */
1883: }
1884:
1885: /* For capturing groups we have to check the group number back at the start
1886: and if necessary complete handling an extraction by setting the offsets and
1887: bumping the high water mark. Whole-pattern recursion is coded as a recurse
1888: into group 0, so it won't be picked up here. Instead, we catch it when the
1889: OP_END is reached. Other recursion is handled here. We just have to record
1890: the current subject position and start match pointer and give a MATCH
1891: return. */
1892:
1893: if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1894: *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1895: {
1896: number = GET2(prev, 1+LINK_SIZE);
1897: offset = number << 1;
1898:
1899: #ifdef PCRE_DEBUG
1900: printf("end bracket %d", number);
1901: printf("\n");
1902: #endif
1903:
1904: /* Handle a recursively called group. */
1905:
1906: if (md->recursive != NULL && md->recursive->group_num == number)
1907: {
1908: md->end_match_ptr = eptr;
1909: md->start_match_ptr = mstart;
1910: RRETURN(MATCH_MATCH);
1911: }
1912:
1913: /* Deal with capturing */
1914:
1915: md->capture_last = number;
1916: if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1917: {
1918: /* If offset is greater than offset_top, it means that we are
1919: "skipping" a capturing group, and that group's offsets must be marked
1920: unset. In earlier versions of PCRE, all the offsets were unset at the
1921: start of matching, but this doesn't work because atomic groups and
1922: assertions can cause a value to be set that should later be unset.
1923: Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1924: part of the atomic group, but this is not on the final matching path,
1925: so must be unset when 2 is set. (If there is no group 2, there is no
1926: problem, because offset_top will then be 2, indicating no capture.) */
1927:
1928: if (offset > offset_top)
1929: {
1930: register int *iptr = md->offset_vector + offset_top;
1931: register int *iend = md->offset_vector + offset;
1932: while (iptr < iend) *iptr++ = -1;
1933: }
1934:
1935: /* Now make the extraction */
1936:
1937: md->offset_vector[offset] =
1938: md->offset_vector[md->offset_end - number];
1939: md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1940: if (offset_top <= offset) offset_top = offset + 2;
1941: }
1942: }
1943:
1944: /* For an ordinary non-repeating ket, just continue at this level. This
1945: also happens for a repeating ket if no characters were matched in the
1946: group. This is the forcible breaking of infinite loops as implemented in
1947: Perl 5.005. For a non-repeating atomic group that includes captures,
1948: establish a backup point by processing the rest of the pattern at a lower
1949: level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1950: original OP_ONCE level, thereby bypassing intermediate backup points, but
1951: resetting any captures that happened along the way. */
1952:
1953: if (*ecode == OP_KET || eptr == saved_eptr)
1954: {
1955: if (*prev == OP_ONCE)
1956: {
1957: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1958: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1959: md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
1960: RRETURN(MATCH_ONCE);
1961: }
1962: ecode += 1 + LINK_SIZE; /* Carry on at this level */
1963: break;
1964: }
1965:
1966: /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1967: and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1968: at a time from the outer level, thus saving stack. */
1969:
1970: if (*ecode == OP_KETRPOS)
1971: {
1972: md->end_match_ptr = eptr;
1973: md->end_offset_top = offset_top;
1974: RRETURN(MATCH_KETRPOS);
1975: }
1976:
1977: /* The normal repeating kets try the rest of the pattern or restart from
1978: the preceding bracket, in the appropriate order. In the second case, we can
1979: use tail recursion to avoid using another stack frame, unless we have an
1980: an atomic group or an unlimited repeat of a group that can match an empty
1981: string. */
1982:
1983: if (*ecode == OP_KETRMIN)
1984: {
1985: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1986: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1987: if (*prev == OP_ONCE)
1988: {
1989: RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
1990: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1991: md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
1992: RRETURN(MATCH_ONCE);
1993: }
1994: if (*prev >= OP_SBRA) /* Could match an empty string */
1995: {
1996: md->match_function_type = MATCH_CBEGROUP;
1997: RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1998: RRETURN(rrc);
1999: }
2000: ecode = prev;
2001: goto TAIL_RECURSE;
2002: }
2003: else /* OP_KETRMAX */
2004: {
2005: if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
2006: RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2007: if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2008: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2009: if (*prev == OP_ONCE)
2010: {
2011: RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2012: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2013: md->once_target = prev;
2014: RRETURN(MATCH_ONCE);
2015: }
2016: ecode += 1 + LINK_SIZE;
2017: goto TAIL_RECURSE;
2018: }
2019: /* Control never gets here */
2020:
2021: /* Not multiline mode: start of subject assertion, unless notbol. */
2022:
2023: case OP_CIRC:
2024: if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2025:
2026: /* Start of subject assertion */
2027:
2028: case OP_SOD:
2029: if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2030: ecode++;
2031: break;
2032:
2033: /* Multiline mode: start of subject unless notbol, or after any newline. */
2034:
2035: case OP_CIRCM:
2036: if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2037: if (eptr != md->start_subject &&
2038: (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2039: RRETURN(MATCH_NOMATCH);
2040: ecode++;
2041: break;
2042:
2043: /* Start of match assertion */
2044:
2045: case OP_SOM:
2046: if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2047: ecode++;
2048: break;
2049:
2050: /* Reset the start of match point */
2051:
2052: case OP_SET_SOM:
2053: mstart = eptr;
2054: ecode++;
2055: break;
2056:
2057: /* Multiline mode: assert before any newline, or before end of subject
2058: unless noteol is set. */
2059:
2060: case OP_DOLLM:
2061: if (eptr < md->end_subject)
2062: { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
2063: else
2064: {
2065: if (md->noteol) RRETURN(MATCH_NOMATCH);
2066: SCHECK_PARTIAL();
2067: }
2068: ecode++;
2069: break;
2070:
2071: /* Not multiline mode: assert before a terminating newline or before end of
2072: subject unless noteol is set. */
2073:
2074: case OP_DOLL:
2075: if (md->noteol) RRETURN(MATCH_NOMATCH);
2076: if (!md->endonly) goto ASSERT_NL_OR_EOS;
2077:
2078: /* ... else fall through for endonly */
2079:
2080: /* End of subject assertion (\z) */
2081:
2082: case OP_EOD:
2083: if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2084: SCHECK_PARTIAL();
2085: ecode++;
2086: break;
2087:
2088: /* End of subject or ending \n assertion (\Z) */
2089:
2090: case OP_EODN:
2091: ASSERT_NL_OR_EOS:
2092: if (eptr < md->end_subject &&
2093: (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2094: RRETURN(MATCH_NOMATCH);
2095:
2096: /* Either at end of string or \n before end. */
2097:
2098: SCHECK_PARTIAL();
2099: ecode++;
2100: break;
2101:
2102: /* Word boundary assertions */
2103:
2104: case OP_NOT_WORD_BOUNDARY:
2105: case OP_WORD_BOUNDARY:
2106: {
2107:
2108: /* Find out if the previous and current characters are "word" characters.
2109: It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2110: be "non-word" characters. Remember the earliest consulted character for
2111: partial matching. */
2112:
1.1.1.2 ! misho 2113: #ifdef SUPPORT_UTF
! 2114: if (utf)
1.1 misho 2115: {
2116: /* Get status of previous character */
2117:
2118: if (eptr == md->start_subject) prev_is_word = FALSE; else
2119: {
1.1.1.2 ! misho 2120: PCRE_PUCHAR lastptr = eptr - 1;
! 2121: BACKCHAR(lastptr);
1.1 misho 2122: if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2123: GETCHAR(c, lastptr);
2124: #ifdef SUPPORT_UCP
2125: if (md->use_ucp)
2126: {
2127: if (c == '_') prev_is_word = TRUE; else
2128: {
2129: int cat = UCD_CATEGORY(c);
2130: prev_is_word = (cat == ucp_L || cat == ucp_N);
2131: }
2132: }
2133: else
2134: #endif
2135: prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2136: }
2137:
2138: /* Get status of next character */
2139:
2140: if (eptr >= md->end_subject)
2141: {
2142: SCHECK_PARTIAL();
2143: cur_is_word = FALSE;
2144: }
2145: else
2146: {
2147: GETCHAR(c, eptr);
2148: #ifdef SUPPORT_UCP
2149: if (md->use_ucp)
2150: {
2151: if (c == '_') cur_is_word = TRUE; else
2152: {
2153: int cat = UCD_CATEGORY(c);
2154: cur_is_word = (cat == ucp_L || cat == ucp_N);
2155: }
2156: }
2157: else
2158: #endif
2159: cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2160: }
2161: }
2162: else
2163: #endif
2164:
2165: /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2166: consistency with the behaviour of \w we do use it in this case. */
2167:
2168: {
2169: /* Get status of previous character */
2170:
2171: if (eptr == md->start_subject) prev_is_word = FALSE; else
2172: {
2173: if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2174: #ifdef SUPPORT_UCP
2175: if (md->use_ucp)
2176: {
2177: c = eptr[-1];
2178: if (c == '_') prev_is_word = TRUE; else
2179: {
2180: int cat = UCD_CATEGORY(c);
2181: prev_is_word = (cat == ucp_L || cat == ucp_N);
2182: }
2183: }
2184: else
2185: #endif
1.1.1.2 ! misho 2186: prev_is_word = MAX_255(eptr[-1])
! 2187: && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1.1 misho 2188: }
2189:
2190: /* Get status of next character */
2191:
2192: if (eptr >= md->end_subject)
2193: {
2194: SCHECK_PARTIAL();
2195: cur_is_word = FALSE;
2196: }
2197: else
2198: #ifdef SUPPORT_UCP
2199: if (md->use_ucp)
2200: {
2201: c = *eptr;
2202: if (c == '_') cur_is_word = TRUE; else
2203: {
2204: int cat = UCD_CATEGORY(c);
2205: cur_is_word = (cat == ucp_L || cat == ucp_N);
2206: }
2207: }
2208: else
2209: #endif
1.1.1.2 ! misho 2210: cur_is_word = MAX_255(*eptr)
! 2211: && ((md->ctypes[*eptr] & ctype_word) != 0);
1.1 misho 2212: }
2213:
2214: /* Now see if the situation is what we want */
2215:
2216: if ((*ecode++ == OP_WORD_BOUNDARY)?
2217: cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2218: RRETURN(MATCH_NOMATCH);
2219: }
2220: break;
2221:
2222: /* Match a single character type; inline for speed */
2223:
2224: case OP_ANY:
2225: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2226: /* Fall through */
2227:
2228: case OP_ALLANY:
2229: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2230: { /* not be updated before SCHECK_PARTIAL. */
2231: SCHECK_PARTIAL();
2232: RRETURN(MATCH_NOMATCH);
2233: }
2234: eptr++;
1.1.1.2 ! misho 2235: #ifdef SUPPORT_UTF
! 2236: if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
! 2237: #endif
1.1 misho 2238: ecode++;
2239: break;
2240:
2241: /* Match a single byte, even in UTF-8 mode. This opcode really does match
2242: any byte, even newline, independent of the setting of PCRE_DOTALL. */
2243:
2244: case OP_ANYBYTE:
2245: if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2246: { /* not be updated before SCHECK_PARTIAL. */
2247: SCHECK_PARTIAL();
2248: RRETURN(MATCH_NOMATCH);
2249: }
2250: eptr++;
2251: ecode++;
2252: break;
2253:
2254: case OP_NOT_DIGIT:
2255: if (eptr >= md->end_subject)
2256: {
2257: SCHECK_PARTIAL();
2258: RRETURN(MATCH_NOMATCH);
2259: }
2260: GETCHARINCTEST(c, eptr);
2261: if (
1.1.1.2 ! misho 2262: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1 misho 2263: c < 256 &&
2264: #endif
2265: (md->ctypes[c] & ctype_digit) != 0
2266: )
2267: RRETURN(MATCH_NOMATCH);
2268: ecode++;
2269: break;
2270:
2271: case OP_DIGIT:
2272: if (eptr >= md->end_subject)
2273: {
2274: SCHECK_PARTIAL();
2275: RRETURN(MATCH_NOMATCH);
2276: }
2277: GETCHARINCTEST(c, eptr);
2278: if (
1.1.1.2 ! misho 2279: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
! 2280: c > 255 ||
1.1 misho 2281: #endif
2282: (md->ctypes[c] & ctype_digit) == 0
2283: )
2284: RRETURN(MATCH_NOMATCH);
2285: ecode++;
2286: break;
2287:
2288: case OP_NOT_WHITESPACE:
2289: if (eptr >= md->end_subject)
2290: {
2291: SCHECK_PARTIAL();
2292: RRETURN(MATCH_NOMATCH);
2293: }
2294: GETCHARINCTEST(c, eptr);
2295: if (
1.1.1.2 ! misho 2296: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1 misho 2297: c < 256 &&
2298: #endif
2299: (md->ctypes[c] & ctype_space) != 0
2300: )
2301: RRETURN(MATCH_NOMATCH);
2302: ecode++;
2303: break;
2304:
2305: case OP_WHITESPACE:
2306: if (eptr >= md->end_subject)
2307: {
2308: SCHECK_PARTIAL();
2309: RRETURN(MATCH_NOMATCH);
2310: }
2311: GETCHARINCTEST(c, eptr);
2312: if (
1.1.1.2 ! misho 2313: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
! 2314: c > 255 ||
1.1 misho 2315: #endif
2316: (md->ctypes[c] & ctype_space) == 0
2317: )
2318: RRETURN(MATCH_NOMATCH);
2319: ecode++;
2320: break;
2321:
2322: case OP_NOT_WORDCHAR:
2323: if (eptr >= md->end_subject)
2324: {
2325: SCHECK_PARTIAL();
2326: RRETURN(MATCH_NOMATCH);
2327: }
2328: GETCHARINCTEST(c, eptr);
2329: if (
1.1.1.2 ! misho 2330: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1 misho 2331: c < 256 &&
2332: #endif
2333: (md->ctypes[c] & ctype_word) != 0
2334: )
2335: RRETURN(MATCH_NOMATCH);
2336: ecode++;
2337: break;
2338:
2339: case OP_WORDCHAR:
2340: if (eptr >= md->end_subject)
2341: {
2342: SCHECK_PARTIAL();
2343: RRETURN(MATCH_NOMATCH);
2344: }
2345: GETCHARINCTEST(c, eptr);
2346: if (
1.1.1.2 ! misho 2347: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
! 2348: c > 255 ||
1.1 misho 2349: #endif
2350: (md->ctypes[c] & ctype_word) == 0
2351: )
2352: RRETURN(MATCH_NOMATCH);
2353: ecode++;
2354: break;
2355:
2356: case OP_ANYNL:
2357: if (eptr >= md->end_subject)
2358: {
2359: SCHECK_PARTIAL();
2360: RRETURN(MATCH_NOMATCH);
2361: }
2362: GETCHARINCTEST(c, eptr);
2363: switch(c)
2364: {
2365: default: RRETURN(MATCH_NOMATCH);
2366:
2367: case 0x000d:
2368: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2369: break;
2370:
2371: case 0x000a:
2372: break;
2373:
2374: case 0x000b:
2375: case 0x000c:
2376: case 0x0085:
2377: case 0x2028:
2378: case 0x2029:
2379: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2380: break;
2381: }
2382: ecode++;
2383: break;
2384:
2385: case OP_NOT_HSPACE:
2386: if (eptr >= md->end_subject)
2387: {
2388: SCHECK_PARTIAL();
2389: RRETURN(MATCH_NOMATCH);
2390: }
2391: GETCHARINCTEST(c, eptr);
2392: switch(c)
2393: {
2394: default: break;
2395: case 0x09: /* HT */
2396: case 0x20: /* SPACE */
2397: case 0xa0: /* NBSP */
2398: case 0x1680: /* OGHAM SPACE MARK */
2399: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2400: case 0x2000: /* EN QUAD */
2401: case 0x2001: /* EM QUAD */
2402: case 0x2002: /* EN SPACE */
2403: case 0x2003: /* EM SPACE */
2404: case 0x2004: /* THREE-PER-EM SPACE */
2405: case 0x2005: /* FOUR-PER-EM SPACE */
2406: case 0x2006: /* SIX-PER-EM SPACE */
2407: case 0x2007: /* FIGURE SPACE */
2408: case 0x2008: /* PUNCTUATION SPACE */
2409: case 0x2009: /* THIN SPACE */
2410: case 0x200A: /* HAIR SPACE */
2411: case 0x202f: /* NARROW NO-BREAK SPACE */
2412: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2413: case 0x3000: /* IDEOGRAPHIC SPACE */
2414: RRETURN(MATCH_NOMATCH);
2415: }
2416: ecode++;
2417: break;
2418:
2419: case OP_HSPACE:
2420: if (eptr >= md->end_subject)
2421: {
2422: SCHECK_PARTIAL();
2423: RRETURN(MATCH_NOMATCH);
2424: }
2425: GETCHARINCTEST(c, eptr);
2426: switch(c)
2427: {
2428: default: RRETURN(MATCH_NOMATCH);
2429: case 0x09: /* HT */
2430: case 0x20: /* SPACE */
2431: case 0xa0: /* NBSP */
2432: case 0x1680: /* OGHAM SPACE MARK */
2433: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2434: case 0x2000: /* EN QUAD */
2435: case 0x2001: /* EM QUAD */
2436: case 0x2002: /* EN SPACE */
2437: case 0x2003: /* EM SPACE */
2438: case 0x2004: /* THREE-PER-EM SPACE */
2439: case 0x2005: /* FOUR-PER-EM SPACE */
2440: case 0x2006: /* SIX-PER-EM SPACE */
2441: case 0x2007: /* FIGURE SPACE */
2442: case 0x2008: /* PUNCTUATION SPACE */
2443: case 0x2009: /* THIN SPACE */
2444: case 0x200A: /* HAIR SPACE */
2445: case 0x202f: /* NARROW NO-BREAK SPACE */
2446: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2447: case 0x3000: /* IDEOGRAPHIC SPACE */
2448: break;
2449: }
2450: ecode++;
2451: break;
2452:
2453: case OP_NOT_VSPACE:
2454: if (eptr >= md->end_subject)
2455: {
2456: SCHECK_PARTIAL();
2457: RRETURN(MATCH_NOMATCH);
2458: }
2459: GETCHARINCTEST(c, eptr);
2460: switch(c)
2461: {
2462: default: break;
2463: case 0x0a: /* LF */
2464: case 0x0b: /* VT */
2465: case 0x0c: /* FF */
2466: case 0x0d: /* CR */
2467: case 0x85: /* NEL */
2468: case 0x2028: /* LINE SEPARATOR */
2469: case 0x2029: /* PARAGRAPH SEPARATOR */
2470: RRETURN(MATCH_NOMATCH);
2471: }
2472: ecode++;
2473: break;
2474:
2475: case OP_VSPACE:
2476: if (eptr >= md->end_subject)
2477: {
2478: SCHECK_PARTIAL();
2479: RRETURN(MATCH_NOMATCH);
2480: }
2481: GETCHARINCTEST(c, eptr);
2482: switch(c)
2483: {
2484: default: RRETURN(MATCH_NOMATCH);
2485: case 0x0a: /* LF */
2486: case 0x0b: /* VT */
2487: case 0x0c: /* FF */
2488: case 0x0d: /* CR */
2489: case 0x85: /* NEL */
2490: case 0x2028: /* LINE SEPARATOR */
2491: case 0x2029: /* PARAGRAPH SEPARATOR */
2492: break;
2493: }
2494: ecode++;
2495: break;
2496:
2497: #ifdef SUPPORT_UCP
2498: /* Check the next character by Unicode property. We will get here only
2499: if the support is in the binary; otherwise a compile-time error occurs. */
2500:
2501: case OP_PROP:
2502: case OP_NOTPROP:
2503: if (eptr >= md->end_subject)
2504: {
2505: SCHECK_PARTIAL();
2506: RRETURN(MATCH_NOMATCH);
2507: }
2508: GETCHARINCTEST(c, eptr);
2509: {
2510: const ucd_record *prop = GET_UCD(c);
2511:
2512: switch(ecode[1])
2513: {
2514: case PT_ANY:
2515: if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2516: break;
2517:
2518: case PT_LAMP:
2519: if ((prop->chartype == ucp_Lu ||
2520: prop->chartype == ucp_Ll ||
2521: prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2522: RRETURN(MATCH_NOMATCH);
2523: break;
2524:
2525: case PT_GC:
1.1.1.2 ! misho 2526: if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
1.1 misho 2527: RRETURN(MATCH_NOMATCH);
2528: break;
2529:
2530: case PT_PC:
2531: if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2532: RRETURN(MATCH_NOMATCH);
2533: break;
2534:
2535: case PT_SC:
2536: if ((ecode[2] != prop->script) == (op == OP_PROP))
2537: RRETURN(MATCH_NOMATCH);
2538: break;
2539:
2540: /* These are specials */
2541:
2542: case PT_ALNUM:
1.1.1.2 ! misho 2543: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
! 2544: PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
1.1 misho 2545: RRETURN(MATCH_NOMATCH);
2546: break;
2547:
2548: case PT_SPACE: /* Perl space */
1.1.1.2 ! misho 2549: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1 misho 2550: c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2551: == (op == OP_NOTPROP))
2552: RRETURN(MATCH_NOMATCH);
2553: break;
2554:
2555: case PT_PXSPACE: /* POSIX space */
1.1.1.2 ! misho 2556: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.1 misho 2557: c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2558: c == CHAR_FF || c == CHAR_CR)
2559: == (op == OP_NOTPROP))
2560: RRETURN(MATCH_NOMATCH);
2561: break;
2562:
2563: case PT_WORD:
1.1.1.2 ! misho 2564: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
! 2565: PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.1 misho 2566: c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2567: RRETURN(MATCH_NOMATCH);
2568: break;
2569:
2570: /* This should never occur */
2571:
2572: default:
2573: RRETURN(PCRE_ERROR_INTERNAL);
2574: }
2575:
2576: ecode += 3;
2577: }
2578: break;
2579:
2580: /* Match an extended Unicode sequence. We will get here only if the support
2581: is in the binary; otherwise a compile-time error occurs. */
2582:
2583: case OP_EXTUNI:
2584: if (eptr >= md->end_subject)
2585: {
2586: SCHECK_PARTIAL();
2587: RRETURN(MATCH_NOMATCH);
2588: }
2589: GETCHARINCTEST(c, eptr);
2590: if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2591: while (eptr < md->end_subject)
2592: {
2593: int len = 1;
1.1.1.2 ! misho 2594: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1 misho 2595: if (UCD_CATEGORY(c) != ucp_M) break;
2596: eptr += len;
2597: }
2598: ecode++;
2599: break;
2600: #endif
2601:
2602:
2603: /* Match a back reference, possibly repeatedly. Look past the end of the
2604: item to see if there is repeat information following. The code is similar
2605: to that for character classes, but repeated for efficiency. Then obey
2606: similar code to character type repeats - written out again for speed.
2607: However, if the referenced string is the empty string, always treat
2608: it as matched, any number of times (otherwise there could be infinite
2609: loops). */
2610:
2611: case OP_REF:
2612: case OP_REFI:
2613: caseless = op == OP_REFI;
2614: offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1.1.1.2 ! misho 2615: ecode += 1 + IMM2_SIZE;
1.1 misho 2616:
2617: /* If the reference is unset, there are two possibilities:
2618:
2619: (a) In the default, Perl-compatible state, set the length negative;
2620: this ensures that every attempt at a match fails. We can't just fail
2621: here, because of the possibility of quantifiers with zero minima.
2622:
2623: (b) If the JavaScript compatibility flag is set, set the length to zero
2624: so that the back reference matches an empty string.
2625:
2626: Otherwise, set the length to the length of what was matched by the
2627: referenced subpattern. */
2628:
2629: if (offset >= offset_top || md->offset_vector[offset] < 0)
2630: length = (md->jscript_compat)? 0 : -1;
2631: else
2632: length = md->offset_vector[offset+1] - md->offset_vector[offset];
2633:
2634: /* Set up for repetition, or handle the non-repeated case */
2635:
2636: switch (*ecode)
2637: {
2638: case OP_CRSTAR:
2639: case OP_CRMINSTAR:
2640: case OP_CRPLUS:
2641: case OP_CRMINPLUS:
2642: case OP_CRQUERY:
2643: case OP_CRMINQUERY:
2644: c = *ecode++ - OP_CRSTAR;
2645: minimize = (c & 1) != 0;
2646: min = rep_min[c]; /* Pick up values from tables; */
2647: max = rep_max[c]; /* zero for max => infinity */
2648: if (max == 0) max = INT_MAX;
2649: break;
2650:
2651: case OP_CRRANGE:
2652: case OP_CRMINRANGE:
2653: minimize = (*ecode == OP_CRMINRANGE);
2654: min = GET2(ecode, 1);
1.1.1.2 ! misho 2655: max = GET2(ecode, 1 + IMM2_SIZE);
1.1 misho 2656: if (max == 0) max = INT_MAX;
1.1.1.2 ! misho 2657: ecode += 1 + 2 * IMM2_SIZE;
1.1 misho 2658: break;
2659:
2660: default: /* No repeat follows */
2661: if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2662: {
2663: CHECK_PARTIAL();
2664: RRETURN(MATCH_NOMATCH);
2665: }
2666: eptr += length;
2667: continue; /* With the main loop */
2668: }
2669:
2670: /* Handle repeated back references. If the length of the reference is
1.1.1.2 ! misho 2671: zero, just continue with the main loop. If the length is negative, it
! 2672: means the reference is unset in non-Java-compatible mode. If the minimum is
! 2673: zero, we can continue at the same level without recursion. For any other
! 2674: minimum, carrying on will result in NOMATCH. */
1.1 misho 2675:
2676: if (length == 0) continue;
1.1.1.2 ! misho 2677: if (length < 0 && min == 0) continue;
1.1 misho 2678:
2679: /* First, ensure the minimum number of matches are present. We get back
2680: the length of the reference string explicitly rather than passing the
2681: address of eptr, so that eptr can be a register variable. */
2682:
2683: for (i = 1; i <= min; i++)
2684: {
2685: int slength;
2686: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2687: {
2688: CHECK_PARTIAL();
2689: RRETURN(MATCH_NOMATCH);
2690: }
2691: eptr += slength;
2692: }
2693:
2694: /* If min = max, continue at the same level without recursion.
2695: They are not both allowed to be zero. */
2696:
2697: if (min == max) continue;
2698:
2699: /* If minimizing, keep trying and advancing the pointer */
2700:
2701: if (minimize)
2702: {
2703: for (fi = min;; fi++)
2704: {
2705: int slength;
2706: RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2707: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2708: if (fi >= max) RRETURN(MATCH_NOMATCH);
2709: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2710: {
2711: CHECK_PARTIAL();
2712: RRETURN(MATCH_NOMATCH);
2713: }
2714: eptr += slength;
2715: }
2716: /* Control never gets here */
2717: }
2718:
2719: /* If maximizing, find the longest string and work backwards */
2720:
2721: else
2722: {
2723: pp = eptr;
2724: for (i = min; i < max; i++)
2725: {
2726: int slength;
2727: if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2728: {
2729: CHECK_PARTIAL();
2730: break;
2731: }
2732: eptr += slength;
2733: }
2734: while (eptr >= pp)
2735: {
2736: RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2737: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2738: eptr -= length;
2739: }
2740: RRETURN(MATCH_NOMATCH);
2741: }
2742: /* Control never gets here */
2743:
2744: /* Match a bit-mapped character class, possibly repeatedly. This op code is
2745: used when all the characters in the class have values in the range 0-255,
2746: and either the matching is caseful, or the characters are in the range
2747: 0-127 when UTF-8 processing is enabled. The only difference between
2748: OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2749: encountered.
2750:
2751: First, look past the end of the item to see if there is repeat information
2752: following. Then obey similar code to character type repeats - written out
2753: again for speed. */
2754:
2755: case OP_NCLASS:
2756: case OP_CLASS:
2757: {
1.1.1.2 ! misho 2758: /* The data variable is saved across frames, so the byte map needs to
! 2759: be stored there. */
! 2760: #define BYTE_MAP ((pcre_uint8 *)data)
1.1 misho 2761: data = ecode + 1; /* Save for matching */
1.1.1.2 ! misho 2762: ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
1.1 misho 2763:
2764: switch (*ecode)
2765: {
2766: case OP_CRSTAR:
2767: case OP_CRMINSTAR:
2768: case OP_CRPLUS:
2769: case OP_CRMINPLUS:
2770: case OP_CRQUERY:
2771: case OP_CRMINQUERY:
2772: c = *ecode++ - OP_CRSTAR;
2773: minimize = (c & 1) != 0;
2774: min = rep_min[c]; /* Pick up values from tables; */
2775: max = rep_max[c]; /* zero for max => infinity */
2776: if (max == 0) max = INT_MAX;
2777: break;
2778:
2779: case OP_CRRANGE:
2780: case OP_CRMINRANGE:
2781: minimize = (*ecode == OP_CRMINRANGE);
2782: min = GET2(ecode, 1);
1.1.1.2 ! misho 2783: max = GET2(ecode, 1 + IMM2_SIZE);
1.1 misho 2784: if (max == 0) max = INT_MAX;
1.1.1.2 ! misho 2785: ecode += 1 + 2 * IMM2_SIZE;
1.1 misho 2786: break;
2787:
2788: default: /* No repeat follows */
2789: min = max = 1;
2790: break;
2791: }
2792:
2793: /* First, ensure the minimum number of matches are present. */
2794:
1.1.1.2 ! misho 2795: #ifdef SUPPORT_UTF
! 2796: if (utf)
1.1 misho 2797: {
2798: for (i = 1; i <= min; i++)
2799: {
2800: if (eptr >= md->end_subject)
2801: {
2802: SCHECK_PARTIAL();
2803: RRETURN(MATCH_NOMATCH);
2804: }
2805: GETCHARINC(c, eptr);
2806: if (c > 255)
2807: {
2808: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2809: }
2810: else
1.1.1.2 ! misho 2811: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2812: }
2813: }
2814: else
2815: #endif
1.1.1.2 ! misho 2816: /* Not UTF mode */
1.1 misho 2817: {
2818: for (i = 1; i <= min; i++)
2819: {
2820: if (eptr >= md->end_subject)
2821: {
2822: SCHECK_PARTIAL();
2823: RRETURN(MATCH_NOMATCH);
2824: }
2825: c = *eptr++;
1.1.1.2 ! misho 2826: #ifndef COMPILE_PCRE8
! 2827: if (c > 255)
! 2828: {
! 2829: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
! 2830: }
! 2831: else
! 2832: #endif
! 2833: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2834: }
2835: }
2836:
2837: /* If max == min we can continue with the main loop without the
2838: need to recurse. */
2839:
2840: if (min == max) continue;
2841:
2842: /* If minimizing, keep testing the rest of the expression and advancing
2843: the pointer while it matches the class. */
2844:
2845: if (minimize)
2846: {
1.1.1.2 ! misho 2847: #ifdef SUPPORT_UTF
! 2848: if (utf)
1.1 misho 2849: {
2850: for (fi = min;; fi++)
2851: {
2852: RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2853: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2854: if (fi >= max) RRETURN(MATCH_NOMATCH);
2855: if (eptr >= md->end_subject)
2856: {
2857: SCHECK_PARTIAL();
2858: RRETURN(MATCH_NOMATCH);
2859: }
2860: GETCHARINC(c, eptr);
2861: if (c > 255)
2862: {
2863: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2864: }
2865: else
1.1.1.2 ! misho 2866: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2867: }
2868: }
2869: else
2870: #endif
1.1.1.2 ! misho 2871: /* Not UTF mode */
1.1 misho 2872: {
2873: for (fi = min;; fi++)
2874: {
2875: RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2876: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2877: if (fi >= max) RRETURN(MATCH_NOMATCH);
2878: if (eptr >= md->end_subject)
2879: {
2880: SCHECK_PARTIAL();
2881: RRETURN(MATCH_NOMATCH);
2882: }
2883: c = *eptr++;
1.1.1.2 ! misho 2884: #ifndef COMPILE_PCRE8
! 2885: if (c > 255)
! 2886: {
! 2887: if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
! 2888: }
! 2889: else
! 2890: #endif
! 2891: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 2892: }
2893: }
2894: /* Control never gets here */
2895: }
2896:
2897: /* If maximizing, find the longest possible run, then work backwards. */
2898:
2899: else
2900: {
2901: pp = eptr;
2902:
1.1.1.2 ! misho 2903: #ifdef SUPPORT_UTF
! 2904: if (utf)
1.1 misho 2905: {
2906: for (i = min; i < max; i++)
2907: {
2908: int len = 1;
2909: if (eptr >= md->end_subject)
2910: {
2911: SCHECK_PARTIAL();
2912: break;
2913: }
2914: GETCHARLEN(c, eptr, len);
2915: if (c > 255)
2916: {
2917: if (op == OP_CLASS) break;
2918: }
2919: else
1.1.1.2 ! misho 2920: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1 misho 2921: eptr += len;
2922: }
2923: for (;;)
2924: {
2925: RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2926: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927: if (eptr-- == pp) break; /* Stop if tried at original pos */
2928: BACKCHAR(eptr);
2929: }
2930: }
2931: else
2932: #endif
1.1.1.2 ! misho 2933: /* Not UTF mode */
1.1 misho 2934: {
2935: for (i = min; i < max; i++)
2936: {
2937: if (eptr >= md->end_subject)
2938: {
2939: SCHECK_PARTIAL();
2940: break;
2941: }
2942: c = *eptr;
1.1.1.2 ! misho 2943: #ifndef COMPILE_PCRE8
! 2944: if (c > 255)
! 2945: {
! 2946: if (op == OP_CLASS) break;
! 2947: }
! 2948: else
! 2949: #endif
! 2950: if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1 misho 2951: eptr++;
2952: }
2953: while (eptr >= pp)
2954: {
2955: RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
2956: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2957: eptr--;
2958: }
2959: }
2960:
2961: RRETURN(MATCH_NOMATCH);
2962: }
1.1.1.2 ! misho 2963: #undef BYTE_MAP
1.1 misho 2964: }
2965: /* Control never gets here */
2966:
2967:
2968: /* Match an extended character class. This opcode is encountered only
2969: when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2970: mode, because Unicode properties are supported in non-UTF-8 mode. */
2971:
1.1.1.2 ! misho 2972: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1 misho 2973: case OP_XCLASS:
2974: {
2975: data = ecode + 1 + LINK_SIZE; /* Save for matching */
2976: ecode += GET(ecode, 1); /* Advance past the item */
2977:
2978: switch (*ecode)
2979: {
2980: case OP_CRSTAR:
2981: case OP_CRMINSTAR:
2982: case OP_CRPLUS:
2983: case OP_CRMINPLUS:
2984: case OP_CRQUERY:
2985: case OP_CRMINQUERY:
2986: c = *ecode++ - OP_CRSTAR;
2987: minimize = (c & 1) != 0;
2988: min = rep_min[c]; /* Pick up values from tables; */
2989: max = rep_max[c]; /* zero for max => infinity */
2990: if (max == 0) max = INT_MAX;
2991: break;
2992:
2993: case OP_CRRANGE:
2994: case OP_CRMINRANGE:
2995: minimize = (*ecode == OP_CRMINRANGE);
2996: min = GET2(ecode, 1);
1.1.1.2 ! misho 2997: max = GET2(ecode, 1 + IMM2_SIZE);
1.1 misho 2998: if (max == 0) max = INT_MAX;
1.1.1.2 ! misho 2999: ecode += 1 + 2 * IMM2_SIZE;
1.1 misho 3000: break;
3001:
3002: default: /* No repeat follows */
3003: min = max = 1;
3004: break;
3005: }
3006:
3007: /* First, ensure the minimum number of matches are present. */
3008:
3009: for (i = 1; i <= min; i++)
3010: {
3011: if (eptr >= md->end_subject)
3012: {
3013: SCHECK_PARTIAL();
3014: RRETURN(MATCH_NOMATCH);
3015: }
3016: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 3017: if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1 misho 3018: }
3019:
3020: /* If max == min we can continue with the main loop without the
3021: need to recurse. */
3022:
3023: if (min == max) continue;
3024:
3025: /* If minimizing, keep testing the rest of the expression and advancing
3026: the pointer while it matches the class. */
3027:
3028: if (minimize)
3029: {
3030: for (fi = min;; fi++)
3031: {
3032: RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3033: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3034: if (fi >= max) RRETURN(MATCH_NOMATCH);
3035: if (eptr >= md->end_subject)
3036: {
3037: SCHECK_PARTIAL();
3038: RRETURN(MATCH_NOMATCH);
3039: }
3040: GETCHARINCTEST(c, eptr);
1.1.1.2 ! misho 3041: if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1 misho 3042: }
3043: /* Control never gets here */
3044: }
3045:
3046: /* If maximizing, find the longest possible run, then work backwards. */
3047:
3048: else
3049: {
3050: pp = eptr;
3051: for (i = min; i < max; i++)
3052: {
3053: int len = 1;
3054: if (eptr >= md->end_subject)
3055: {
3056: SCHECK_PARTIAL();
3057: break;
3058: }
1.1.1.2 ! misho 3059: #ifdef SUPPORT_UTF
1.1 misho 3060: GETCHARLENTEST(c, eptr, len);
1.1.1.2 ! misho 3061: #else
! 3062: c = *eptr;
! 3063: #endif
! 3064: if (!PRIV(xclass)(c, data, utf)) break;
1.1 misho 3065: eptr += len;
3066: }
3067: for(;;)
3068: {
3069: RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3070: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3071: if (eptr-- == pp) break; /* Stop if tried at original pos */
1.1.1.2 ! misho 3072: #ifdef SUPPORT_UTF
! 3073: if (utf) BACKCHAR(eptr);
! 3074: #endif
1.1 misho 3075: }
3076: RRETURN(MATCH_NOMATCH);
3077: }
3078:
3079: /* Control never gets here */
3080: }
3081: #endif /* End of XCLASS */
3082:
3083: /* Match a single character, casefully */
3084:
3085: case OP_CHAR:
1.1.1.2 ! misho 3086: #ifdef SUPPORT_UTF
! 3087: if (utf)
1.1 misho 3088: {
3089: length = 1;
3090: ecode++;
3091: GETCHARLEN(fc, ecode, length);
3092: if (length > md->end_subject - eptr)
3093: {
3094: CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
3095: RRETURN(MATCH_NOMATCH);
3096: }
3097: while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3098: }
3099: else
3100: #endif
1.1.1.2 ! misho 3101: /* Not UTF mode */
1.1 misho 3102: {
3103: if (md->end_subject - eptr < 1)
3104: {
3105: SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
3106: RRETURN(MATCH_NOMATCH);
3107: }
3108: if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3109: ecode += 2;
3110: }
3111: break;
3112:
3113: /* Match a single character, caselessly. If we are at the end of the
3114: subject, give up immediately. */
3115:
3116: case OP_CHARI:
3117: if (eptr >= md->end_subject)
3118: {
3119: SCHECK_PARTIAL();
3120: RRETURN(MATCH_NOMATCH);
3121: }
3122:
1.1.1.2 ! misho 3123: #ifdef SUPPORT_UTF
! 3124: if (utf)
1.1 misho 3125: {
3126: length = 1;
3127: ecode++;
3128: GETCHARLEN(fc, ecode, length);
3129:
3130: /* If the pattern character's value is < 128, we have only one byte, and
3131: we know that its other case must also be one byte long, so we can use the
3132: fast lookup table. We know that there is at least one byte left in the
3133: subject. */
3134:
3135: if (fc < 128)
3136: {
1.1.1.2 ! misho 3137: if (md->lcc[fc]
! 3138: != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
! 3139: ecode++;
! 3140: eptr++;
1.1 misho 3141: }
3142:
3143: /* Otherwise we must pick up the subject character. Note that we cannot
3144: use the value of "length" to check for sufficient bytes left, because the
3145: other case of the character may have more or fewer bytes. */
3146:
3147: else
3148: {
3149: unsigned int dc;
3150: GETCHARINC(dc, eptr);
3151: ecode += length;
3152:
3153: /* If we have Unicode property support, we can use it to test the other
3154: case of the character, if there is one. */
3155:
3156: if (fc != dc)
3157: {
3158: #ifdef SUPPORT_UCP
3159: if (dc != UCD_OTHERCASE(fc))
3160: #endif
3161: RRETURN(MATCH_NOMATCH);
3162: }
3163: }
3164: }
3165: else
1.1.1.2 ! misho 3166: #endif /* SUPPORT_UTF */
1.1 misho 3167:
1.1.1.2 ! misho 3168: /* Not UTF mode */
1.1 misho 3169: {
1.1.1.2 ! misho 3170: if (TABLE_GET(ecode[1], md->lcc, ecode[1])
! 3171: != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
! 3172: eptr++;
1.1 misho 3173: ecode += 2;
3174: }
3175: break;
3176:
3177: /* Match a single character repeatedly. */
3178:
3179: case OP_EXACT:
3180: case OP_EXACTI:
3181: min = max = GET2(ecode, 1);
1.1.1.2 ! misho 3182: ecode += 1 + IMM2_SIZE;
1.1 misho 3183: goto REPEATCHAR;
3184:
3185: case OP_POSUPTO:
3186: case OP_POSUPTOI:
3187: possessive = TRUE;
3188: /* Fall through */
3189:
3190: case OP_UPTO:
3191: case OP_UPTOI:
3192: case OP_MINUPTO:
3193: case OP_MINUPTOI:
3194: min = 0;
3195: max = GET2(ecode, 1);
3196: minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
1.1.1.2 ! misho 3197: ecode += 1 + IMM2_SIZE;
1.1 misho 3198: goto REPEATCHAR;
3199:
3200: case OP_POSSTAR:
3201: case OP_POSSTARI:
3202: possessive = TRUE;
3203: min = 0;
3204: max = INT_MAX;
3205: ecode++;
3206: goto REPEATCHAR;
3207:
3208: case OP_POSPLUS:
3209: case OP_POSPLUSI:
3210: possessive = TRUE;
3211: min = 1;
3212: max = INT_MAX;
3213: ecode++;
3214: goto REPEATCHAR;
3215:
3216: case OP_POSQUERY:
3217: case OP_POSQUERYI:
3218: possessive = TRUE;
3219: min = 0;
3220: max = 1;
3221: ecode++;
3222: goto REPEATCHAR;
3223:
3224: case OP_STAR:
3225: case OP_STARI:
3226: case OP_MINSTAR:
3227: case OP_MINSTARI:
3228: case OP_PLUS:
3229: case OP_PLUSI:
3230: case OP_MINPLUS:
3231: case OP_MINPLUSI:
3232: case OP_QUERY:
3233: case OP_QUERYI:
3234: case OP_MINQUERY:
3235: case OP_MINQUERYI:
3236: c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3237: minimize = (c & 1) != 0;
3238: min = rep_min[c]; /* Pick up values from tables; */
3239: max = rep_max[c]; /* zero for max => infinity */
3240: if (max == 0) max = INT_MAX;
3241:
3242: /* Common code for all repeated single-character matches. */
3243:
3244: REPEATCHAR:
1.1.1.2 ! misho 3245: #ifdef SUPPORT_UTF
! 3246: if (utf)
1.1 misho 3247: {
3248: length = 1;
3249: charptr = ecode;
3250: GETCHARLEN(fc, ecode, length);
3251: ecode += length;
3252:
3253: /* Handle multibyte character matching specially here. There is
3254: support for caseless matching if UCP support is present. */
3255:
3256: if (length > 1)
3257: {
3258: #ifdef SUPPORT_UCP
3259: unsigned int othercase;
3260: if (op >= OP_STARI && /* Caseless */
3261: (othercase = UCD_OTHERCASE(fc)) != fc)
1.1.1.2 ! misho 3262: oclength = PRIV(ord2utf)(othercase, occhars);
1.1 misho 3263: else oclength = 0;
3264: #endif /* SUPPORT_UCP */
3265:
3266: for (i = 1; i <= min; i++)
3267: {
3268: if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho 3269: memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1 misho 3270: #ifdef SUPPORT_UCP
3271: else if (oclength > 0 &&
3272: eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho 3273: memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1 misho 3274: #endif /* SUPPORT_UCP */
3275: else
3276: {
3277: CHECK_PARTIAL();
3278: RRETURN(MATCH_NOMATCH);
3279: }
3280: }
3281:
3282: if (min == max) continue;
3283:
3284: if (minimize)
3285: {
3286: for (fi = min;; fi++)
3287: {
3288: RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3289: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3290: if (fi >= max) RRETURN(MATCH_NOMATCH);
3291: if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho 3292: memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1 misho 3293: #ifdef SUPPORT_UCP
3294: else if (oclength > 0 &&
3295: eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho 3296: memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1 misho 3297: #endif /* SUPPORT_UCP */
3298: else
3299: {
3300: CHECK_PARTIAL();
3301: RRETURN(MATCH_NOMATCH);
3302: }
3303: }
3304: /* Control never gets here */
3305: }
3306:
3307: else /* Maximize */
3308: {
3309: pp = eptr;
3310: for (i = min; i < max; i++)
3311: {
3312: if (eptr <= md->end_subject - length &&
1.1.1.2 ! misho 3313: memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1 misho 3314: #ifdef SUPPORT_UCP
3315: else if (oclength > 0 &&
3316: eptr <= md->end_subject - oclength &&
1.1.1.2 ! misho 3317: memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.1 misho 3318: #endif /* SUPPORT_UCP */
3319: else
3320: {
3321: CHECK_PARTIAL();
3322: break;
3323: }
3324: }
3325:
3326: if (possessive) continue;
3327:
3328: for(;;)
3329: {
3330: RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3331: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3332: if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3333: #ifdef SUPPORT_UCP
3334: eptr--;
3335: BACKCHAR(eptr);
3336: #else /* without SUPPORT_UCP */
3337: eptr -= length;
3338: #endif /* SUPPORT_UCP */
3339: }
3340: }
3341: /* Control never gets here */
3342: }
3343:
3344: /* If the length of a UTF-8 character is 1, we fall through here, and
3345: obey the code as for non-UTF-8 characters below, though in this case the
3346: value of fc will always be < 128. */
3347: }
3348: else
1.1.1.2 ! misho 3349: #endif /* SUPPORT_UTF */
! 3350: /* When not in UTF-8 mode, load a single-byte character. */
! 3351: fc = *ecode++;
1.1 misho 3352:
1.1.1.2 ! misho 3353: /* The value of fc at this point is always one character, though we may
! 3354: or may not be in UTF mode. The code is duplicated for the caseless and
1.1 misho 3355: caseful cases, for speed, since matching characters is likely to be quite
3356: common. First, ensure the minimum number of matches are present. If min =
3357: max, continue at the same level without recursing. Otherwise, if
3358: minimizing, keep trying the rest of the expression and advancing one
3359: matching character if failing, up to the maximum. Alternatively, if
3360: maximizing, find the maximum number of characters and work backwards. */
3361:
3362: DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3363: max, eptr));
3364:
3365: if (op >= OP_STARI) /* Caseless */
3366: {
1.1.1.2 ! misho 3367: #ifdef COMPILE_PCRE8
! 3368: /* fc must be < 128 if UTF is enabled. */
! 3369: foc = md->fcc[fc];
! 3370: #else
! 3371: #ifdef SUPPORT_UTF
! 3372: #ifdef SUPPORT_UCP
! 3373: if (utf && fc > 127)
! 3374: foc = UCD_OTHERCASE(fc);
! 3375: #else
! 3376: if (utf && fc > 127)
! 3377: foc = fc;
! 3378: #endif /* SUPPORT_UCP */
! 3379: else
! 3380: #endif /* SUPPORT_UTF */
! 3381: foc = TABLE_GET(fc, md->fcc, fc);
! 3382: #endif /* COMPILE_PCRE8 */
! 3383:
1.1 misho 3384: for (i = 1; i <= min; i++)
3385: {
3386: if (eptr >= md->end_subject)
3387: {
3388: SCHECK_PARTIAL();
3389: RRETURN(MATCH_NOMATCH);
3390: }
1.1.1.2 ! misho 3391: if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
! 3392: eptr++;
1.1 misho 3393: }
3394: if (min == max) continue;
3395: if (minimize)
3396: {
3397: for (fi = min;; fi++)
3398: {
3399: RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3400: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3401: if (fi >= max) RRETURN(MATCH_NOMATCH);
3402: if (eptr >= md->end_subject)
3403: {
3404: SCHECK_PARTIAL();
3405: RRETURN(MATCH_NOMATCH);
3406: }
1.1.1.2 ! misho 3407: if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
! 3408: eptr++;
1.1 misho 3409: }
3410: /* Control never gets here */
3411: }
3412: else /* Maximize */
3413: {
3414: pp = eptr;
3415: for (i = min; i < max; i++)
3416: {
3417: if (eptr >= md->end_subject)
3418: {
3419: SCHECK_PARTIAL();
3420: break;
3421: }
1.1.1.2 ! misho 3422: if (fc != *eptr && foc != *eptr) break;
1.1 misho 3423: eptr++;
3424: }
3425:
3426: if (possessive) continue;
3427:
3428: while (eptr >= pp)
3429: {
3430: RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3431: eptr--;
3432: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3433: }
3434: RRETURN(MATCH_NOMATCH);
3435: }
3436: /* Control never gets here */
3437: }
3438:
3439: /* Caseful comparisons (includes all multi-byte characters) */
3440:
3441: else
3442: {
3443: for (i = 1; i <= min; i++)
3444: {
3445: if (eptr >= md->end_subject)
3446: {
3447: SCHECK_PARTIAL();
3448: RRETURN(MATCH_NOMATCH);
3449: }
3450: if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3451: }
3452:
3453: if (min == max) continue;
3454:
3455: if (minimize)
3456: {
3457: for (fi = min;; fi++)
3458: {
3459: RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3460: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3461: if (fi >= max) RRETURN(MATCH_NOMATCH);
3462: if (eptr >= md->end_subject)
3463: {
3464: SCHECK_PARTIAL();
3465: RRETURN(MATCH_NOMATCH);
3466: }
3467: if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3468: }
3469: /* Control never gets here */
3470: }
3471: else /* Maximize */
3472: {
3473: pp = eptr;
3474: for (i = min; i < max; i++)
3475: {
3476: if (eptr >= md->end_subject)
3477: {
3478: SCHECK_PARTIAL();
3479: break;
3480: }
3481: if (fc != *eptr) break;
3482: eptr++;
3483: }
3484: if (possessive) continue;
3485:
3486: while (eptr >= pp)
3487: {
3488: RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3489: eptr--;
3490: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3491: }
3492: RRETURN(MATCH_NOMATCH);
3493: }
3494: }
3495: /* Control never gets here */
3496:
3497: /* Match a negated single one-byte character. The character we are
3498: checking can be multibyte. */
3499:
3500: case OP_NOT:
3501: case OP_NOTI:
3502: if (eptr >= md->end_subject)
3503: {
3504: SCHECK_PARTIAL();
3505: RRETURN(MATCH_NOMATCH);
3506: }
3507: ecode++;
3508: GETCHARINCTEST(c, eptr);
3509: if (op == OP_NOTI) /* The caseless case */
3510: {
1.1.1.2 ! misho 3511: register unsigned int ch, och;
! 3512: ch = *ecode++;
! 3513: #ifdef COMPILE_PCRE8
! 3514: /* ch must be < 128 if UTF is enabled. */
! 3515: och = md->fcc[ch];
! 3516: #else
! 3517: #ifdef SUPPORT_UTF
! 3518: #ifdef SUPPORT_UCP
! 3519: if (utf && ch > 127)
! 3520: och = UCD_OTHERCASE(ch);
! 3521: #else
! 3522: if (utf && ch > 127)
! 3523: och = ch;
! 3524: #endif /* SUPPORT_UCP */
! 3525: else
! 3526: #endif /* SUPPORT_UTF */
! 3527: och = TABLE_GET(ch, md->fcc, ch);
! 3528: #endif /* COMPILE_PCRE8 */
! 3529: if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
1.1 misho 3530: }
3531: else /* Caseful */
3532: {
3533: if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
3534: }
3535: break;
3536:
3537: /* Match a negated single one-byte character repeatedly. This is almost a
3538: repeat of the code for a repeated single character, but I haven't found a
3539: nice way of commoning these up that doesn't require a test of the
3540: positive/negative option for each character match. Maybe that wouldn't add
3541: very much to the time taken, but character matching *is* what this is all
3542: about... */
3543:
3544: case OP_NOTEXACT:
3545: case OP_NOTEXACTI:
3546: min = max = GET2(ecode, 1);
1.1.1.2 ! misho 3547: ecode += 1 + IMM2_SIZE;
1.1 misho 3548: goto REPEATNOTCHAR;
3549:
3550: case OP_NOTUPTO:
3551: case OP_NOTUPTOI:
3552: case OP_NOTMINUPTO:
3553: case OP_NOTMINUPTOI:
3554: min = 0;
3555: max = GET2(ecode, 1);
3556: minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
1.1.1.2 ! misho 3557: ecode += 1 + IMM2_SIZE;
1.1 misho 3558: goto REPEATNOTCHAR;
3559:
3560: case OP_NOTPOSSTAR:
3561: case OP_NOTPOSSTARI:
3562: possessive = TRUE;
3563: min = 0;
3564: max = INT_MAX;
3565: ecode++;
3566: goto REPEATNOTCHAR;
3567:
3568: case OP_NOTPOSPLUS:
3569: case OP_NOTPOSPLUSI:
3570: possessive = TRUE;
3571: min = 1;
3572: max = INT_MAX;
3573: ecode++;
3574: goto REPEATNOTCHAR;
3575:
3576: case OP_NOTPOSQUERY:
3577: case OP_NOTPOSQUERYI:
3578: possessive = TRUE;
3579: min = 0;
3580: max = 1;
3581: ecode++;
3582: goto REPEATNOTCHAR;
3583:
3584: case OP_NOTPOSUPTO:
3585: case OP_NOTPOSUPTOI:
3586: possessive = TRUE;
3587: min = 0;
3588: max = GET2(ecode, 1);
1.1.1.2 ! misho 3589: ecode += 1 + IMM2_SIZE;
1.1 misho 3590: goto REPEATNOTCHAR;
3591:
3592: case OP_NOTSTAR:
3593: case OP_NOTSTARI:
3594: case OP_NOTMINSTAR:
3595: case OP_NOTMINSTARI:
3596: case OP_NOTPLUS:
3597: case OP_NOTPLUSI:
3598: case OP_NOTMINPLUS:
3599: case OP_NOTMINPLUSI:
3600: case OP_NOTQUERY:
3601: case OP_NOTQUERYI:
3602: case OP_NOTMINQUERY:
3603: case OP_NOTMINQUERYI:
3604: c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3605: minimize = (c & 1) != 0;
3606: min = rep_min[c]; /* Pick up values from tables; */
3607: max = rep_max[c]; /* zero for max => infinity */
3608: if (max == 0) max = INT_MAX;
3609:
3610: /* Common code for all repeated single-byte matches. */
3611:
3612: REPEATNOTCHAR:
3613: fc = *ecode++;
3614:
3615: /* The code is duplicated for the caseless and caseful cases, for speed,
3616: since matching characters is likely to be quite common. First, ensure the
3617: minimum number of matches are present. If min = max, continue at the same
3618: level without recursing. Otherwise, if minimizing, keep trying the rest of
3619: the expression and advancing one matching character if failing, up to the
3620: maximum. Alternatively, if maximizing, find the maximum number of
3621: characters and work backwards. */
3622:
3623: DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3624: max, eptr));
3625:
3626: if (op >= OP_NOTSTARI) /* Caseless */
3627: {
1.1.1.2 ! misho 3628: #ifdef COMPILE_PCRE8
! 3629: /* fc must be < 128 if UTF is enabled. */
! 3630: foc = md->fcc[fc];
! 3631: #else
! 3632: #ifdef SUPPORT_UTF
! 3633: #ifdef SUPPORT_UCP
! 3634: if (utf && fc > 127)
! 3635: foc = UCD_OTHERCASE(fc);
! 3636: #else
! 3637: if (utf && fc > 127)
! 3638: foc = fc;
! 3639: #endif /* SUPPORT_UCP */
! 3640: else
! 3641: #endif /* SUPPORT_UTF */
! 3642: foc = TABLE_GET(fc, md->fcc, fc);
! 3643: #endif /* COMPILE_PCRE8 */
1.1 misho 3644:
1.1.1.2 ! misho 3645: #ifdef SUPPORT_UTF
! 3646: if (utf)
1.1 misho 3647: {
3648: register unsigned int d;
3649: for (i = 1; i <= min; i++)
3650: {
3651: if (eptr >= md->end_subject)
3652: {
3653: SCHECK_PARTIAL();
3654: RRETURN(MATCH_NOMATCH);
3655: }
3656: GETCHARINC(d, eptr);
1.1.1.2 ! misho 3657: if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
1.1 misho 3658: }
3659: }
3660: else
3661: #endif
1.1.1.2 ! misho 3662: /* Not UTF mode */
1.1 misho 3663: {
3664: for (i = 1; i <= min; i++)
3665: {
3666: if (eptr >= md->end_subject)
3667: {
3668: SCHECK_PARTIAL();
3669: RRETURN(MATCH_NOMATCH);
3670: }
1.1.1.2 ! misho 3671: if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
! 3672: eptr++;
1.1 misho 3673: }
3674: }
3675:
3676: if (min == max) continue;
3677:
3678: if (minimize)
3679: {
1.1.1.2 ! misho 3680: #ifdef SUPPORT_UTF
! 3681: if (utf)
1.1 misho 3682: {
3683: register unsigned int d;
3684: for (fi = min;; fi++)
3685: {
3686: RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3687: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3688: if (fi >= max) RRETURN(MATCH_NOMATCH);
3689: if (eptr >= md->end_subject)
3690: {
3691: SCHECK_PARTIAL();
3692: RRETURN(MATCH_NOMATCH);
3693: }
3694: GETCHARINC(d, eptr);
1.1.1.2 ! misho 3695: if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1 misho 3696: }
3697: }
3698: else
3699: #endif
1.1.1.2 ! misho 3700: /* Not UTF mode */
1.1 misho 3701: {
3702: for (fi = min;; fi++)
3703: {
3704: RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3705: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3706: if (fi >= max) RRETURN(MATCH_NOMATCH);
3707: if (eptr >= md->end_subject)
3708: {
3709: SCHECK_PARTIAL();
3710: RRETURN(MATCH_NOMATCH);
3711: }
1.1.1.2 ! misho 3712: if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
! 3713: eptr++;
1.1 misho 3714: }
3715: }
3716: /* Control never gets here */
3717: }
3718:
3719: /* Maximize case */
3720:
3721: else
3722: {
3723: pp = eptr;
3724:
1.1.1.2 ! misho 3725: #ifdef SUPPORT_UTF
! 3726: if (utf)
1.1 misho 3727: {
3728: register unsigned int d;
3729: for (i = min; i < max; i++)
3730: {
3731: int len = 1;
3732: if (eptr >= md->end_subject)
3733: {
3734: SCHECK_PARTIAL();
3735: break;
3736: }
3737: GETCHARLEN(d, eptr, len);
1.1.1.2 ! misho 3738: if (fc == d || (unsigned int)foc == d) break;
1.1 misho 3739: eptr += len;
3740: }
1.1.1.2 ! misho 3741: if (possessive) continue;
! 3742: for(;;)
1.1 misho 3743: {
3744: RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3745: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3746: if (eptr-- == pp) break; /* Stop if tried at original pos */
3747: BACKCHAR(eptr);
3748: }
3749: }
3750: else
3751: #endif
1.1.1.2 ! misho 3752: /* Not UTF mode */
1.1 misho 3753: {
3754: for (i = min; i < max; i++)
3755: {
3756: if (eptr >= md->end_subject)
3757: {
3758: SCHECK_PARTIAL();
3759: break;
3760: }
1.1.1.2 ! misho 3761: if (fc == *eptr || foc == *eptr) break;
1.1 misho 3762: eptr++;
3763: }
3764: if (possessive) continue;
3765: while (eptr >= pp)
3766: {
3767: RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3768: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3769: eptr--;
3770: }
3771: }
3772:
3773: RRETURN(MATCH_NOMATCH);
3774: }
3775: /* Control never gets here */
3776: }
3777:
3778: /* Caseful comparisons */
3779:
3780: else
3781: {
1.1.1.2 ! misho 3782: #ifdef SUPPORT_UTF
! 3783: if (utf)
1.1 misho 3784: {
3785: register unsigned int d;
3786: for (i = 1; i <= min; i++)
3787: {
3788: if (eptr >= md->end_subject)
3789: {
3790: SCHECK_PARTIAL();
3791: RRETURN(MATCH_NOMATCH);
3792: }
3793: GETCHARINC(d, eptr);
3794: if (fc == d) RRETURN(MATCH_NOMATCH);
3795: }
3796: }
3797: else
3798: #endif
1.1.1.2 ! misho 3799: /* Not UTF mode */
1.1 misho 3800: {
3801: for (i = 1; i <= min; i++)
3802: {
3803: if (eptr >= md->end_subject)
3804: {
3805: SCHECK_PARTIAL();
3806: RRETURN(MATCH_NOMATCH);
3807: }
3808: if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3809: }
3810: }
3811:
3812: if (min == max) continue;
3813:
3814: if (minimize)
3815: {
1.1.1.2 ! misho 3816: #ifdef SUPPORT_UTF
! 3817: if (utf)
1.1 misho 3818: {
3819: register unsigned int d;
3820: for (fi = min;; fi++)
3821: {
3822: RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3823: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3824: if (fi >= max) RRETURN(MATCH_NOMATCH);
3825: if (eptr >= md->end_subject)
3826: {
3827: SCHECK_PARTIAL();
3828: RRETURN(MATCH_NOMATCH);
3829: }
3830: GETCHARINC(d, eptr);
3831: if (fc == d) RRETURN(MATCH_NOMATCH);
3832: }
3833: }
3834: else
3835: #endif
1.1.1.2 ! misho 3836: /* Not UTF mode */
1.1 misho 3837: {
3838: for (fi = min;; fi++)
3839: {
3840: RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3841: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3842: if (fi >= max) RRETURN(MATCH_NOMATCH);
3843: if (eptr >= md->end_subject)
3844: {
3845: SCHECK_PARTIAL();
3846: RRETURN(MATCH_NOMATCH);
3847: }
3848: if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3849: }
3850: }
3851: /* Control never gets here */
3852: }
3853:
3854: /* Maximize case */
3855:
3856: else
3857: {
3858: pp = eptr;
3859:
1.1.1.2 ! misho 3860: #ifdef SUPPORT_UTF
! 3861: if (utf)
1.1 misho 3862: {
3863: register unsigned int d;
3864: for (i = min; i < max; i++)
3865: {
3866: int len = 1;
3867: if (eptr >= md->end_subject)
3868: {
3869: SCHECK_PARTIAL();
3870: break;
3871: }
3872: GETCHARLEN(d, eptr, len);
3873: if (fc == d) break;
3874: eptr += len;
3875: }
3876: if (possessive) continue;
3877: for(;;)
3878: {
3879: RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3880: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3881: if (eptr-- == pp) break; /* Stop if tried at original pos */
3882: BACKCHAR(eptr);
3883: }
3884: }
3885: else
3886: #endif
1.1.1.2 ! misho 3887: /* Not UTF mode */
1.1 misho 3888: {
3889: for (i = min; i < max; i++)
3890: {
3891: if (eptr >= md->end_subject)
3892: {
3893: SCHECK_PARTIAL();
3894: break;
3895: }
3896: if (fc == *eptr) break;
3897: eptr++;
3898: }
3899: if (possessive) continue;
3900: while (eptr >= pp)
3901: {
3902: RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3903: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3904: eptr--;
3905: }
3906: }
3907:
3908: RRETURN(MATCH_NOMATCH);
3909: }
3910: }
3911: /* Control never gets here */
3912:
3913: /* Match a single character type repeatedly; several different opcodes
3914: share code. This is very similar to the code for single characters, but we
3915: repeat it in the interests of efficiency. */
3916:
3917: case OP_TYPEEXACT:
3918: min = max = GET2(ecode, 1);
3919: minimize = TRUE;
1.1.1.2 ! misho 3920: ecode += 1 + IMM2_SIZE;
1.1 misho 3921: goto REPEATTYPE;
3922:
3923: case OP_TYPEUPTO:
3924: case OP_TYPEMINUPTO:
3925: min = 0;
3926: max = GET2(ecode, 1);
3927: minimize = *ecode == OP_TYPEMINUPTO;
1.1.1.2 ! misho 3928: ecode += 1 + IMM2_SIZE;
1.1 misho 3929: goto REPEATTYPE;
3930:
3931: case OP_TYPEPOSSTAR:
3932: possessive = TRUE;
3933: min = 0;
3934: max = INT_MAX;
3935: ecode++;
3936: goto REPEATTYPE;
3937:
3938: case OP_TYPEPOSPLUS:
3939: possessive = TRUE;
3940: min = 1;
3941: max = INT_MAX;
3942: ecode++;
3943: goto REPEATTYPE;
3944:
3945: case OP_TYPEPOSQUERY:
3946: possessive = TRUE;
3947: min = 0;
3948: max = 1;
3949: ecode++;
3950: goto REPEATTYPE;
3951:
3952: case OP_TYPEPOSUPTO:
3953: possessive = TRUE;
3954: min = 0;
3955: max = GET2(ecode, 1);
1.1.1.2 ! misho 3956: ecode += 1 + IMM2_SIZE;
1.1 misho 3957: goto REPEATTYPE;
3958:
3959: case OP_TYPESTAR:
3960: case OP_TYPEMINSTAR:
3961: case OP_TYPEPLUS:
3962: case OP_TYPEMINPLUS:
3963: case OP_TYPEQUERY:
3964: case OP_TYPEMINQUERY:
3965: c = *ecode++ - OP_TYPESTAR;
3966: minimize = (c & 1) != 0;
3967: min = rep_min[c]; /* Pick up values from tables; */
3968: max = rep_max[c]; /* zero for max => infinity */
3969: if (max == 0) max = INT_MAX;
3970:
3971: /* Common code for all repeated single character type matches. Note that
3972: in UTF-8 mode, '.' matches a character of any length, but for the other
3973: character types, the valid characters are all one-byte long. */
3974:
3975: REPEATTYPE:
3976: ctype = *ecode++; /* Code for the character type */
3977:
3978: #ifdef SUPPORT_UCP
3979: if (ctype == OP_PROP || ctype == OP_NOTPROP)
3980: {
3981: prop_fail_result = ctype == OP_NOTPROP;
3982: prop_type = *ecode++;
3983: prop_value = *ecode++;
3984: }
3985: else prop_type = -1;
3986: #endif
3987:
3988: /* First, ensure the minimum number of matches are present. Use inline
3989: code for maximizing the speed, and do the type test once at the start
3990: (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3991: is tidier. Also separate the UCP code, which can be the same for both UTF-8
3992: and single-bytes. */
3993:
3994: if (min > 0)
3995: {
3996: #ifdef SUPPORT_UCP
3997: if (prop_type >= 0)
3998: {
3999: switch(prop_type)
4000: {
4001: case PT_ANY:
4002: if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4003: for (i = 1; i <= min; i++)
4004: {
4005: if (eptr >= md->end_subject)
4006: {
4007: SCHECK_PARTIAL();
4008: RRETURN(MATCH_NOMATCH);
4009: }
4010: GETCHARINCTEST(c, eptr);
4011: }
4012: break;
4013:
4014: case PT_LAMP:
4015: for (i = 1; i <= min; i++)
4016: {
4017: int chartype;
4018: if (eptr >= md->end_subject)
4019: {
4020: SCHECK_PARTIAL();
4021: RRETURN(MATCH_NOMATCH);
4022: }
4023: GETCHARINCTEST(c, eptr);
4024: chartype = UCD_CHARTYPE(c);
4025: if ((chartype == ucp_Lu ||
4026: chartype == ucp_Ll ||
4027: chartype == ucp_Lt) == prop_fail_result)
4028: RRETURN(MATCH_NOMATCH);
4029: }
4030: break;
4031:
4032: case PT_GC:
4033: for (i = 1; i <= min; i++)
4034: {
4035: if (eptr >= md->end_subject)
4036: {
4037: SCHECK_PARTIAL();
4038: RRETURN(MATCH_NOMATCH);
4039: }
4040: GETCHARINCTEST(c, eptr);
4041: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4042: RRETURN(MATCH_NOMATCH);
4043: }
4044: break;
4045:
4046: case PT_PC:
4047: for (i = 1; i <= min; i++)
4048: {
4049: if (eptr >= md->end_subject)
4050: {
4051: SCHECK_PARTIAL();
4052: RRETURN(MATCH_NOMATCH);
4053: }
4054: GETCHARINCTEST(c, eptr);
4055: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4056: RRETURN(MATCH_NOMATCH);
4057: }
4058: break;
4059:
4060: case PT_SC:
4061: for (i = 1; i <= min; i++)
4062: {
4063: if (eptr >= md->end_subject)
4064: {
4065: SCHECK_PARTIAL();
4066: RRETURN(MATCH_NOMATCH);
4067: }
4068: GETCHARINCTEST(c, eptr);
4069: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4070: RRETURN(MATCH_NOMATCH);
4071: }
4072: break;
4073:
4074: case PT_ALNUM:
4075: for (i = 1; i <= min; i++)
4076: {
4077: int category;
4078: if (eptr >= md->end_subject)
4079: {
4080: SCHECK_PARTIAL();
4081: RRETURN(MATCH_NOMATCH);
4082: }
4083: GETCHARINCTEST(c, eptr);
4084: category = UCD_CATEGORY(c);
4085: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4086: RRETURN(MATCH_NOMATCH);
4087: }
4088: break;
4089:
4090: case PT_SPACE: /* Perl space */
4091: for (i = 1; i <= min; i++)
4092: {
4093: if (eptr >= md->end_subject)
4094: {
4095: SCHECK_PARTIAL();
4096: RRETURN(MATCH_NOMATCH);
4097: }
4098: GETCHARINCTEST(c, eptr);
4099: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4100: c == CHAR_FF || c == CHAR_CR)
4101: == prop_fail_result)
4102: RRETURN(MATCH_NOMATCH);
4103: }
4104: break;
4105:
4106: case PT_PXSPACE: /* POSIX space */
4107: for (i = 1; i <= min; i++)
4108: {
4109: if (eptr >= md->end_subject)
4110: {
4111: SCHECK_PARTIAL();
4112: RRETURN(MATCH_NOMATCH);
4113: }
4114: GETCHARINCTEST(c, eptr);
4115: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4116: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4117: == prop_fail_result)
4118: RRETURN(MATCH_NOMATCH);
4119: }
4120: break;
4121:
4122: case PT_WORD:
4123: for (i = 1; i <= min; i++)
4124: {
4125: int category;
4126: if (eptr >= md->end_subject)
4127: {
4128: SCHECK_PARTIAL();
4129: RRETURN(MATCH_NOMATCH);
4130: }
4131: GETCHARINCTEST(c, eptr);
4132: category = UCD_CATEGORY(c);
4133: if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4134: == prop_fail_result)
4135: RRETURN(MATCH_NOMATCH);
4136: }
4137: break;
4138:
4139: /* This should not occur */
4140:
4141: default:
4142: RRETURN(PCRE_ERROR_INTERNAL);
4143: }
4144: }
4145:
4146: /* Match extended Unicode sequences. We will get here only if the
4147: support is in the binary; otherwise a compile-time error occurs. */
4148:
4149: else if (ctype == OP_EXTUNI)
4150: {
4151: for (i = 1; i <= min; i++)
4152: {
4153: if (eptr >= md->end_subject)
4154: {
4155: SCHECK_PARTIAL();
4156: RRETURN(MATCH_NOMATCH);
4157: }
4158: GETCHARINCTEST(c, eptr);
4159: if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4160: while (eptr < md->end_subject)
4161: {
4162: int len = 1;
1.1.1.2 ! misho 4163: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1 misho 4164: if (UCD_CATEGORY(c) != ucp_M) break;
4165: eptr += len;
4166: }
4167: }
4168: }
4169:
4170: else
4171: #endif /* SUPPORT_UCP */
4172:
4173: /* Handle all other cases when the coding is UTF-8 */
4174:
1.1.1.2 ! misho 4175: #ifdef SUPPORT_UTF
! 4176: if (utf) switch(ctype)
1.1 misho 4177: {
4178: case OP_ANY:
4179: for (i = 1; i <= min; i++)
4180: {
4181: if (eptr >= md->end_subject)
4182: {
4183: SCHECK_PARTIAL();
4184: RRETURN(MATCH_NOMATCH);
4185: }
4186: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4187: eptr++;
1.1.1.2 ! misho 4188: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4189: }
4190: break;
4191:
4192: case OP_ALLANY:
4193: for (i = 1; i <= min; i++)
4194: {
4195: if (eptr >= md->end_subject)
4196: {
4197: SCHECK_PARTIAL();
4198: RRETURN(MATCH_NOMATCH);
4199: }
4200: eptr++;
1.1.1.2 ! misho 4201: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4202: }
4203: break;
4204:
4205: case OP_ANYBYTE:
4206: if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4207: eptr += min;
4208: break;
4209:
4210: case OP_ANYNL:
4211: for (i = 1; i <= min; i++)
4212: {
4213: if (eptr >= md->end_subject)
4214: {
4215: SCHECK_PARTIAL();
4216: RRETURN(MATCH_NOMATCH);
4217: }
4218: GETCHARINC(c, eptr);
4219: switch(c)
4220: {
4221: default: RRETURN(MATCH_NOMATCH);
4222:
4223: case 0x000d:
4224: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4225: break;
4226:
4227: case 0x000a:
4228: break;
4229:
4230: case 0x000b:
4231: case 0x000c:
4232: case 0x0085:
4233: case 0x2028:
4234: case 0x2029:
4235: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4236: break;
4237: }
4238: }
4239: break;
4240:
4241: case OP_NOT_HSPACE:
4242: for (i = 1; i <= min; i++)
4243: {
4244: if (eptr >= md->end_subject)
4245: {
4246: SCHECK_PARTIAL();
4247: RRETURN(MATCH_NOMATCH);
4248: }
4249: GETCHARINC(c, eptr);
4250: switch(c)
4251: {
4252: default: break;
4253: case 0x09: /* HT */
4254: case 0x20: /* SPACE */
4255: case 0xa0: /* NBSP */
4256: case 0x1680: /* OGHAM SPACE MARK */
4257: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4258: case 0x2000: /* EN QUAD */
4259: case 0x2001: /* EM QUAD */
4260: case 0x2002: /* EN SPACE */
4261: case 0x2003: /* EM SPACE */
4262: case 0x2004: /* THREE-PER-EM SPACE */
4263: case 0x2005: /* FOUR-PER-EM SPACE */
4264: case 0x2006: /* SIX-PER-EM SPACE */
4265: case 0x2007: /* FIGURE SPACE */
4266: case 0x2008: /* PUNCTUATION SPACE */
4267: case 0x2009: /* THIN SPACE */
4268: case 0x200A: /* HAIR SPACE */
4269: case 0x202f: /* NARROW NO-BREAK SPACE */
4270: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4271: case 0x3000: /* IDEOGRAPHIC SPACE */
4272: RRETURN(MATCH_NOMATCH);
4273: }
4274: }
4275: break;
4276:
4277: case OP_HSPACE:
4278: for (i = 1; i <= min; i++)
4279: {
4280: if (eptr >= md->end_subject)
4281: {
4282: SCHECK_PARTIAL();
4283: RRETURN(MATCH_NOMATCH);
4284: }
4285: GETCHARINC(c, eptr);
4286: switch(c)
4287: {
4288: default: RRETURN(MATCH_NOMATCH);
4289: case 0x09: /* HT */
4290: case 0x20: /* SPACE */
4291: case 0xa0: /* NBSP */
4292: case 0x1680: /* OGHAM SPACE MARK */
4293: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4294: case 0x2000: /* EN QUAD */
4295: case 0x2001: /* EM QUAD */
4296: case 0x2002: /* EN SPACE */
4297: case 0x2003: /* EM SPACE */
4298: case 0x2004: /* THREE-PER-EM SPACE */
4299: case 0x2005: /* FOUR-PER-EM SPACE */
4300: case 0x2006: /* SIX-PER-EM SPACE */
4301: case 0x2007: /* FIGURE SPACE */
4302: case 0x2008: /* PUNCTUATION SPACE */
4303: case 0x2009: /* THIN SPACE */
4304: case 0x200A: /* HAIR SPACE */
4305: case 0x202f: /* NARROW NO-BREAK SPACE */
4306: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4307: case 0x3000: /* IDEOGRAPHIC SPACE */
4308: break;
4309: }
4310: }
4311: break;
4312:
4313: case OP_NOT_VSPACE:
4314: for (i = 1; i <= min; i++)
4315: {
4316: if (eptr >= md->end_subject)
4317: {
4318: SCHECK_PARTIAL();
4319: RRETURN(MATCH_NOMATCH);
4320: }
4321: GETCHARINC(c, eptr);
4322: switch(c)
4323: {
4324: default: break;
4325: case 0x0a: /* LF */
4326: case 0x0b: /* VT */
4327: case 0x0c: /* FF */
4328: case 0x0d: /* CR */
4329: case 0x85: /* NEL */
4330: case 0x2028: /* LINE SEPARATOR */
4331: case 0x2029: /* PARAGRAPH SEPARATOR */
4332: RRETURN(MATCH_NOMATCH);
4333: }
4334: }
4335: break;
4336:
4337: case OP_VSPACE:
4338: for (i = 1; i <= min; i++)
4339: {
4340: if (eptr >= md->end_subject)
4341: {
4342: SCHECK_PARTIAL();
4343: RRETURN(MATCH_NOMATCH);
4344: }
4345: GETCHARINC(c, eptr);
4346: switch(c)
4347: {
4348: default: RRETURN(MATCH_NOMATCH);
4349: case 0x0a: /* LF */
4350: case 0x0b: /* VT */
4351: case 0x0c: /* FF */
4352: case 0x0d: /* CR */
4353: case 0x85: /* NEL */
4354: case 0x2028: /* LINE SEPARATOR */
4355: case 0x2029: /* PARAGRAPH SEPARATOR */
4356: break;
4357: }
4358: }
4359: break;
4360:
4361: case OP_NOT_DIGIT:
4362: for (i = 1; i <= min; i++)
4363: {
4364: if (eptr >= md->end_subject)
4365: {
4366: SCHECK_PARTIAL();
4367: RRETURN(MATCH_NOMATCH);
4368: }
4369: GETCHARINC(c, eptr);
4370: if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4371: RRETURN(MATCH_NOMATCH);
4372: }
4373: break;
4374:
4375: case OP_DIGIT:
4376: for (i = 1; i <= min; i++)
4377: {
4378: if (eptr >= md->end_subject)
4379: {
4380: SCHECK_PARTIAL();
4381: RRETURN(MATCH_NOMATCH);
4382: }
1.1.1.2 ! misho 4383: if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
1.1 misho 4384: RRETURN(MATCH_NOMATCH);
1.1.1.2 ! misho 4385: eptr++;
1.1 misho 4386: /* No need to skip more bytes - we know it's a 1-byte character */
4387: }
4388: break;
4389:
4390: case OP_NOT_WHITESPACE:
4391: for (i = 1; i <= min; i++)
4392: {
4393: if (eptr >= md->end_subject)
4394: {
4395: SCHECK_PARTIAL();
4396: RRETURN(MATCH_NOMATCH);
4397: }
4398: if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4399: RRETURN(MATCH_NOMATCH);
1.1.1.2 ! misho 4400: eptr++;
! 4401: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4402: }
4403: break;
4404:
4405: case OP_WHITESPACE:
4406: for (i = 1; i <= min; i++)
4407: {
4408: if (eptr >= md->end_subject)
4409: {
4410: SCHECK_PARTIAL();
4411: RRETURN(MATCH_NOMATCH);
4412: }
1.1.1.2 ! misho 4413: if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
1.1 misho 4414: RRETURN(MATCH_NOMATCH);
1.1.1.2 ! misho 4415: eptr++;
1.1 misho 4416: /* No need to skip more bytes - we know it's a 1-byte character */
4417: }
4418: break;
4419:
4420: case OP_NOT_WORDCHAR:
4421: for (i = 1; i <= min; i++)
4422: {
4423: if (eptr >= md->end_subject)
4424: {
4425: SCHECK_PARTIAL();
4426: RRETURN(MATCH_NOMATCH);
4427: }
4428: if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4429: RRETURN(MATCH_NOMATCH);
1.1.1.2 ! misho 4430: eptr++;
! 4431: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 4432: }
4433: break;
4434:
4435: case OP_WORDCHAR:
4436: for (i = 1; i <= min; i++)
4437: {
4438: if (eptr >= md->end_subject)
4439: {
4440: SCHECK_PARTIAL();
4441: RRETURN(MATCH_NOMATCH);
4442: }
1.1.1.2 ! misho 4443: if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
1.1 misho 4444: RRETURN(MATCH_NOMATCH);
1.1.1.2 ! misho 4445: eptr++;
1.1 misho 4446: /* No need to skip more bytes - we know it's a 1-byte character */
4447: }
4448: break;
4449:
4450: default:
4451: RRETURN(PCRE_ERROR_INTERNAL);
4452: } /* End switch(ctype) */
4453:
4454: else
1.1.1.2 ! misho 4455: #endif /* SUPPORT_UTF */
1.1 misho 4456:
4457: /* Code for the non-UTF-8 case for minimum matching of operators other
4458: than OP_PROP and OP_NOTPROP. */
4459:
4460: switch(ctype)
4461: {
4462: case OP_ANY:
4463: for (i = 1; i <= min; i++)
4464: {
4465: if (eptr >= md->end_subject)
4466: {
4467: SCHECK_PARTIAL();
4468: RRETURN(MATCH_NOMATCH);
4469: }
4470: if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4471: eptr++;
4472: }
4473: break;
4474:
4475: case OP_ALLANY:
4476: if (eptr > md->end_subject - min)
4477: {
4478: SCHECK_PARTIAL();
4479: RRETURN(MATCH_NOMATCH);
4480: }
4481: eptr += min;
4482: break;
4483:
4484: case OP_ANYBYTE:
4485: if (eptr > md->end_subject - min)
4486: {
4487: SCHECK_PARTIAL();
4488: RRETURN(MATCH_NOMATCH);
4489: }
4490: eptr += min;
4491: break;
4492:
4493: case OP_ANYNL:
4494: for (i = 1; i <= min; i++)
4495: {
4496: if (eptr >= md->end_subject)
4497: {
4498: SCHECK_PARTIAL();
4499: RRETURN(MATCH_NOMATCH);
4500: }
4501: switch(*eptr++)
4502: {
4503: default: RRETURN(MATCH_NOMATCH);
4504:
4505: case 0x000d:
4506: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4507: break;
4508:
4509: case 0x000a:
4510: break;
4511:
4512: case 0x000b:
4513: case 0x000c:
4514: case 0x0085:
1.1.1.2 ! misho 4515: #ifdef COMPILE_PCRE16
! 4516: case 0x2028:
! 4517: case 0x2029:
! 4518: #endif
1.1 misho 4519: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4520: break;
4521: }
4522: }
4523: break;
4524:
4525: case OP_NOT_HSPACE:
4526: for (i = 1; i <= min; i++)
4527: {
4528: if (eptr >= md->end_subject)
4529: {
4530: SCHECK_PARTIAL();
4531: RRETURN(MATCH_NOMATCH);
4532: }
4533: switch(*eptr++)
4534: {
4535: default: break;
4536: case 0x09: /* HT */
4537: case 0x20: /* SPACE */
4538: case 0xa0: /* NBSP */
1.1.1.2 ! misho 4539: #ifdef COMPILE_PCRE16
! 4540: case 0x1680: /* OGHAM SPACE MARK */
! 4541: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4542: case 0x2000: /* EN QUAD */
! 4543: case 0x2001: /* EM QUAD */
! 4544: case 0x2002: /* EN SPACE */
! 4545: case 0x2003: /* EM SPACE */
! 4546: case 0x2004: /* THREE-PER-EM SPACE */
! 4547: case 0x2005: /* FOUR-PER-EM SPACE */
! 4548: case 0x2006: /* SIX-PER-EM SPACE */
! 4549: case 0x2007: /* FIGURE SPACE */
! 4550: case 0x2008: /* PUNCTUATION SPACE */
! 4551: case 0x2009: /* THIN SPACE */
! 4552: case 0x200A: /* HAIR SPACE */
! 4553: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4554: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4555: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4556: #endif
1.1 misho 4557: RRETURN(MATCH_NOMATCH);
4558: }
4559: }
4560: break;
4561:
4562: case OP_HSPACE:
4563: for (i = 1; i <= min; i++)
4564: {
4565: if (eptr >= md->end_subject)
4566: {
4567: SCHECK_PARTIAL();
4568: RRETURN(MATCH_NOMATCH);
4569: }
4570: switch(*eptr++)
4571: {
4572: default: RRETURN(MATCH_NOMATCH);
4573: case 0x09: /* HT */
4574: case 0x20: /* SPACE */
4575: case 0xa0: /* NBSP */
1.1.1.2 ! misho 4576: #ifdef COMPILE_PCRE16
! 4577: case 0x1680: /* OGHAM SPACE MARK */
! 4578: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 4579: case 0x2000: /* EN QUAD */
! 4580: case 0x2001: /* EM QUAD */
! 4581: case 0x2002: /* EN SPACE */
! 4582: case 0x2003: /* EM SPACE */
! 4583: case 0x2004: /* THREE-PER-EM SPACE */
! 4584: case 0x2005: /* FOUR-PER-EM SPACE */
! 4585: case 0x2006: /* SIX-PER-EM SPACE */
! 4586: case 0x2007: /* FIGURE SPACE */
! 4587: case 0x2008: /* PUNCTUATION SPACE */
! 4588: case 0x2009: /* THIN SPACE */
! 4589: case 0x200A: /* HAIR SPACE */
! 4590: case 0x202f: /* NARROW NO-BREAK SPACE */
! 4591: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 4592: case 0x3000: /* IDEOGRAPHIC SPACE */
! 4593: #endif
1.1 misho 4594: break;
4595: }
4596: }
4597: break;
4598:
4599: case OP_NOT_VSPACE:
4600: for (i = 1; i <= min; i++)
4601: {
4602: if (eptr >= md->end_subject)
4603: {
4604: SCHECK_PARTIAL();
4605: RRETURN(MATCH_NOMATCH);
4606: }
4607: switch(*eptr++)
4608: {
4609: default: break;
4610: case 0x0a: /* LF */
4611: case 0x0b: /* VT */
4612: case 0x0c: /* FF */
4613: case 0x0d: /* CR */
4614: case 0x85: /* NEL */
1.1.1.2 ! misho 4615: #ifdef COMPILE_PCRE16
! 4616: case 0x2028: /* LINE SEPARATOR */
! 4617: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4618: #endif
1.1 misho 4619: RRETURN(MATCH_NOMATCH);
4620: }
4621: }
4622: break;
4623:
4624: case OP_VSPACE:
4625: for (i = 1; i <= min; i++)
4626: {
4627: if (eptr >= md->end_subject)
4628: {
4629: SCHECK_PARTIAL();
4630: RRETURN(MATCH_NOMATCH);
4631: }
4632: switch(*eptr++)
4633: {
4634: default: RRETURN(MATCH_NOMATCH);
4635: case 0x0a: /* LF */
4636: case 0x0b: /* VT */
4637: case 0x0c: /* FF */
4638: case 0x0d: /* CR */
4639: case 0x85: /* NEL */
1.1.1.2 ! misho 4640: #ifdef COMPILE_PCRE16
! 4641: case 0x2028: /* LINE SEPARATOR */
! 4642: case 0x2029: /* PARAGRAPH SEPARATOR */
! 4643: #endif
1.1 misho 4644: break;
4645: }
4646: }
4647: break;
4648:
4649: case OP_NOT_DIGIT:
4650: for (i = 1; i <= min; i++)
4651: {
4652: if (eptr >= md->end_subject)
4653: {
4654: SCHECK_PARTIAL();
4655: RRETURN(MATCH_NOMATCH);
4656: }
1.1.1.2 ! misho 4657: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
! 4658: RRETURN(MATCH_NOMATCH);
! 4659: eptr++;
1.1 misho 4660: }
4661: break;
4662:
4663: case OP_DIGIT:
4664: for (i = 1; i <= min; i++)
4665: {
4666: if (eptr >= md->end_subject)
4667: {
4668: SCHECK_PARTIAL();
4669: RRETURN(MATCH_NOMATCH);
4670: }
1.1.1.2 ! misho 4671: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
! 4672: RRETURN(MATCH_NOMATCH);
! 4673: eptr++;
1.1 misho 4674: }
4675: break;
4676:
4677: case OP_NOT_WHITESPACE:
4678: for (i = 1; i <= min; i++)
4679: {
4680: if (eptr >= md->end_subject)
4681: {
4682: SCHECK_PARTIAL();
4683: RRETURN(MATCH_NOMATCH);
4684: }
1.1.1.2 ! misho 4685: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
! 4686: RRETURN(MATCH_NOMATCH);
! 4687: eptr++;
1.1 misho 4688: }
4689: break;
4690:
4691: case OP_WHITESPACE:
4692: for (i = 1; i <= min; i++)
4693: {
4694: if (eptr >= md->end_subject)
4695: {
4696: SCHECK_PARTIAL();
4697: RRETURN(MATCH_NOMATCH);
4698: }
1.1.1.2 ! misho 4699: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
! 4700: RRETURN(MATCH_NOMATCH);
! 4701: eptr++;
1.1 misho 4702: }
4703: break;
4704:
4705: case OP_NOT_WORDCHAR:
4706: for (i = 1; i <= min; i++)
4707: {
4708: if (eptr >= md->end_subject)
4709: {
4710: SCHECK_PARTIAL();
4711: RRETURN(MATCH_NOMATCH);
4712: }
1.1.1.2 ! misho 4713: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
1.1 misho 4714: RRETURN(MATCH_NOMATCH);
1.1.1.2 ! misho 4715: eptr++;
1.1 misho 4716: }
4717: break;
4718:
4719: case OP_WORDCHAR:
4720: for (i = 1; i <= min; i++)
4721: {
4722: if (eptr >= md->end_subject)
4723: {
4724: SCHECK_PARTIAL();
4725: RRETURN(MATCH_NOMATCH);
4726: }
1.1.1.2 ! misho 4727: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
1.1 misho 4728: RRETURN(MATCH_NOMATCH);
1.1.1.2 ! misho 4729: eptr++;
1.1 misho 4730: }
4731: break;
4732:
4733: default:
4734: RRETURN(PCRE_ERROR_INTERNAL);
4735: }
4736: }
4737:
4738: /* If min = max, continue at the same level without recursing */
4739:
4740: if (min == max) continue;
4741:
4742: /* If minimizing, we have to test the rest of the pattern before each
4743: subsequent match. Again, separate the UTF-8 case for speed, and also
4744: separate the UCP cases. */
4745:
4746: if (minimize)
4747: {
4748: #ifdef SUPPORT_UCP
4749: if (prop_type >= 0)
4750: {
4751: switch(prop_type)
4752: {
4753: case PT_ANY:
4754: for (fi = min;; fi++)
4755: {
4756: RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4757: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4758: if (fi >= max) RRETURN(MATCH_NOMATCH);
4759: if (eptr >= md->end_subject)
4760: {
4761: SCHECK_PARTIAL();
4762: RRETURN(MATCH_NOMATCH);
4763: }
4764: GETCHARINCTEST(c, eptr);
4765: if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4766: }
4767: /* Control never gets here */
4768:
4769: case PT_LAMP:
4770: for (fi = min;; fi++)
4771: {
4772: int chartype;
4773: RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4774: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4775: if (fi >= max) RRETURN(MATCH_NOMATCH);
4776: if (eptr >= md->end_subject)
4777: {
4778: SCHECK_PARTIAL();
4779: RRETURN(MATCH_NOMATCH);
4780: }
4781: GETCHARINCTEST(c, eptr);
4782: chartype = UCD_CHARTYPE(c);
4783: if ((chartype == ucp_Lu ||
4784: chartype == ucp_Ll ||
4785: chartype == ucp_Lt) == prop_fail_result)
4786: RRETURN(MATCH_NOMATCH);
4787: }
4788: /* Control never gets here */
4789:
4790: case PT_GC:
4791: for (fi = min;; fi++)
4792: {
4793: RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4794: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4795: if (fi >= max) RRETURN(MATCH_NOMATCH);
4796: if (eptr >= md->end_subject)
4797: {
4798: SCHECK_PARTIAL();
4799: RRETURN(MATCH_NOMATCH);
4800: }
4801: GETCHARINCTEST(c, eptr);
4802: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4803: RRETURN(MATCH_NOMATCH);
4804: }
4805: /* Control never gets here */
4806:
4807: case PT_PC:
4808: for (fi = min;; fi++)
4809: {
4810: RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4811: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4812: if (fi >= max) RRETURN(MATCH_NOMATCH);
4813: if (eptr >= md->end_subject)
4814: {
4815: SCHECK_PARTIAL();
4816: RRETURN(MATCH_NOMATCH);
4817: }
4818: GETCHARINCTEST(c, eptr);
4819: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4820: RRETURN(MATCH_NOMATCH);
4821: }
4822: /* Control never gets here */
4823:
4824: case PT_SC:
4825: for (fi = min;; fi++)
4826: {
4827: RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4828: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4829: if (fi >= max) RRETURN(MATCH_NOMATCH);
4830: if (eptr >= md->end_subject)
4831: {
4832: SCHECK_PARTIAL();
4833: RRETURN(MATCH_NOMATCH);
4834: }
4835: GETCHARINCTEST(c, eptr);
4836: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4837: RRETURN(MATCH_NOMATCH);
4838: }
4839: /* Control never gets here */
4840:
4841: case PT_ALNUM:
4842: for (fi = min;; fi++)
4843: {
4844: int category;
4845: RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4846: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4847: if (fi >= max) RRETURN(MATCH_NOMATCH);
4848: if (eptr >= md->end_subject)
4849: {
4850: SCHECK_PARTIAL();
4851: RRETURN(MATCH_NOMATCH);
4852: }
4853: GETCHARINCTEST(c, eptr);
4854: category = UCD_CATEGORY(c);
4855: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4856: RRETURN(MATCH_NOMATCH);
4857: }
4858: /* Control never gets here */
4859:
4860: case PT_SPACE: /* Perl space */
4861: for (fi = min;; fi++)
4862: {
4863: RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4864: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4865: if (fi >= max) RRETURN(MATCH_NOMATCH);
4866: if (eptr >= md->end_subject)
4867: {
4868: SCHECK_PARTIAL();
4869: RRETURN(MATCH_NOMATCH);
4870: }
4871: GETCHARINCTEST(c, eptr);
4872: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4873: c == CHAR_FF || c == CHAR_CR)
4874: == prop_fail_result)
4875: RRETURN(MATCH_NOMATCH);
4876: }
4877: /* Control never gets here */
4878:
4879: case PT_PXSPACE: /* POSIX space */
4880: for (fi = min;; fi++)
4881: {
4882: RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4883: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4884: if (fi >= max) RRETURN(MATCH_NOMATCH);
4885: if (eptr >= md->end_subject)
4886: {
4887: SCHECK_PARTIAL();
4888: RRETURN(MATCH_NOMATCH);
4889: }
4890: GETCHARINCTEST(c, eptr);
4891: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4892: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4893: == prop_fail_result)
4894: RRETURN(MATCH_NOMATCH);
4895: }
4896: /* Control never gets here */
4897:
4898: case PT_WORD:
4899: for (fi = min;; fi++)
4900: {
4901: int category;
4902: RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4903: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4904: if (fi >= max) RRETURN(MATCH_NOMATCH);
4905: if (eptr >= md->end_subject)
4906: {
4907: SCHECK_PARTIAL();
4908: RRETURN(MATCH_NOMATCH);
4909: }
4910: GETCHARINCTEST(c, eptr);
4911: category = UCD_CATEGORY(c);
4912: if ((category == ucp_L ||
4913: category == ucp_N ||
4914: c == CHAR_UNDERSCORE)
4915: == prop_fail_result)
4916: RRETURN(MATCH_NOMATCH);
4917: }
4918: /* Control never gets here */
4919:
4920: /* This should never occur */
4921:
4922: default:
4923: RRETURN(PCRE_ERROR_INTERNAL);
4924: }
4925: }
4926:
4927: /* Match extended Unicode sequences. We will get here only if the
4928: support is in the binary; otherwise a compile-time error occurs. */
4929:
4930: else if (ctype == OP_EXTUNI)
4931: {
4932: for (fi = min;; fi++)
4933: {
4934: RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
4935: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4936: if (fi >= max) RRETURN(MATCH_NOMATCH);
4937: if (eptr >= md->end_subject)
4938: {
4939: SCHECK_PARTIAL();
4940: RRETURN(MATCH_NOMATCH);
4941: }
4942: GETCHARINCTEST(c, eptr);
4943: if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4944: while (eptr < md->end_subject)
4945: {
4946: int len = 1;
1.1.1.2 ! misho 4947: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1 misho 4948: if (UCD_CATEGORY(c) != ucp_M) break;
4949: eptr += len;
4950: }
4951: }
4952: }
4953: else
4954: #endif /* SUPPORT_UCP */
4955:
1.1.1.2 ! misho 4956: #ifdef SUPPORT_UTF
! 4957: if (utf)
1.1 misho 4958: {
4959: for (fi = min;; fi++)
4960: {
4961: RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
4962: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4963: if (fi >= max) RRETURN(MATCH_NOMATCH);
4964: if (eptr >= md->end_subject)
4965: {
4966: SCHECK_PARTIAL();
4967: RRETURN(MATCH_NOMATCH);
4968: }
4969: if (ctype == OP_ANY && IS_NEWLINE(eptr))
4970: RRETURN(MATCH_NOMATCH);
4971: GETCHARINC(c, eptr);
4972: switch(ctype)
4973: {
4974: case OP_ANY: /* This is the non-NL case */
4975: case OP_ALLANY:
4976: case OP_ANYBYTE:
4977: break;
4978:
4979: case OP_ANYNL:
4980: switch(c)
4981: {
4982: default: RRETURN(MATCH_NOMATCH);
4983: case 0x000d:
4984: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4985: break;
4986: case 0x000a:
4987: break;
4988:
4989: case 0x000b:
4990: case 0x000c:
4991: case 0x0085:
4992: case 0x2028:
4993: case 0x2029:
4994: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4995: break;
4996: }
4997: break;
4998:
4999: case OP_NOT_HSPACE:
5000: switch(c)
5001: {
5002: default: break;
5003: case 0x09: /* HT */
5004: case 0x20: /* SPACE */
5005: case 0xa0: /* NBSP */
5006: case 0x1680: /* OGHAM SPACE MARK */
5007: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5008: case 0x2000: /* EN QUAD */
5009: case 0x2001: /* EM QUAD */
5010: case 0x2002: /* EN SPACE */
5011: case 0x2003: /* EM SPACE */
5012: case 0x2004: /* THREE-PER-EM SPACE */
5013: case 0x2005: /* FOUR-PER-EM SPACE */
5014: case 0x2006: /* SIX-PER-EM SPACE */
5015: case 0x2007: /* FIGURE SPACE */
5016: case 0x2008: /* PUNCTUATION SPACE */
5017: case 0x2009: /* THIN SPACE */
5018: case 0x200A: /* HAIR SPACE */
5019: case 0x202f: /* NARROW NO-BREAK SPACE */
5020: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5021: case 0x3000: /* IDEOGRAPHIC SPACE */
5022: RRETURN(MATCH_NOMATCH);
5023: }
5024: break;
5025:
5026: case OP_HSPACE:
5027: switch(c)
5028: {
5029: default: RRETURN(MATCH_NOMATCH);
5030: case 0x09: /* HT */
5031: case 0x20: /* SPACE */
5032: case 0xa0: /* NBSP */
5033: case 0x1680: /* OGHAM SPACE MARK */
5034: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5035: case 0x2000: /* EN QUAD */
5036: case 0x2001: /* EM QUAD */
5037: case 0x2002: /* EN SPACE */
5038: case 0x2003: /* EM SPACE */
5039: case 0x2004: /* THREE-PER-EM SPACE */
5040: case 0x2005: /* FOUR-PER-EM SPACE */
5041: case 0x2006: /* SIX-PER-EM SPACE */
5042: case 0x2007: /* FIGURE SPACE */
5043: case 0x2008: /* PUNCTUATION SPACE */
5044: case 0x2009: /* THIN SPACE */
5045: case 0x200A: /* HAIR SPACE */
5046: case 0x202f: /* NARROW NO-BREAK SPACE */
5047: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5048: case 0x3000: /* IDEOGRAPHIC SPACE */
5049: break;
5050: }
5051: break;
5052:
5053: case OP_NOT_VSPACE:
5054: switch(c)
5055: {
5056: default: break;
5057: case 0x0a: /* LF */
5058: case 0x0b: /* VT */
5059: case 0x0c: /* FF */
5060: case 0x0d: /* CR */
5061: case 0x85: /* NEL */
5062: case 0x2028: /* LINE SEPARATOR */
5063: case 0x2029: /* PARAGRAPH SEPARATOR */
5064: RRETURN(MATCH_NOMATCH);
5065: }
5066: break;
5067:
5068: case OP_VSPACE:
5069: switch(c)
5070: {
5071: default: RRETURN(MATCH_NOMATCH);
5072: case 0x0a: /* LF */
5073: case 0x0b: /* VT */
5074: case 0x0c: /* FF */
5075: case 0x0d: /* CR */
5076: case 0x85: /* NEL */
5077: case 0x2028: /* LINE SEPARATOR */
5078: case 0x2029: /* PARAGRAPH SEPARATOR */
5079: break;
5080: }
5081: break;
5082:
5083: case OP_NOT_DIGIT:
5084: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5085: RRETURN(MATCH_NOMATCH);
5086: break;
5087:
5088: case OP_DIGIT:
5089: if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5090: RRETURN(MATCH_NOMATCH);
5091: break;
5092:
5093: case OP_NOT_WHITESPACE:
5094: if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5095: RRETURN(MATCH_NOMATCH);
5096: break;
5097:
5098: case OP_WHITESPACE:
1.1.1.2 ! misho 5099: if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
1.1 misho 5100: RRETURN(MATCH_NOMATCH);
5101: break;
5102:
5103: case OP_NOT_WORDCHAR:
5104: if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5105: RRETURN(MATCH_NOMATCH);
5106: break;
5107:
5108: case OP_WORDCHAR:
5109: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5110: RRETURN(MATCH_NOMATCH);
5111: break;
5112:
5113: default:
5114: RRETURN(PCRE_ERROR_INTERNAL);
5115: }
5116: }
5117: }
5118: else
5119: #endif
1.1.1.2 ! misho 5120: /* Not UTF mode */
1.1 misho 5121: {
5122: for (fi = min;; fi++)
5123: {
5124: RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5125: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5126: if (fi >= max) RRETURN(MATCH_NOMATCH);
5127: if (eptr >= md->end_subject)
5128: {
5129: SCHECK_PARTIAL();
5130: RRETURN(MATCH_NOMATCH);
5131: }
5132: if (ctype == OP_ANY && IS_NEWLINE(eptr))
5133: RRETURN(MATCH_NOMATCH);
5134: c = *eptr++;
5135: switch(ctype)
5136: {
5137: case OP_ANY: /* This is the non-NL case */
5138: case OP_ALLANY:
5139: case OP_ANYBYTE:
5140: break;
5141:
5142: case OP_ANYNL:
5143: switch(c)
5144: {
5145: default: RRETURN(MATCH_NOMATCH);
5146: case 0x000d:
5147: if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5148: break;
5149:
5150: case 0x000a:
5151: break;
5152:
5153: case 0x000b:
5154: case 0x000c:
5155: case 0x0085:
1.1.1.2 ! misho 5156: #ifdef COMPILE_PCRE16
! 5157: case 0x2028:
! 5158: case 0x2029:
! 5159: #endif
1.1 misho 5160: if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5161: break;
5162: }
5163: break;
5164:
5165: case OP_NOT_HSPACE:
5166: switch(c)
5167: {
5168: default: break;
5169: case 0x09: /* HT */
5170: case 0x20: /* SPACE */
5171: case 0xa0: /* NBSP */
1.1.1.2 ! misho 5172: #ifdef COMPILE_PCRE16
! 5173: case 0x1680: /* OGHAM SPACE MARK */
! 5174: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 5175: case 0x2000: /* EN QUAD */
! 5176: case 0x2001: /* EM QUAD */
! 5177: case 0x2002: /* EN SPACE */
! 5178: case 0x2003: /* EM SPACE */
! 5179: case 0x2004: /* THREE-PER-EM SPACE */
! 5180: case 0x2005: /* FOUR-PER-EM SPACE */
! 5181: case 0x2006: /* SIX-PER-EM SPACE */
! 5182: case 0x2007: /* FIGURE SPACE */
! 5183: case 0x2008: /* PUNCTUATION SPACE */
! 5184: case 0x2009: /* THIN SPACE */
! 5185: case 0x200A: /* HAIR SPACE */
! 5186: case 0x202f: /* NARROW NO-BREAK SPACE */
! 5187: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 5188: case 0x3000: /* IDEOGRAPHIC SPACE */
! 5189: #endif
1.1 misho 5190: RRETURN(MATCH_NOMATCH);
5191: }
5192: break;
5193:
5194: case OP_HSPACE:
5195: switch(c)
5196: {
5197: default: RRETURN(MATCH_NOMATCH);
5198: case 0x09: /* HT */
5199: case 0x20: /* SPACE */
5200: case 0xa0: /* NBSP */
1.1.1.2 ! misho 5201: #ifdef COMPILE_PCRE16
! 5202: case 0x1680: /* OGHAM SPACE MARK */
! 5203: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
! 5204: case 0x2000: /* EN QUAD */
! 5205: case 0x2001: /* EM QUAD */
! 5206: case 0x2002: /* EN SPACE */
! 5207: case 0x2003: /* EM SPACE */
! 5208: case 0x2004: /* THREE-PER-EM SPACE */
! 5209: case 0x2005: /* FOUR-PER-EM SPACE */
! 5210: case 0x2006: /* SIX-PER-EM SPACE */
! 5211: case 0x2007: /* FIGURE SPACE */
! 5212: case 0x2008: /* PUNCTUATION SPACE */
! 5213: case 0x2009: /* THIN SPACE */
! 5214: case 0x200A: /* HAIR SPACE */
! 5215: case 0x202f: /* NARROW NO-BREAK SPACE */
! 5216: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
! 5217: case 0x3000: /* IDEOGRAPHIC SPACE */
! 5218: #endif
1.1 misho 5219: break;
5220: }
5221: break;
5222:
5223: case OP_NOT_VSPACE:
5224: switch(c)
5225: {
5226: default: break;
5227: case 0x0a: /* LF */
5228: case 0x0b: /* VT */
5229: case 0x0c: /* FF */
5230: case 0x0d: /* CR */
5231: case 0x85: /* NEL */
1.1.1.2 ! misho 5232: #ifdef COMPILE_PCRE16
! 5233: case 0x2028: /* LINE SEPARATOR */
! 5234: case 0x2029: /* PARAGRAPH SEPARATOR */
! 5235: #endif
1.1 misho 5236: RRETURN(MATCH_NOMATCH);
5237: }
5238: break;
5239:
5240: case OP_VSPACE:
5241: switch(c)
5242: {
5243: default: RRETURN(MATCH_NOMATCH);
5244: case 0x0a: /* LF */
5245: case 0x0b: /* VT */
5246: case 0x0c: /* FF */
5247: case 0x0d: /* CR */
5248: case 0x85: /* NEL */
1.1.1.2 ! misho 5249: #ifdef COMPILE_PCRE16
! 5250: case 0x2028: /* LINE SEPARATOR */
! 5251: case 0x2029: /* PARAGRAPH SEPARATOR */
! 5252: #endif
1.1 misho 5253: break;
5254: }
5255: break;
5256:
5257: case OP_NOT_DIGIT:
1.1.1.2 ! misho 5258: if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5259: break;
5260:
5261: case OP_DIGIT:
1.1.1.2 ! misho 5262: if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5263: break;
5264:
5265: case OP_NOT_WHITESPACE:
1.1.1.2 ! misho 5266: if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5267: break;
5268:
5269: case OP_WHITESPACE:
1.1.1.2 ! misho 5270: if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5271: break;
5272:
5273: case OP_NOT_WORDCHAR:
1.1.1.2 ! misho 5274: if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5275: break;
5276:
5277: case OP_WORDCHAR:
1.1.1.2 ! misho 5278: if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
1.1 misho 5279: break;
5280:
5281: default:
5282: RRETURN(PCRE_ERROR_INTERNAL);
5283: }
5284: }
5285: }
5286: /* Control never gets here */
5287: }
5288:
5289: /* If maximizing, it is worth using inline code for speed, doing the type
5290: test once at the start (i.e. keep it out of the loop). Again, keep the
5291: UTF-8 and UCP stuff separate. */
5292:
5293: else
5294: {
5295: pp = eptr; /* Remember where we started */
5296:
5297: #ifdef SUPPORT_UCP
5298: if (prop_type >= 0)
5299: {
5300: switch(prop_type)
5301: {
5302: case PT_ANY:
5303: for (i = min; i < max; i++)
5304: {
5305: int len = 1;
5306: if (eptr >= md->end_subject)
5307: {
5308: SCHECK_PARTIAL();
5309: break;
5310: }
5311: GETCHARLENTEST(c, eptr, len);
5312: if (prop_fail_result) break;
5313: eptr+= len;
5314: }
5315: break;
5316:
5317: case PT_LAMP:
5318: for (i = min; i < max; i++)
5319: {
5320: int chartype;
5321: int len = 1;
5322: if (eptr >= md->end_subject)
5323: {
5324: SCHECK_PARTIAL();
5325: break;
5326: }
5327: GETCHARLENTEST(c, eptr, len);
5328: chartype = UCD_CHARTYPE(c);
5329: if ((chartype == ucp_Lu ||
5330: chartype == ucp_Ll ||
5331: chartype == ucp_Lt) == prop_fail_result)
5332: break;
5333: eptr+= len;
5334: }
5335: break;
5336:
5337: case PT_GC:
5338: for (i = min; i < max; i++)
5339: {
5340: int len = 1;
5341: if (eptr >= md->end_subject)
5342: {
5343: SCHECK_PARTIAL();
5344: break;
5345: }
5346: GETCHARLENTEST(c, eptr, len);
5347: if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5348: eptr+= len;
5349: }
5350: break;
5351:
5352: case PT_PC:
5353: for (i = min; i < max; i++)
5354: {
5355: int len = 1;
5356: if (eptr >= md->end_subject)
5357: {
5358: SCHECK_PARTIAL();
5359: break;
5360: }
5361: GETCHARLENTEST(c, eptr, len);
5362: if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5363: eptr+= len;
5364: }
5365: break;
5366:
5367: case PT_SC:
5368: for (i = min; i < max; i++)
5369: {
5370: int len = 1;
5371: if (eptr >= md->end_subject)
5372: {
5373: SCHECK_PARTIAL();
5374: break;
5375: }
5376: GETCHARLENTEST(c, eptr, len);
5377: if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5378: eptr+= len;
5379: }
5380: break;
5381:
5382: case PT_ALNUM:
5383: for (i = min; i < max; i++)
5384: {
5385: int category;
5386: int len = 1;
5387: if (eptr >= md->end_subject)
5388: {
5389: SCHECK_PARTIAL();
5390: break;
5391: }
5392: GETCHARLENTEST(c, eptr, len);
5393: category = UCD_CATEGORY(c);
5394: if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5395: break;
5396: eptr+= len;
5397: }
5398: break;
5399:
5400: case PT_SPACE: /* Perl space */
5401: for (i = min; i < max; i++)
5402: {
5403: int len = 1;
5404: if (eptr >= md->end_subject)
5405: {
5406: SCHECK_PARTIAL();
5407: break;
5408: }
5409: GETCHARLENTEST(c, eptr, len);
5410: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5411: c == CHAR_FF || c == CHAR_CR)
5412: == prop_fail_result)
5413: break;
5414: eptr+= len;
5415: }
5416: break;
5417:
5418: case PT_PXSPACE: /* POSIX space */
5419: for (i = min; i < max; i++)
5420: {
5421: int len = 1;
5422: if (eptr >= md->end_subject)
5423: {
5424: SCHECK_PARTIAL();
5425: break;
5426: }
5427: GETCHARLENTEST(c, eptr, len);
5428: if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5429: c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5430: == prop_fail_result)
5431: break;
5432: eptr+= len;
5433: }
5434: break;
5435:
5436: case PT_WORD:
5437: for (i = min; i < max; i++)
5438: {
5439: int category;
5440: int len = 1;
5441: if (eptr >= md->end_subject)
5442: {
5443: SCHECK_PARTIAL();
5444: break;
5445: }
5446: GETCHARLENTEST(c, eptr, len);
5447: category = UCD_CATEGORY(c);
5448: if ((category == ucp_L || category == ucp_N ||
5449: c == CHAR_UNDERSCORE) == prop_fail_result)
5450: break;
5451: eptr+= len;
5452: }
5453: break;
5454:
5455: default:
5456: RRETURN(PCRE_ERROR_INTERNAL);
5457: }
5458:
5459: /* eptr is now past the end of the maximum run */
5460:
5461: if (possessive) continue;
5462: for(;;)
5463: {
5464: RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5465: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5466: if (eptr-- == pp) break; /* Stop if tried at original pos */
1.1.1.2 ! misho 5467: if (utf) BACKCHAR(eptr);
1.1 misho 5468: }
5469: }
5470:
5471: /* Match extended Unicode sequences. We will get here only if the
5472: support is in the binary; otherwise a compile-time error occurs. */
5473:
5474: else if (ctype == OP_EXTUNI)
5475: {
5476: for (i = min; i < max; i++)
5477: {
5478: int len = 1;
5479: if (eptr >= md->end_subject)
5480: {
5481: SCHECK_PARTIAL();
5482: break;
5483: }
1.1.1.2 ! misho 5484: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1 misho 5485: if (UCD_CATEGORY(c) == ucp_M) break;
5486: eptr += len;
5487: while (eptr < md->end_subject)
5488: {
5489: len = 1;
1.1.1.2 ! misho 5490: if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
1.1 misho 5491: if (UCD_CATEGORY(c) != ucp_M) break;
5492: eptr += len;
5493: }
5494: }
5495:
5496: /* eptr is now past the end of the maximum run */
5497:
5498: if (possessive) continue;
5499:
5500: for(;;)
5501: {
5502: RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5503: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5504: if (eptr-- == pp) break; /* Stop if tried at original pos */
5505: for (;;) /* Move back over one extended */
5506: {
1.1.1.2 ! misho 5507: if (!utf) c = *eptr; else
1.1 misho 5508: {
5509: BACKCHAR(eptr);
5510: GETCHAR(c, eptr);
5511: }
5512: if (UCD_CATEGORY(c) != ucp_M) break;
5513: eptr--;
5514: }
5515: }
5516: }
5517:
5518: else
5519: #endif /* SUPPORT_UCP */
5520:
1.1.1.2 ! misho 5521: #ifdef SUPPORT_UTF
! 5522: if (utf)
1.1 misho 5523: {
5524: switch(ctype)
5525: {
5526: case OP_ANY:
5527: if (max < INT_MAX)
5528: {
5529: for (i = min; i < max; i++)
5530: {
5531: if (eptr >= md->end_subject)
5532: {
5533: SCHECK_PARTIAL();
5534: break;
5535: }
5536: if (IS_NEWLINE(eptr)) break;
5537: eptr++;
1.1.1.2 ! misho 5538: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 5539: }
5540: }
5541:
5542: /* Handle unlimited UTF-8 repeat */
5543:
5544: else
5545: {
5546: for (i = min; i < max; i++)
5547: {
5548: if (eptr >= md->end_subject)
5549: {
5550: SCHECK_PARTIAL();
5551: break;
5552: }
5553: if (IS_NEWLINE(eptr)) break;
5554: eptr++;
1.1.1.2 ! misho 5555: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 5556: }
5557: }
5558: break;
5559:
5560: case OP_ALLANY:
5561: if (max < INT_MAX)
5562: {
5563: for (i = min; i < max; i++)
5564: {
5565: if (eptr >= md->end_subject)
5566: {
5567: SCHECK_PARTIAL();
5568: break;
5569: }
5570: eptr++;
1.1.1.2 ! misho 5571: ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1 misho 5572: }
5573: }
5574: else
5575: {
5576: eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5577: SCHECK_PARTIAL();
5578: }
5579: break;
5580:
5581: /* The byte case is the same as non-UTF8 */
5582:
5583: case OP_ANYBYTE:
5584: c = max - min;
5585: if (c > (unsigned int)(md->end_subject - eptr))
5586: {
5587: eptr = md->end_subject;
5588: SCHECK_PARTIAL();
5589: }
5590: else eptr += c;
5591: break;
5592:
5593: case OP_ANYNL:
5594: for (i = min; i < max; i++)
5595: {
5596: int len = 1;
5597: if (eptr >= md->end_subject)
5598: {
5599: SCHECK_PARTIAL();
5600: break;
5601: }
5602: GETCHARLEN(c, eptr, len);
5603: if (c == 0x000d)
5604: {
5605: if (++eptr >= md->end_subject) break;
5606: if (*eptr == 0x000a) eptr++;
5607: }
5608: else
5609: {
5610: if (c != 0x000a &&
5611: (md->bsr_anycrlf ||
5612: (c != 0x000b && c != 0x000c &&
5613: c != 0x0085 && c != 0x2028 && c != 0x2029)))
5614: break;
5615: eptr += len;
5616: }
5617: }
5618: break;
5619:
5620: case OP_NOT_HSPACE:
5621: case OP_HSPACE:
5622: for (i = min; i < max; i++)
5623: {
5624: BOOL gotspace;
5625: int len = 1;
5626: if (eptr >= md->end_subject)
5627: {
5628: SCHECK_PARTIAL();
5629: break;
5630: }
5631: GETCHARLEN(c, eptr, len);
5632: switch(c)
5633: {
5634: default: gotspace = FALSE; break;
5635: case 0x09: /* HT */
5636: case 0x20: /* SPACE */
5637: case 0xa0: /* NBSP */
5638: case 0x1680: /* OGHAM SPACE MARK */
5639: case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5640: case 0x2000: /* EN QUAD */
5641: case 0x2001: /* EM QUAD */
5642: case 0x2002: /* EN SPACE */
5643: case 0x2003: /* EM SPACE */
5644: case 0x2004: /* THREE-PER-EM SPACE */
5645: case 0x2005: /* FOUR-PER-EM SPACE */
5646: case 0x2006: /* SIX-PER-EM SPACE */
5647: case 0x2007: /* FIGURE SPACE */
5648: case 0x2008: /* PUNCTUATION SPACE */
5649: case 0x2009: /* THIN SPACE */
5650: case 0x200A: /* HAIR SPACE */
5651: case 0x202f: /* NARROW NO-BREAK SPACE */
5652: case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5653: case 0x3000: /* IDEOGRAPHIC SPACE */
5654: gotspace = TRUE;
5655: break;
5656: }
5657: if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5658: eptr += len;
5659: }
5660: break;
5661:
5662: case OP_NOT_VSPACE:
5663: case OP_VSPACE:
5664: for (i = min; i < max; i++)
5665: {
5666: BOOL gotspace;
5667: int len = 1;
5668: if (eptr >= md->end_subject)
5669: {
5670: SCHECK_PARTIAL();
5671: break;
5672: }
5673: GETCHARLEN(c, eptr, len);
5674: switch(c)
5675: {
5676: default: gotspace = FALSE; break;
5677: case 0x0a: /* LF */
5678: case 0x0b: /* VT */
5679: case 0x0c: /* FF */
5680: case 0x0d: /* CR */
5681: case 0x85: /* NEL */
5682: case 0x2028: /* LINE SEPARATOR */
5683: case 0x2029: /* PARAGRAPH SEPARATOR */
5684: gotspace = TRUE;
5685: break;
5686: }
5687: if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5688: eptr += len;
5689: }
5690: break;
5691:
5692: case OP_NOT_DIGIT:
5693: for (i = min; i < max; i++)
5694: {
5695: int len = 1;
5696: if (eptr >= md->end_subject)
5697: {
5698: SCHECK_PARTIAL();
5699: break;
5700: }
5701: GETCHARLEN(c, eptr, len);
5702: if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5703: eptr+= len;
5704: }
5705: break;
5706:
5707: case OP_DIGIT:
5708: for (i = min; i < max; i++)
5709: {
5710: int len = 1;
5711: if (eptr >= md->end_subject)
5712: {
5713: SCHECK_PARTIAL();
5714: break;
5715: }
5716: GETCHARLEN(c, eptr, len);
5717: if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5718: eptr+= len;
5719: }
5720: break;
5721:
5722: case OP_NOT_WHITESPACE:
5723: for (i = min; i < max; i++)
5724: {
5725: int len = 1;
5726: if (eptr >= md->end_subject)
5727: {
5728: SCHECK_PARTIAL();
5729: break;
5730: }
5731: GETCHARLEN(c, eptr, len);
5732: if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5733: eptr+= len;
5734: }
5735: break;
5736:
5737: case OP_WHITESPACE:
5738: for (i = min; i < max; i++)
5739: {
5740: int len = 1;
5741: if (eptr >= md->end_subject)
5742: {
5743: SCHECK_PARTIAL();
5744: break;
5745: }
5746: GETCHARLEN(c, eptr, len);
5747: if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5748: eptr+= len;
5749: }
5750: break;
5751:
5752: case OP_NOT_WORDCHAR:
5753: for (i = min; i < max; i++)
5754: {
5755: int len = 1;
5756: if (eptr >= md->end_subject)
5757: {
5758: SCHECK_PARTIAL();
5759: break;
5760: }
5761: GETCHARLEN(c, eptr, len);
5762: if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5763: eptr+= len;
5764: }
5765: break;
5766:
5767: case OP_WORDCHAR:
5768: for (i = min; i < max; i++)
5769: {
5770: int len = 1;
5771: if (eptr >= md->end_subject)
5772: {
5773: SCHECK_PARTIAL();
5774: break;
5775: }
5776: GETCHARLEN(c, eptr, len);
5777: if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5778: eptr+= len;
5779: }
5780: break;
5781:
5782: default:
5783: RRETURN(PCRE_ERROR_INTERNAL);
5784: }
5785:
5786: /* eptr is now past the end of the maximum run. If possessive, we are
5787: done (no backing up). Otherwise, match at this position; anything other
5788: than no match is immediately returned. For nomatch, back up one
5789: character, unless we are matching \R and the last thing matched was
5790: \r\n, in which case, back up two bytes. */
5791:
5792: if (possessive) continue;
5793: for(;;)
5794: {
5795: RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5796: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5797: if (eptr-- == pp) break; /* Stop if tried at original pos */
5798: BACKCHAR(eptr);
5799: if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
5800: eptr[-1] == '\r') eptr--;
5801: }
5802: }
5803: else
1.1.1.2 ! misho 5804: #endif /* SUPPORT_UTF */
! 5805: /* Not UTF mode */
1.1 misho 5806: {
5807: switch(ctype)
5808: {
5809: case OP_ANY:
5810: for (i = min; i < max; i++)
5811: {
5812: if (eptr >= md->end_subject)
5813: {
5814: SCHECK_PARTIAL();
5815: break;
5816: }
5817: if (IS_NEWLINE(eptr)) break;
5818: eptr++;
5819: }
5820: break;
5821:
5822: case OP_ALLANY:
5823: case OP_ANYBYTE:
5824: c = max - min;
5825: if (c > (unsigned int)(md->end_subject - eptr))
5826: {
5827: eptr = md->end_subject;
5828: SCHECK_PARTIAL();
5829: }
5830: else eptr += c;
5831: break;
5832:
5833: case OP_ANYNL:
5834: for (i = min; i < max; i++)
5835: {
5836: if (eptr >= md->end_subject)
5837: {
5838: SCHECK_PARTIAL();
5839: break;
5840: }
5841: c = *eptr;
5842: if (c == 0x000d)
5843: {
5844: if (++eptr >= md->end_subject) break;
5845: if (*eptr == 0x000a) eptr++;
5846: }
5847: else
5848: {
1.1.1.2 ! misho 5849: if (c != 0x000a && (md->bsr_anycrlf ||
! 5850: (c != 0x000b && c != 0x000c && c != 0x0085
! 5851: #ifdef COMPILE_PCRE16
! 5852: && c != 0x2028 && c != 0x2029
! 5853: #endif
! 5854: ))) break;
1.1 misho 5855: eptr++;
5856: }
5857: }
5858: break;
5859:
5860: case OP_NOT_HSPACE:
5861: for (i = min; i < max; i++)
5862: {
5863: if (eptr >= md->end_subject)
5864: {
5865: SCHECK_PARTIAL();
5866: break;
5867: }
5868: c = *eptr;
1.1.1.2 ! misho 5869: if (c == 0x09 || c == 0x20 || c == 0xa0
! 5870: #ifdef COMPILE_PCRE16
! 5871: || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
! 5872: || c == 0x202f || c == 0x205f || c == 0x3000
! 5873: #endif
! 5874: ) break;
1.1 misho 5875: eptr++;
5876: }
5877: break;
5878:
5879: case OP_HSPACE:
5880: for (i = min; i < max; i++)
5881: {
5882: if (eptr >= md->end_subject)
5883: {
5884: SCHECK_PARTIAL();
5885: break;
5886: }
5887: c = *eptr;
1.1.1.2 ! misho 5888: if (c != 0x09 && c != 0x20 && c != 0xa0
! 5889: #ifdef COMPILE_PCRE16
! 5890: && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
! 5891: && c != 0x202f && c != 0x205f && c != 0x3000
! 5892: #endif
! 5893: ) break;
1.1 misho 5894: eptr++;
5895: }
5896: break;
5897:
5898: case OP_NOT_VSPACE:
5899: for (i = min; i < max; i++)
5900: {
5901: if (eptr >= md->end_subject)
5902: {
5903: SCHECK_PARTIAL();
5904: break;
5905: }
5906: c = *eptr;
1.1.1.2 ! misho 5907: if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
! 5908: #ifdef COMPILE_PCRE16
! 5909: || c == 0x2028 || c == 0x2029
! 5910: #endif
! 5911: ) break;
1.1 misho 5912: eptr++;
5913: }
5914: break;
5915:
5916: case OP_VSPACE:
5917: for (i = min; i < max; i++)
5918: {
5919: if (eptr >= md->end_subject)
5920: {
5921: SCHECK_PARTIAL();
5922: break;
5923: }
5924: c = *eptr;
1.1.1.2 ! misho 5925: if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
! 5926: #ifdef COMPILE_PCRE16
! 5927: && c != 0x2028 && c != 0x2029
! 5928: #endif
! 5929: ) break;
1.1 misho 5930: eptr++;
5931: }
5932: break;
5933:
5934: case OP_NOT_DIGIT:
5935: for (i = min; i < max; i++)
5936: {
5937: if (eptr >= md->end_subject)
5938: {
5939: SCHECK_PARTIAL();
5940: break;
5941: }
1.1.1.2 ! misho 5942: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1 misho 5943: eptr++;
5944: }
5945: break;
5946:
5947: case OP_DIGIT:
5948: for (i = min; i < max; i++)
5949: {
5950: if (eptr >= md->end_subject)
5951: {
5952: SCHECK_PARTIAL();
5953: break;
5954: }
1.1.1.2 ! misho 5955: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1 misho 5956: eptr++;
5957: }
5958: break;
5959:
5960: case OP_NOT_WHITESPACE:
5961: for (i = min; i < max; i++)
5962: {
5963: if (eptr >= md->end_subject)
5964: {
5965: SCHECK_PARTIAL();
5966: break;
5967: }
1.1.1.2 ! misho 5968: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
1.1 misho 5969: eptr++;
5970: }
5971: break;
5972:
5973: case OP_WHITESPACE:
5974: for (i = min; i < max; i++)
5975: {
5976: if (eptr >= md->end_subject)
5977: {
5978: SCHECK_PARTIAL();
5979: break;
5980: }
1.1.1.2 ! misho 5981: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
1.1 misho 5982: eptr++;
5983: }
5984: break;
5985:
5986: case OP_NOT_WORDCHAR:
5987: for (i = min; i < max; i++)
5988: {
5989: if (eptr >= md->end_subject)
5990: {
5991: SCHECK_PARTIAL();
5992: break;
5993: }
1.1.1.2 ! misho 5994: if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
1.1 misho 5995: eptr++;
5996: }
5997: break;
5998:
5999: case OP_WORDCHAR:
6000: for (i = min; i < max; i++)
6001: {
6002: if (eptr >= md->end_subject)
6003: {
6004: SCHECK_PARTIAL();
6005: break;
6006: }
1.1.1.2 ! misho 6007: if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
1.1 misho 6008: eptr++;
6009: }
6010: break;
6011:
6012: default:
6013: RRETURN(PCRE_ERROR_INTERNAL);
6014: }
6015:
6016: /* eptr is now past the end of the maximum run. If possessive, we are
6017: done (no backing up). Otherwise, match at this position; anything other
6018: than no match is immediately returned. For nomatch, back up one
6019: character (byte), unless we are matching \R and the last thing matched
6020: was \r\n, in which case, back up two bytes. */
6021:
6022: if (possessive) continue;
6023: while (eptr >= pp)
6024: {
6025: RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6026: if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6027: eptr--;
6028: if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
6029: eptr[-1] == '\r') eptr--;
6030: }
6031: }
6032:
6033: /* Get here if we can't make it match with any permitted repetitions */
6034:
6035: RRETURN(MATCH_NOMATCH);
6036: }
6037: /* Control never gets here */
6038:
6039: /* There's been some horrible disaster. Arrival here can only mean there is
6040: something seriously wrong in the code above or the OP_xxx definitions. */
6041:
6042: default:
6043: DPRINTF(("Unknown opcode %d\n", *ecode));
6044: RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6045: }
6046:
6047: /* Do not stick any code in here without much thought; it is assumed
6048: that "continue" in the code above comes out to here to repeat the main
6049: loop. */
6050:
6051: } /* End of main loop */
6052: /* Control never reaches here */
6053:
6054:
6055: /* When compiling to use the heap rather than the stack for recursive calls to
6056: match(), the RRETURN() macro jumps here. The number that is saved in
6057: frame->Xwhere indicates which label we actually want to return to. */
6058:
6059: #ifdef NO_RECURSE
6060: #define LBL(val) case val: goto L_RM##val;
6061: HEAP_RETURN:
6062: switch (frame->Xwhere)
6063: {
6064: LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6065: LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6066: LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6067: LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6068: LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6069: LBL(65) LBL(66)
1.1.1.2 ! misho 6070: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
! 6071: LBL(21)
! 6072: #endif
! 6073: #ifdef SUPPORT_UTF
! 6074: LBL(16) LBL(18) LBL(20)
! 6075: LBL(22) LBL(23) LBL(28) LBL(30)
1.1 misho 6076: LBL(32) LBL(34) LBL(42) LBL(46)
6077: #ifdef SUPPORT_UCP
6078: LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6079: LBL(59) LBL(60) LBL(61) LBL(62)
6080: #endif /* SUPPORT_UCP */
1.1.1.2 ! misho 6081: #endif /* SUPPORT_UTF */
1.1 misho 6082: default:
6083: DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
1.1.1.2 ! misho 6084:
! 6085: printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
! 6086:
1.1 misho 6087: return PCRE_ERROR_INTERNAL;
6088: }
6089: #undef LBL
6090: #endif /* NO_RECURSE */
6091: }
6092:
6093:
6094: /***************************************************************************
6095: ****************************************************************************
6096: RECURSION IN THE match() FUNCTION
6097:
6098: Undefine all the macros that were defined above to handle this. */
6099:
6100: #ifdef NO_RECURSE
6101: #undef eptr
6102: #undef ecode
6103: #undef mstart
6104: #undef offset_top
6105: #undef eptrb
6106: #undef flags
6107:
6108: #undef callpat
6109: #undef charptr
6110: #undef data
6111: #undef next
6112: #undef pp
6113: #undef prev
6114: #undef saved_eptr
6115:
6116: #undef new_recursive
6117:
6118: #undef cur_is_word
6119: #undef condition
6120: #undef prev_is_word
6121:
6122: #undef ctype
6123: #undef length
6124: #undef max
6125: #undef min
6126: #undef number
6127: #undef offset
6128: #undef op
6129: #undef save_capture_last
6130: #undef save_offset1
6131: #undef save_offset2
6132: #undef save_offset3
6133: #undef stacksave
6134:
6135: #undef newptrb
6136:
6137: #endif
6138:
6139: /* These two are defined as macros in both cases */
6140:
6141: #undef fc
6142: #undef fi
6143:
6144: /***************************************************************************
6145: ***************************************************************************/
6146:
6147:
6148:
6149: /*************************************************
6150: * Execute a Regular Expression *
6151: *************************************************/
6152:
6153: /* This function applies a compiled re to a subject string and picks out
6154: portions of the string if it matches. Two elements in the vector are set for
6155: each substring: the offsets to the start and end of the substring.
6156:
6157: Arguments:
6158: argument_re points to the compiled expression
6159: extra_data points to extra data or is NULL
6160: subject points to the subject string
6161: length length of subject string (may contain binary zeros)
6162: start_offset where to start in the subject string
6163: options option bits
6164: offsets points to a vector of ints to be filled in with offsets
6165: offsetcount the number of elements in the vector
6166:
6167: Returns: > 0 => success; value is the number of elements filled in
6168: = 0 => success, but offsets is not big enough
6169: -1 => failed to match
6170: < -1 => some kind of unexpected problem
6171: */
6172:
1.1.1.2 ! misho 6173: #ifdef COMPILE_PCRE8
1.1 misho 6174: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6175: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6176: PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6177: int offsetcount)
1.1.1.2 ! misho 6178: #else
! 6179: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 6180: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
! 6181: PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
! 6182: int offsetcount)
! 6183: #endif
1.1 misho 6184: {
6185: int rc, ocount, arg_offset_max;
6186: int newline;
6187: BOOL using_temporary_offsets = FALSE;
6188: BOOL anchored;
6189: BOOL startline;
6190: BOOL firstline;
1.1.1.2 ! misho 6191: BOOL utf;
! 6192: BOOL has_first_char = FALSE;
! 6193: BOOL has_req_char = FALSE;
! 6194: pcre_uchar first_char = 0;
! 6195: pcre_uchar first_char2 = 0;
! 6196: pcre_uchar req_char = 0;
! 6197: pcre_uchar req_char2 = 0;
1.1 misho 6198: match_data match_block;
6199: match_data *md = &match_block;
1.1.1.2 ! misho 6200: const pcre_uint8 *tables;
! 6201: const pcre_uint8 *start_bits = NULL;
! 6202: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
! 6203: PCRE_PUCHAR end_subject;
! 6204: PCRE_PUCHAR start_partial = NULL;
! 6205: PCRE_PUCHAR req_char_ptr = start_match - 1;
1.1 misho 6206:
6207: const pcre_study_data *study;
1.1.1.2 ! misho 6208: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
1.1 misho 6209:
1.1.1.2 ! misho 6210: /* Check for the special magic call that measures the size of the stack used
! 6211: per recursive call of match(). */
! 6212:
! 6213: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
! 6214: start_offset == -999)
! 6215: #ifdef NO_RECURSE
! 6216: return -sizeof(heapframe);
! 6217: #else
! 6218: return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
! 6219: #endif
1.1 misho 6220:
6221: /* Plausibility checks */
6222:
6223: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1.1.1.2 ! misho 6224: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
! 6225: return PCRE_ERROR_NULL;
1.1 misho 6226: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6227: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6228:
1.1.1.2 ! misho 6229: /* Check that the first field in the block is the magic number. If it is not,
! 6230: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
! 6231: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
! 6232: means that the pattern is likely compiled with different endianness. */
! 6233:
! 6234: if (re->magic_number != MAGIC_NUMBER)
! 6235: return re->magic_number == REVERSED_MAGIC_NUMBER?
! 6236: PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
! 6237: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
! 6238:
1.1 misho 6239: /* These two settings are used in the code for checking a UTF-8 string that
6240: follows immediately afterwards. Other values in the md block are used only
6241: during "normal" pcre_exec() processing, not when the JIT support is in use,
6242: so they are set up later. */
6243:
1.1.1.2 ! misho 6244: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
! 6245: utf = md->utf = (re->options & PCRE_UTF8) != 0;
1.1 misho 6246: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6247: ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6248:
6249: /* Check a UTF-8 string if required. Pass back the character offset and error
6250: code for an invalid string if a results vector is available. */
6251:
1.1.1.2 ! misho 6252: #ifdef SUPPORT_UTF
! 6253: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
1.1 misho 6254: {
6255: int erroroffset;
1.1.1.2 ! misho 6256: int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
1.1 misho 6257: if (errorcode != 0)
6258: {
6259: if (offsetcount >= 2)
6260: {
6261: offsets[0] = erroroffset;
6262: offsets[1] = errorcode;
6263: }
1.1.1.2 ! misho 6264: #ifdef COMPILE_PCRE16
! 6265: return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
! 6266: PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
! 6267: #else
1.1 misho 6268: return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6269: PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
1.1.1.2 ! misho 6270: #endif
1.1 misho 6271: }
6272:
1.1.1.2 ! misho 6273: /* Check that a start_offset points to the start of a UTF character. */
1.1 misho 6274: if (start_offset > 0 && start_offset < length &&
1.1.1.2 ! misho 6275: NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
1.1 misho 6276: return PCRE_ERROR_BADUTF8_OFFSET;
6277: }
6278: #endif
6279:
6280: /* If the pattern was successfully studied with JIT support, run the JIT
6281: executable instead of the rest of this function. Most options must be set at
6282: compile time for the JIT code to be usable. Fallback to the normal code path if
6283: an unsupported flag is set. In particular, JIT does not support partial
6284: matching. */
6285:
6286: #ifdef SUPPORT_JIT
6287: if (extra_data != NULL
6288: && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
6289: && extra_data->executable_jit != NULL
6290: && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
6291: && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6292: PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
1.1.1.2 ! misho 6293: return PRIV(jit_exec)(re, extra_data->executable_jit,
! 6294: (const pcre_uchar *)subject, length, start_offset, options,
! 6295: ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
1.1 misho 6296: ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6297: #endif
6298:
6299: /* Carry on with non-JIT matching. This information is for finding all the
6300: numbers associated with a given name, for condition testing. */
6301:
1.1.1.2 ! misho 6302: md->name_table = (pcre_uchar *)re + re->name_table_offset;
1.1 misho 6303: md->name_count = re->name_count;
6304: md->name_entry_size = re->name_entry_size;
6305:
6306: /* Fish out the optional data from the extra_data structure, first setting
6307: the default values. */
6308:
6309: study = NULL;
6310: md->match_limit = MATCH_LIMIT;
6311: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6312: md->callout_data = NULL;
6313:
6314: /* The table pointer is always in native byte order. */
6315:
1.1.1.2 ! misho 6316: tables = re->tables;
1.1 misho 6317:
6318: if (extra_data != NULL)
6319: {
6320: register unsigned int flags = extra_data->flags;
6321: if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6322: study = (const pcre_study_data *)extra_data->study_data;
6323: if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6324: md->match_limit = extra_data->match_limit;
6325: if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6326: md->match_limit_recursion = extra_data->match_limit_recursion;
6327: if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6328: md->callout_data = extra_data->callout_data;
6329: if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6330: }
6331:
6332: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6333: is a feature that makes it possible to save compiled regex and re-use them
6334: in other programs later. */
6335:
1.1.1.2 ! misho 6336: if (tables == NULL) tables = PRIV(default_tables);
1.1 misho 6337:
6338: /* Set up other data */
6339:
6340: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6341: startline = (re->flags & PCRE_STARTLINE) != 0;
6342: firstline = (re->options & PCRE_FIRSTLINE) != 0;
6343:
6344: /* The code starts after the real_pcre block and the capture name table. */
6345:
1.1.1.2 ! misho 6346: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
1.1 misho 6347: re->name_count * re->name_entry_size;
6348:
1.1.1.2 ! misho 6349: md->start_subject = (PCRE_PUCHAR)subject;
1.1 misho 6350: md->start_offset = start_offset;
6351: md->end_subject = md->start_subject + length;
6352: end_subject = md->end_subject;
6353:
6354: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6355: md->use_ucp = (re->options & PCRE_UCP) != 0;
6356: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6357: md->ignore_skip_arg = FALSE;
6358:
6359: /* Some options are unpacked into BOOL variables in the hope that testing
6360: them will be faster than individual option bits. */
6361:
6362: md->notbol = (options & PCRE_NOTBOL) != 0;
6363: md->noteol = (options & PCRE_NOTEOL) != 0;
6364: md->notempty = (options & PCRE_NOTEMPTY) != 0;
6365: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6366:
6367: md->hitend = FALSE;
6368: md->mark = md->nomatch_mark = NULL; /* In case never set */
6369:
6370: md->recursive = NULL; /* No recursion at top level */
6371: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6372:
6373: md->lcc = tables + lcc_offset;
1.1.1.2 ! misho 6374: md->fcc = tables + fcc_offset;
1.1 misho 6375: md->ctypes = tables + ctypes_offset;
6376:
6377: /* Handle different \R options. */
6378:
6379: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6380: {
6381: case 0:
6382: if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6383: md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6384: else
6385: #ifdef BSR_ANYCRLF
6386: md->bsr_anycrlf = TRUE;
6387: #else
6388: md->bsr_anycrlf = FALSE;
6389: #endif
6390: break;
6391:
6392: case PCRE_BSR_ANYCRLF:
6393: md->bsr_anycrlf = TRUE;
6394: break;
6395:
6396: case PCRE_BSR_UNICODE:
6397: md->bsr_anycrlf = FALSE;
6398: break;
6399:
6400: default: return PCRE_ERROR_BADNEWLINE;
6401: }
6402:
6403: /* Handle different types of newline. The three bits give eight cases. If
6404: nothing is set at run time, whatever was used at compile time applies. */
6405:
6406: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6407: (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6408: {
6409: case 0: newline = NEWLINE; break; /* Compile-time default */
6410: case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6411: case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6412: case PCRE_NEWLINE_CR+
6413: PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6414: case PCRE_NEWLINE_ANY: newline = -1; break;
6415: case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6416: default: return PCRE_ERROR_BADNEWLINE;
6417: }
6418:
6419: if (newline == -2)
6420: {
6421: md->nltype = NLTYPE_ANYCRLF;
6422: }
6423: else if (newline < 0)
6424: {
6425: md->nltype = NLTYPE_ANY;
6426: }
6427: else
6428: {
6429: md->nltype = NLTYPE_FIXED;
6430: if (newline > 255)
6431: {
6432: md->nllen = 2;
6433: md->nl[0] = (newline >> 8) & 255;
6434: md->nl[1] = newline & 255;
6435: }
6436: else
6437: {
6438: md->nllen = 1;
6439: md->nl[0] = newline;
6440: }
6441: }
6442:
6443: /* Partial matching was originally supported only for a restricted set of
6444: regexes; from release 8.00 there are no restrictions, but the bits are still
6445: defined (though never set). So there's no harm in leaving this code. */
6446:
6447: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6448: return PCRE_ERROR_BADPARTIAL;
6449:
6450: /* If the expression has got more back references than the offsets supplied can
6451: hold, we get a temporary chunk of working store to use during the matching.
6452: Otherwise, we can use the vector supplied, rounding down its size to a multiple
6453: of 3. */
6454:
6455: ocount = offsetcount - (offsetcount % 3);
6456: arg_offset_max = (2*ocount)/3;
6457:
6458: if (re->top_backref > 0 && re->top_backref >= ocount/3)
6459: {
6460: ocount = re->top_backref * 3 + 3;
1.1.1.2 ! misho 6461: md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
1.1 misho 6462: if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6463: using_temporary_offsets = TRUE;
6464: DPRINTF(("Got memory to hold back references\n"));
6465: }
6466: else md->offset_vector = offsets;
6467:
6468: md->offset_end = ocount;
6469: md->offset_max = (2*ocount)/3;
6470: md->offset_overflow = FALSE;
6471: md->capture_last = -1;
6472:
6473: /* Reset the working variable associated with each extraction. These should
6474: never be used unless previously set, but they get saved and restored, and so we
6475: initialize them to avoid reading uninitialized locations. Also, unset the
6476: offsets for the matched string. This is really just for tidiness with callouts,
6477: in case they inspect these fields. */
6478:
6479: if (md->offset_vector != NULL)
6480: {
6481: register int *iptr = md->offset_vector + ocount;
6482: register int *iend = iptr - re->top_bracket;
6483: if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6484: while (--iptr >= iend) *iptr = -1;
6485: md->offset_vector[0] = md->offset_vector[1] = -1;
6486: }
6487:
1.1.1.2 ! misho 6488: /* Set up the first character to match, if available. The first_char value is
1.1 misho 6489: never set for an anchored regular expression, but the anchoring may be forced
6490: at run time, so we have to test for anchoring. The first char may be unset for
6491: an unanchored pattern, of course. If there's no first char and the pattern was
6492: studied, there may be a bitmap of possible first characters. */
6493:
6494: if (!anchored)
6495: {
6496: if ((re->flags & PCRE_FIRSTSET) != 0)
6497: {
1.1.1.2 ! misho 6498: has_first_char = TRUE;
! 6499: first_char = first_char2 = (pcre_uchar)(re->first_char);
! 6500: if ((re->flags & PCRE_FCH_CASELESS) != 0)
! 6501: {
! 6502: first_char2 = TABLE_GET(first_char, md->fcc, first_char);
! 6503: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
! 6504: if (utf && first_char > 127)
! 6505: first_char2 = UCD_OTHERCASE(first_char);
! 6506: #endif
! 6507: }
1.1 misho 6508: }
6509: else
6510: if (!startline && study != NULL &&
6511: (study->flags & PCRE_STUDY_MAPPED) != 0)
6512: start_bits = study->start_bits;
6513: }
6514:
6515: /* For anchored or unanchored matches, there may be a "last known required
6516: character" set. */
6517:
6518: if ((re->flags & PCRE_REQCHSET) != 0)
6519: {
1.1.1.2 ! misho 6520: has_req_char = TRUE;
! 6521: req_char = req_char2 = (pcre_uchar)(re->req_char);
! 6522: if ((re->flags & PCRE_RCH_CASELESS) != 0)
! 6523: {
! 6524: req_char2 = TABLE_GET(req_char, md->fcc, req_char);
! 6525: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
! 6526: if (utf && req_char > 127)
! 6527: req_char2 = UCD_OTHERCASE(req_char);
! 6528: #endif
! 6529: }
1.1 misho 6530: }
6531:
6532:
6533: /* ==========================================================================*/
6534:
6535: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6536: the loop runs just once. */
6537:
6538: for(;;)
6539: {
1.1.1.2 ! misho 6540: PCRE_PUCHAR save_end_subject = end_subject;
! 6541: PCRE_PUCHAR new_start_match;
1.1 misho 6542:
6543: /* If firstline is TRUE, the start of the match is constrained to the first
6544: line of a multiline string. That is, the match must be before or at the first
6545: newline. Implement this by temporarily adjusting end_subject so that we stop
6546: scanning at a newline. If the match fails at the newline, later code breaks
6547: this loop. */
6548:
6549: if (firstline)
6550: {
1.1.1.2 ! misho 6551: PCRE_PUCHAR t = start_match;
! 6552: #ifdef SUPPORT_UTF
! 6553: if (utf)
1.1 misho 6554: {
6555: while (t < md->end_subject && !IS_NEWLINE(t))
6556: {
6557: t++;
1.1.1.2 ! misho 6558: ACROSSCHAR(t < end_subject, *t, t++);
1.1 misho 6559: }
6560: }
6561: else
6562: #endif
6563: while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6564: end_subject = t;
6565: }
6566:
6567: /* There are some optimizations that avoid running the match if a known
6568: starting point is not found, or if a known later character is not present.
6569: However, there is an option that disables these, for testing and for ensuring
6570: that all callouts do actually occur. The option can be set in the regex by
6571: (*NO_START_OPT) or passed in match-time options. */
6572:
6573: if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6574: {
1.1.1.2 ! misho 6575: /* Advance to a unique first char if there is one. */
1.1 misho 6576:
1.1.1.2 ! misho 6577: if (has_first_char)
1.1 misho 6578: {
1.1.1.2 ! misho 6579: if (first_char != first_char2)
! 6580: while (start_match < end_subject &&
! 6581: *start_match != first_char && *start_match != first_char2)
1.1 misho 6582: start_match++;
6583: else
1.1.1.2 ! misho 6584: while (start_match < end_subject && *start_match != first_char)
1.1 misho 6585: start_match++;
6586: }
6587:
6588: /* Or to just after a linebreak for a multiline match */
6589:
6590: else if (startline)
6591: {
6592: if (start_match > md->start_subject + start_offset)
6593: {
1.1.1.2 ! misho 6594: #ifdef SUPPORT_UTF
! 6595: if (utf)
1.1 misho 6596: {
6597: while (start_match < end_subject && !WAS_NEWLINE(start_match))
6598: {
6599: start_match++;
1.1.1.2 ! misho 6600: ACROSSCHAR(start_match < end_subject, *start_match,
! 6601: start_match++);
1.1 misho 6602: }
6603: }
6604: else
6605: #endif
6606: while (start_match < end_subject && !WAS_NEWLINE(start_match))
6607: start_match++;
6608:
6609: /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6610: and we are now at a LF, advance the match position by one more character.
6611: */
6612:
6613: if (start_match[-1] == CHAR_CR &&
6614: (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6615: start_match < end_subject &&
6616: *start_match == CHAR_NL)
6617: start_match++;
6618: }
6619: }
6620:
6621: /* Or to a non-unique first byte after study */
6622:
6623: else if (start_bits != NULL)
6624: {
6625: while (start_match < end_subject)
6626: {
6627: register unsigned int c = *start_match;
1.1.1.2 ! misho 6628: #ifndef COMPILE_PCRE8
! 6629: if (c > 255) c = 255;
! 6630: #endif
1.1 misho 6631: if ((start_bits[c/8] & (1 << (c&7))) == 0)
6632: {
6633: start_match++;
1.1.1.2 ! misho 6634: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
! 6635: /* In non 8-bit mode, the iteration will stop for
! 6636: characters > 255 at the beginning or not stop at all. */
! 6637: if (utf)
! 6638: ACROSSCHAR(start_match < end_subject, *start_match,
! 6639: start_match++);
1.1 misho 6640: #endif
6641: }
6642: else break;
6643: }
6644: }
6645: } /* Starting optimizations */
6646:
6647: /* Restore fudged end_subject */
6648:
6649: end_subject = save_end_subject;
6650:
6651: /* The following two optimizations are disabled for partial matching or if
6652: disabling is explicitly requested. */
6653:
6654: if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6655: {
6656: /* If the pattern was studied, a minimum subject length may be set. This is
6657: a lower bound; no actual string of that length may actually match the
6658: pattern. Although the value is, strictly, in characters, we treat it as
6659: bytes to avoid spending too much time in this optimization. */
6660:
6661: if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6662: (pcre_uint32)(end_subject - start_match) < study->minlength)
6663: {
6664: rc = MATCH_NOMATCH;
6665: break;
6666: }
6667:
1.1.1.2 ! misho 6668: /* If req_char is set, we know that that character must appear in the
! 6669: subject for the match to succeed. If the first character is set, req_char
1.1 misho 6670: must be later in the subject; otherwise the test starts at the match point.
6671: This optimization can save a huge amount of backtracking in patterns with
6672: nested unlimited repeats that aren't going to match. Writing separate code
6673: for cased/caseless versions makes it go faster, as does using an
6674: autoincrement and backing off on a match.
6675:
6676: HOWEVER: when the subject string is very, very long, searching to its end
6677: can take a long time, and give bad performance on quite ordinary patterns.
6678: This showed up when somebody was matching something like /^\d+C/ on a
6679: 32-megabyte string... so we don't do this when the string is sufficiently
6680: long. */
6681:
1.1.1.2 ! misho 6682: if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
1.1 misho 6683: {
1.1.1.2 ! misho 6684: register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
1.1 misho 6685:
6686: /* We don't need to repeat the search if we haven't yet reached the
6687: place we found it at last time. */
6688:
1.1.1.2 ! misho 6689: if (p > req_char_ptr)
1.1 misho 6690: {
1.1.1.2 ! misho 6691: if (req_char != req_char2)
1.1 misho 6692: {
6693: while (p < end_subject)
6694: {
6695: register int pp = *p++;
1.1.1.2 ! misho 6696: if (pp == req_char || pp == req_char2) { p--; break; }
1.1 misho 6697: }
6698: }
6699: else
6700: {
6701: while (p < end_subject)
6702: {
1.1.1.2 ! misho 6703: if (*p++ == req_char) { p--; break; }
1.1 misho 6704: }
6705: }
6706:
6707: /* If we can't find the required character, break the matching loop,
6708: forcing a match failure. */
6709:
6710: if (p >= end_subject)
6711: {
6712: rc = MATCH_NOMATCH;
6713: break;
6714: }
6715:
6716: /* If we have found the required character, save the point where we
6717: found it, so that we don't search again next time round the loop if
6718: the start hasn't passed this character yet. */
6719:
1.1.1.2 ! misho 6720: req_char_ptr = p;
1.1 misho 6721: }
6722: }
6723: }
6724:
6725: #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6726: printf(">>>> Match against: ");
6727: pchars(start_match, end_subject - start_match, TRUE, md);
6728: printf("\n");
6729: #endif
6730:
6731: /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6732: first starting point for which a partial match was found. */
6733:
6734: md->start_match_ptr = start_match;
6735: md->start_used_ptr = start_match;
6736: md->match_call_count = 0;
6737: md->match_function_type = 0;
6738: md->end_offset_top = 0;
6739: rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6740: if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6741:
6742: switch(rc)
6743: {
6744: /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6745: the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6746: entirely. The only way we can do that is to re-do the match at the same
6747: point, with a flag to force SKIP with an argument to be ignored. Just
6748: treating this case as NOMATCH does not work because it does not check other
6749: alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6750:
6751: case MATCH_SKIP_ARG:
6752: new_start_match = start_match;
6753: md->ignore_skip_arg = TRUE;
6754: break;
6755:
6756: /* SKIP passes back the next starting point explicitly, but if it is the
6757: same as the match we have just done, treat it as NOMATCH. */
6758:
6759: case MATCH_SKIP:
6760: if (md->start_match_ptr != start_match)
6761: {
6762: new_start_match = md->start_match_ptr;
6763: break;
6764: }
6765: /* Fall through */
6766:
6767: /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6768: exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
6769:
6770: case MATCH_NOMATCH:
6771: case MATCH_PRUNE:
6772: case MATCH_THEN:
6773: md->ignore_skip_arg = FALSE;
6774: new_start_match = start_match + 1;
1.1.1.2 ! misho 6775: #ifdef SUPPORT_UTF
! 6776: if (utf)
! 6777: ACROSSCHAR(new_start_match < end_subject, *new_start_match,
! 6778: new_start_match++);
1.1 misho 6779: #endif
6780: break;
6781:
6782: /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6783:
6784: case MATCH_COMMIT:
6785: rc = MATCH_NOMATCH;
6786: goto ENDLOOP;
6787:
6788: /* Any other return is either a match, or some kind of error. */
6789:
6790: default:
6791: goto ENDLOOP;
6792: }
6793:
6794: /* Control reaches here for the various types of "no match at this point"
6795: result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6796:
6797: rc = MATCH_NOMATCH;
6798:
6799: /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6800: newline in the subject (though it may continue over the newline). Therefore,
6801: if we have just failed to match, starting at a newline, do not continue. */
6802:
6803: if (firstline && IS_NEWLINE(start_match)) break;
6804:
6805: /* Advance to new matching position */
6806:
6807: start_match = new_start_match;
6808:
6809: /* Break the loop if the pattern is anchored or if we have passed the end of
6810: the subject. */
6811:
6812: if (anchored || start_match > end_subject) break;
6813:
6814: /* If we have just passed a CR and we are now at a LF, and the pattern does
6815: not contain any explicit matches for \r or \n, and the newline option is CRLF
1.1.1.2 ! misho 6816: or ANY or ANYCRLF, advance the match position by one more character. In
! 6817: normal matching start_match will aways be greater than the first position at
! 6818: this stage, but a failed *SKIP can cause a return at the same point, which is
! 6819: why the first test exists. */
1.1 misho 6820:
1.1.1.2 ! misho 6821: if (start_match > (PCRE_PUCHAR)subject + start_offset &&
! 6822: start_match[-1] == CHAR_CR &&
1.1 misho 6823: start_match < end_subject &&
6824: *start_match == CHAR_NL &&
6825: (re->flags & PCRE_HASCRORLF) == 0 &&
6826: (md->nltype == NLTYPE_ANY ||
6827: md->nltype == NLTYPE_ANYCRLF ||
6828: md->nllen == 2))
6829: start_match++;
6830:
6831: md->mark = NULL; /* Reset for start of next match attempt */
6832: } /* End of for(;;) "bumpalong" loop */
6833:
6834: /* ==========================================================================*/
6835:
6836: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6837: conditions is true:
6838:
6839: (1) The pattern is anchored or the match was failed by (*COMMIT);
6840:
6841: (2) We are past the end of the subject;
6842:
6843: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6844: this option requests that a match occur at or before the first newline in
6845: the subject.
6846:
6847: When we have a match and the offset vector is big enough to deal with any
6848: backreferences, captured substring offsets will already be set up. In the case
6849: where we had to get some local store to hold offsets for backreference
6850: processing, copy those that we can. In this case there need not be overflow if
6851: certain parts of the pattern were not used, even though there are more
6852: capturing parentheses than vector slots. */
6853:
6854: ENDLOOP:
6855:
6856: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6857: {
6858: if (using_temporary_offsets)
6859: {
6860: if (arg_offset_max >= 4)
6861: {
6862: memcpy(offsets + 2, md->offset_vector + 2,
6863: (arg_offset_max - 2) * sizeof(int));
6864: DPRINTF(("Copied offsets from temporary memory\n"));
6865: }
6866: if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6867: DPRINTF(("Freeing temporary memory\n"));
1.1.1.2 ! misho 6868: (PUBL(free))(md->offset_vector);
1.1 misho 6869: }
6870:
6871: /* Set the return code to the number of captured strings, or 0 if there were
6872: too many to fit into the vector. */
6873:
6874: rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6875: 0 : md->end_offset_top/2;
6876:
6877: /* If there is space in the offset vector, set any unused pairs at the end of
6878: the pattern to -1 for backwards compatibility. It is documented that this
6879: happens. In earlier versions, the whole set of potential capturing offsets
6880: was set to -1 each time round the loop, but this is handled differently now.
6881: "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6882: those at the end that need unsetting here. We can't just unset them all at
6883: the start of the whole thing because they may get set in one branch that is
6884: not the final matching branch. */
6885:
6886: if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
6887: {
6888: register int *iptr, *iend;
6889: int resetcount = 2 + re->top_bracket * 2;
6890: if (resetcount > offsetcount) resetcount = ocount;
6891: iptr = offsets + md->end_offset_top;
6892: iend = offsets + resetcount;
6893: while (iptr < iend) *iptr++ = -1;
6894: }
6895:
6896: /* If there is space, set up the whole thing as substring 0. The value of
6897: md->start_match_ptr might be modified if \K was encountered on the success
6898: matching path. */
6899:
6900: if (offsetcount < 2) rc = 0; else
6901: {
6902: offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6903: offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6904: }
6905:
6906: /* Return MARK data if requested */
6907:
6908: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2 ! misho 6909: *(extra_data->mark) = (pcre_uchar *)md->mark;
1.1 misho 6910: DPRINTF((">>>> returning %d\n", rc));
6911: return rc;
6912: }
6913:
6914: /* Control gets here if there has been an error, or if the overall match
6915: attempt has failed at all permitted starting positions. */
6916:
6917: if (using_temporary_offsets)
6918: {
6919: DPRINTF(("Freeing temporary memory\n"));
1.1.1.2 ! misho 6920: (PUBL(free))(md->offset_vector);
1.1 misho 6921: }
6922:
6923: /* For anything other than nomatch or partial match, just return the code. */
6924:
6925: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6926: {
6927: DPRINTF((">>>> error: returning %d\n", rc));
6928: return rc;
6929: }
6930:
6931: /* Handle partial matches - disable any mark data */
6932:
6933: if (start_partial != NULL)
6934: {
6935: DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6936: md->mark = NULL;
6937: if (offsetcount > 1)
6938: {
1.1.1.2 ! misho 6939: offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
! 6940: offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
1.1 misho 6941: }
6942: rc = PCRE_ERROR_PARTIAL;
6943: }
6944:
6945: /* This is the classic nomatch case */
6946:
6947: else
6948: {
6949: DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6950: rc = PCRE_ERROR_NOMATCH;
6951: }
6952:
6953: /* Return the MARK data if it has been requested. */
6954:
6955: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.1.1.2 ! misho 6956: *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
1.1 misho 6957: return rc;
6958: }
6959:
6960: /* End of pcre_exec.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>