Annotation of embedaddon/pcre/pcretest.c, revision 1.1.1.3
1.1 misho 1: /*************************************************
2: * PCRE testing program *
3: *************************************************/
4:
5: /* This program was hacked up as a tester for PCRE. I really should have
6: written it more tidily in the first place. Will I ever learn? It has grown and
1.1.1.2 misho 7: been extended and consequently is now rather, er, *very* untidy in places. The
8: addition of 16-bit support has made it even worse. :-(
1.1 misho 9:
10: -----------------------------------------------------------------------------
11: Redistribution and use in source and binary forms, with or without
12: modification, are permitted provided that the following conditions are met:
13:
14: * Redistributions of source code must retain the above copyright notice,
15: this list of conditions and the following disclaimer.
16:
17: * Redistributions in binary form must reproduce the above copyright
18: notice, this list of conditions and the following disclaimer in the
19: documentation and/or other materials provided with the distribution.
20:
21: * Neither the name of the University of Cambridge nor the names of its
22: contributors may be used to endorse or promote products derived from
23: this software without specific prior written permission.
24:
25: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: POSSIBILITY OF SUCH DAMAGE.
36: -----------------------------------------------------------------------------
37: */
38:
1.1.1.2 misho 39: /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40: libraries in a single program. This is different from the modules such as
41: pcre_compile.c in the library itself, which are compiled separately for each
42: mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43: (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44: compiled only once. Therefore, it must not make use of any of the macros from
45: pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46: however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47: only supported library functions. */
48:
1.1 misho 49: #ifdef HAVE_CONFIG_H
50: #include "config.h"
51: #endif
52:
53: #include <ctype.h>
54: #include <stdio.h>
55: #include <string.h>
56: #include <stdlib.h>
57: #include <time.h>
58: #include <locale.h>
59: #include <errno.h>
60:
1.1.1.3 ! misho 61: /* Both libreadline and libedit are optionally supported. The user-supplied
! 62: original patch uses readline/readline.h for libedit, but in at least one system
! 63: it is installed as editline/readline.h, so the configuration code now looks for
! 64: that first, falling back to readline/readline.h. */
! 65:
! 66: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 67: #ifdef HAVE_UNISTD_H
68: #include <unistd.h>
69: #endif
1.1.1.3 ! misho 70: #if defined(SUPPORT_LIBREADLINE)
1.1 misho 71: #include <readline/readline.h>
72: #include <readline/history.h>
1.1.1.3 ! misho 73: #else
! 74: #if defined(HAVE_EDITLINE_READLINE_H)
! 75: #include <editline/readline.h>
! 76: #else
! 77: #include <readline/readline.h>
! 78: #endif
! 79: #endif
1.1 misho 80: #endif
81:
82: /* A number of things vary for Windows builds. Originally, pcretest opened its
83: input and output without "b"; then I was told that "b" was needed in some
84: environments, so it was added for release 5.0 to both the input and output. (It
85: makes no difference on Unix-like systems.) Later I was told that it is wrong
86: for the input on Windows. I've now abstracted the modes into two macros that
87: are set here, to make it easier to fiddle with them, and removed "b" from the
88: input mode under Windows. */
89:
90: #if defined(_WIN32) || defined(WIN32)
91: #include <io.h> /* For _setmode() */
92: #include <fcntl.h> /* For _O_BINARY */
93: #define INPUT_MODE "r"
94: #define OUTPUT_MODE "wb"
95:
96: #ifndef isatty
97: #define isatty _isatty /* This is what Windows calls them, I'm told, */
98: #endif /* though in some environments they seem to */
99: /* be already defined, hence the #ifndefs. */
100: #ifndef fileno
101: #define fileno _fileno
102: #endif
103:
104: /* A user sent this fix for Borland Builder 5 under Windows. */
105:
106: #ifdef __BORLANDC__
107: #define _setmode(handle, mode) setmode(handle, mode)
108: #endif
109:
110: /* Not Windows */
111:
112: #else
113: #include <sys/time.h> /* These two includes are needed */
114: #include <sys/resource.h> /* for setrlimit(). */
115: #define INPUT_MODE "rb"
116: #define OUTPUT_MODE "wb"
117: #endif
118:
1.1.1.2 misho 119: #define PRIV(name) name
1.1 misho 120:
121: /* We have to include pcre_internal.h because we need the internal info for
122: displaying the results of pcre_study() and we also need to know about the
123: internal macros, structures, and other internal data values; pcretest has
124: "inside information" compared to a program that strictly follows the PCRE API.
125:
126: Although pcre_internal.h does itself include pcre.h, we explicitly include it
127: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
128: appropriately for an application, not for building PCRE. */
129:
130: #include "pcre.h"
1.1.1.2 misho 131:
132: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133: /* Configure internal macros to 16 bit mode. */
134: #define COMPILE_PCRE16
135: #endif
136:
1.1 misho 137: #include "pcre_internal.h"
138:
1.1.1.2 misho 139: /* The pcre_printint() function, which prints the internal form of a compiled
140: regex, is held in a separate file so that (a) it can be compiled in either
141: 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142: when that is compiled in debug mode. */
143:
144: #ifdef SUPPORT_PCRE8
145: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146: #endif
147: #ifdef SUPPORT_PCRE16
148: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149: #endif
150:
1.1 misho 151: /* We need access to some of the data tables that PCRE uses. So as not to have
152: to keep two copies, we include the source file here, changing the names of the
153: external symbols to prevent clashes. */
154:
1.1.1.2 misho 155: #define PCRE_INCLUDED
1.1 misho 156:
157: #include "pcre_tables.c"
158:
159: /* The definition of the macro PRINTABLE, which determines whether to print an
160: output character as-is or as a hex value when showing compiled patterns, is
1.1.1.2 misho 161: the same as in the printint.src file. We uses it here in cases when the locale
162: has not been explicitly changed, so as to get consistent output from systems
163: that differ in their output from isprint() even in the "C" locale. */
1.1 misho 164:
1.1.1.2 misho 165: #ifdef EBCDIC
166: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167: #else
168: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169: #endif
170:
171: #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172:
173: /* Posix support is disabled in 16 bit only mode. */
174: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175: #define NOPOSIX
176: #endif
1.1 misho 177:
178: /* It is possible to compile this test program without including support for
179: testing the POSIX interface, though this is not available via the standard
180: Makefile. */
181:
182: #if !defined NOPOSIX
183: #include "pcreposix.h"
184: #endif
185:
1.1.1.2 misho 186: /* It is also possible, originally for the benefit of a version that was
187: imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188: NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189: automatically cut out the UTF support if PCRE is built without it. */
190:
191: #ifndef SUPPORT_UTF
192: #ifndef NOUTF
193: #define NOUTF
1.1 misho 194: #endif
195: #endif
196:
1.1.1.2 misho 197: /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198: for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199: only from one place and is handled differently). I couldn't dream up any way of
200: using a single macro to do this in a generic way, because of the many different
201: argument requirements. We know that at least one of SUPPORT_PCRE8 and
202: SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203: use these in the definitions of generic macros.
204:
205: **** Special note about the PCHARSxxx macros: the address of the string to be
206: printed is always given as two arguments: a base address followed by an offset.
207: The base address is cast to the correct data size for 8 or 16 bit data; the
208: offset is in units of this size. If the string were given as base+offset in one
209: argument, the casting might be incorrectly applied. */
210:
211: #ifdef SUPPORT_PCRE8
212:
213: #define PCHARS8(lv, p, offset, len, f) \
214: lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215:
216: #define PCHARSV8(p, offset, len, f) \
217: (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218:
219: #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220: p = read_capture_name8(p, cn8, re)
221:
222: #define STRLEN8(p) ((int)strlen((char *)p))
223:
224: #define SET_PCRE_CALLOUT8(callout) \
225: pcre_callout = callout
226:
227: #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228: pcre_assign_jit_stack(extra, callback, userdata)
229:
230: #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231: re = pcre_compile((char *)pat, options, error, erroffset, tables)
232:
233: #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234: namesptr, cbuffer, size) \
235: rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236: (char *)namesptr, cbuffer, size)
237:
238: #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239: rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240:
241: #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242: offsets, size_offsets, workspace, size_workspace) \
243: count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244: offsets, size_offsets, workspace, size_workspace)
245:
246: #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247: offsets, size_offsets) \
248: count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249: offsets, size_offsets)
250:
251: #define PCRE_FREE_STUDY8(extra) \
252: pcre_free_study(extra)
253:
254: #define PCRE_FREE_SUBSTRING8(substring) \
255: pcre_free_substring(substring)
256:
257: #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258: pcre_free_substring_list(listptr)
259:
260: #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261: getnamesptr, subsptr) \
262: rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263: (char *)getnamesptr, subsptr)
264:
265: #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266: n = pcre_get_stringnumber(re, (char *)ptr)
267:
268: #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269: rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270:
271: #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272: rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273:
274: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275: rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276:
277: #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278: pcre_printint(re, outfile, debug_lengths)
279:
280: #define PCRE_STUDY8(extra, re, options, error) \
281: extra = pcre_study(re, options, error)
282:
283: #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284: pcre_jit_stack_alloc(startsize, maxsize)
285:
286: #define PCRE_JIT_STACK_FREE8(stack) \
287: pcre_jit_stack_free(stack)
288:
289: #endif /* SUPPORT_PCRE8 */
290:
291: /* -----------------------------------------------------------*/
292:
293: #ifdef SUPPORT_PCRE16
294:
295: #define PCHARS16(lv, p, offset, len, f) \
296: lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297:
298: #define PCHARSV16(p, offset, len, f) \
299: (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300:
301: #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302: p = read_capture_name16(p, cn16, re)
303:
304: #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305:
306: #define SET_PCRE_CALLOUT16(callout) \
307: pcre16_callout = (int (*)(pcre16_callout_block *))callout
308:
309: #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310: pcre16_assign_jit_stack((pcre16_extra *)extra, \
311: (pcre16_jit_callback)callback, userdata)
312:
313: #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314: re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315: tables)
316:
317: #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318: namesptr, cbuffer, size) \
319: rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320: count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321:
322: #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323: rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324: (PCRE_UCHAR16 *)cbuffer, size/2)
325:
326: #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327: offsets, size_offsets, workspace, size_workspace) \
328: count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329: (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330: workspace, size_workspace)
331:
332: #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333: offsets, size_offsets) \
334: count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335: len, start_offset, options, offsets, size_offsets)
336:
337: #define PCRE_FREE_STUDY16(extra) \
338: pcre16_free_study((pcre16_extra *)extra)
339:
340: #define PCRE_FREE_SUBSTRING16(substring) \
341: pcre16_free_substring((PCRE_SPTR16)substring)
342:
343: #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344: pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345:
346: #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347: getnamesptr, subsptr) \
348: rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349: count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350:
351: #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352: n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353:
354: #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355: rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356: (PCRE_SPTR16 *)(void*)subsptr)
357:
358: #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359: rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360: (PCRE_SPTR16 **)(void*)listptr)
361:
362: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363: rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364: tables)
365:
366: #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367: pcre16_printint(re, outfile, debug_lengths)
368:
369: #define PCRE_STUDY16(extra, re, options, error) \
370: extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371:
372: #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373: (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374:
375: #define PCRE_JIT_STACK_FREE16(stack) \
376: pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377:
378: #endif /* SUPPORT_PCRE16 */
379:
380:
381: /* ----- Both modes are supported; a runtime test is needed, except for
382: pcre_config(), and the JIT stack functions, when it doesn't matter which
383: version is called. ----- */
384:
385: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386:
387: #define CHAR_SIZE (use_pcre16? 2:1)
388:
389: #define PCHARS(lv, p, offset, len, f) \
390: if (use_pcre16) \
391: PCHARS16(lv, p, offset, len, f); \
392: else \
393: PCHARS8(lv, p, offset, len, f)
394:
395: #define PCHARSV(p, offset, len, f) \
396: if (use_pcre16) \
397: PCHARSV16(p, offset, len, f); \
398: else \
399: PCHARSV8(p, offset, len, f)
400:
401: #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402: if (use_pcre16) \
403: READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404: else \
405: READ_CAPTURE_NAME8(p, cn8, cn16, re)
406:
407: #define SET_PCRE_CALLOUT(callout) \
408: if (use_pcre16) \
409: SET_PCRE_CALLOUT16(callout); \
410: else \
411: SET_PCRE_CALLOUT8(callout)
412:
413: #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414:
415: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416: if (use_pcre16) \
417: PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418: else \
419: PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420:
421: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422: if (use_pcre16) \
423: PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424: else \
425: PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426:
427: #define PCRE_CONFIG pcre_config
428:
429: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430: namesptr, cbuffer, size) \
431: if (use_pcre16) \
432: PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433: namesptr, cbuffer, size); \
434: else \
435: PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436: namesptr, cbuffer, size)
437:
438: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439: if (use_pcre16) \
440: PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441: else \
442: PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443:
444: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445: offsets, size_offsets, workspace, size_workspace) \
446: if (use_pcre16) \
447: PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448: offsets, size_offsets, workspace, size_workspace); \
449: else \
450: PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451: offsets, size_offsets, workspace, size_workspace)
452:
453: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454: offsets, size_offsets) \
455: if (use_pcre16) \
456: PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457: offsets, size_offsets); \
458: else \
459: PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460: offsets, size_offsets)
461:
462: #define PCRE_FREE_STUDY(extra) \
463: if (use_pcre16) \
464: PCRE_FREE_STUDY16(extra); \
465: else \
466: PCRE_FREE_STUDY8(extra)
467:
468: #define PCRE_FREE_SUBSTRING(substring) \
469: if (use_pcre16) \
470: PCRE_FREE_SUBSTRING16(substring); \
471: else \
472: PCRE_FREE_SUBSTRING8(substring)
473:
474: #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475: if (use_pcre16) \
476: PCRE_FREE_SUBSTRING_LIST16(listptr); \
477: else \
478: PCRE_FREE_SUBSTRING_LIST8(listptr)
479:
480: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481: getnamesptr, subsptr) \
482: if (use_pcre16) \
483: PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484: getnamesptr, subsptr); \
485: else \
486: PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487: getnamesptr, subsptr)
488:
489: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490: if (use_pcre16) \
491: PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492: else \
493: PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494:
495: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496: if (use_pcre16) \
497: PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498: else \
499: PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500:
501: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502: if (use_pcre16) \
503: PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504: else \
505: PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506:
507: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508: (use_pcre16 ? \
509: PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510: :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511:
512: #define PCRE_JIT_STACK_FREE(stack) \
513: if (use_pcre16) \
514: PCRE_JIT_STACK_FREE16(stack); \
515: else \
516: PCRE_JIT_STACK_FREE8(stack)
517:
518: #define PCRE_MAKETABLES \
519: (use_pcre16? pcre16_maketables() : pcre_maketables())
520:
521: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522: if (use_pcre16) \
523: PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524: else \
525: PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526:
527: #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528: if (use_pcre16) \
529: PCRE_PRINTINT16(re, outfile, debug_lengths); \
530: else \
531: PCRE_PRINTINT8(re, outfile, debug_lengths)
532:
533: #define PCRE_STUDY(extra, re, options, error) \
534: if (use_pcre16) \
535: PCRE_STUDY16(extra, re, options, error); \
536: else \
537: PCRE_STUDY8(extra, re, options, error)
538:
539: /* ----- Only 8-bit mode is supported ----- */
540:
541: #elif defined SUPPORT_PCRE8
542: #define CHAR_SIZE 1
543: #define PCHARS PCHARS8
544: #define PCHARSV PCHARSV8
545: #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
546: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
547: #define STRLEN STRLEN8
548: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
549: #define PCRE_COMPILE PCRE_COMPILE8
550: #define PCRE_CONFIG pcre_config
551: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
553: #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
554: #define PCRE_EXEC PCRE_EXEC8
555: #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
556: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
557: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
558: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
559: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
560: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
561: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
562: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
563: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
564: #define PCRE_MAKETABLES pcre_maketables()
565: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566: #define PCRE_PRINTINT PCRE_PRINTINT8
567: #define PCRE_STUDY PCRE_STUDY8
568:
569: /* ----- Only 16-bit mode is supported ----- */
570:
571: #else
572: #define CHAR_SIZE 2
573: #define PCHARS PCHARS16
574: #define PCHARSV PCHARSV16
575: #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
576: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
577: #define STRLEN STRLEN16
578: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
579: #define PCRE_COMPILE PCRE_COMPILE16
580: #define PCRE_CONFIG pcre16_config
581: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
583: #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
584: #define PCRE_EXEC PCRE_EXEC16
585: #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
586: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
587: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
588: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
589: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
590: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
591: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
592: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
593: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
594: #define PCRE_MAKETABLES pcre16_maketables()
595: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596: #define PCRE_PRINTINT PCRE_PRINTINT16
597: #define PCRE_STUDY PCRE_STUDY16
598: #endif
599:
600: /* ----- End of mode-specific function call macros ----- */
601:
1.1 misho 602:
603: /* Other parameters */
604:
605: #ifndef CLOCKS_PER_SEC
606: #ifdef CLK_TCK
607: #define CLOCKS_PER_SEC CLK_TCK
608: #else
609: #define CLOCKS_PER_SEC 100
610: #endif
611: #endif
612:
1.1.1.3 ! misho 613: #if !defined NODFA
! 614: #define DFA_WS_DIMENSION 1000
! 615: #endif
! 616:
1.1 misho 617: /* This is the default loop count for timing. */
618:
619: #define LOOPREPEAT 500000
620:
621: /* Static variables */
622:
623: static FILE *outfile;
624: static int log_store = 0;
625: static int callout_count;
626: static int callout_extra;
627: static int callout_fail_count;
628: static int callout_fail_id;
629: static int debug_lengths;
630: static int first_callout;
1.1.1.3 ! misho 631: static int jit_was_used;
1.1 misho 632: static int locale_set = 0;
633: static int show_malloc;
1.1.1.2 misho 634: static int use_utf;
1.1 misho 635: static size_t gotten_store;
636: static size_t first_gotten_store = 0;
637: static const unsigned char *last_callout_mark = NULL;
638:
639: /* The buffers grow automatically if very long input lines are encountered. */
640:
641: static int buffer_size = 50000;
1.1.1.2 misho 642: static pcre_uint8 *buffer = NULL;
643: static pcre_uint8 *dbuffer = NULL;
644: static pcre_uint8 *pbuffer = NULL;
645:
646: /* Another buffer is needed translation to 16-bit character strings. It will
647: obtained and extended as required. */
648:
649: #ifdef SUPPORT_PCRE16
650: static int buffer16_size = 0;
651: static pcre_uint16 *buffer16 = NULL;
652:
653: #ifdef SUPPORT_PCRE8
654:
655: /* We need the table of operator lengths that is used for 16-bit compiling, in
656: order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657: data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658: appropriately for the 16-bit world. Just as a safety check, make sure that
659: COMPILE_PCRE16 is *not* set. */
660:
661: #ifdef COMPILE_PCRE16
662: #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663: #endif
664:
665: #if LINK_SIZE == 2
666: #undef LINK_SIZE
667: #define LINK_SIZE 1
668: #elif LINK_SIZE == 3 || LINK_SIZE == 4
669: #undef LINK_SIZE
670: #define LINK_SIZE 2
671: #else
672: #error LINK_SIZE must be either 2, 3, or 4
673: #endif
674:
675: #undef IMM2_SIZE
676: #define IMM2_SIZE 1
677:
678: #endif /* SUPPORT_PCRE8 */
679:
680: static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681: #endif /* SUPPORT_PCRE16 */
682:
683: /* If we have 8-bit support, default use_pcre16 to false; if there is also
684: 16-bit support, it can be changed by an option. If there is no 8-bit support,
685: there must be 16-bit support, so default it to 1. */
686:
687: #ifdef SUPPORT_PCRE8
688: static int use_pcre16 = 0;
689: #else
690: static int use_pcre16 = 1;
691: #endif
1.1 misho 692:
1.1.1.3 ! misho 693: /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
! 694:
! 695: static int jit_study_bits[] =
! 696: {
! 697: PCRE_STUDY_JIT_COMPILE,
! 698: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
! 699: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
! 700: PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
! 701: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
! 702: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
! 703: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
! 704: PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
! 705: };
! 706:
1.1 misho 707: /* Textual explanations for runtime error codes */
708:
709: static const char *errtexts[] = {
710: NULL, /* 0 is no error */
711: NULL, /* NOMATCH is handled specially */
712: "NULL argument passed",
713: "bad option value",
714: "magic number missing",
715: "unknown opcode - pattern overwritten?",
716: "no more memory",
717: NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
718: "match limit exceeded",
719: "callout error code",
1.1.1.2 misho 720: NULL, /* BADUTF8/16 is handled specially */
721: NULL, /* BADUTF8/16 offset is handled specially */
1.1 misho 722: NULL, /* PARTIAL is handled specially */
723: "not used - internal error",
724: "internal error - pattern overwritten?",
725: "bad count value",
726: "item unsupported for DFA matching",
727: "backreference condition or recursion test not supported for DFA matching",
728: "match limit not supported for DFA matching",
729: "workspace size exceeded in DFA matching",
730: "too much recursion for DFA matching",
731: "recursion limit exceeded",
732: "not used - internal error",
733: "invalid combination of newline options",
734: "bad offset value",
1.1.1.2 misho 735: NULL, /* SHORTUTF8/16 is handled specially */
1.1 misho 736: "nested recursion at the same subject position",
1.1.1.2 misho 737: "JIT stack limit reached",
1.1.1.3 ! misho 738: "pattern compiled in wrong mode: 8-bit/16-bit error",
! 739: "pattern compiled with other endianness",
! 740: "invalid data in workspace for DFA restart"
1.1 misho 741: };
742:
743:
744: /*************************************************
745: * Alternate character tables *
746: *************************************************/
747:
748: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
749: using the default tables of the library. However, the T option can be used to
750: select alternate sets of tables, for different kinds of testing. Note also that
751: the L (locale) option also adjusts the tables. */
752:
753: /* This is the set of tables distributed as default with PCRE. It recognizes
754: only ASCII characters. */
755:
1.1.1.2 misho 756: static const pcre_uint8 tables0[] = {
1.1 misho 757:
758: /* This table is a lower casing table. */
759:
760: 0, 1, 2, 3, 4, 5, 6, 7,
761: 8, 9, 10, 11, 12, 13, 14, 15,
762: 16, 17, 18, 19, 20, 21, 22, 23,
763: 24, 25, 26, 27, 28, 29, 30, 31,
764: 32, 33, 34, 35, 36, 37, 38, 39,
765: 40, 41, 42, 43, 44, 45, 46, 47,
766: 48, 49, 50, 51, 52, 53, 54, 55,
767: 56, 57, 58, 59, 60, 61, 62, 63,
768: 64, 97, 98, 99,100,101,102,103,
769: 104,105,106,107,108,109,110,111,
770: 112,113,114,115,116,117,118,119,
771: 120,121,122, 91, 92, 93, 94, 95,
772: 96, 97, 98, 99,100,101,102,103,
773: 104,105,106,107,108,109,110,111,
774: 112,113,114,115,116,117,118,119,
775: 120,121,122,123,124,125,126,127,
776: 128,129,130,131,132,133,134,135,
777: 136,137,138,139,140,141,142,143,
778: 144,145,146,147,148,149,150,151,
779: 152,153,154,155,156,157,158,159,
780: 160,161,162,163,164,165,166,167,
781: 168,169,170,171,172,173,174,175,
782: 176,177,178,179,180,181,182,183,
783: 184,185,186,187,188,189,190,191,
784: 192,193,194,195,196,197,198,199,
785: 200,201,202,203,204,205,206,207,
786: 208,209,210,211,212,213,214,215,
787: 216,217,218,219,220,221,222,223,
788: 224,225,226,227,228,229,230,231,
789: 232,233,234,235,236,237,238,239,
790: 240,241,242,243,244,245,246,247,
791: 248,249,250,251,252,253,254,255,
792:
793: /* This table is a case flipping table. */
794:
795: 0, 1, 2, 3, 4, 5, 6, 7,
796: 8, 9, 10, 11, 12, 13, 14, 15,
797: 16, 17, 18, 19, 20, 21, 22, 23,
798: 24, 25, 26, 27, 28, 29, 30, 31,
799: 32, 33, 34, 35, 36, 37, 38, 39,
800: 40, 41, 42, 43, 44, 45, 46, 47,
801: 48, 49, 50, 51, 52, 53, 54, 55,
802: 56, 57, 58, 59, 60, 61, 62, 63,
803: 64, 97, 98, 99,100,101,102,103,
804: 104,105,106,107,108,109,110,111,
805: 112,113,114,115,116,117,118,119,
806: 120,121,122, 91, 92, 93, 94, 95,
807: 96, 65, 66, 67, 68, 69, 70, 71,
808: 72, 73, 74, 75, 76, 77, 78, 79,
809: 80, 81, 82, 83, 84, 85, 86, 87,
810: 88, 89, 90,123,124,125,126,127,
811: 128,129,130,131,132,133,134,135,
812: 136,137,138,139,140,141,142,143,
813: 144,145,146,147,148,149,150,151,
814: 152,153,154,155,156,157,158,159,
815: 160,161,162,163,164,165,166,167,
816: 168,169,170,171,172,173,174,175,
817: 176,177,178,179,180,181,182,183,
818: 184,185,186,187,188,189,190,191,
819: 192,193,194,195,196,197,198,199,
820: 200,201,202,203,204,205,206,207,
821: 208,209,210,211,212,213,214,215,
822: 216,217,218,219,220,221,222,223,
823: 224,225,226,227,228,229,230,231,
824: 232,233,234,235,236,237,238,239,
825: 240,241,242,243,244,245,246,247,
826: 248,249,250,251,252,253,254,255,
827:
828: /* This table contains bit maps for various character classes. Each map is 32
829: bytes long and the bits run from the least significant end of each byte. The
830: classes that have their own maps are: space, xdigit, digit, upper, lower, word,
831: graph, print, punct, and cntrl. Other classes are built from combinations. */
832:
833: 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
834: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837:
838: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839: 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
840: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842:
843: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
844: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847:
848: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849: 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
850: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852:
853: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
855: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857:
858: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
859: 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
860: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862:
863: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
864: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867:
868: 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
869: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
870: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872:
873: 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
874: 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
875: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877:
878: 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
879: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
880: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
882:
883: /* This table identifies various classes of character by individual bits:
884: 0x01 white space character
885: 0x02 letter
886: 0x04 decimal digit
887: 0x08 hexadecimal digit
888: 0x10 alphanumeric or '_'
889: 0x80 regular expression metacharacter or binary zero
890: */
891:
892: 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
893: 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
894: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
895: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
896: 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
897: 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
898: 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
899: 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
900: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
901: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
902: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
903: 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
904: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
905: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
906: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
907: 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
908: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
909: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
910: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
911: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
912: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
913: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
914: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
915: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
916: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
917: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
918: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
919: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
920: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
921: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
922: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
923: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
924:
925: /* This is a set of tables that came orginally from a Windows user. It seems to
926: be at least an approximation of ISO 8859. In particular, there are characters
927: greater than 128 that are marked as spaces, letters, etc. */
928:
1.1.1.2 misho 929: static const pcre_uint8 tables1[] = {
1.1 misho 930: 0,1,2,3,4,5,6,7,
931: 8,9,10,11,12,13,14,15,
932: 16,17,18,19,20,21,22,23,
933: 24,25,26,27,28,29,30,31,
934: 32,33,34,35,36,37,38,39,
935: 40,41,42,43,44,45,46,47,
936: 48,49,50,51,52,53,54,55,
937: 56,57,58,59,60,61,62,63,
938: 64,97,98,99,100,101,102,103,
939: 104,105,106,107,108,109,110,111,
940: 112,113,114,115,116,117,118,119,
941: 120,121,122,91,92,93,94,95,
942: 96,97,98,99,100,101,102,103,
943: 104,105,106,107,108,109,110,111,
944: 112,113,114,115,116,117,118,119,
945: 120,121,122,123,124,125,126,127,
946: 128,129,130,131,132,133,134,135,
947: 136,137,138,139,140,141,142,143,
948: 144,145,146,147,148,149,150,151,
949: 152,153,154,155,156,157,158,159,
950: 160,161,162,163,164,165,166,167,
951: 168,169,170,171,172,173,174,175,
952: 176,177,178,179,180,181,182,183,
953: 184,185,186,187,188,189,190,191,
954: 224,225,226,227,228,229,230,231,
955: 232,233,234,235,236,237,238,239,
956: 240,241,242,243,244,245,246,215,
957: 248,249,250,251,252,253,254,223,
958: 224,225,226,227,228,229,230,231,
959: 232,233,234,235,236,237,238,239,
960: 240,241,242,243,244,245,246,247,
961: 248,249,250,251,252,253,254,255,
962: 0,1,2,3,4,5,6,7,
963: 8,9,10,11,12,13,14,15,
964: 16,17,18,19,20,21,22,23,
965: 24,25,26,27,28,29,30,31,
966: 32,33,34,35,36,37,38,39,
967: 40,41,42,43,44,45,46,47,
968: 48,49,50,51,52,53,54,55,
969: 56,57,58,59,60,61,62,63,
970: 64,97,98,99,100,101,102,103,
971: 104,105,106,107,108,109,110,111,
972: 112,113,114,115,116,117,118,119,
973: 120,121,122,91,92,93,94,95,
974: 96,65,66,67,68,69,70,71,
975: 72,73,74,75,76,77,78,79,
976: 80,81,82,83,84,85,86,87,
977: 88,89,90,123,124,125,126,127,
978: 128,129,130,131,132,133,134,135,
979: 136,137,138,139,140,141,142,143,
980: 144,145,146,147,148,149,150,151,
981: 152,153,154,155,156,157,158,159,
982: 160,161,162,163,164,165,166,167,
983: 168,169,170,171,172,173,174,175,
984: 176,177,178,179,180,181,182,183,
985: 184,185,186,187,188,189,190,191,
986: 224,225,226,227,228,229,230,231,
987: 232,233,234,235,236,237,238,239,
988: 240,241,242,243,244,245,246,215,
989: 248,249,250,251,252,253,254,223,
990: 192,193,194,195,196,197,198,199,
991: 200,201,202,203,204,205,206,207,
992: 208,209,210,211,212,213,214,247,
993: 216,217,218,219,220,221,222,255,
994: 0,62,0,0,1,0,0,0,
995: 0,0,0,0,0,0,0,0,
996: 32,0,0,0,1,0,0,0,
997: 0,0,0,0,0,0,0,0,
998: 0,0,0,0,0,0,255,3,
999: 126,0,0,0,126,0,0,0,
1000: 0,0,0,0,0,0,0,0,
1001: 0,0,0,0,0,0,0,0,
1002: 0,0,0,0,0,0,255,3,
1003: 0,0,0,0,0,0,0,0,
1004: 0,0,0,0,0,0,12,2,
1005: 0,0,0,0,0,0,0,0,
1006: 0,0,0,0,0,0,0,0,
1007: 254,255,255,7,0,0,0,0,
1008: 0,0,0,0,0,0,0,0,
1009: 255,255,127,127,0,0,0,0,
1010: 0,0,0,0,0,0,0,0,
1011: 0,0,0,0,254,255,255,7,
1012: 0,0,0,0,0,4,32,4,
1013: 0,0,0,128,255,255,127,255,
1014: 0,0,0,0,0,0,255,3,
1015: 254,255,255,135,254,255,255,7,
1016: 0,0,0,0,0,4,44,6,
1017: 255,255,127,255,255,255,127,255,
1018: 0,0,0,0,254,255,255,255,
1019: 255,255,255,255,255,255,255,127,
1020: 0,0,0,0,254,255,255,255,
1021: 255,255,255,255,255,255,255,255,
1022: 0,2,0,0,255,255,255,255,
1023: 255,255,255,255,255,255,255,127,
1024: 0,0,0,0,255,255,255,255,
1025: 255,255,255,255,255,255,255,255,
1026: 0,0,0,0,254,255,0,252,
1027: 1,0,0,248,1,0,0,120,
1028: 0,0,0,0,254,255,255,255,
1029: 0,0,128,0,0,0,128,0,
1030: 255,255,255,255,0,0,0,0,
1031: 0,0,0,0,0,0,0,128,
1032: 255,255,255,255,0,0,0,0,
1033: 0,0,0,0,0,0,0,0,
1034: 128,0,0,0,0,0,0,0,
1035: 0,1,1,0,1,1,0,0,
1036: 0,0,0,0,0,0,0,0,
1037: 0,0,0,0,0,0,0,0,
1038: 1,0,0,0,128,0,0,0,
1039: 128,128,128,128,0,0,128,0,
1040: 28,28,28,28,28,28,28,28,
1041: 28,28,0,0,0,0,0,128,
1042: 0,26,26,26,26,26,26,18,
1043: 18,18,18,18,18,18,18,18,
1044: 18,18,18,18,18,18,18,18,
1045: 18,18,18,128,128,0,128,16,
1046: 0,26,26,26,26,26,26,18,
1047: 18,18,18,18,18,18,18,18,
1048: 18,18,18,18,18,18,18,18,
1049: 18,18,18,128,128,0,0,0,
1050: 0,0,0,0,0,1,0,0,
1051: 0,0,0,0,0,0,0,0,
1052: 0,0,0,0,0,0,0,0,
1053: 0,0,0,0,0,0,0,0,
1054: 1,0,0,0,0,0,0,0,
1055: 0,0,18,0,0,0,0,0,
1056: 0,0,20,20,0,18,0,0,
1057: 0,20,18,0,0,0,0,0,
1058: 18,18,18,18,18,18,18,18,
1059: 18,18,18,18,18,18,18,18,
1060: 18,18,18,18,18,18,18,0,
1061: 18,18,18,18,18,18,18,18,
1062: 18,18,18,18,18,18,18,18,
1063: 18,18,18,18,18,18,18,18,
1064: 18,18,18,18,18,18,18,0,
1065: 18,18,18,18,18,18,18,18
1066: };
1067:
1068:
1069:
1070:
1071: #ifndef HAVE_STRERROR
1072: /*************************************************
1073: * Provide strerror() for non-ANSI libraries *
1074: *************************************************/
1075:
1076: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1077: in their libraries, but can provide the same facility by this simple
1078: alternative function. */
1079:
1080: extern int sys_nerr;
1081: extern char *sys_errlist[];
1082:
1083: char *
1084: strerror(int n)
1085: {
1086: if (n < 0 || n >= sys_nerr) return "unknown error number";
1087: return sys_errlist[n];
1088: }
1089: #endif /* HAVE_STRERROR */
1090:
1091:
1092: /*************************************************
1093: * JIT memory callback *
1094: *************************************************/
1095:
1096: static pcre_jit_stack* jit_callback(void *arg)
1097: {
1.1.1.3 ! misho 1098: jit_was_used = TRUE;
1.1 misho 1099: return (pcre_jit_stack *)arg;
1100: }
1101:
1102:
1.1.1.2 misho 1103: #if !defined NOUTF || defined SUPPORT_PCRE16
1104: /*************************************************
1105: * Convert UTF-8 string to value *
1106: *************************************************/
1107:
1108: /* This function takes one or more bytes that represents a UTF-8 character,
1109: and returns the value of the character.
1110:
1111: Argument:
1112: utf8bytes a pointer to the byte vector
1113: vptr a pointer to an int to receive the value
1114:
1115: Returns: > 0 => the number of bytes consumed
1116: -6 to 0 => malformed UTF-8 character at offset = (-return)
1117: */
1118:
1119: static int
1120: utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1121: {
1122: int c = *utf8bytes++;
1123: int d = c;
1124: int i, j, s;
1125:
1126: for (i = -1; i < 6; i++) /* i is number of additional bytes */
1127: {
1128: if ((d & 0x80) == 0) break;
1129: d <<= 1;
1130: }
1131:
1132: if (i == -1) { *vptr = c; return 1; } /* ascii character */
1133: if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1134:
1135: /* i now has a value in the range 1-5 */
1136:
1137: s = 6*i;
1138: d = (c & utf8_table3[i]) << s;
1139:
1140: for (j = 0; j < i; j++)
1141: {
1142: c = *utf8bytes++;
1143: if ((c & 0xc0) != 0x80) return -(j+1);
1144: s -= 6;
1145: d |= (c & 0x3f) << s;
1146: }
1147:
1148: /* Check that encoding was the correct unique one */
1149:
1150: for (j = 0; j < utf8_table1_size; j++)
1151: if (d <= utf8_table1[j]) break;
1152: if (j != i) return -(i+1);
1153:
1154: /* Valid value */
1155:
1156: *vptr = d;
1157: return i+1;
1158: }
1159: #endif /* NOUTF || SUPPORT_PCRE16 */
1160:
1161:
1162:
1163: #if !defined NOUTF || defined SUPPORT_PCRE16
1164: /*************************************************
1165: * Convert character value to UTF-8 *
1166: *************************************************/
1167:
1168: /* This function takes an integer value in the range 0 - 0x7fffffff
1169: and encodes it as a UTF-8 character in 0 to 6 bytes.
1170:
1171: Arguments:
1172: cvalue the character value
1173: utf8bytes pointer to buffer for result - at least 6 bytes long
1174:
1175: Returns: number of characters placed in the buffer
1176: */
1177:
1178: static int
1179: ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1180: {
1181: register int i, j;
1182: for (i = 0; i < utf8_table1_size; i++)
1183: if (cvalue <= utf8_table1[i]) break;
1184: utf8bytes += i;
1185: for (j = i; j > 0; j--)
1186: {
1187: *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1188: cvalue >>= 6;
1189: }
1190: *utf8bytes = utf8_table2[i] | cvalue;
1191: return i + 1;
1192: }
1193: #endif
1194:
1195:
1196: #ifdef SUPPORT_PCRE16
1197: /*************************************************
1198: * Convert a string to 16-bit *
1199: *************************************************/
1200:
1201: /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1202: 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1203: double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1204: in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1205: result is always left in buffer16.
1206:
1207: Note that this function does not object to surrogate values. This is
1208: deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1209: for the purpose of testing that they are correctly faulted.
1210:
1211: Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1212: in UTF-8 so that values greater than 255 can be handled.
1213:
1214: Arguments:
1215: data TRUE if converting a data line; FALSE for a regex
1216: p points to a byte string
1217: utf true if UTF-8 (to be converted to UTF-16)
1218: len number of bytes in the string (excluding trailing zero)
1219:
1220: Returns: number of 16-bit data items used (excluding trailing zero)
1221: OR -1 if a UTF-8 string is malformed
1222: OR -2 if a value > 0x10ffff is encountered
1223: OR -3 if a value > 0xffff is encountered when not in UTF mode
1224: */
1225:
1226: static int
1227: to16(int data, pcre_uint8 *p, int utf, int len)
1228: {
1229: pcre_uint16 *pp;
1230:
1231: if (buffer16_size < 2*len + 2)
1232: {
1233: if (buffer16 != NULL) free(buffer16);
1234: buffer16_size = 2*len + 2;
1235: buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1236: if (buffer16 == NULL)
1237: {
1238: fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1239: exit(1);
1240: }
1241: }
1242:
1243: pp = buffer16;
1244:
1245: if (!utf && !data)
1246: {
1247: while (len-- > 0) *pp++ = *p++;
1248: }
1249:
1250: else
1251: {
1252: int c = 0;
1253: while (len > 0)
1254: {
1255: int chlen = utf82ord(p, &c);
1256: if (chlen <= 0) return -1;
1257: if (c > 0x10ffff) return -2;
1258: p += chlen;
1259: len -= chlen;
1260: if (c < 0x10000) *pp++ = c; else
1261: {
1262: if (!utf) return -3;
1263: c -= 0x10000;
1264: *pp++ = 0xD800 | (c >> 10);
1265: *pp++ = 0xDC00 | (c & 0x3ff);
1266: }
1267: }
1268: }
1269:
1270: *pp = 0;
1271: return pp - buffer16;
1272: }
1273: #endif
1274:
1275:
1.1 misho 1276: /*************************************************
1277: * Read or extend an input line *
1278: *************************************************/
1279:
1280: /* Input lines are read into buffer, but both patterns and data lines can be
1281: continued over multiple input lines. In addition, if the buffer fills up, we
1282: want to automatically expand it so as to be able to handle extremely large
1283: lines that are needed for certain stress tests. When the input buffer is
1284: expanded, the other two buffers must also be expanded likewise, and the
1285: contents of pbuffer, which are a copy of the input for callouts, must be
1286: preserved (for when expansion happens for a data line). This is not the most
1287: optimal way of handling this, but hey, this is just a test program!
1288:
1289: Arguments:
1290: f the file to read
1291: start where in buffer to start (this *must* be within buffer)
1292: prompt for stdin or readline()
1293:
1294: Returns: pointer to the start of new data
1295: could be a copy of start, or could be moved
1296: NULL if no data read and EOF reached
1297: */
1298:
1.1.1.2 misho 1299: static pcre_uint8 *
1300: extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1.1 misho 1301: {
1.1.1.2 misho 1302: pcre_uint8 *here = start;
1.1 misho 1303:
1304: for (;;)
1305: {
1.1.1.2 misho 1306: size_t rlen = (size_t)(buffer_size - (here - buffer));
1.1 misho 1307:
1308: if (rlen > 1000)
1309: {
1310: int dlen;
1311:
1.1.1.3 ! misho 1312: /* If libreadline or libedit support is required, use readline() to read a
! 1313: line if the input is a terminal. Note that readline() removes the trailing
! 1314: newline, so we must put it back again, to be compatible with fgets(). */
1.1 misho 1315:
1.1.1.3 ! misho 1316: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 1317: if (isatty(fileno(f)))
1318: {
1319: size_t len;
1320: char *s = readline(prompt);
1321: if (s == NULL) return (here == start)? NULL : start;
1322: len = strlen(s);
1323: if (len > 0) add_history(s);
1324: if (len > rlen - 1) len = rlen - 1;
1325: memcpy(here, s, len);
1326: here[len] = '\n';
1327: here[len+1] = 0;
1328: free(s);
1329: }
1330: else
1331: #endif
1332:
1333: /* Read the next line by normal means, prompting if the file is stdin. */
1334:
1335: {
1336: if (f == stdin) printf("%s", prompt);
1337: if (fgets((char *)here, rlen, f) == NULL)
1338: return (here == start)? NULL : start;
1339: }
1340:
1341: dlen = (int)strlen((char *)here);
1342: if (dlen > 0 && here[dlen - 1] == '\n') return start;
1343: here += dlen;
1344: }
1345:
1346: else
1347: {
1348: int new_buffer_size = 2*buffer_size;
1.1.1.2 misho 1349: pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1350: pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1351: pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1.1 misho 1352:
1353: if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1354: {
1355: fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1356: exit(1);
1357: }
1358:
1359: memcpy(new_buffer, buffer, buffer_size);
1360: memcpy(new_pbuffer, pbuffer, buffer_size);
1361:
1362: buffer_size = new_buffer_size;
1363:
1364: start = new_buffer + (start - buffer);
1365: here = new_buffer + (here - buffer);
1366:
1367: free(buffer);
1368: free(dbuffer);
1369: free(pbuffer);
1370:
1371: buffer = new_buffer;
1372: dbuffer = new_dbuffer;
1373: pbuffer = new_pbuffer;
1374: }
1375: }
1376:
1377: return NULL; /* Control never gets here */
1378: }
1379:
1380:
1381:
1382: /*************************************************
1383: * Read number from string *
1384: *************************************************/
1385:
1386: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1387: around with conditional compilation, just do the job by hand. It is only used
1388: for unpicking arguments, so just keep it simple.
1389:
1390: Arguments:
1391: str string to be converted
1392: endptr where to put the end pointer
1393:
1394: Returns: the unsigned long
1395: */
1396:
1397: static int
1.1.1.2 misho 1398: get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1.1 misho 1399: {
1400: int result = 0;
1401: while(*str != 0 && isspace(*str)) str++;
1402: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1403: *endptr = str;
1404: return(result);
1405: }
1406:
1407:
1408:
1409: /*************************************************
1.1.1.2 misho 1410: * Print one character *
1.1 misho 1411: *************************************************/
1412:
1.1.1.2 misho 1413: /* Print a single character either literally, or as a hex escape. */
1.1 misho 1414:
1.1.1.2 misho 1415: static int pchar(int c, FILE *f)
1.1 misho 1416: {
1.1.1.2 misho 1417: if (PRINTOK(c))
1418: {
1419: if (f != NULL) fprintf(f, "%c", c);
1420: return 1;
1421: }
1.1 misho 1422:
1.1.1.2 misho 1423: if (c < 0x100)
1.1 misho 1424: {
1.1.1.2 misho 1425: if (use_utf)
1426: {
1427: if (f != NULL) fprintf(f, "\\x{%02x}", c);
1428: return 6;
1429: }
1430: else
1431: {
1432: if (f != NULL) fprintf(f, "\\x%02x", c);
1433: return 4;
1434: }
1.1 misho 1435: }
1436:
1.1.1.2 misho 1437: if (f != NULL) fprintf(f, "\\x{%02x}", c);
1438: return (c <= 0x000000ff)? 6 :
1439: (c <= 0x00000fff)? 7 :
1440: (c <= 0x0000ffff)? 8 :
1441: (c <= 0x000fffff)? 9 : 10;
1442: }
1.1 misho 1443:
1444:
1445:
1.1.1.2 misho 1446: #ifdef SUPPORT_PCRE8
1447: /*************************************************
1448: * Print 8-bit character string *
1449: *************************************************/
1.1 misho 1450:
1.1.1.2 misho 1451: /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1452: If handed a NULL file, just counts chars without printing. */
1.1 misho 1453:
1.1.1.2 misho 1454: static int pchars(pcre_uint8 *p, int length, FILE *f)
1455: {
1456: int c = 0;
1457: int yield = 0;
1.1 misho 1458:
1.1.1.2 misho 1459: if (length < 0)
1460: length = strlen((char *)p);
1.1 misho 1461:
1.1.1.2 misho 1462: while (length-- > 0)
1463: {
1464: #if !defined NOUTF
1465: if (use_utf)
1466: {
1467: int rc = utf82ord(p, &c);
1468: if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1469: {
1470: length -= rc - 1;
1471: p += rc;
1472: yield += pchar(c, f);
1473: continue;
1474: }
1475: }
1476: #endif
1477: c = *p++;
1478: yield += pchar(c, f);
1479: }
1.1 misho 1480:
1.1.1.2 misho 1481: return yield;
1482: }
1.1 misho 1483: #endif
1484:
1485:
1486:
1.1.1.2 misho 1487: #ifdef SUPPORT_PCRE16
1.1 misho 1488: /*************************************************
1.1.1.2 misho 1489: * Find length of 0-terminated 16-bit string *
1.1 misho 1490: *************************************************/
1491:
1.1.1.2 misho 1492: static int strlen16(PCRE_SPTR16 p)
1.1 misho 1493: {
1.1.1.2 misho 1494: int len = 0;
1495: while (*p++ != 0) len++;
1496: return len;
1.1 misho 1497: }
1.1.1.2 misho 1498: #endif /* SUPPORT_PCRE16 */
1.1 misho 1499:
1500:
1.1.1.2 misho 1501: #ifdef SUPPORT_PCRE16
1.1 misho 1502: /*************************************************
1.1.1.2 misho 1503: * Print 16-bit character string *
1.1 misho 1504: *************************************************/
1505:
1.1.1.2 misho 1506: /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1507: If handed a NULL file, just counts chars without printing. */
1.1 misho 1508:
1.1.1.2 misho 1509: static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1.1 misho 1510: {
1511: int yield = 0;
1512:
1.1.1.2 misho 1513: if (length < 0)
1514: length = strlen16(p);
1515:
1.1 misho 1516: while (length-- > 0)
1517: {
1.1.1.2 misho 1518: int c = *p++ & 0xffff;
1519: #if !defined NOUTF
1520: if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1.1 misho 1521: {
1.1.1.2 misho 1522: int d = *p & 0xffff;
1523: if (d >= 0xDC00 && d < 0xDFFF)
1.1 misho 1524: {
1.1.1.2 misho 1525: c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1526: length--;
1527: p++;
1.1 misho 1528: }
1529: }
1530: #endif
1.1.1.2 misho 1531: yield += pchar(c, f);
1532: }
1533:
1534: return yield;
1535: }
1536: #endif /* SUPPORT_PCRE16 */
1.1 misho 1537:
1538:
1.1.1.2 misho 1539:
1540: #ifdef SUPPORT_PCRE8
1541: /*************************************************
1542: * Read a capture name (8-bit) and check it *
1543: *************************************************/
1544:
1545: static pcre_uint8 *
1546: read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1547: {
1548: pcre_uint8 *npp = *pp;
1549: while (isalnum(*p)) *npp++ = *p++;
1550: *npp++ = 0;
1551: *npp = 0;
1552: if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1553: {
1554: fprintf(outfile, "no parentheses with name \"");
1555: PCHARSV(*pp, 0, -1, outfile);
1556: fprintf(outfile, "\"\n");
1.1 misho 1557: }
1558:
1.1.1.2 misho 1559: *pp = npp;
1560: return p;
1.1 misho 1561: }
1.1.1.2 misho 1562: #endif /* SUPPORT_PCRE8 */
1563:
1564:
1565:
1566: #ifdef SUPPORT_PCRE16
1567: /*************************************************
1568: * Read a capture name (16-bit) and check it *
1569: *************************************************/
1570:
1571: /* Note that the text being read is 8-bit. */
1572:
1573: static pcre_uint8 *
1574: read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1575: {
1576: pcre_uint16 *npp = *pp;
1577: while (isalnum(*p)) *npp++ = *p++;
1578: *npp++ = 0;
1579: *npp = 0;
1580: if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1581: {
1582: fprintf(outfile, "no parentheses with name \"");
1583: PCHARSV(*pp, 0, -1, outfile);
1584: fprintf(outfile, "\"\n");
1585: }
1586: *pp = npp;
1587: return p;
1588: }
1589: #endif /* SUPPORT_PCRE16 */
1.1 misho 1590:
1591:
1592:
1593: /*************************************************
1594: * Callout function *
1595: *************************************************/
1596:
1597: /* Called from PCRE as a result of the (?C) item. We print out where we are in
1598: the match. Yield zero unless more callouts than the fail count, or the callout
1599: data is not zero. */
1600:
1601: static int callout(pcre_callout_block *cb)
1602: {
1603: FILE *f = (first_callout | callout_extra)? outfile : NULL;
1604: int i, pre_start, post_start, subject_length;
1605:
1606: if (callout_extra)
1607: {
1608: fprintf(f, "Callout %d: last capture = %d\n",
1609: cb->callout_number, cb->capture_last);
1610:
1611: for (i = 0; i < cb->capture_top * 2; i += 2)
1612: {
1613: if (cb->offset_vector[i] < 0)
1614: fprintf(f, "%2d: <unset>\n", i/2);
1615: else
1616: {
1617: fprintf(f, "%2d: ", i/2);
1.1.1.2 misho 1618: PCHARSV(cb->subject, cb->offset_vector[i],
1.1 misho 1619: cb->offset_vector[i+1] - cb->offset_vector[i], f);
1620: fprintf(f, "\n");
1621: }
1622: }
1623: }
1624:
1625: /* Re-print the subject in canonical form, the first time or if giving full
1626: datails. On subsequent calls in the same match, we use pchars just to find the
1627: printed lengths of the substrings. */
1628:
1629: if (f != NULL) fprintf(f, "--->");
1630:
1.1.1.2 misho 1631: PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1632: PCHARS(post_start, cb->subject, cb->start_match,
1.1 misho 1633: cb->current_position - cb->start_match, f);
1634:
1.1.1.2 misho 1635: PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1.1 misho 1636:
1.1.1.2 misho 1637: PCHARSV(cb->subject, cb->current_position,
1.1 misho 1638: cb->subject_length - cb->current_position, f);
1639:
1640: if (f != NULL) fprintf(f, "\n");
1641:
1642: /* Always print appropriate indicators, with callout number if not already
1643: shown. For automatic callouts, show the pattern offset. */
1644:
1645: if (cb->callout_number == 255)
1646: {
1647: fprintf(outfile, "%+3d ", cb->pattern_position);
1648: if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1649: }
1650: else
1651: {
1652: if (callout_extra) fprintf(outfile, " ");
1653: else fprintf(outfile, "%3d ", cb->callout_number);
1654: }
1655:
1656: for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1657: fprintf(outfile, "^");
1658:
1659: if (post_start > 0)
1660: {
1661: for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1662: fprintf(outfile, "^");
1663: }
1664:
1665: for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1666: fprintf(outfile, " ");
1667:
1668: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1669: pbuffer + cb->pattern_position);
1670:
1671: fprintf(outfile, "\n");
1672: first_callout = 0;
1673:
1674: if (cb->mark != last_callout_mark)
1675: {
1.1.1.2 misho 1676: if (cb->mark == NULL)
1677: fprintf(outfile, "Latest Mark: <unset>\n");
1678: else
1679: {
1680: fprintf(outfile, "Latest Mark: ");
1681: PCHARSV(cb->mark, 0, -1, outfile);
1682: putc('\n', outfile);
1683: }
1.1 misho 1684: last_callout_mark = cb->mark;
1685: }
1686:
1687: if (cb->callout_data != NULL)
1688: {
1689: int callout_data = *((int *)(cb->callout_data));
1690: if (callout_data != 0)
1691: {
1692: fprintf(outfile, "Callout data = %d\n", callout_data);
1693: return callout_data;
1694: }
1695: }
1696:
1697: return (cb->callout_number != callout_fail_id)? 0 :
1698: (++callout_count >= callout_fail_count)? 1 : 0;
1699: }
1700:
1701:
1702: /*************************************************
1703: * Local malloc functions *
1704: *************************************************/
1705:
1706: /* Alternative malloc function, to test functionality and save the size of a
1707: compiled re, which is the first store request that pcre_compile() makes. The
1708: show_malloc variable is set only during matching. */
1709:
1710: static void *new_malloc(size_t size)
1711: {
1712: void *block = malloc(size);
1713: gotten_store = size;
1714: if (first_gotten_store == 0) first_gotten_store = size;
1715: if (show_malloc)
1716: fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1717: return block;
1718: }
1719:
1720: static void new_free(void *block)
1721: {
1722: if (show_malloc)
1723: fprintf(outfile, "free %p\n", block);
1724: free(block);
1725: }
1726:
1727: /* For recursion malloc/free, to test stacking calls */
1728:
1729: static void *stack_malloc(size_t size)
1730: {
1731: void *block = malloc(size);
1732: if (show_malloc)
1733: fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1734: return block;
1735: }
1736:
1737: static void stack_free(void *block)
1738: {
1739: if (show_malloc)
1740: fprintf(outfile, "stack_free %p\n", block);
1741: free(block);
1742: }
1743:
1744:
1.1.1.2 misho 1745: /*************************************************
1746: * Call pcre_fullinfo() *
1747: *************************************************/
1748:
1749: /* Get one piece of information from the pcre_fullinfo() function. When only
1750: one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1751: value, but the code is defensive.
1752:
1753: Arguments:
1754: re compiled regex
1755: study study data
1756: option PCRE_INFO_xxx option
1757: ptr where to put the data
1758:
1759: Returns: 0 when OK, < 0 on error
1760: */
1761:
1762: static int
1763: new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1764: {
1765: int rc;
1766:
1767: if (use_pcre16)
1768: #ifdef SUPPORT_PCRE16
1769: rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1770: #else
1771: rc = PCRE_ERROR_BADMODE;
1772: #endif
1773: else
1774: #ifdef SUPPORT_PCRE8
1775: rc = pcre_fullinfo(re, study, option, ptr);
1776: #else
1777: rc = PCRE_ERROR_BADMODE;
1778: #endif
1779:
1780: if (rc < 0)
1781: {
1782: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1783: use_pcre16? "16" : "", option);
1784: if (rc == PCRE_ERROR_BADMODE)
1785: fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1786: "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1787: }
1788:
1789: return rc;
1790: }
1791:
1792:
1793:
1794: /*************************************************
1795: * Swap byte functions *
1796: *************************************************/
1797:
1798: /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1799: value, respectively.
1800:
1801: Arguments:
1802: value any number
1803:
1804: Returns: the byte swapped value
1805: */
1806:
1807: static pcre_uint32
1808: swap_uint32(pcre_uint32 value)
1809: {
1810: return ((value & 0x000000ff) << 24) |
1811: ((value & 0x0000ff00) << 8) |
1812: ((value & 0x00ff0000) >> 8) |
1813: (value >> 24);
1814: }
1815:
1816: static pcre_uint16
1817: swap_uint16(pcre_uint16 value)
1818: {
1819: return (value >> 8) | (value << 8);
1820: }
1821:
1822:
1823:
1824: /*************************************************
1825: * Flip bytes in a compiled pattern *
1826: *************************************************/
1827:
1828: /* This function is called if the 'F' option was present on a pattern that is
1829: to be written to a file. We flip the bytes of all the integer fields in the
1830: regex data block and the study block. In 16-bit mode this also flips relevant
1831: bytes in the pattern itself. This is to make it possible to test PCRE's
1832: ability to reload byte-flipped patterns, e.g. those compiled on a different
1833: architecture. */
1834:
1835: static void
1836: regexflip(pcre *ere, pcre_extra *extra)
1837: {
1838: REAL_PCRE *re = (REAL_PCRE *)ere;
1839: #ifdef SUPPORT_PCRE16
1840: int op;
1841: pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1842: int length = re->name_count * re->name_entry_size;
1843: #ifdef SUPPORT_UTF
1844: BOOL utf = (re->options & PCRE_UTF16) != 0;
1845: BOOL utf16_char = FALSE;
1846: #endif /* SUPPORT_UTF */
1847: #endif /* SUPPORT_PCRE16 */
1848:
1849: /* Always flip the bytes in the main data block and study blocks. */
1850:
1851: re->magic_number = REVERSED_MAGIC_NUMBER;
1852: re->size = swap_uint32(re->size);
1853: re->options = swap_uint32(re->options);
1854: re->flags = swap_uint16(re->flags);
1855: re->top_bracket = swap_uint16(re->top_bracket);
1856: re->top_backref = swap_uint16(re->top_backref);
1857: re->first_char = swap_uint16(re->first_char);
1858: re->req_char = swap_uint16(re->req_char);
1859: re->name_table_offset = swap_uint16(re->name_table_offset);
1860: re->name_entry_size = swap_uint16(re->name_entry_size);
1861: re->name_count = swap_uint16(re->name_count);
1862:
1863: if (extra != NULL)
1864: {
1865: pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1866: rsd->size = swap_uint32(rsd->size);
1867: rsd->flags = swap_uint32(rsd->flags);
1868: rsd->minlength = swap_uint32(rsd->minlength);
1869: }
1870:
1871: /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1872: in the name table, if present, and then in the pattern itself. */
1873:
1874: #ifdef SUPPORT_PCRE16
1875: if (!use_pcre16) return;
1876:
1877: while(TRUE)
1878: {
1879: /* Swap previous characters. */
1880: while (length-- > 0)
1881: {
1882: *ptr = swap_uint16(*ptr);
1883: ptr++;
1884: }
1885: #ifdef SUPPORT_UTF
1886: if (utf16_char)
1887: {
1888: if ((ptr[-1] & 0xfc00) == 0xd800)
1889: {
1890: /* We know that there is only one extra character in UTF-16. */
1891: *ptr = swap_uint16(*ptr);
1892: ptr++;
1893: }
1894: }
1895: utf16_char = FALSE;
1896: #endif /* SUPPORT_UTF */
1897:
1898: /* Get next opcode. */
1.1 misho 1899:
1.1.1.2 misho 1900: length = 0;
1901: op = *ptr;
1902: *ptr++ = swap_uint16(op);
1.1 misho 1903:
1.1.1.2 misho 1904: switch (op)
1905: {
1906: case OP_END:
1907: return;
1.1 misho 1908:
1.1.1.2 misho 1909: #ifdef SUPPORT_UTF
1910: case OP_CHAR:
1911: case OP_CHARI:
1912: case OP_NOT:
1913: case OP_NOTI:
1914: case OP_STAR:
1915: case OP_MINSTAR:
1916: case OP_PLUS:
1917: case OP_MINPLUS:
1918: case OP_QUERY:
1919: case OP_MINQUERY:
1920: case OP_UPTO:
1921: case OP_MINUPTO:
1922: case OP_EXACT:
1923: case OP_POSSTAR:
1924: case OP_POSPLUS:
1925: case OP_POSQUERY:
1926: case OP_POSUPTO:
1927: case OP_STARI:
1928: case OP_MINSTARI:
1929: case OP_PLUSI:
1930: case OP_MINPLUSI:
1931: case OP_QUERYI:
1932: case OP_MINQUERYI:
1933: case OP_UPTOI:
1934: case OP_MINUPTOI:
1935: case OP_EXACTI:
1936: case OP_POSSTARI:
1937: case OP_POSPLUSI:
1938: case OP_POSQUERYI:
1939: case OP_POSUPTOI:
1940: case OP_NOTSTAR:
1941: case OP_NOTMINSTAR:
1942: case OP_NOTPLUS:
1943: case OP_NOTMINPLUS:
1944: case OP_NOTQUERY:
1945: case OP_NOTMINQUERY:
1946: case OP_NOTUPTO:
1947: case OP_NOTMINUPTO:
1948: case OP_NOTEXACT:
1949: case OP_NOTPOSSTAR:
1950: case OP_NOTPOSPLUS:
1951: case OP_NOTPOSQUERY:
1952: case OP_NOTPOSUPTO:
1953: case OP_NOTSTARI:
1954: case OP_NOTMINSTARI:
1955: case OP_NOTPLUSI:
1956: case OP_NOTMINPLUSI:
1957: case OP_NOTQUERYI:
1958: case OP_NOTMINQUERYI:
1959: case OP_NOTUPTOI:
1960: case OP_NOTMINUPTOI:
1961: case OP_NOTEXACTI:
1962: case OP_NOTPOSSTARI:
1963: case OP_NOTPOSPLUSI:
1964: case OP_NOTPOSQUERYI:
1965: case OP_NOTPOSUPTOI:
1966: if (utf) utf16_char = TRUE;
1967: #endif
1968: /* Fall through. */
1.1 misho 1969:
1.1.1.2 misho 1970: default:
1971: length = OP_lengths16[op] - 1;
1972: break;
1973:
1974: case OP_CLASS:
1975: case OP_NCLASS:
1976: /* Skip the character bit map. */
1977: ptr += 32/sizeof(pcre_uint16);
1978: length = 0;
1979: break;
1980:
1981: case OP_XCLASS:
1982: /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1983: if (LINK_SIZE > 1)
1984: length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1985: - (1 + LINK_SIZE + 1));
1986: else
1987: length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1.1 misho 1988:
1.1.1.2 misho 1989: /* Reverse the size of the XCLASS instance. */
1990: *ptr = swap_uint16(*ptr);
1991: ptr++;
1992: if (LINK_SIZE > 1)
1993: {
1994: *ptr = swap_uint16(*ptr);
1995: ptr++;
1996: }
1.1 misho 1997:
1.1.1.2 misho 1998: op = *ptr;
1999: *ptr = swap_uint16(op);
2000: ptr++;
2001: if ((op & XCL_MAP) != 0)
2002: {
2003: /* Skip the character bit map. */
2004: ptr += 32/sizeof(pcre_uint16);
2005: length -= 32/sizeof(pcre_uint16);
2006: }
2007: break;
2008: }
2009: }
2010: /* Control should never reach here in 16 bit mode. */
2011: #endif /* SUPPORT_PCRE16 */
1.1 misho 2012: }
2013:
2014:
2015:
2016: /*************************************************
2017: * Check match or recursion limit *
2018: *************************************************/
2019:
2020: static int
1.1.1.2 misho 2021: check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1.1 misho 2022: int start_offset, int options, int *use_offsets, int use_size_offsets,
2023: int flag, unsigned long int *limit, int errnumber, const char *msg)
2024: {
2025: int count;
2026: int min = 0;
2027: int mid = 64;
2028: int max = -1;
2029:
2030: extra->flags |= flag;
2031:
2032: for (;;)
2033: {
2034: *limit = mid;
2035:
1.1.1.2 misho 2036: PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1.1 misho 2037: use_offsets, use_size_offsets);
2038:
2039: if (count == errnumber)
2040: {
2041: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2042: min = mid;
2043: mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2044: }
2045:
2046: else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2047: count == PCRE_ERROR_PARTIAL)
2048: {
2049: if (mid == min + 1)
2050: {
2051: fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2052: break;
2053: }
2054: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2055: max = mid;
2056: mid = (min + mid)/2;
2057: }
2058: else break; /* Some other error */
2059: }
2060:
2061: extra->flags &= ~flag;
2062: return count;
2063: }
2064:
2065:
2066:
2067: /*************************************************
2068: * Case-independent strncmp() function *
2069: *************************************************/
2070:
2071: /*
2072: Arguments:
2073: s first string
2074: t second string
2075: n number of characters to compare
2076:
2077: Returns: < 0, = 0, or > 0, according to the comparison
2078: */
2079:
2080: static int
1.1.1.2 misho 2081: strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1.1 misho 2082: {
2083: while (n--)
2084: {
2085: int c = tolower(*s++) - tolower(*t++);
2086: if (c) return c;
2087: }
2088: return 0;
2089: }
2090:
2091:
2092:
2093: /*************************************************
2094: * Check newline indicator *
2095: *************************************************/
2096:
2097: /* This is used both at compile and run-time to check for <xxx> escapes. Print
2098: a message and return 0 if there is no match.
2099:
2100: Arguments:
2101: p points after the leading '<'
2102: f file for error message
2103:
2104: Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2105: */
2106:
2107: static int
1.1.1.2 misho 2108: check_newline(pcre_uint8 *p, FILE *f)
1.1 misho 2109: {
1.1.1.2 misho 2110: if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2111: if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2112: if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2113: if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2114: if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2115: if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2116: if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1.1 misho 2117: fprintf(f, "Unknown newline type at: <%s\n", p);
2118: return 0;
2119: }
2120:
2121:
2122:
2123: /*************************************************
2124: * Usage function *
2125: *************************************************/
2126:
2127: static void
2128: usage(void)
2129: {
2130: printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2131: printf("Input and output default to stdin and stdout.\n");
1.1.1.3 ! misho 2132: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 2133: printf("If input is a terminal, readline() is used to read from it.\n");
2134: #else
2135: printf("This version of pcretest is not linked with readline().\n");
2136: #endif
2137: printf("\nOptions:\n");
1.1.1.2 misho 2138: #ifdef SUPPORT_PCRE16
2139: printf(" -16 use the 16-bit library\n");
2140: #endif
2141: printf(" -b show compiled code\n");
1.1 misho 2142: printf(" -C show PCRE compile-time options and exit\n");
1.1.1.2 misho 2143: printf(" -C arg show a specific compile-time option\n");
2144: printf(" and exit with its value. The arg can be:\n");
2145: printf(" linksize internal link size [2, 3, 4]\n");
2146: printf(" pcre8 8 bit library support enabled [0, 1]\n");
2147: printf(" pcre16 16 bit library support enabled [0, 1]\n");
2148: printf(" utf Unicode Transformation Format supported [0, 1]\n");
2149: printf(" ucp Unicode Properties supported [0, 1]\n");
2150: printf(" jit Just-in-time compiler supported [0, 1]\n");
2151: printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
1.1 misho 2152: printf(" -d debug: show compiled code and information (-b and -i)\n");
2153: #if !defined NODFA
2154: printf(" -dfa force DFA matching for all subjects\n");
2155: #endif
2156: printf(" -help show usage information\n");
2157: printf(" -i show information about compiled patterns\n"
2158: " -M find MATCH_LIMIT minimum for each subject\n"
2159: " -m output memory used information\n"
2160: " -o <n> set size of offsets vector to <n>\n");
2161: #if !defined NOPOSIX
2162: printf(" -p use POSIX interface\n");
2163: #endif
2164: printf(" -q quiet: do not output PCRE version number at start\n");
2165: printf(" -S <n> set stack size to <n> megabytes\n");
2166: printf(" -s force each pattern to be studied at basic level\n"
2167: " -s+ force each pattern to be studied, using JIT if available\n"
1.1.1.3 ! misho 2168: " -s++ ditto, verifying when JIT was actually used\n"
! 2169: " -s+n force each pattern to be studied, using JIT if available,\n"
! 2170: " where 1 <= n <= 7 selects JIT options\n"
! 2171: " -s++n ditto, verifying when JIT was actually used\n"
1.1 misho 2172: " -t time compilation and execution\n");
2173: printf(" -t <n> time compilation and execution, repeating <n> times\n");
2174: printf(" -tm time execution (matching) only\n");
2175: printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2176: }
2177:
2178:
2179:
2180: /*************************************************
2181: * Main Program *
2182: *************************************************/
2183:
2184: /* Read lines from named file or stdin and write to named file or stdout; lines
2185: consist of a regular expression, in delimiters and optionally followed by
2186: options, followed by a set of test data, terminated by an empty line. */
2187:
2188: int main(int argc, char **argv)
2189: {
2190: FILE *infile = stdin;
1.1.1.2 misho 2191: const char *version;
1.1 misho 2192: int options = 0;
2193: int study_options = 0;
2194: int default_find_match_limit = FALSE;
2195: int op = 1;
2196: int timeit = 0;
2197: int timeitm = 0;
2198: int showinfo = 0;
2199: int showstore = 0;
2200: int force_study = -1;
2201: int force_study_options = 0;
2202: int quiet = 0;
2203: int size_offsets = 45;
2204: int size_offsets_max;
2205: int *offsets = NULL;
2206: int debug = 0;
2207: int done = 0;
2208: int all_use_dfa = 0;
1.1.1.3 ! misho 2209: int verify_jit = 0;
1.1 misho 2210: int yield = 0;
2211: int stack_size;
2212:
1.1.1.3 ! misho 2213: #if !defined NOPOSIX
! 2214: int posix = 0;
! 2215: #endif
! 2216: #if !defined NODFA
! 2217: int *dfa_workspace = NULL;
! 2218: #endif
! 2219:
1.1 misho 2220: pcre_jit_stack *jit_stack = NULL;
2221:
1.1.1.2 misho 2222: /* These vectors store, end-to-end, a list of zero-terminated captured
2223: substring names, each list itself being terminated by an empty name. Assume
2224: that 1024 is plenty long enough for the few names we'll be testing. It is
2225: easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2226: for the actual memory, to ensure alignment. */
2227:
2228: pcre_uint16 copynames[1024];
2229: pcre_uint16 getnames[1024];
2230:
2231: #ifdef SUPPORT_PCRE16
2232: pcre_uint16 *cn16ptr;
2233: pcre_uint16 *gn16ptr;
2234: #endif
1.1 misho 2235:
1.1.1.2 misho 2236: #ifdef SUPPORT_PCRE8
2237: pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2238: pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2239: pcre_uint8 *cn8ptr;
2240: pcre_uint8 *gn8ptr;
2241: #endif
1.1 misho 2242:
1.1.1.2 misho 2243: /* Get buffers from malloc() so that valgrind will check their misuse when
2244: debugging. They grow automatically when very long lines are read. The 16-bit
2245: buffer (buffer16) is obtained only if needed. */
2246:
2247: buffer = (pcre_uint8 *)malloc(buffer_size);
2248: dbuffer = (pcre_uint8 *)malloc(buffer_size);
2249: pbuffer = (pcre_uint8 *)malloc(buffer_size);
1.1 misho 2250:
2251: /* The outfile variable is static so that new_malloc can use it. */
2252:
2253: outfile = stdout;
2254:
2255: /* The following _setmode() stuff is some Windows magic that tells its runtime
2256: library to translate CRLF into a single LF character. At least, that's what
2257: I've been told: never having used Windows I take this all on trust. Originally
2258: it set 0x8000, but then I was advised that _O_BINARY was better. */
2259:
2260: #if defined(_WIN32) || defined(WIN32)
2261: _setmode( _fileno( stdout ), _O_BINARY );
2262: #endif
2263:
1.1.1.2 misho 2264: /* Get the version number: both pcre_version() and pcre16_version() give the
2265: same answer. We just need to ensure that we call one that is available. */
2266:
2267: #ifdef SUPPORT_PCRE8
2268: version = pcre_version();
2269: #else
2270: version = pcre16_version();
2271: #endif
2272:
1.1 misho 2273: /* Scan options */
2274:
2275: while (argc > 1 && argv[op][0] == '-')
2276: {
1.1.1.2 misho 2277: pcre_uint8 *endptr;
1.1.1.3 ! misho 2278: char *arg = argv[op];
1.1 misho 2279:
1.1.1.3 ! misho 2280: if (strcmp(arg, "-m") == 0) showstore = 1;
! 2281: else if (strcmp(arg, "-s") == 0) force_study = 0;
! 2282:
! 2283: else if (strncmp(arg, "-s+", 3) == 0)
1.1 misho 2284: {
1.1.1.3 ! misho 2285: arg += 3;
! 2286: if (*arg == '+') { arg++; verify_jit = TRUE; }
1.1 misho 2287: force_study = 1;
1.1.1.3 ! misho 2288: if (*arg == 0)
! 2289: force_study_options = jit_study_bits[6];
! 2290: else if (*arg >= '1' && *arg <= '7')
! 2291: force_study_options = jit_study_bits[*arg - '1'];
! 2292: else goto BAD_ARG;
1.1 misho 2293: }
1.1.1.3 ! misho 2294: else if (strcmp(arg, "-16") == 0)
1.1.1.2 misho 2295: {
2296: #ifdef SUPPORT_PCRE16
2297: use_pcre16 = 1;
2298: #else
2299: printf("** This version of PCRE was built without 16-bit support\n");
2300: exit(1);
2301: #endif
2302: }
1.1.1.3 ! misho 2303: else if (strcmp(arg, "-q") == 0) quiet = 1;
! 2304: else if (strcmp(arg, "-b") == 0) debug = 1;
! 2305: else if (strcmp(arg, "-i") == 0) showinfo = 1;
! 2306: else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
! 2307: else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
1.1 misho 2308: #if !defined NODFA
1.1.1.3 ! misho 2309: else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
1.1 misho 2310: #endif
1.1.1.3 ! misho 2311: else if (strcmp(arg, "-o") == 0 && argc > 2 &&
1.1.1.2 misho 2312: ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 2313: *endptr == 0))
2314: {
2315: op++;
2316: argc--;
2317: }
1.1.1.3 ! misho 2318: else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
1.1 misho 2319: {
1.1.1.3 ! misho 2320: int both = arg[2] == 0;
1.1 misho 2321: int temp;
1.1.1.2 misho 2322: if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1.1 misho 2323: *endptr == 0))
2324: {
2325: timeitm = temp;
2326: op++;
2327: argc--;
2328: }
2329: else timeitm = LOOPREPEAT;
2330: if (both) timeit = timeitm;
2331: }
1.1.1.3 ! misho 2332: else if (strcmp(arg, "-S") == 0 && argc > 2 &&
1.1.1.2 misho 2333: ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 2334: *endptr == 0))
2335: {
2336: #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2337: printf("PCRE: -S not supported on this OS\n");
2338: exit(1);
2339: #else
2340: int rc;
2341: struct rlimit rlim;
2342: getrlimit(RLIMIT_STACK, &rlim);
2343: rlim.rlim_cur = stack_size * 1024 * 1024;
2344: rc = setrlimit(RLIMIT_STACK, &rlim);
2345: if (rc != 0)
2346: {
2347: printf("PCRE: setrlimit() failed with error %d\n", rc);
2348: exit(1);
2349: }
2350: op++;
2351: argc--;
2352: #endif
2353: }
2354: #if !defined NOPOSIX
1.1.1.3 ! misho 2355: else if (strcmp(arg, "-p") == 0) posix = 1;
1.1 misho 2356: #endif
1.1.1.3 ! misho 2357: else if (strcmp(arg, "-C") == 0)
1.1 misho 2358: {
2359: int rc;
2360: unsigned long int lrc;
1.1.1.2 misho 2361:
2362: if (argc > 2)
2363: {
2364: if (strcmp(argv[op + 1], "linksize") == 0)
2365: {
2366: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2367: printf("%d\n", rc);
2368: yield = rc;
2369: goto EXIT;
2370: }
2371: if (strcmp(argv[op + 1], "pcre8") == 0)
2372: {
2373: #ifdef SUPPORT_PCRE8
2374: printf("1\n");
2375: yield = 1;
2376: #else
2377: printf("0\n");
2378: yield = 0;
2379: #endif
2380: goto EXIT;
2381: }
2382: if (strcmp(argv[op + 1], "pcre16") == 0)
2383: {
2384: #ifdef SUPPORT_PCRE16
2385: printf("1\n");
2386: yield = 1;
2387: #else
2388: printf("0\n");
2389: yield = 0;
2390: #endif
2391: goto EXIT;
2392: }
2393: if (strcmp(argv[op + 1], "utf") == 0)
2394: {
2395: #ifdef SUPPORT_PCRE8
2396: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2397: printf("%d\n", rc);
2398: yield = rc;
2399: #else
2400: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2401: printf("%d\n", rc);
2402: yield = rc;
2403: #endif
2404: goto EXIT;
2405: }
2406: if (strcmp(argv[op + 1], "ucp") == 0)
2407: {
2408: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409: printf("%d\n", rc);
2410: yield = rc;
2411: goto EXIT;
2412: }
2413: if (strcmp(argv[op + 1], "jit") == 0)
2414: {
2415: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2416: printf("%d\n", rc);
2417: yield = rc;
2418: goto EXIT;
2419: }
2420: if (strcmp(argv[op + 1], "newline") == 0)
2421: {
2422: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2423: /* Note that these values are always the ASCII values, even
2424: in EBCDIC environments. CR is 13 and NL is 10. */
2425: printf("%s\n", (rc == 13)? "CR" :
2426: (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2427: (rc == -2)? "ANYCRLF" :
2428: (rc == -1)? "ANY" : "???");
2429: goto EXIT;
2430: }
2431: printf("Unknown -C option: %s\n", argv[op + 1]);
2432: goto EXIT;
2433: }
2434:
2435: printf("PCRE version %s\n", version);
1.1 misho 2436: printf("Compiled with\n");
1.1.1.2 misho 2437:
2438: /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2439: are set, either both UTFs are supported or both are not supported. */
2440:
2441: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2442: printf(" 8-bit and 16-bit support\n");
2443: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2444: if (rc)
2445: printf(" UTF-8 and UTF-16 support\n");
2446: else
2447: printf(" No UTF-8 or UTF-16 support\n");
2448: #elif defined SUPPORT_PCRE8
2449: printf(" 8-bit support only\n");
1.1 misho 2450: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2451: printf(" %sUTF-8 support\n", rc? "" : "No ");
1.1.1.2 misho 2452: #else
2453: printf(" 16-bit support only\n");
2454: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2455: printf(" %sUTF-16 support\n", rc? "" : "No ");
2456: #endif
2457:
2458: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1.1 misho 2459: printf(" %sUnicode properties support\n", rc? "" : "No ");
1.1.1.2 misho 2460: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
1.1 misho 2461: if (rc)
1.1.1.2 misho 2462: {
2463: const char *arch;
2464: (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2465: printf(" Just-in-time compiler support: %s\n", arch);
2466: }
1.1 misho 2467: else
2468: printf(" No just-in-time compiler support\n");
1.1.1.2 misho 2469: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
1.1 misho 2470: /* Note that these values are always the ASCII values, even
2471: in EBCDIC environments. CR is 13 and NL is 10. */
2472: printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2473: (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2474: (rc == -2)? "ANYCRLF" :
2475: (rc == -1)? "ANY" : "???");
1.1.1.2 misho 2476: (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
1.1 misho 2477: printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2478: "all Unicode newlines");
1.1.1.2 misho 2479: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
1.1 misho 2480: printf(" Internal link size = %d\n", rc);
1.1.1.2 misho 2481: (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1.1 misho 2482: printf(" POSIX malloc threshold = %d\n", rc);
1.1.1.2 misho 2483: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1.1 misho 2484: printf(" Default match limit = %ld\n", lrc);
1.1.1.2 misho 2485: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1.1 misho 2486: printf(" Default recursion depth limit = %ld\n", lrc);
1.1.1.2 misho 2487: (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2488: printf(" Match recursion uses %s", rc? "stack" : "heap");
2489: if (showstore)
2490: {
2491: PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2492: printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2493: }
2494: printf("\n");
1.1 misho 2495: goto EXIT;
2496: }
1.1.1.3 ! misho 2497: else if (strcmp(arg, "-help") == 0 ||
! 2498: strcmp(arg, "--help") == 0)
1.1 misho 2499: {
2500: usage();
2501: goto EXIT;
2502: }
2503: else
2504: {
1.1.1.3 ! misho 2505: BAD_ARG:
! 2506: printf("** Unknown or malformed option %s\n", arg);
1.1 misho 2507: usage();
2508: yield = 1;
2509: goto EXIT;
2510: }
2511: op++;
2512: argc--;
2513: }
2514:
2515: /* Get the store for the offsets vector, and remember what it was */
2516:
2517: size_offsets_max = size_offsets;
2518: offsets = (int *)malloc(size_offsets_max * sizeof(int));
2519: if (offsets == NULL)
2520: {
2521: printf("** Failed to get %d bytes of memory for offsets vector\n",
2522: (int)(size_offsets_max * sizeof(int)));
2523: yield = 1;
2524: goto EXIT;
2525: }
2526:
2527: /* Sort out the input and output files */
2528:
2529: if (argc > 1)
2530: {
2531: infile = fopen(argv[op], INPUT_MODE);
2532: if (infile == NULL)
2533: {
2534: printf("** Failed to open %s\n", argv[op]);
2535: yield = 1;
2536: goto EXIT;
2537: }
2538: }
2539:
2540: if (argc > 2)
2541: {
2542: outfile = fopen(argv[op+1], OUTPUT_MODE);
2543: if (outfile == NULL)
2544: {
2545: printf("** Failed to open %s\n", argv[op+1]);
2546: yield = 1;
2547: goto EXIT;
2548: }
2549: }
2550:
2551: /* Set alternative malloc function */
2552:
1.1.1.2 misho 2553: #ifdef SUPPORT_PCRE8
1.1 misho 2554: pcre_malloc = new_malloc;
2555: pcre_free = new_free;
2556: pcre_stack_malloc = stack_malloc;
2557: pcre_stack_free = stack_free;
1.1.1.2 misho 2558: #endif
2559:
2560: #ifdef SUPPORT_PCRE16
2561: pcre16_malloc = new_malloc;
2562: pcre16_free = new_free;
2563: pcre16_stack_malloc = stack_malloc;
2564: pcre16_stack_free = stack_free;
2565: #endif
1.1 misho 2566:
2567: /* Heading line unless quiet, then prompt for first regex if stdin */
2568:
1.1.1.2 misho 2569: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
1.1 misho 2570:
2571: /* Main loop */
2572:
2573: while (!done)
2574: {
2575: pcre *re = NULL;
2576: pcre_extra *extra = NULL;
2577:
2578: #if !defined NOPOSIX /* There are still compilers that require no indent */
2579: regex_t preg;
2580: int do_posix = 0;
2581: #endif
2582:
2583: const char *error;
1.1.1.2 misho 2584: pcre_uint8 *markptr;
2585: pcre_uint8 *p, *pp, *ppp;
2586: pcre_uint8 *to_file = NULL;
2587: const pcre_uint8 *tables = NULL;
2588: unsigned long int get_options;
1.1 misho 2589: unsigned long int true_size, true_study_size = 0;
2590: size_t size, regex_gotten_store;
2591: int do_allcaps = 0;
2592: int do_mark = 0;
2593: int do_study = 0;
2594: int no_force_study = 0;
2595: int do_debug = debug;
2596: int do_G = 0;
2597: int do_g = 0;
2598: int do_showinfo = showinfo;
2599: int do_showrest = 0;
2600: int do_showcaprest = 0;
2601: int do_flip = 0;
2602: int erroroffset, len, delimiter, poffset;
2603:
1.1.1.3 ! misho 2604: #if !defined NODFA
! 2605: int dfa_matched = 0;
! 2606: #endif
! 2607:
1.1.1.2 misho 2608: use_utf = 0;
1.1 misho 2609: debug_lengths = 1;
2610:
2611: if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2612: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2613: fflush(outfile);
2614:
2615: p = buffer;
2616: while (isspace(*p)) p++;
2617: if (*p == 0) continue;
2618:
2619: /* See if the pattern is to be loaded pre-compiled from a file. */
2620:
2621: if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2622: {
1.1.1.2 misho 2623: pcre_uint32 magic;
2624: pcre_uint8 sbuf[8];
1.1 misho 2625: FILE *f;
2626:
2627: p++;
1.1.1.2 misho 2628: if (*p == '!')
2629: {
2630: do_debug = TRUE;
2631: do_showinfo = TRUE;
2632: p++;
2633: }
2634:
1.1 misho 2635: pp = p + (int)strlen((char *)p);
2636: while (isspace(pp[-1])) pp--;
2637: *pp = 0;
2638:
2639: f = fopen((char *)p, "rb");
2640: if (f == NULL)
2641: {
2642: fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2643: continue;
2644: }
2645:
1.1.1.2 misho 2646: first_gotten_store = 0;
1.1 misho 2647: if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2648:
2649: true_size =
2650: (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2651: true_study_size =
2652: (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2653:
1.1.1.2 misho 2654: re = (pcre *)new_malloc(true_size);
1.1 misho 2655: regex_gotten_store = first_gotten_store;
2656:
2657: if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2658:
1.1.1.2 misho 2659: magic = ((REAL_PCRE *)re)->magic_number;
1.1 misho 2660: if (magic != MAGIC_NUMBER)
2661: {
1.1.1.2 misho 2662: if (swap_uint32(magic) == MAGIC_NUMBER)
1.1 misho 2663: {
2664: do_flip = 1;
2665: }
2666: else
2667: {
2668: fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2669: fclose(f);
2670: continue;
2671: }
2672: }
2673:
1.1.1.2 misho 2674: /* We hide the byte-invert info for little and big endian tests. */
1.1 misho 2675: fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1.1.1.2 misho 2676: do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
1.1 misho 2677:
2678: /* Now see if there is any following study data. */
2679:
2680: if (true_study_size != 0)
2681: {
2682: pcre_study_data *psd;
2683:
2684: extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2685: extra->flags = PCRE_EXTRA_STUDY_DATA;
2686:
2687: psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2688: extra->study_data = psd;
2689:
2690: if (fread(psd, 1, true_study_size, f) != true_study_size)
2691: {
2692: FAIL_READ:
2693: fprintf(outfile, "Failed to read data from %s\n", p);
1.1.1.2 misho 2694: if (extra != NULL)
2695: {
2696: PCRE_FREE_STUDY(extra);
2697: }
1.1 misho 2698: if (re != NULL) new_free(re);
2699: fclose(f);
2700: continue;
2701: }
2702: fprintf(outfile, "Study data loaded from %s\n", p);
2703: do_study = 1; /* To get the data output if requested */
2704: }
2705: else fprintf(outfile, "No study data\n");
2706:
1.1.1.2 misho 2707: /* Flip the necessary bytes. */
2708: if (do_flip)
2709: {
2710: int rc;
2711: PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2712: if (rc == PCRE_ERROR_BADMODE)
2713: {
2714: /* Simulate the result of the function call below. */
2715: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2716: use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2717: fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2718: "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2719: continue;
2720: }
2721: }
2722:
2723: /* Need to know if UTF-8 for printing data strings. */
2724:
2725: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2726: use_utf = (get_options & PCRE_UTF8) != 0;
2727:
1.1 misho 2728: fclose(f);
2729: goto SHOW_INFO;
2730: }
2731:
2732: /* In-line pattern (the usual case). Get the delimiter and seek the end of
1.1.1.2 misho 2733: the pattern; if it isn't complete, read more. */
1.1 misho 2734:
2735: delimiter = *p++;
2736:
2737: if (isalnum(delimiter) || delimiter == '\\')
2738: {
2739: fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2740: goto SKIP_DATA;
2741: }
2742:
2743: pp = p;
2744: poffset = (int)(p - buffer);
2745:
2746: for(;;)
2747: {
2748: while (*pp != 0)
2749: {
2750: if (*pp == '\\' && pp[1] != 0) pp++;
2751: else if (*pp == delimiter) break;
2752: pp++;
2753: }
2754: if (*pp != 0) break;
2755: if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2756: {
2757: fprintf(outfile, "** Unexpected EOF\n");
2758: done = 1;
2759: goto CONTINUE;
2760: }
2761: if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2762: }
2763:
2764: /* The buffer may have moved while being extended; reset the start of data
2765: pointer to the correct relative point in the buffer. */
2766:
2767: p = buffer + poffset;
2768:
2769: /* If the first character after the delimiter is backslash, make
2770: the pattern end with backslash. This is purely to provide a way
2771: of testing for the error message when a pattern ends with backslash. */
2772:
2773: if (pp[1] == '\\') *pp++ = '\\';
2774:
2775: /* Terminate the pattern at the delimiter, and save a copy of the pattern
2776: for callouts. */
2777:
2778: *pp++ = 0;
2779: strcpy((char *)pbuffer, (char *)p);
2780:
2781: /* Look for options after final delimiter */
2782:
2783: options = 0;
2784: study_options = 0;
2785: log_store = showstore; /* default from command line */
2786:
2787: while (*pp != 0)
2788: {
2789: switch (*pp++)
2790: {
2791: case 'f': options |= PCRE_FIRSTLINE; break;
2792: case 'g': do_g = 1; break;
2793: case 'i': options |= PCRE_CASELESS; break;
2794: case 'm': options |= PCRE_MULTILINE; break;
2795: case 's': options |= PCRE_DOTALL; break;
2796: case 'x': options |= PCRE_EXTENDED; break;
2797:
2798: case '+':
2799: if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2800: break;
2801:
2802: case '=': do_allcaps = 1; break;
2803: case 'A': options |= PCRE_ANCHORED; break;
2804: case 'B': do_debug = 1; break;
2805: case 'C': options |= PCRE_AUTO_CALLOUT; break;
2806: case 'D': do_debug = do_showinfo = 1; break;
2807: case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2808: case 'F': do_flip = 1; break;
2809: case 'G': do_G = 1; break;
2810: case 'I': do_showinfo = 1; break;
2811: case 'J': options |= PCRE_DUPNAMES; break;
2812: case 'K': do_mark = 1; break;
2813: case 'M': log_store = 1; break;
2814: case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2815:
2816: #if !defined NOPOSIX
2817: case 'P': do_posix = 1; break;
2818: #endif
2819:
2820: case 'S':
2821: if (do_study == 0)
2822: {
2823: do_study = 1;
2824: if (*pp == '+')
2825: {
1.1.1.3 ! misho 2826: if (*(++pp) == '+')
! 2827: {
! 2828: verify_jit = TRUE;
! 2829: pp++;
! 2830: }
! 2831: if (*pp >= '1' && *pp <= '7')
! 2832: study_options |= jit_study_bits[*pp++ - '1'];
! 2833: else
! 2834: study_options |= jit_study_bits[6];
1.1 misho 2835: }
2836: }
2837: else
2838: {
2839: do_study = 0;
2840: no_force_study = 1;
2841: }
2842: break;
2843:
2844: case 'U': options |= PCRE_UNGREEDY; break;
2845: case 'W': options |= PCRE_UCP; break;
2846: case 'X': options |= PCRE_EXTRA; break;
2847: case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2848: case 'Z': debug_lengths = 0; break;
1.1.1.2 misho 2849: case '8': options |= PCRE_UTF8; use_utf = 1; break;
1.1 misho 2850: case '?': options |= PCRE_NO_UTF8_CHECK; break;
2851:
2852: case 'T':
2853: switch (*pp++)
2854: {
2855: case '0': tables = tables0; break;
2856: case '1': tables = tables1; break;
2857:
2858: case '\r':
2859: case '\n':
2860: case ' ':
2861: case 0:
2862: fprintf(outfile, "** Missing table number after /T\n");
2863: goto SKIP_DATA;
2864:
2865: default:
2866: fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2867: goto SKIP_DATA;
2868: }
2869: break;
2870:
2871: case 'L':
2872: ppp = pp;
2873: /* The '\r' test here is so that it works on Windows. */
2874: /* The '0' test is just in case this is an unterminated line. */
2875: while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2876: *ppp = 0;
2877: if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2878: {
2879: fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2880: goto SKIP_DATA;
2881: }
2882: locale_set = 1;
1.1.1.2 misho 2883: tables = PCRE_MAKETABLES;
1.1 misho 2884: pp = ppp;
2885: break;
2886:
2887: case '>':
2888: to_file = pp;
2889: while (*pp != 0) pp++;
2890: while (isspace(pp[-1])) pp--;
2891: *pp = 0;
2892: break;
2893:
2894: case '<':
2895: {
1.1.1.2 misho 2896: if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1.1 misho 2897: {
2898: options |= PCRE_JAVASCRIPT_COMPAT;
2899: pp += 3;
2900: }
2901: else
2902: {
2903: int x = check_newline(pp, outfile);
2904: if (x == 0) goto SKIP_DATA;
2905: options |= x;
2906: while (*pp++ != '>');
2907: }
2908: }
2909: break;
2910:
2911: case '\r': /* So that it works in Windows */
2912: case '\n':
2913: case ' ':
2914: break;
2915:
2916: default:
2917: fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2918: goto SKIP_DATA;
2919: }
2920: }
2921:
2922: /* Handle compiling via the POSIX interface, which doesn't support the
2923: timing, showing, or debugging options, nor the ability to pass over
1.1.1.2 misho 2924: local character tables. Neither does it have 16-bit support. */
1.1 misho 2925:
2926: #if !defined NOPOSIX
2927: if (posix || do_posix)
2928: {
2929: int rc;
2930: int cflags = 0;
2931:
2932: if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2933: if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2934: if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2935: if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2936: if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2937: if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2938: if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2939:
2940: first_gotten_store = 0;
2941: rc = regcomp(&preg, (char *)p, cflags);
2942:
2943: /* Compilation failed; go back for another re, skipping to blank line
2944: if non-interactive. */
2945:
2946: if (rc != 0)
2947: {
2948: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2949: fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2950: goto SKIP_DATA;
2951: }
2952: }
2953:
2954: /* Handle compiling via the native interface */
2955:
2956: else
2957: #endif /* !defined NOPOSIX */
2958:
2959: {
1.1.1.2 misho 2960: /* In 16-bit mode, convert the input. */
2961:
2962: #ifdef SUPPORT_PCRE16
2963: if (use_pcre16)
2964: {
2965: switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2966: {
2967: case -1:
2968: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2969: "converted to UTF-16\n");
2970: goto SKIP_DATA;
2971:
2972: case -2:
2973: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2974: "cannot be converted to UTF-16\n");
2975: goto SKIP_DATA;
2976:
2977: case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2978: fprintf(outfile, "**Failed: character value greater than 0xffff "
2979: "cannot be converted to 16-bit in non-UTF mode\n");
2980: goto SKIP_DATA;
2981:
2982: default:
2983: break;
2984: }
2985: p = (pcre_uint8 *)buffer16;
2986: }
2987: #endif
2988:
2989: /* Compile many times when timing */
1.1 misho 2990:
2991: if (timeit > 0)
2992: {
2993: register int i;
2994: clock_t time_taken;
2995: clock_t start_time = clock();
2996: for (i = 0; i < timeit; i++)
2997: {
1.1.1.2 misho 2998: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 2999: if (re != NULL) free(re);
3000: }
3001: time_taken = clock() - start_time;
3002: fprintf(outfile, "Compile time %.4f milliseconds\n",
3003: (((double)time_taken * 1000.0) / (double)timeit) /
3004: (double)CLOCKS_PER_SEC);
3005: }
3006:
3007: first_gotten_store = 0;
1.1.1.2 misho 3008: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 3009:
3010: /* Compilation failed; go back for another re, skipping to blank line
3011: if non-interactive. */
3012:
3013: if (re == NULL)
3014: {
3015: fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3016: SKIP_DATA:
3017: if (infile != stdin)
3018: {
3019: for (;;)
3020: {
3021: if (extend_inputline(infile, buffer, NULL) == NULL)
3022: {
3023: done = 1;
3024: goto CONTINUE;
3025: }
3026: len = (int)strlen((char *)buffer);
3027: while (len > 0 && isspace(buffer[len-1])) len--;
3028: if (len == 0) break;
3029: }
3030: fprintf(outfile, "\n");
3031: }
3032: goto CONTINUE;
3033: }
3034:
3035: /* Compilation succeeded. It is now possible to set the UTF-8 option from
3036: within the regex; check for this so that we know how to process the data
3037: lines. */
3038:
1.1.1.2 misho 3039: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3040: goto SKIP_DATA;
3041: if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
1.1 misho 3042:
3043: /* Extract the size for possible writing before possibly flipping it,
3044: and remember the store that was got. */
3045:
1.1.1.2 misho 3046: true_size = ((REAL_PCRE *)re)->size;
1.1 misho 3047: regex_gotten_store = first_gotten_store;
3048:
3049: /* Output code size information if requested */
3050:
3051: if (log_store)
3052: fprintf(outfile, "Memory allocation (code space): %d\n",
3053: (int)(first_gotten_store -
1.1.1.2 misho 3054: sizeof(REAL_PCRE) -
3055: ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
1.1 misho 3056:
3057: /* If -s or /S was present, study the regex to generate additional info to
3058: help with the matching, unless the pattern has the SS option, which
3059: suppresses the effect of /S (used for a few test patterns where studying is
3060: never sensible). */
3061:
3062: if (do_study || (force_study >= 0 && !no_force_study))
3063: {
3064: if (timeit > 0)
3065: {
3066: register int i;
3067: clock_t time_taken;
3068: clock_t start_time = clock();
3069: for (i = 0; i < timeit; i++)
1.1.1.2 misho 3070: {
3071: PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3072: }
1.1 misho 3073: time_taken = clock() - start_time;
1.1.1.2 misho 3074: if (extra != NULL)
3075: {
3076: PCRE_FREE_STUDY(extra);
3077: }
1.1 misho 3078: fprintf(outfile, " Study time %.4f milliseconds\n",
3079: (((double)time_taken * 1000.0) / (double)timeit) /
3080: (double)CLOCKS_PER_SEC);
3081: }
1.1.1.2 misho 3082: PCRE_STUDY(extra, re, study_options | force_study_options, &error);
1.1 misho 3083: if (error != NULL)
3084: fprintf(outfile, "Failed to study: %s\n", error);
3085: else if (extra != NULL)
3086: {
3087: true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3088: if (log_store)
3089: {
3090: size_t jitsize;
1.1.1.2 misho 3091: if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3092: jitsize != 0)
3093: fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
1.1 misho 3094: }
3095: }
3096: }
3097:
3098: /* If /K was present, we set up for handling MARK data. */
3099:
3100: if (do_mark)
3101: {
3102: if (extra == NULL)
3103: {
3104: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3105: extra->flags = 0;
3106: }
3107: extra->mark = &markptr;
3108: extra->flags |= PCRE_EXTRA_MARK;
3109: }
3110:
1.1.1.2 misho 3111: /* Extract and display information from the compiled data if required. */
1.1 misho 3112:
3113: SHOW_INFO:
3114:
3115: if (do_debug)
3116: {
3117: fprintf(outfile, "------------------------------------------------------------------\n");
1.1.1.2 misho 3118: PCRE_PRINTINT(re, outfile, debug_lengths);
1.1 misho 3119: }
3120:
3121: /* We already have the options in get_options (see above) */
3122:
3123: if (do_showinfo)
3124: {
3125: unsigned long int all_options;
3126: int count, backrefmax, first_char, need_char, okpartial, jchanged,
1.1.1.3 ! misho 3127: hascrorlf, maxlookbehind;
1.1 misho 3128: int nameentrysize, namecount;
1.1.1.2 misho 3129: const pcre_uint8 *nametable;
1.1 misho 3130:
1.1.1.2 misho 3131: if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3132: new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3133: new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3134: new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3135: new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3136: new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3137: new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3138: new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3139: new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3140: new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
1.1.1.3 ! misho 3141: new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
! 3142: new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
1.1.1.2 misho 3143: != 0)
3144: goto SKIP_DATA;
1.1 misho 3145:
3146: if (size != regex_gotten_store) fprintf(outfile,
3147: "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3148: (int)size, (int)regex_gotten_store);
3149:
3150: fprintf(outfile, "Capturing subpattern count = %d\n", count);
3151: if (backrefmax > 0)
3152: fprintf(outfile, "Max back reference = %d\n", backrefmax);
3153:
3154: if (namecount > 0)
3155: {
3156: fprintf(outfile, "Named capturing subpatterns:\n");
3157: while (namecount-- > 0)
3158: {
1.1.1.2 misho 3159: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3160: int imm2_size = use_pcre16 ? 1 : 2;
3161: #else
3162: int imm2_size = IMM2_SIZE;
3163: #endif
3164: int length = (int)STRLEN(nametable + imm2_size);
3165: fprintf(outfile, " ");
3166: PCHARSV(nametable, imm2_size, length, outfile);
3167: while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3168: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3169: fprintf(outfile, "%3d\n", use_pcre16?
3170: (int)(((PCRE_SPTR16)nametable)[0])
3171: :((int)nametable[0] << 8) | (int)nametable[1]);
3172: nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3173: #else
3174: fprintf(outfile, "%3d\n", GET2(nametable, 0));
3175: #ifdef SUPPORT_PCRE8
1.1 misho 3176: nametable += nameentrysize;
1.1.1.2 misho 3177: #else
3178: nametable += nameentrysize * 2;
3179: #endif
3180: #endif
1.1 misho 3181: }
3182: }
3183:
3184: if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3185: if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3186:
1.1.1.2 misho 3187: all_options = ((REAL_PCRE *)re)->options;
3188: if (do_flip) all_options = swap_uint32(all_options);
1.1 misho 3189:
3190: if (get_options == 0) fprintf(outfile, "No options\n");
3191: else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3192: ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3193: ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3194: ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3195: ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3196: ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3197: ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3198: ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3199: ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3200: ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3201: ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3202: ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3203: ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1.1.1.2 misho 3204: ((get_options & PCRE_UTF8) != 0)? " utf" : "",
1.1 misho 3205: ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1.1.1.2 misho 3206: ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
1.1 misho 3207: ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3208: ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3209:
3210: if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3211:
3212: switch (get_options & PCRE_NEWLINE_BITS)
3213: {
3214: case PCRE_NEWLINE_CR:
3215: fprintf(outfile, "Forced newline sequence: CR\n");
3216: break;
3217:
3218: case PCRE_NEWLINE_LF:
3219: fprintf(outfile, "Forced newline sequence: LF\n");
3220: break;
3221:
3222: case PCRE_NEWLINE_CRLF:
3223: fprintf(outfile, "Forced newline sequence: CRLF\n");
3224: break;
3225:
3226: case PCRE_NEWLINE_ANYCRLF:
3227: fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3228: break;
3229:
3230: case PCRE_NEWLINE_ANY:
3231: fprintf(outfile, "Forced newline sequence: ANY\n");
3232: break;
3233:
3234: default:
3235: break;
3236: }
3237:
3238: if (first_char == -1)
3239: {
3240: fprintf(outfile, "First char at start or follows newline\n");
3241: }
3242: else if (first_char < 0)
3243: {
3244: fprintf(outfile, "No first char\n");
3245: }
3246: else
3247: {
1.1.1.2 misho 3248: const char *caseless =
3249: ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
1.1 misho 3250: "" : " (caseless)";
1.1.1.2 misho 3251:
3252: if (PRINTOK(first_char))
3253: fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
1.1 misho 3254: else
1.1.1.2 misho 3255: {
3256: fprintf(outfile, "First char = ");
3257: pchar(first_char, outfile);
3258: fprintf(outfile, "%s\n", caseless);
3259: }
1.1 misho 3260: }
3261:
3262: if (need_char < 0)
3263: {
3264: fprintf(outfile, "No need char\n");
3265: }
3266: else
3267: {
1.1.1.2 misho 3268: const char *caseless =
3269: ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
1.1 misho 3270: "" : " (caseless)";
1.1.1.2 misho 3271:
3272: if (PRINTOK(need_char))
3273: fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
1.1 misho 3274: else
1.1.1.2 misho 3275: {
3276: fprintf(outfile, "Need char = ");
3277: pchar(need_char, outfile);
3278: fprintf(outfile, "%s\n", caseless);
3279: }
1.1 misho 3280: }
3281:
1.1.1.3 ! misho 3282: if (maxlookbehind > 0)
! 3283: fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
! 3284:
1.1 misho 3285: /* Don't output study size; at present it is in any case a fixed
3286: value, but it varies, depending on the computer architecture, and
3287: so messes up the test suite. (And with the /F option, it might be
3288: flipped.) If study was forced by an external -s, don't show this
3289: information unless -i or -d was also present. This means that, except
3290: when auto-callouts are involved, the output from runs with and without
3291: -s should be identical. */
3292:
3293: if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3294: {
3295: if (extra == NULL)
3296: fprintf(outfile, "Study returned NULL\n");
3297: else
3298: {
1.1.1.2 misho 3299: pcre_uint8 *start_bits = NULL;
1.1 misho 3300: int minlength;
3301:
1.1.1.2 misho 3302: if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3303: fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1.1 misho 3304:
1.1.1.2 misho 3305: if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
1.1 misho 3306: {
1.1.1.2 misho 3307: if (start_bits == NULL)
3308: fprintf(outfile, "No set of starting bytes\n");
3309: else
1.1 misho 3310: {
1.1.1.2 misho 3311: int i;
3312: int c = 24;
3313: fprintf(outfile, "Starting byte set: ");
3314: for (i = 0; i < 256; i++)
1.1 misho 3315: {
1.1.1.2 misho 3316: if ((start_bits[i/8] & (1<<(i&7))) != 0)
1.1 misho 3317: {
1.1.1.2 misho 3318: if (c > 75)
3319: {
3320: fprintf(outfile, "\n ");
3321: c = 2;
3322: }
3323: if (PRINTOK(i) && i != ' ')
3324: {
3325: fprintf(outfile, "%c ", i);
3326: c += 2;
3327: }
3328: else
3329: {
3330: fprintf(outfile, "\\x%02x ", i);
3331: c += 5;
3332: }
1.1 misho 3333: }
3334: }
1.1.1.2 misho 3335: fprintf(outfile, "\n");
1.1 misho 3336: }
3337: }
3338: }
3339:
3340: /* Show this only if the JIT was set by /S, not by -s. */
3341:
3342: if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3343: {
3344: int jit;
1.1.1.2 misho 3345: if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3346: {
3347: if (jit)
3348: fprintf(outfile, "JIT study was successful\n");
3349: else
1.1 misho 3350: #ifdef SUPPORT_JIT
1.1.1.2 misho 3351: fprintf(outfile, "JIT study was not successful\n");
1.1 misho 3352: #else
1.1.1.2 misho 3353: fprintf(outfile, "JIT support is not available in this version of PCRE\n");
1.1 misho 3354: #endif
1.1.1.2 misho 3355: }
1.1 misho 3356: }
3357: }
3358: }
3359:
3360: /* If the '>' option was present, we write out the regex to a file, and
3361: that is all. The first 8 bytes of the file are the regex length and then
3362: the study length, in big-endian order. */
3363:
3364: if (to_file != NULL)
3365: {
3366: FILE *f = fopen((char *)to_file, "wb");
3367: if (f == NULL)
3368: {
3369: fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3370: }
3371: else
3372: {
1.1.1.2 misho 3373: pcre_uint8 sbuf[8];
3374:
3375: if (do_flip) regexflip(re, extra);
3376: sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3377: sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3378: sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3379: sbuf[3] = (pcre_uint8)((true_size) & 255);
3380: sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3381: sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3382: sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3383: sbuf[7] = (pcre_uint8)((true_study_size) & 255);
1.1 misho 3384:
3385: if (fwrite(sbuf, 1, 8, f) < 8 ||
3386: fwrite(re, 1, true_size, f) < true_size)
3387: {
3388: fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3389: }
3390: else
3391: {
3392: fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3393:
3394: /* If there is study data, write it. */
3395:
3396: if (extra != NULL)
3397: {
3398: if (fwrite(extra->study_data, 1, true_study_size, f) <
3399: true_study_size)
3400: {
3401: fprintf(outfile, "Write error on %s: %s\n", to_file,
3402: strerror(errno));
3403: }
3404: else fprintf(outfile, "Study data written to %s\n", to_file);
3405: }
3406: }
3407: fclose(f);
3408: }
3409:
3410: new_free(re);
1.1.1.2 misho 3411: if (extra != NULL)
3412: {
3413: PCRE_FREE_STUDY(extra);
3414: }
1.1 misho 3415: if (locale_set)
3416: {
3417: new_free((void *)tables);
3418: setlocale(LC_CTYPE, "C");
3419: locale_set = 0;
3420: }
3421: continue; /* With next regex */
3422: }
3423: } /* End of non-POSIX compile */
3424:
3425: /* Read data lines and test them */
3426:
3427: for (;;)
3428: {
1.1.1.2 misho 3429: pcre_uint8 *q;
3430: pcre_uint8 *bptr;
1.1 misho 3431: int *use_offsets = offsets;
3432: int use_size_offsets = size_offsets;
3433: int callout_data = 0;
3434: int callout_data_set = 0;
3435: int count, c;
3436: int copystrings = 0;
3437: int find_match_limit = default_find_match_limit;
3438: int getstrings = 0;
3439: int getlist = 0;
3440: int gmatched = 0;
3441: int start_offset = 0;
3442: int start_offset_sign = 1;
3443: int g_notempty = 0;
3444: int use_dfa = 0;
3445:
3446: *copynames = 0;
3447: *getnames = 0;
3448:
1.1.1.2 misho 3449: #ifdef SUPPORT_PCRE16
3450: cn16ptr = copynames;
3451: gn16ptr = getnames;
3452: #endif
3453: #ifdef SUPPORT_PCRE8
3454: cn8ptr = copynames8;
3455: gn8ptr = getnames8;
3456: #endif
1.1 misho 3457:
1.1.1.2 misho 3458: SET_PCRE_CALLOUT(callout);
1.1 misho 3459: first_callout = 1;
3460: last_callout_mark = NULL;
3461: callout_extra = 0;
3462: callout_count = 0;
3463: callout_fail_count = 999999;
3464: callout_fail_id = -1;
3465: show_malloc = 0;
1.1.1.2 misho 3466: options = 0;
1.1 misho 3467:
3468: if (extra != NULL) extra->flags &=
3469: ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3470:
3471: len = 0;
3472: for (;;)
3473: {
3474: if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3475: {
3476: if (len > 0) /* Reached EOF without hitting a newline */
3477: {
3478: fprintf(outfile, "\n");
3479: break;
3480: }
3481: done = 1;
3482: goto CONTINUE;
3483: }
3484: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3485: len = (int)strlen((char *)buffer);
3486: if (buffer[len-1] == '\n') break;
3487: }
3488:
3489: while (len > 0 && isspace(buffer[len-1])) len--;
3490: buffer[len] = 0;
3491: if (len == 0) break;
3492:
3493: p = buffer;
3494: while (isspace(*p)) p++;
3495:
3496: bptr = q = dbuffer;
3497: while ((c = *p++) != 0)
3498: {
3499: int i = 0;
3500: int n = 0;
3501:
1.1.1.2 misho 3502: /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3503: In non-UTF mode, allow the value of the byte to fall through to later,
3504: where values greater than 127 are turned into UTF-8 when running in
3505: 16-bit mode. */
3506:
3507: if (c != '\\')
3508: {
3509: if (use_utf)
3510: {
3511: *q++ = c;
3512: continue;
3513: }
3514: }
3515:
3516: /* Handle backslash escapes */
3517:
3518: else switch ((c = *p++))
1.1 misho 3519: {
3520: case 'a': c = 7; break;
3521: case 'b': c = '\b'; break;
3522: case 'e': c = 27; break;
3523: case 'f': c = '\f'; break;
3524: case 'n': c = '\n'; break;
3525: case 'r': c = '\r'; break;
3526: case 't': c = '\t'; break;
3527: case 'v': c = '\v'; break;
3528:
3529: case '0': case '1': case '2': case '3':
3530: case '4': case '5': case '6': case '7':
3531: c -= '0';
3532: while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3533: c = c * 8 + *p++ - '0';
3534: break;
3535:
3536: case 'x':
3537: if (*p == '{')
3538: {
1.1.1.2 misho 3539: pcre_uint8 *pt = p;
1.1 misho 3540: c = 0;
3541:
3542: /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3543: when isxdigit() is a macro that refers to its argument more than
3544: once. This is banned by the C Standard, but apparently happens in at
3545: least one MacOS environment. */
3546:
3547: for (pt++; isxdigit(*pt); pt++)
1.1.1.2 misho 3548: {
3549: if (++i == 9)
3550: fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3551: "using only the first eight.\n");
3552: else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3553: }
1.1 misho 3554: if (*pt == '}')
3555: {
3556: p = pt + 1;
3557: break;
3558: }
1.1.1.2 misho 3559: /* Not correct form for \x{...}; fall through */
1.1 misho 3560: }
3561:
1.1.1.2 misho 3562: /* \x without {} always defines just one byte in 8-bit mode. This
3563: allows UTF-8 characters to be constructed byte by byte, and also allows
3564: invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3565: Otherwise, pass it down to later code so that it can be turned into
3566: UTF-8 when running in 16-bit mode. */
1.1 misho 3567:
3568: c = 0;
3569: while (i++ < 2 && isxdigit(*p))
3570: {
3571: c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3572: p++;
3573: }
1.1.1.2 misho 3574: if (use_utf)
3575: {
3576: *q++ = c;
3577: continue;
3578: }
1.1 misho 3579: break;
3580:
3581: case 0: /* \ followed by EOF allows for an empty line */
3582: p--;
3583: continue;
3584:
3585: case '>':
3586: if (*p == '-')
3587: {
3588: start_offset_sign = -1;
3589: p++;
3590: }
3591: while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3592: start_offset *= start_offset_sign;
3593: continue;
3594:
3595: case 'A': /* Option setting */
3596: options |= PCRE_ANCHORED;
3597: continue;
3598:
3599: case 'B':
3600: options |= PCRE_NOTBOL;
3601: continue;
3602:
3603: case 'C':
3604: if (isdigit(*p)) /* Set copy string */
3605: {
3606: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3607: copystrings |= 1 << n;
3608: }
3609: else if (isalnum(*p))
3610: {
1.1.1.2 misho 3611: READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
1.1 misho 3612: }
3613: else if (*p == '+')
3614: {
3615: callout_extra = 1;
3616: p++;
3617: }
3618: else if (*p == '-')
3619: {
1.1.1.2 misho 3620: SET_PCRE_CALLOUT(NULL);
1.1 misho 3621: p++;
3622: }
3623: else if (*p == '!')
3624: {
3625: callout_fail_id = 0;
3626: p++;
3627: while(isdigit(*p))
3628: callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3629: callout_fail_count = 0;
3630: if (*p == '!')
3631: {
3632: p++;
3633: while(isdigit(*p))
3634: callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3635: }
3636: }
3637: else if (*p == '*')
3638: {
3639: int sign = 1;
3640: callout_data = 0;
3641: if (*(++p) == '-') { sign = -1; p++; }
3642: while(isdigit(*p))
3643: callout_data = callout_data * 10 + *p++ - '0';
3644: callout_data *= sign;
3645: callout_data_set = 1;
3646: }
3647: continue;
3648:
3649: #if !defined NODFA
3650: case 'D':
3651: #if !defined NOPOSIX
3652: if (posix || do_posix)
3653: printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3654: else
3655: #endif
3656: use_dfa = 1;
3657: continue;
3658: #endif
3659:
3660: #if !defined NODFA
3661: case 'F':
3662: options |= PCRE_DFA_SHORTEST;
3663: continue;
3664: #endif
3665:
3666: case 'G':
3667: if (isdigit(*p))
3668: {
3669: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3670: getstrings |= 1 << n;
3671: }
3672: else if (isalnum(*p))
3673: {
1.1.1.2 misho 3674: READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
1.1 misho 3675: }
3676: continue;
3677:
3678: case 'J':
3679: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3680: if (extra != NULL
3681: && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3682: && extra->executable_jit != NULL)
3683: {
1.1.1.2 misho 3684: if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3685: jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3686: PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
1.1 misho 3687: }
3688: continue;
3689:
3690: case 'L':
3691: getlist = 1;
3692: continue;
3693:
3694: case 'M':
3695: find_match_limit = 1;
3696: continue;
3697:
3698: case 'N':
3699: if ((options & PCRE_NOTEMPTY) != 0)
3700: options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3701: else
3702: options |= PCRE_NOTEMPTY;
3703: continue;
3704:
3705: case 'O':
3706: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3707: if (n > size_offsets_max)
3708: {
3709: size_offsets_max = n;
3710: free(offsets);
3711: use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3712: if (offsets == NULL)
3713: {
3714: printf("** Failed to get %d bytes of memory for offsets vector\n",
3715: (int)(size_offsets_max * sizeof(int)));
3716: yield = 1;
3717: goto EXIT;
3718: }
3719: }
3720: use_size_offsets = n;
3721: if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1.1.1.3 ! misho 3722: else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
1.1 misho 3723: continue;
3724:
3725: case 'P':
3726: options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3727: PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3728: continue;
3729:
3730: case 'Q':
3731: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3732: if (extra == NULL)
3733: {
3734: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3735: extra->flags = 0;
3736: }
3737: extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3738: extra->match_limit_recursion = n;
3739: continue;
3740:
3741: case 'q':
3742: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3743: if (extra == NULL)
3744: {
3745: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3746: extra->flags = 0;
3747: }
3748: extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3749: extra->match_limit = n;
3750: continue;
3751:
3752: #if !defined NODFA
3753: case 'R':
3754: options |= PCRE_DFA_RESTART;
3755: continue;
3756: #endif
3757:
3758: case 'S':
3759: show_malloc = 1;
3760: continue;
3761:
3762: case 'Y':
3763: options |= PCRE_NO_START_OPTIMIZE;
3764: continue;
3765:
3766: case 'Z':
3767: options |= PCRE_NOTEOL;
3768: continue;
3769:
3770: case '?':
3771: options |= PCRE_NO_UTF8_CHECK;
3772: continue;
3773:
3774: case '<':
3775: {
3776: int x = check_newline(p, outfile);
3777: if (x == 0) goto NEXT_DATA;
3778: options |= x;
3779: while (*p++ != '>');
3780: }
3781: continue;
3782: }
1.1.1.2 misho 3783:
3784: /* We now have a character value in c that may be greater than 255. In
3785: 16-bit mode, we always convert characters to UTF-8 so that values greater
3786: than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3787: convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3788: mode must have come from \x{...} or octal constructs because values from
3789: \x.. get this far only in non-UTF mode. */
3790:
3791: #if !defined NOUTF || defined SUPPORT_PCRE16
3792: if (use_pcre16 || use_utf)
3793: {
3794: pcre_uint8 buff8[8];
3795: int ii, utn;
3796: utn = ord2utf8(c, buff8);
3797: for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3798: }
3799: else
3800: #endif
3801: {
3802: if (c > 255)
3803: {
3804: fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3805: "and UTF-8 mode is not enabled.\n", c);
3806: fprintf(outfile, "** Truncation will probably give the wrong "
3807: "result.\n");
3808: }
3809: *q++ = c;
3810: }
1.1 misho 3811: }
1.1.1.2 misho 3812:
3813: /* Reached end of subject string */
3814:
1.1 misho 3815: *q = 0;
3816: len = (int)(q - dbuffer);
3817:
3818: /* Move the data to the end of the buffer so that a read over the end of
3819: the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3820: we are using the POSIX interface, we must include the terminating zero. */
3821:
3822: #if !defined NOPOSIX
3823: if (posix || do_posix)
3824: {
3825: memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3826: bptr += buffer_size - len - 1;
3827: }
3828: else
3829: #endif
3830: {
3831: memmove(bptr + buffer_size - len, bptr, len);
3832: bptr += buffer_size - len;
3833: }
3834:
3835: if ((all_use_dfa || use_dfa) && find_match_limit)
3836: {
3837: printf("**Match limit not relevant for DFA matching: ignored\n");
3838: find_match_limit = 0;
3839: }
3840:
3841: /* Handle matching via the POSIX interface, which does not
3842: support timing or playing with the match limit or callout data. */
3843:
3844: #if !defined NOPOSIX
3845: if (posix || do_posix)
3846: {
3847: int rc;
3848: int eflags = 0;
3849: regmatch_t *pmatch = NULL;
3850: if (use_size_offsets > 0)
3851: pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3852: if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3853: if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3854: if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3855:
3856: rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3857:
3858: if (rc != 0)
3859: {
3860: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3861: fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3862: }
3863: else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3864: != 0)
3865: {
3866: fprintf(outfile, "Matched with REG_NOSUB\n");
3867: }
3868: else
3869: {
3870: size_t i;
3871: for (i = 0; i < (size_t)use_size_offsets; i++)
3872: {
3873: if (pmatch[i].rm_so >= 0)
3874: {
3875: fprintf(outfile, "%2d: ", (int)i);
1.1.1.2 misho 3876: PCHARSV(dbuffer, pmatch[i].rm_so,
1.1 misho 3877: pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3878: fprintf(outfile, "\n");
3879: if (do_showcaprest || (i == 0 && do_showrest))
3880: {
3881: fprintf(outfile, "%2d+ ", (int)i);
1.1.1.2 misho 3882: PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1.1 misho 3883: outfile);
3884: fprintf(outfile, "\n");
3885: }
3886: }
3887: }
3888: }
3889: free(pmatch);
1.1.1.2 misho 3890: goto NEXT_DATA;
1.1 misho 3891: }
3892:
1.1.1.2 misho 3893: #endif /* !defined NOPOSIX */
3894:
1.1 misho 3895: /* Handle matching via the native interface - repeats for /g and /G */
3896:
1.1.1.2 misho 3897: #ifdef SUPPORT_PCRE16
3898: if (use_pcre16)
3899: {
3900: len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3901: switch(len)
3902: {
3903: case -1:
3904: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3905: "converted to UTF-16\n");
3906: goto NEXT_DATA;
3907:
3908: case -2:
3909: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3910: "cannot be converted to UTF-16\n");
3911: goto NEXT_DATA;
3912:
3913: case -3:
3914: fprintf(outfile, "**Failed: character value greater than 0xffff "
3915: "cannot be converted to 16-bit in non-UTF mode\n");
3916: goto NEXT_DATA;
3917:
3918: default:
3919: break;
3920: }
3921: bptr = (pcre_uint8 *)buffer16;
3922: }
3923: #endif
1.1 misho 3924:
1.1.1.3 ! misho 3925: /* Ensure that there is a JIT callback if we want to verify that JIT was
! 3926: actually used. If jit_stack == NULL, no stack has yet been assigned. */
! 3927:
! 3928: if (verify_jit && jit_stack == NULL && extra != NULL)
! 3929: { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
! 3930:
1.1 misho 3931: for (;; gmatched++) /* Loop for /g or /G */
3932: {
3933: markptr = NULL;
1.1.1.3 ! misho 3934: jit_was_used = FALSE;
1.1 misho 3935:
3936: if (timeitm > 0)
3937: {
3938: register int i;
3939: clock_t time_taken;
3940: clock_t start_time = clock();
3941:
3942: #if !defined NODFA
3943: if (all_use_dfa || use_dfa)
3944: {
1.1.1.3 ! misho 3945: if ((options & PCRE_DFA_RESTART) != 0)
! 3946: {
! 3947: fprintf(outfile, "Timing DFA restarts is not supported\n");
! 3948: break;
! 3949: }
! 3950: if (dfa_workspace == NULL)
! 3951: dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
1.1 misho 3952: for (i = 0; i < timeitm; i++)
1.1.1.2 misho 3953: {
3954: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 ! misho 3955: (options | g_notempty), use_offsets, use_size_offsets,
! 3956: dfa_workspace, DFA_WS_DIMENSION);
1.1.1.2 misho 3957: }
1.1 misho 3958: }
3959: else
3960: #endif
3961:
3962: for (i = 0; i < timeitm; i++)
1.1.1.2 misho 3963: {
3964: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3965: (options | g_notempty), use_offsets, use_size_offsets);
3966: }
1.1 misho 3967: time_taken = clock() - start_time;
3968: fprintf(outfile, "Execute time %.4f milliseconds\n",
3969: (((double)time_taken * 1000.0) / (double)timeitm) /
3970: (double)CLOCKS_PER_SEC);
3971: }
3972:
3973: /* If find_match_limit is set, we want to do repeated matches with
3974: varying limits in order to find the minimum value for the match limit and
3975: for the recursion limit. The match limits are relevant only to the normal
3976: running of pcre_exec(), so disable the JIT optimization. This makes it
3977: possible to run the same set of tests with and without JIT externally
3978: requested. */
3979:
3980: if (find_match_limit)
3981: {
3982: if (extra == NULL)
3983: {
3984: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3985: extra->flags = 0;
3986: }
3987: else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3988:
3989: (void)check_match_limit(re, extra, bptr, len, start_offset,
3990: options|g_notempty, use_offsets, use_size_offsets,
3991: PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3992: PCRE_ERROR_MATCHLIMIT, "match()");
3993:
3994: count = check_match_limit(re, extra, bptr, len, start_offset,
3995: options|g_notempty, use_offsets, use_size_offsets,
3996: PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3997: PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3998: }
3999:
4000: /* If callout_data is set, use the interface with additional data */
4001:
4002: else if (callout_data_set)
4003: {
4004: if (extra == NULL)
4005: {
4006: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4007: extra->flags = 0;
4008: }
4009: extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4010: extra->callout_data = &callout_data;
1.1.1.2 misho 4011: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
1.1 misho 4012: options | g_notempty, use_offsets, use_size_offsets);
4013: extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4014: }
4015:
4016: /* The normal case is just to do the match once, with the default
4017: value of match_limit. */
4018:
4019: #if !defined NODFA
4020: else if (all_use_dfa || use_dfa)
4021: {
1.1.1.3 ! misho 4022: if (dfa_workspace == NULL)
! 4023: dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
! 4024: if (dfa_matched++ == 0)
! 4025: dfa_workspace[0] = -1; /* To catch bad restart */
1.1.1.2 misho 4026: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 ! misho 4027: (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
! 4028: DFA_WS_DIMENSION);
1.1 misho 4029: if (count == 0)
4030: {
4031: fprintf(outfile, "Matched, but too many subsidiary matches\n");
4032: count = use_size_offsets/2;
4033: }
4034: }
4035: #endif
4036:
4037: else
4038: {
1.1.1.2 misho 4039: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4040: options | g_notempty, use_offsets, use_size_offsets);
1.1 misho 4041: if (count == 0)
4042: {
4043: fprintf(outfile, "Matched, but too many substrings\n");
4044: count = use_size_offsets/3;
4045: }
4046: }
4047:
4048: /* Matched */
4049:
4050: if (count >= 0)
4051: {
4052: int i, maxcount;
1.1.1.2 misho 4053: void *cnptr, *gnptr;
1.1 misho 4054:
4055: #if !defined NODFA
4056: if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4057: #endif
4058: maxcount = use_size_offsets/3;
4059:
4060: /* This is a check against a lunatic return value. */
4061:
4062: if (count > maxcount)
4063: {
4064: fprintf(outfile,
4065: "** PCRE error: returned count %d is too big for offset size %d\n",
4066: count, use_size_offsets);
4067: count = use_size_offsets/3;
4068: if (do_g || do_G)
4069: {
4070: fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4071: do_g = do_G = FALSE; /* Break g/G loop */
4072: }
4073: }
4074:
4075: /* do_allcaps requests showing of all captures in the pattern, to check
4076: unset ones at the end. */
4077:
4078: if (do_allcaps)
4079: {
1.1.1.2 misho 4080: if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4081: goto SKIP_DATA;
1.1 misho 4082: count++; /* Allow for full match */
4083: if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4084: }
4085:
4086: /* Output the captured substrings */
4087:
4088: for (i = 0; i < count * 2; i += 2)
4089: {
4090: if (use_offsets[i] < 0)
4091: {
4092: if (use_offsets[i] != -1)
4093: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4094: use_offsets[i], i);
4095: if (use_offsets[i+1] != -1)
4096: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4097: use_offsets[i+1], i+1);
4098: fprintf(outfile, "%2d: <unset>\n", i/2);
4099: }
4100: else
4101: {
4102: fprintf(outfile, "%2d: ", i/2);
1.1.1.2 misho 4103: PCHARSV(bptr, use_offsets[i],
1.1 misho 4104: use_offsets[i+1] - use_offsets[i], outfile);
1.1.1.3 ! misho 4105: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1 misho 4106: fprintf(outfile, "\n");
4107: if (do_showcaprest || (i == 0 && do_showrest))
4108: {
4109: fprintf(outfile, "%2d+ ", i/2);
1.1.1.2 misho 4110: PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
1.1 misho 4111: outfile);
4112: fprintf(outfile, "\n");
4113: }
4114: }
4115: }
4116:
1.1.1.2 misho 4117: if (markptr != NULL)
4118: {
4119: fprintf(outfile, "MK: ");
4120: PCHARSV(markptr, 0, -1, outfile);
4121: fprintf(outfile, "\n");
4122: }
1.1 misho 4123:
4124: for (i = 0; i < 32; i++)
4125: {
4126: if ((copystrings & (1 << i)) != 0)
4127: {
1.1.1.2 misho 4128: int rc;
1.1 misho 4129: char copybuffer[256];
1.1.1.2 misho 4130: PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4131: copybuffer, sizeof(copybuffer));
1.1 misho 4132: if (rc < 0)
4133: fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4134: else
1.1.1.2 misho 4135: {
4136: fprintf(outfile, "%2dC ", i);
4137: PCHARSV(copybuffer, 0, rc, outfile);
4138: fprintf(outfile, " (%d)\n", rc);
4139: }
1.1 misho 4140: }
4141: }
4142:
1.1.1.2 misho 4143: cnptr = copynames;
4144: for (;;)
1.1 misho 4145: {
1.1.1.2 misho 4146: int rc;
1.1 misho 4147: char copybuffer[256];
1.1.1.2 misho 4148:
4149: if (use_pcre16)
4150: {
4151: if (*(pcre_uint16 *)cnptr == 0) break;
4152: }
4153: else
4154: {
4155: if (*(pcre_uint8 *)cnptr == 0) break;
4156: }
4157:
4158: PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4159: cnptr, copybuffer, sizeof(copybuffer));
4160:
1.1 misho 4161: if (rc < 0)
1.1.1.2 misho 4162: {
4163: fprintf(outfile, "copy substring ");
4164: PCHARSV(cnptr, 0, -1, outfile);
4165: fprintf(outfile, " failed %d\n", rc);
4166: }
1.1 misho 4167: else
1.1.1.2 misho 4168: {
4169: fprintf(outfile, " C ");
4170: PCHARSV(copybuffer, 0, rc, outfile);
4171: fprintf(outfile, " (%d) ", rc);
4172: PCHARSV(cnptr, 0, -1, outfile);
4173: putc('\n', outfile);
4174: }
4175:
4176: cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
1.1 misho 4177: }
4178:
4179: for (i = 0; i < 32; i++)
4180: {
4181: if ((getstrings & (1 << i)) != 0)
4182: {
1.1.1.2 misho 4183: int rc;
1.1 misho 4184: const char *substring;
1.1.1.2 misho 4185: PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
1.1 misho 4186: if (rc < 0)
4187: fprintf(outfile, "get substring %d failed %d\n", i, rc);
4188: else
4189: {
1.1.1.2 misho 4190: fprintf(outfile, "%2dG ", i);
4191: PCHARSV(substring, 0, rc, outfile);
4192: fprintf(outfile, " (%d)\n", rc);
4193: PCRE_FREE_SUBSTRING(substring);
1.1 misho 4194: }
4195: }
4196: }
4197:
1.1.1.2 misho 4198: gnptr = getnames;
4199: for (;;)
1.1 misho 4200: {
1.1.1.2 misho 4201: int rc;
1.1 misho 4202: const char *substring;
1.1.1.2 misho 4203:
4204: if (use_pcre16)
4205: {
4206: if (*(pcre_uint16 *)gnptr == 0) break;
4207: }
4208: else
4209: {
4210: if (*(pcre_uint8 *)gnptr == 0) break;
4211: }
4212:
4213: PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4214: gnptr, &substring);
1.1 misho 4215: if (rc < 0)
1.1.1.2 misho 4216: {
4217: fprintf(outfile, "get substring ");
4218: PCHARSV(gnptr, 0, -1, outfile);
4219: fprintf(outfile, " failed %d\n", rc);
4220: }
1.1 misho 4221: else
4222: {
1.1.1.2 misho 4223: fprintf(outfile, " G ");
4224: PCHARSV(substring, 0, rc, outfile);
4225: fprintf(outfile, " (%d) ", rc);
4226: PCHARSV(gnptr, 0, -1, outfile);
4227: PCRE_FREE_SUBSTRING(substring);
4228: putc('\n', outfile);
1.1 misho 4229: }
1.1.1.2 misho 4230:
4231: gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
1.1 misho 4232: }
4233:
4234: if (getlist)
4235: {
1.1.1.2 misho 4236: int rc;
1.1 misho 4237: const char **stringlist;
1.1.1.2 misho 4238: PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
1.1 misho 4239: if (rc < 0)
4240: fprintf(outfile, "get substring list failed %d\n", rc);
4241: else
4242: {
4243: for (i = 0; i < count; i++)
1.1.1.2 misho 4244: {
4245: fprintf(outfile, "%2dL ", i);
4246: PCHARSV(stringlist[i], 0, -1, outfile);
4247: putc('\n', outfile);
4248: }
1.1 misho 4249: if (stringlist[i] != NULL)
4250: fprintf(outfile, "string list not terminated by NULL\n");
1.1.1.2 misho 4251: PCRE_FREE_SUBSTRING_LIST(stringlist);
1.1 misho 4252: }
4253: }
4254: }
4255:
4256: /* There was a partial match */
4257:
4258: else if (count == PCRE_ERROR_PARTIAL)
4259: {
4260: if (markptr == NULL) fprintf(outfile, "Partial match");
1.1.1.2 misho 4261: else
4262: {
4263: fprintf(outfile, "Partial match, mark=");
4264: PCHARSV(markptr, 0, -1, outfile);
4265: }
1.1 misho 4266: if (use_size_offsets > 1)
4267: {
4268: fprintf(outfile, ": ");
1.1.1.2 misho 4269: PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
1.1 misho 4270: outfile);
4271: }
1.1.1.3 ! misho 4272: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1 misho 4273: fprintf(outfile, "\n");
4274: break; /* Out of the /g loop */
4275: }
4276:
4277: /* Failed to match. If this is a /g or /G loop and we previously set
4278: g_notempty after a null match, this is not necessarily the end. We want
4279: to advance the start offset, and continue. We won't be at the end of the
4280: string - that was checked before setting g_notempty.
4281:
4282: Complication arises in the case when the newline convention is "any",
4283: "crlf", or "anycrlf". If the previous match was at the end of a line
4284: terminated by CRLF, an advance of one character just passes the \r,
4285: whereas we should prefer the longer newline sequence, as does the code in
4286: pcre_exec(). Fudge the offset value to achieve this. We check for a
1.1.1.2 misho 4287: newline setting in the pattern; if none was set, use PCRE_CONFIG() to
1.1 misho 4288: find the default.
4289:
4290: Otherwise, in the case of UTF-8 matching, the advance must be one
4291: character, not one byte. */
4292:
4293: else
4294: {
4295: if (g_notempty != 0)
4296: {
4297: int onechar = 1;
1.1.1.2 misho 4298: unsigned int obits = ((REAL_PCRE *)re)->options;
1.1 misho 4299: use_offsets[0] = start_offset;
4300: if ((obits & PCRE_NEWLINE_BITS) == 0)
4301: {
4302: int d;
1.1.1.2 misho 4303: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
1.1 misho 4304: /* Note that these values are always the ASCII ones, even in
4305: EBCDIC environments. CR = 13, NL = 10. */
4306: obits = (d == 13)? PCRE_NEWLINE_CR :
4307: (d == 10)? PCRE_NEWLINE_LF :
4308: (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4309: (d == -2)? PCRE_NEWLINE_ANYCRLF :
4310: (d == -1)? PCRE_NEWLINE_ANY : 0;
4311: }
4312: if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4313: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4314: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4315: &&
4316: start_offset < len - 1 &&
1.1.1.2 misho 4317: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4318: (use_pcre16?
4319: ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4320: && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4321: :
4322: bptr[start_offset] == '\r'
4323: && bptr[start_offset + 1] == '\n')
4324: #elif defined SUPPORT_PCRE16
4325: ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4326: && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4327: #else
4328: bptr[start_offset] == '\r'
4329: && bptr[start_offset + 1] == '\n'
4330: #endif
4331: )
1.1 misho 4332: onechar++;
1.1.1.2 misho 4333: else if (use_utf)
1.1 misho 4334: {
4335: while (start_offset + onechar < len)
4336: {
4337: if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4338: onechar++;
4339: }
4340: }
4341: use_offsets[1] = start_offset + onechar;
4342: }
4343: else
4344: {
4345: switch(count)
4346: {
4347: case PCRE_ERROR_NOMATCH:
4348: if (gmatched == 0)
4349: {
1.1.1.2 misho 4350: if (markptr == NULL)
4351: {
1.1.1.3 ! misho 4352: fprintf(outfile, "No match");
1.1.1.2 misho 4353: }
4354: else
4355: {
4356: fprintf(outfile, "No match, mark = ");
4357: PCHARSV(markptr, 0, -1, outfile);
4358: }
1.1.1.3 ! misho 4359: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
! 4360: putc('\n', outfile);
1.1 misho 4361: }
4362: break;
4363:
4364: case PCRE_ERROR_BADUTF8:
4365: case PCRE_ERROR_SHORTUTF8:
1.1.1.2 misho 4366: fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4367: (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4368: use_pcre16? "16" : "8");
1.1 misho 4369: if (use_size_offsets >= 2)
4370: fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4371: use_offsets[1]);
4372: fprintf(outfile, "\n");
4373: break;
4374:
1.1.1.2 misho 4375: case PCRE_ERROR_BADUTF8_OFFSET:
4376: fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4377: use_pcre16? "16" : "8");
4378: break;
4379:
1.1 misho 4380: default:
1.1.1.2 misho 4381: if (count < 0 &&
4382: (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
1.1 misho 4383: fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4384: else
4385: fprintf(outfile, "Error %d (Unexpected value)\n", count);
4386: break;
4387: }
4388:
4389: break; /* Out of the /g loop */
4390: }
4391: }
4392:
4393: /* If not /g or /G we are done */
4394:
4395: if (!do_g && !do_G) break;
4396:
4397: /* If we have matched an empty string, first check to see if we are at
4398: the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4399: Perl's /g options does. This turns out to be rather cunning. First we set
4400: PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4401: same point. If this fails (picked up above) we advance to the next
4402: character. */
4403:
4404: g_notempty = 0;
4405:
4406: if (use_offsets[0] == use_offsets[1])
4407: {
4408: if (use_offsets[0] == len) break;
4409: g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4410: }
4411:
4412: /* For /g, update the start offset, leaving the rest alone */
4413:
4414: if (do_g) start_offset = use_offsets[1];
4415:
4416: /* For /G, update the pointer and length */
4417:
4418: else
4419: {
1.1.1.2 misho 4420: bptr += use_offsets[1] * CHAR_SIZE;
1.1 misho 4421: len -= use_offsets[1];
4422: }
4423: } /* End of loop for /g and /G */
4424:
4425: NEXT_DATA: continue;
4426: } /* End of loop for data lines */
4427:
4428: CONTINUE:
4429:
4430: #if !defined NOPOSIX
4431: if (posix || do_posix) regfree(&preg);
4432: #endif
4433:
4434: if (re != NULL) new_free(re);
1.1.1.2 misho 4435: if (extra != NULL)
4436: {
4437: PCRE_FREE_STUDY(extra);
4438: }
1.1 misho 4439: if (locale_set)
4440: {
4441: new_free((void *)tables);
4442: setlocale(LC_CTYPE, "C");
4443: locale_set = 0;
4444: }
4445: if (jit_stack != NULL)
4446: {
1.1.1.2 misho 4447: PCRE_JIT_STACK_FREE(jit_stack);
1.1 misho 4448: jit_stack = NULL;
4449: }
4450: }
4451:
4452: if (infile == stdin) fprintf(outfile, "\n");
4453:
4454: EXIT:
4455:
4456: if (infile != NULL && infile != stdin) fclose(infile);
4457: if (outfile != NULL && outfile != stdout) fclose(outfile);
4458:
4459: free(buffer);
4460: free(dbuffer);
4461: free(pbuffer);
4462: free(offsets);
4463:
1.1.1.2 misho 4464: #ifdef SUPPORT_PCRE16
4465: if (buffer16 != NULL) free(buffer16);
4466: #endif
4467:
1.1 misho 4468: return yield;
4469: }
4470:
4471: /* End of pcretest.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>