Annotation of embedaddon/pcre/pcretest.c, revision 1.1.1.5
1.1 misho 1: /*************************************************
2: * PCRE testing program *
3: *************************************************/
4:
5: /* This program was hacked up as a tester for PCRE. I really should have
6: written it more tidily in the first place. Will I ever learn? It has grown and
1.1.1.2 misho 7: been extended and consequently is now rather, er, *very* untidy in places. The
8: addition of 16-bit support has made it even worse. :-(
1.1 misho 9:
10: -----------------------------------------------------------------------------
11: Redistribution and use in source and binary forms, with or without
12: modification, are permitted provided that the following conditions are met:
13:
14: * Redistributions of source code must retain the above copyright notice,
15: this list of conditions and the following disclaimer.
16:
17: * Redistributions in binary form must reproduce the above copyright
18: notice, this list of conditions and the following disclaimer in the
19: documentation and/or other materials provided with the distribution.
20:
21: * Neither the name of the University of Cambridge nor the names of its
22: contributors may be used to endorse or promote products derived from
23: this software without specific prior written permission.
24:
25: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: POSSIBILITY OF SUCH DAMAGE.
36: -----------------------------------------------------------------------------
37: */
38:
1.1.1.4 misho 39: /* This program now supports the testing of all of the 8-bit, 16-bit, and
40: 32-bit PCRE libraries in a single program. This is different from the modules
41: such as pcre_compile.c in the library itself, which are compiled separately for
42: each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43: twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44: make use of any of the macros from pcre_internal.h that depend on
45: COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46: SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47: supported library functions. */
1.1.1.2 misho 48:
1.1 misho 49: #ifdef HAVE_CONFIG_H
50: #include "config.h"
51: #endif
52:
53: #include <ctype.h>
54: #include <stdio.h>
55: #include <string.h>
56: #include <stdlib.h>
57: #include <time.h>
58: #include <locale.h>
59: #include <errno.h>
60:
1.1.1.3 misho 61: /* Both libreadline and libedit are optionally supported. The user-supplied
62: original patch uses readline/readline.h for libedit, but in at least one system
63: it is installed as editline/readline.h, so the configuration code now looks for
64: that first, falling back to readline/readline.h. */
65:
66: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 67: #ifdef HAVE_UNISTD_H
68: #include <unistd.h>
69: #endif
1.1.1.3 misho 70: #if defined(SUPPORT_LIBREADLINE)
1.1 misho 71: #include <readline/readline.h>
72: #include <readline/history.h>
1.1.1.3 misho 73: #else
74: #if defined(HAVE_EDITLINE_READLINE_H)
75: #include <editline/readline.h>
76: #else
77: #include <readline/readline.h>
78: #endif
79: #endif
1.1 misho 80: #endif
81:
82: /* A number of things vary for Windows builds. Originally, pcretest opened its
83: input and output without "b"; then I was told that "b" was needed in some
84: environments, so it was added for release 5.0 to both the input and output. (It
85: makes no difference on Unix-like systems.) Later I was told that it is wrong
86: for the input on Windows. I've now abstracted the modes into two macros that
87: are set here, to make it easier to fiddle with them, and removed "b" from the
88: input mode under Windows. */
89:
90: #if defined(_WIN32) || defined(WIN32)
91: #include <io.h> /* For _setmode() */
92: #include <fcntl.h> /* For _O_BINARY */
93: #define INPUT_MODE "r"
94: #define OUTPUT_MODE "wb"
95:
96: #ifndef isatty
97: #define isatty _isatty /* This is what Windows calls them, I'm told, */
98: #endif /* though in some environments they seem to */
99: /* be already defined, hence the #ifndefs. */
100: #ifndef fileno
101: #define fileno _fileno
102: #endif
103:
104: /* A user sent this fix for Borland Builder 5 under Windows. */
105:
106: #ifdef __BORLANDC__
107: #define _setmode(handle, mode) setmode(handle, mode)
108: #endif
109:
110: /* Not Windows */
111:
112: #else
113: #include <sys/time.h> /* These two includes are needed */
114: #include <sys/resource.h> /* for setrlimit(). */
1.1.1.4 misho 115: #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116: #define INPUT_MODE "r"
117: #define OUTPUT_MODE "w"
118: #else
1.1 misho 119: #define INPUT_MODE "rb"
120: #define OUTPUT_MODE "wb"
121: #endif
1.1.1.4 misho 122: #endif
123:
124: #ifdef __VMS
125: #include <ssdef.h>
126: void vms_setsymbol( char *, char *, int );
127: #endif
128:
1.1 misho 129:
1.1.1.2 misho 130: #define PRIV(name) name
1.1 misho 131:
132: /* We have to include pcre_internal.h because we need the internal info for
133: displaying the results of pcre_study() and we also need to know about the
134: internal macros, structures, and other internal data values; pcretest has
135: "inside information" compared to a program that strictly follows the PCRE API.
136:
137: Although pcre_internal.h does itself include pcre.h, we explicitly include it
138: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139: appropriately for an application, not for building PCRE. */
140:
141: #include "pcre.h"
142: #include "pcre_internal.h"
143:
1.1.1.2 misho 144: /* The pcre_printint() function, which prints the internal form of a compiled
145: regex, is held in a separate file so that (a) it can be compiled in either
1.1.1.4 misho 146: 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
1.1.1.2 misho 147: when that is compiled in debug mode. */
148:
149: #ifdef SUPPORT_PCRE8
150: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151: #endif
152: #ifdef SUPPORT_PCRE16
153: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154: #endif
1.1.1.4 misho 155: #ifdef SUPPORT_PCRE32
156: void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157: #endif
1.1.1.2 misho 158:
1.1 misho 159: /* We need access to some of the data tables that PCRE uses. So as not to have
1.1.1.4 misho 160: to keep two copies, we include the source files here, changing the names of the
1.1 misho 161: external symbols to prevent clashes. */
162:
1.1.1.2 misho 163: #define PCRE_INCLUDED
1.1 misho 164:
165: #include "pcre_tables.c"
1.1.1.4 misho 166: #include "pcre_ucd.c"
1.1 misho 167:
168: /* The definition of the macro PRINTABLE, which determines whether to print an
169: output character as-is or as a hex value when showing compiled patterns, is
1.1.1.2 misho 170: the same as in the printint.src file. We uses it here in cases when the locale
171: has not been explicitly changed, so as to get consistent output from systems
172: that differ in their output from isprint() even in the "C" locale. */
1.1 misho 173:
1.1.1.2 misho 174: #ifdef EBCDIC
175: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176: #else
177: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178: #endif
179:
180: #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181:
1.1.1.4 misho 182: /* Posix support is disabled in 16 or 32 bit only mode. */
183: #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
1.1.1.2 misho 184: #define NOPOSIX
185: #endif
1.1 misho 186:
187: /* It is possible to compile this test program without including support for
188: testing the POSIX interface, though this is not available via the standard
189: Makefile. */
190:
191: #if !defined NOPOSIX
192: #include "pcreposix.h"
193: #endif
194:
1.1.1.2 misho 195: /* It is also possible, originally for the benefit of a version that was
196: imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197: NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198: automatically cut out the UTF support if PCRE is built without it. */
199:
200: #ifndef SUPPORT_UTF
201: #ifndef NOUTF
202: #define NOUTF
1.1 misho 203: #endif
204: #endif
205:
1.1.1.4 misho 206: /* To make the code a bit tidier for 8/16/32-bit support, we define macros
1.1.1.2 misho 207: for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208: only from one place and is handled differently). I couldn't dream up any way of
209: using a single macro to do this in a generic way, because of the many different
210: argument requirements. We know that at least one of SUPPORT_PCRE8 and
211: SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212: use these in the definitions of generic macros.
213:
214: **** Special note about the PCHARSxxx macros: the address of the string to be
215: printed is always given as two arguments: a base address followed by an offset.
216: The base address is cast to the correct data size for 8 or 16 bit data; the
217: offset is in units of this size. If the string were given as base+offset in one
218: argument, the casting might be incorrectly applied. */
219:
220: #ifdef SUPPORT_PCRE8
221:
222: #define PCHARS8(lv, p, offset, len, f) \
223: lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224:
225: #define PCHARSV8(p, offset, len, f) \
226: (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227:
1.1.1.4 misho 228: #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
1.1.1.2 misho 229: p = read_capture_name8(p, cn8, re)
230:
231: #define STRLEN8(p) ((int)strlen((char *)p))
232:
233: #define SET_PCRE_CALLOUT8(callout) \
234: pcre_callout = callout
235:
236: #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
237: pcre_assign_jit_stack(extra, callback, userdata)
238:
239: #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
240: re = pcre_compile((char *)pat, options, error, erroffset, tables)
241:
242: #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
243: namesptr, cbuffer, size) \
244: rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
245: (char *)namesptr, cbuffer, size)
246:
247: #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
248: rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
249:
250: #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
251: offsets, size_offsets, workspace, size_workspace) \
252: count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
253: offsets, size_offsets, workspace, size_workspace)
254:
255: #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
256: offsets, size_offsets) \
257: count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
258: offsets, size_offsets)
259:
260: #define PCRE_FREE_STUDY8(extra) \
261: pcre_free_study(extra)
262:
263: #define PCRE_FREE_SUBSTRING8(substring) \
264: pcre_free_substring(substring)
265:
266: #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
267: pcre_free_substring_list(listptr)
268:
269: #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
270: getnamesptr, subsptr) \
271: rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
272: (char *)getnamesptr, subsptr)
273:
274: #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
275: n = pcre_get_stringnumber(re, (char *)ptr)
276:
277: #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
278: rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
279:
280: #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
281: rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
282:
283: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
284: rc = pcre_pattern_to_host_byte_order(re, extra, tables)
285:
286: #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
287: pcre_printint(re, outfile, debug_lengths)
288:
289: #define PCRE_STUDY8(extra, re, options, error) \
290: extra = pcre_study(re, options, error)
291:
292: #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
293: pcre_jit_stack_alloc(startsize, maxsize)
294:
295: #define PCRE_JIT_STACK_FREE8(stack) \
296: pcre_jit_stack_free(stack)
297:
1.1.1.4 misho 298: #define pcre8_maketables pcre_maketables
299:
1.1.1.2 misho 300: #endif /* SUPPORT_PCRE8 */
301:
302: /* -----------------------------------------------------------*/
303:
304: #ifdef SUPPORT_PCRE16
305:
306: #define PCHARS16(lv, p, offset, len, f) \
307: lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
308:
309: #define PCHARSV16(p, offset, len, f) \
310: (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
311:
1.1.1.4 misho 312: #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
1.1.1.2 misho 313: p = read_capture_name16(p, cn16, re)
314:
315: #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
316:
317: #define SET_PCRE_CALLOUT16(callout) \
318: pcre16_callout = (int (*)(pcre16_callout_block *))callout
319:
320: #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
321: pcre16_assign_jit_stack((pcre16_extra *)extra, \
322: (pcre16_jit_callback)callback, userdata)
323:
324: #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
325: re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
326: tables)
327:
328: #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
329: namesptr, cbuffer, size) \
330: rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
331: count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
332:
333: #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
334: rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
335: (PCRE_UCHAR16 *)cbuffer, size/2)
336:
337: #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338: offsets, size_offsets, workspace, size_workspace) \
339: count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
340: (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
341: workspace, size_workspace)
342:
343: #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344: offsets, size_offsets) \
345: count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
346: len, start_offset, options, offsets, size_offsets)
347:
348: #define PCRE_FREE_STUDY16(extra) \
349: pcre16_free_study((pcre16_extra *)extra)
350:
351: #define PCRE_FREE_SUBSTRING16(substring) \
352: pcre16_free_substring((PCRE_SPTR16)substring)
353:
354: #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
355: pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
356:
357: #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
358: getnamesptr, subsptr) \
359: rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
360: count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
361:
362: #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
363: n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
364:
365: #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
366: rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
367: (PCRE_SPTR16 *)(void*)subsptr)
368:
369: #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
370: rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
371: (PCRE_SPTR16 **)(void*)listptr)
372:
373: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
374: rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
375: tables)
376:
377: #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
378: pcre16_printint(re, outfile, debug_lengths)
379:
380: #define PCRE_STUDY16(extra, re, options, error) \
381: extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
382:
383: #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
384: (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
385:
386: #define PCRE_JIT_STACK_FREE16(stack) \
387: pcre16_jit_stack_free((pcre16_jit_stack *)stack)
388:
389: #endif /* SUPPORT_PCRE16 */
390:
1.1.1.4 misho 391: /* -----------------------------------------------------------*/
392:
393: #ifdef SUPPORT_PCRE32
394:
395: #define PCHARS32(lv, p, offset, len, f) \
396: lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
397:
398: #define PCHARSV32(p, offset, len, f) \
399: (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
400:
401: #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
402: p = read_capture_name32(p, cn32, re)
403:
404: #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
405:
406: #define SET_PCRE_CALLOUT32(callout) \
407: pcre32_callout = (int (*)(pcre32_callout_block *))callout
408:
409: #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
410: pcre32_assign_jit_stack((pcre32_extra *)extra, \
411: (pcre32_jit_callback)callback, userdata)
412:
413: #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
414: re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
415: tables)
416:
417: #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
418: namesptr, cbuffer, size) \
419: rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
420: count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
421:
422: #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
423: rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
424: (PCRE_UCHAR32 *)cbuffer, size/2)
425:
426: #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427: offsets, size_offsets, workspace, size_workspace) \
428: count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
429: (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
430: workspace, size_workspace)
431:
432: #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433: offsets, size_offsets) \
434: count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
435: len, start_offset, options, offsets, size_offsets)
436:
437: #define PCRE_FREE_STUDY32(extra) \
438: pcre32_free_study((pcre32_extra *)extra)
439:
440: #define PCRE_FREE_SUBSTRING32(substring) \
441: pcre32_free_substring((PCRE_SPTR32)substring)
442:
443: #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
444: pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
445:
446: #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
447: getnamesptr, subsptr) \
448: rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
449: count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
450:
451: #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
452: n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
453:
454: #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
455: rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
456: (PCRE_SPTR32 *)(void*)subsptr)
457:
458: #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
459: rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
460: (PCRE_SPTR32 **)(void*)listptr)
461:
462: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
463: rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
464: tables)
465:
466: #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
467: pcre32_printint(re, outfile, debug_lengths)
468:
469: #define PCRE_STUDY32(extra, re, options, error) \
470: extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
471:
472: #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
473: (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
474:
475: #define PCRE_JIT_STACK_FREE32(stack) \
476: pcre32_jit_stack_free((pcre32_jit_stack *)stack)
477:
478: #endif /* SUPPORT_PCRE32 */
1.1.1.2 misho 479:
1.1.1.4 misho 480:
481: /* ----- More than one mode is supported; a runtime test is needed, except for
1.1.1.2 misho 482: pcre_config(), and the JIT stack functions, when it doesn't matter which
1.1.1.4 misho 483: available version is called. ----- */
484:
485: enum {
486: PCRE8_MODE,
487: PCRE16_MODE,
488: PCRE32_MODE
489: };
490:
491: #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
492: defined (SUPPORT_PCRE32)) >= 2
1.1.1.2 misho 493:
1.1.1.4 misho 494: #define CHAR_SIZE (1 << pcre_mode)
1.1.1.2 misho 495:
1.1.1.4 misho 496: /* There doesn't seem to be an easy way of writing these macros that can cope
497: with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
498: cases separately. */
499:
500: /* ----- All three modes supported ----- */
501:
502: #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
1.1.1.2 misho 503:
504: #define PCHARS(lv, p, offset, len, f) \
1.1.1.4 misho 505: if (pcre_mode == PCRE32_MODE) \
506: PCHARS32(lv, p, offset, len, f); \
507: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 508: PCHARS16(lv, p, offset, len, f); \
509: else \
510: PCHARS8(lv, p, offset, len, f)
511:
512: #define PCHARSV(p, offset, len, f) \
1.1.1.4 misho 513: if (pcre_mode == PCRE32_MODE) \
514: PCHARSV32(p, offset, len, f); \
515: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 516: PCHARSV16(p, offset, len, f); \
517: else \
518: PCHARSV8(p, offset, len, f)
519:
1.1.1.4 misho 520: #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
521: if (pcre_mode == PCRE32_MODE) \
522: READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
523: else if (pcre_mode == PCRE16_MODE) \
524: READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
1.1.1.2 misho 525: else \
1.1.1.4 misho 526: READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
1.1.1.2 misho 527:
528: #define SET_PCRE_CALLOUT(callout) \
1.1.1.4 misho 529: if (pcre_mode == PCRE32_MODE) \
530: SET_PCRE_CALLOUT32(callout); \
531: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 532: SET_PCRE_CALLOUT16(callout); \
533: else \
534: SET_PCRE_CALLOUT8(callout)
535:
1.1.1.4 misho 536: #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
1.1.1.2 misho 537:
538: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
1.1.1.4 misho 539: if (pcre_mode == PCRE32_MODE) \
540: PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
541: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 542: PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
543: else \
544: PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
545:
546: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
1.1.1.4 misho 547: if (pcre_mode == PCRE32_MODE) \
548: PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
549: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 550: PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
551: else \
552: PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
553:
554: #define PCRE_CONFIG pcre_config
555:
556: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
557: namesptr, cbuffer, size) \
1.1.1.4 misho 558: if (pcre_mode == PCRE32_MODE) \
559: PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
560: namesptr, cbuffer, size); \
561: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 562: PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
563: namesptr, cbuffer, size); \
564: else \
565: PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
566: namesptr, cbuffer, size)
567:
568: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
1.1.1.4 misho 569: if (pcre_mode == PCRE32_MODE) \
570: PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
571: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 572: PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
573: else \
574: PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
575:
576: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
577: offsets, size_offsets, workspace, size_workspace) \
1.1.1.4 misho 578: if (pcre_mode == PCRE32_MODE) \
579: PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
580: offsets, size_offsets, workspace, size_workspace); \
581: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 582: PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
583: offsets, size_offsets, workspace, size_workspace); \
584: else \
585: PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
586: offsets, size_offsets, workspace, size_workspace)
587:
588: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
589: offsets, size_offsets) \
1.1.1.4 misho 590: if (pcre_mode == PCRE32_MODE) \
591: PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
592: offsets, size_offsets); \
593: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 594: PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
595: offsets, size_offsets); \
596: else \
597: PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
598: offsets, size_offsets)
599:
600: #define PCRE_FREE_STUDY(extra) \
1.1.1.4 misho 601: if (pcre_mode == PCRE32_MODE) \
602: PCRE_FREE_STUDY32(extra); \
603: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 604: PCRE_FREE_STUDY16(extra); \
605: else \
606: PCRE_FREE_STUDY8(extra)
607:
608: #define PCRE_FREE_SUBSTRING(substring) \
1.1.1.4 misho 609: if (pcre_mode == PCRE32_MODE) \
610: PCRE_FREE_SUBSTRING32(substring); \
611: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 612: PCRE_FREE_SUBSTRING16(substring); \
613: else \
614: PCRE_FREE_SUBSTRING8(substring)
615:
616: #define PCRE_FREE_SUBSTRING_LIST(listptr) \
1.1.1.4 misho 617: if (pcre_mode == PCRE32_MODE) \
618: PCRE_FREE_SUBSTRING_LIST32(listptr); \
619: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 620: PCRE_FREE_SUBSTRING_LIST16(listptr); \
621: else \
622: PCRE_FREE_SUBSTRING_LIST8(listptr)
623:
624: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
625: getnamesptr, subsptr) \
1.1.1.4 misho 626: if (pcre_mode == PCRE32_MODE) \
627: PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
628: getnamesptr, subsptr); \
629: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 630: PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
631: getnamesptr, subsptr); \
632: else \
633: PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
634: getnamesptr, subsptr)
635:
636: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
1.1.1.4 misho 637: if (pcre_mode == PCRE32_MODE) \
638: PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
639: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 640: PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
641: else \
642: PCRE_GET_STRINGNUMBER8(n, rc, ptr)
643:
644: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
1.1.1.4 misho 645: if (pcre_mode == PCRE32_MODE) \
646: PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
647: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 648: PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
649: else \
650: PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
651:
652: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
1.1.1.4 misho 653: if (pcre_mode == PCRE32_MODE) \
654: PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
655: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 656: PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
657: else \
658: PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
659:
660: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
1.1.1.4 misho 661: (pcre_mode == PCRE32_MODE ? \
662: PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
663: : pcre_mode == PCRE16_MODE ? \
664: PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
665: : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
1.1.1.2 misho 666:
667: #define PCRE_JIT_STACK_FREE(stack) \
1.1.1.4 misho 668: if (pcre_mode == PCRE32_MODE) \
669: PCRE_JIT_STACK_FREE32(stack); \
670: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 671: PCRE_JIT_STACK_FREE16(stack); \
672: else \
673: PCRE_JIT_STACK_FREE8(stack)
674:
675: #define PCRE_MAKETABLES \
1.1.1.4 misho 676: (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
1.1.1.2 misho 677:
678: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
1.1.1.4 misho 679: if (pcre_mode == PCRE32_MODE) \
680: PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
681: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 682: PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
683: else \
684: PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
685:
686: #define PCRE_PRINTINT(re, outfile, debug_lengths) \
1.1.1.4 misho 687: if (pcre_mode == PCRE32_MODE) \
688: PCRE_PRINTINT32(re, outfile, debug_lengths); \
689: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 690: PCRE_PRINTINT16(re, outfile, debug_lengths); \
691: else \
692: PCRE_PRINTINT8(re, outfile, debug_lengths)
693:
694: #define PCRE_STUDY(extra, re, options, error) \
1.1.1.4 misho 695: if (pcre_mode == PCRE32_MODE) \
696: PCRE_STUDY32(extra, re, options, error); \
697: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 698: PCRE_STUDY16(extra, re, options, error); \
699: else \
700: PCRE_STUDY8(extra, re, options, error)
701:
1.1.1.4 misho 702:
703: /* ----- Two out of three modes are supported ----- */
704:
705: #else
706:
707: /* We can use some macro trickery to make a single set of definitions work in
708: the three different cases. */
709:
710: /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
711:
712: #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
713: #define BITONE 32
714: #define BITTWO 16
715:
716: /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
717:
718: #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
719: #define BITONE 32
720: #define BITTWO 8
721:
722: /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723:
724: #else
725: #define BITONE 16
726: #define BITTWO 8
727: #endif
728:
729: #define glue(a,b) a##b
730: #define G(a,b) glue(a,b)
731:
732:
733: /* ----- Common macros for two-mode cases ----- */
734:
735: #define PCHARS(lv, p, offset, len, f) \
736: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737: G(PCHARS,BITONE)(lv, p, offset, len, f); \
738: else \
739: G(PCHARS,BITTWO)(lv, p, offset, len, f)
740:
741: #define PCHARSV(p, offset, len, f) \
742: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743: G(PCHARSV,BITONE)(p, offset, len, f); \
744: else \
745: G(PCHARSV,BITTWO)(p, offset, len, f)
746:
747: #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
748: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749: G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
750: else \
751: G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
752:
753: #define SET_PCRE_CALLOUT(callout) \
754: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
755: G(SET_PCRE_CALLOUT,BITONE)(callout); \
756: else \
757: G(SET_PCRE_CALLOUT,BITTWO)(callout)
758:
759: #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
760: G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
761:
762: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
763: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764: G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
765: else \
766: G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
767:
768: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
769: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
770: G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
771: else \
772: G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
773:
774: #define PCRE_CONFIG G(G(pcre,BITONE),_config)
775:
776: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
777: namesptr, cbuffer, size) \
778: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
779: G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
780: namesptr, cbuffer, size); \
781: else \
782: G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
783: namesptr, cbuffer, size)
784:
785: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
786: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787: G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
788: else \
789: G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
790:
791: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
792: offsets, size_offsets, workspace, size_workspace) \
793: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
794: G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
795: offsets, size_offsets, workspace, size_workspace); \
796: else \
797: G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
798: offsets, size_offsets, workspace, size_workspace)
799:
800: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
801: offsets, size_offsets) \
802: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
803: G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
804: offsets, size_offsets); \
805: else \
806: G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
807: offsets, size_offsets)
808:
809: #define PCRE_FREE_STUDY(extra) \
810: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811: G(PCRE_FREE_STUDY,BITONE)(extra); \
812: else \
813: G(PCRE_FREE_STUDY,BITTWO)(extra)
814:
815: #define PCRE_FREE_SUBSTRING(substring) \
816: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817: G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
818: else \
819: G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
820:
821: #define PCRE_FREE_SUBSTRING_LIST(listptr) \
822: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
823: G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
824: else \
825: G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
826:
827: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
828: getnamesptr, subsptr) \
829: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
830: G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
831: getnamesptr, subsptr); \
832: else \
833: G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
834: getnamesptr, subsptr)
835:
836: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
837: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838: G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
839: else \
840: G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
841:
842: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
843: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844: G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
845: else \
846: G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
847:
848: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
849: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
850: G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
851: else \
852: G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
853:
854: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
855: (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
856: G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
857: : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
858:
859: #define PCRE_JIT_STACK_FREE(stack) \
860: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861: G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
862: else \
863: G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
864:
865: #define PCRE_MAKETABLES \
866: (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
867: G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
868:
869: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
870: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871: G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
872: else \
873: G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
874:
875: #define PCRE_PRINTINT(re, outfile, debug_lengths) \
876: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877: G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
878: else \
879: G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
880:
881: #define PCRE_STUDY(extra, re, options, error) \
882: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
883: G(PCRE_STUDY,BITONE)(extra, re, options, error); \
884: else \
885: G(PCRE_STUDY,BITTWO)(extra, re, options, error)
886:
887: #endif /* Two out of three modes */
888:
889: /* ----- End of cases where more than one mode is supported ----- */
890:
891:
1.1.1.2 misho 892: /* ----- Only 8-bit mode is supported ----- */
893:
894: #elif defined SUPPORT_PCRE8
895: #define CHAR_SIZE 1
896: #define PCHARS PCHARS8
897: #define PCHARSV PCHARSV8
898: #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
899: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
900: #define STRLEN STRLEN8
901: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
902: #define PCRE_COMPILE PCRE_COMPILE8
903: #define PCRE_CONFIG pcre_config
904: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
905: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
906: #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
907: #define PCRE_EXEC PCRE_EXEC8
908: #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
909: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
910: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
911: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
912: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
913: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
914: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
915: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
916: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
917: #define PCRE_MAKETABLES pcre_maketables()
918: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
919: #define PCRE_PRINTINT PCRE_PRINTINT8
920: #define PCRE_STUDY PCRE_STUDY8
921:
922: /* ----- Only 16-bit mode is supported ----- */
923:
1.1.1.4 misho 924: #elif defined SUPPORT_PCRE16
1.1.1.2 misho 925: #define CHAR_SIZE 2
926: #define PCHARS PCHARS16
927: #define PCHARSV PCHARSV16
928: #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
929: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
930: #define STRLEN STRLEN16
931: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
932: #define PCRE_COMPILE PCRE_COMPILE16
933: #define PCRE_CONFIG pcre16_config
934: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
935: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
936: #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
937: #define PCRE_EXEC PCRE_EXEC16
938: #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
939: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
940: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
941: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
942: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
943: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
944: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
945: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
946: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
947: #define PCRE_MAKETABLES pcre16_maketables()
948: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
949: #define PCRE_PRINTINT PCRE_PRINTINT16
950: #define PCRE_STUDY PCRE_STUDY16
1.1.1.4 misho 951:
952: /* ----- Only 32-bit mode is supported ----- */
953:
954: #elif defined SUPPORT_PCRE32
955: #define CHAR_SIZE 4
956: #define PCHARS PCHARS32
957: #define PCHARSV PCHARSV32
958: #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
959: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
960: #define STRLEN STRLEN32
961: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
962: #define PCRE_COMPILE PCRE_COMPILE32
963: #define PCRE_CONFIG pcre32_config
964: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
965: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
966: #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
967: #define PCRE_EXEC PCRE_EXEC32
968: #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
969: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
970: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
971: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
972: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
973: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
974: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
975: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
976: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
977: #define PCRE_MAKETABLES pcre32_maketables()
978: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
979: #define PCRE_PRINTINT PCRE_PRINTINT32
980: #define PCRE_STUDY PCRE_STUDY32
981:
1.1.1.2 misho 982: #endif
983:
984: /* ----- End of mode-specific function call macros ----- */
985:
1.1 misho 986:
987: /* Other parameters */
988:
989: #ifndef CLOCKS_PER_SEC
990: #ifdef CLK_TCK
991: #define CLOCKS_PER_SEC CLK_TCK
992: #else
993: #define CLOCKS_PER_SEC 100
994: #endif
995: #endif
996:
1.1.1.3 misho 997: #if !defined NODFA
998: #define DFA_WS_DIMENSION 1000
999: #endif
1000:
1.1 misho 1001: /* This is the default loop count for timing. */
1002:
1003: #define LOOPREPEAT 500000
1004:
1005: /* Static variables */
1006:
1007: static FILE *outfile;
1008: static int log_store = 0;
1009: static int callout_count;
1010: static int callout_extra;
1011: static int callout_fail_count;
1012: static int callout_fail_id;
1013: static int debug_lengths;
1014: static int first_callout;
1.1.1.3 misho 1015: static int jit_was_used;
1.1 misho 1016: static int locale_set = 0;
1017: static int show_malloc;
1.1.1.2 misho 1018: static int use_utf;
1.1 misho 1019: static const unsigned char *last_callout_mark = NULL;
1020:
1021: /* The buffers grow automatically if very long input lines are encountered. */
1022:
1023: static int buffer_size = 50000;
1.1.1.2 misho 1024: static pcre_uint8 *buffer = NULL;
1025: static pcre_uint8 *pbuffer = NULL;
1026:
1.1.1.4 misho 1027: /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1.1.1.2 misho 1028:
1029: #ifdef COMPILE_PCRE16
1030: #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1031: #endif
1032:
1.1.1.4 misho 1033: #ifdef COMPILE_PCRE32
1034: #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1035: #endif
1036:
1037: /* We need buffers for building 16/32-bit strings, and the tables of operator
1038: lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1039: pattern for saving/reloading testing. Luckily, the data for these tables is
1040: defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1041: are used in the tables) are adjusted appropriately for the 16/32-bit world.
1042: LINK_SIZE is also used later in this program. */
1043:
1044: #ifdef SUPPORT_PCRE16
1045: #undef IMM2_SIZE
1046: #define IMM2_SIZE 1
1047:
1.1.1.2 misho 1048: #if LINK_SIZE == 2
1049: #undef LINK_SIZE
1050: #define LINK_SIZE 1
1051: #elif LINK_SIZE == 3 || LINK_SIZE == 4
1052: #undef LINK_SIZE
1053: #define LINK_SIZE 2
1054: #else
1055: #error LINK_SIZE must be either 2, 3, or 4
1056: #endif
1057:
1.1.1.4 misho 1058: static int buffer16_size = 0;
1059: static pcre_uint16 *buffer16 = NULL;
1.1.1.2 misho 1060: static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1061: #endif /* SUPPORT_PCRE16 */
1062:
1.1.1.4 misho 1063: #ifdef SUPPORT_PCRE32
1064: #undef IMM2_SIZE
1065: #define IMM2_SIZE 1
1066: #undef LINK_SIZE
1067: #define LINK_SIZE 1
1.1.1.2 misho 1068:
1.1.1.4 misho 1069: static int buffer32_size = 0;
1070: static pcre_uint32 *buffer32 = NULL;
1071: static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1072: #endif /* SUPPORT_PCRE32 */
1073:
1074: /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1075: support, it can be changed by an option. If there is no 8-bit support, there
1076: must be 16-or 32-bit support, so default it to 1. */
1077:
1078: #if defined SUPPORT_PCRE8
1079: static int pcre_mode = PCRE8_MODE;
1080: #elif defined SUPPORT_PCRE16
1081: static int pcre_mode = PCRE16_MODE;
1082: #elif defined SUPPORT_PCRE32
1083: static int pcre_mode = PCRE32_MODE;
1.1.1.2 misho 1084: #endif
1.1 misho 1085:
1.1.1.3 misho 1086: /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1087:
1088: static int jit_study_bits[] =
1089: {
1090: PCRE_STUDY_JIT_COMPILE,
1091: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1092: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1093: PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1094: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1095: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1096: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1097: PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1098: };
1099:
1.1.1.4 misho 1100: #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1101: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1102:
1.1 misho 1103: /* Textual explanations for runtime error codes */
1104:
1105: static const char *errtexts[] = {
1106: NULL, /* 0 is no error */
1107: NULL, /* NOMATCH is handled specially */
1108: "NULL argument passed",
1109: "bad option value",
1110: "magic number missing",
1111: "unknown opcode - pattern overwritten?",
1112: "no more memory",
1113: NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1114: "match limit exceeded",
1115: "callout error code",
1.1.1.2 misho 1116: NULL, /* BADUTF8/16 is handled specially */
1117: NULL, /* BADUTF8/16 offset is handled specially */
1.1 misho 1118: NULL, /* PARTIAL is handled specially */
1119: "not used - internal error",
1120: "internal error - pattern overwritten?",
1121: "bad count value",
1122: "item unsupported for DFA matching",
1123: "backreference condition or recursion test not supported for DFA matching",
1124: "match limit not supported for DFA matching",
1125: "workspace size exceeded in DFA matching",
1126: "too much recursion for DFA matching",
1127: "recursion limit exceeded",
1128: "not used - internal error",
1129: "invalid combination of newline options",
1130: "bad offset value",
1.1.1.2 misho 1131: NULL, /* SHORTUTF8/16 is handled specially */
1.1 misho 1132: "nested recursion at the same subject position",
1.1.1.2 misho 1133: "JIT stack limit reached",
1.1.1.3 misho 1134: "pattern compiled in wrong mode: 8-bit/16-bit error",
1135: "pattern compiled with other endianness",
1.1.1.4 misho 1136: "invalid data in workspace for DFA restart",
1137: "bad JIT option",
1138: "bad length"
1.1 misho 1139: };
1140:
1141:
1142: /*************************************************
1143: * Alternate character tables *
1144: *************************************************/
1145:
1146: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1147: using the default tables of the library. However, the T option can be used to
1148: select alternate sets of tables, for different kinds of testing. Note also that
1149: the L (locale) option also adjusts the tables. */
1150:
1151: /* This is the set of tables distributed as default with PCRE. It recognizes
1152: only ASCII characters. */
1153:
1.1.1.2 misho 1154: static const pcre_uint8 tables0[] = {
1.1 misho 1155:
1156: /* This table is a lower casing table. */
1157:
1158: 0, 1, 2, 3, 4, 5, 6, 7,
1159: 8, 9, 10, 11, 12, 13, 14, 15,
1160: 16, 17, 18, 19, 20, 21, 22, 23,
1161: 24, 25, 26, 27, 28, 29, 30, 31,
1162: 32, 33, 34, 35, 36, 37, 38, 39,
1163: 40, 41, 42, 43, 44, 45, 46, 47,
1164: 48, 49, 50, 51, 52, 53, 54, 55,
1165: 56, 57, 58, 59, 60, 61, 62, 63,
1166: 64, 97, 98, 99,100,101,102,103,
1167: 104,105,106,107,108,109,110,111,
1168: 112,113,114,115,116,117,118,119,
1169: 120,121,122, 91, 92, 93, 94, 95,
1170: 96, 97, 98, 99,100,101,102,103,
1171: 104,105,106,107,108,109,110,111,
1172: 112,113,114,115,116,117,118,119,
1173: 120,121,122,123,124,125,126,127,
1174: 128,129,130,131,132,133,134,135,
1175: 136,137,138,139,140,141,142,143,
1176: 144,145,146,147,148,149,150,151,
1177: 152,153,154,155,156,157,158,159,
1178: 160,161,162,163,164,165,166,167,
1179: 168,169,170,171,172,173,174,175,
1180: 176,177,178,179,180,181,182,183,
1181: 184,185,186,187,188,189,190,191,
1182: 192,193,194,195,196,197,198,199,
1183: 200,201,202,203,204,205,206,207,
1184: 208,209,210,211,212,213,214,215,
1185: 216,217,218,219,220,221,222,223,
1186: 224,225,226,227,228,229,230,231,
1187: 232,233,234,235,236,237,238,239,
1188: 240,241,242,243,244,245,246,247,
1189: 248,249,250,251,252,253,254,255,
1190:
1191: /* This table is a case flipping table. */
1192:
1193: 0, 1, 2, 3, 4, 5, 6, 7,
1194: 8, 9, 10, 11, 12, 13, 14, 15,
1195: 16, 17, 18, 19, 20, 21, 22, 23,
1196: 24, 25, 26, 27, 28, 29, 30, 31,
1197: 32, 33, 34, 35, 36, 37, 38, 39,
1198: 40, 41, 42, 43, 44, 45, 46, 47,
1199: 48, 49, 50, 51, 52, 53, 54, 55,
1200: 56, 57, 58, 59, 60, 61, 62, 63,
1201: 64, 97, 98, 99,100,101,102,103,
1202: 104,105,106,107,108,109,110,111,
1203: 112,113,114,115,116,117,118,119,
1204: 120,121,122, 91, 92, 93, 94, 95,
1205: 96, 65, 66, 67, 68, 69, 70, 71,
1206: 72, 73, 74, 75, 76, 77, 78, 79,
1207: 80, 81, 82, 83, 84, 85, 86, 87,
1208: 88, 89, 90,123,124,125,126,127,
1209: 128,129,130,131,132,133,134,135,
1210: 136,137,138,139,140,141,142,143,
1211: 144,145,146,147,148,149,150,151,
1212: 152,153,154,155,156,157,158,159,
1213: 160,161,162,163,164,165,166,167,
1214: 168,169,170,171,172,173,174,175,
1215: 176,177,178,179,180,181,182,183,
1216: 184,185,186,187,188,189,190,191,
1217: 192,193,194,195,196,197,198,199,
1218: 200,201,202,203,204,205,206,207,
1219: 208,209,210,211,212,213,214,215,
1220: 216,217,218,219,220,221,222,223,
1221: 224,225,226,227,228,229,230,231,
1222: 232,233,234,235,236,237,238,239,
1223: 240,241,242,243,244,245,246,247,
1224: 248,249,250,251,252,253,254,255,
1225:
1226: /* This table contains bit maps for various character classes. Each map is 32
1227: bytes long and the bits run from the least significant end of each byte. The
1228: classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1229: graph, print, punct, and cntrl. Other classes are built from combinations. */
1230:
1231: 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1232: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1233: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1234: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235:
1236: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1237: 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1238: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1240:
1241: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1242: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1244: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245:
1246: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247: 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1248: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1250:
1251: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1253: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1255:
1256: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1257: 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1258: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1259: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260:
1261: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1262: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1263: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1264: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1265:
1266: 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1267: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1268: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1269: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270:
1271: 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1272: 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1273: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1275:
1276: 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1277: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1278: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1280:
1281: /* This table identifies various classes of character by individual bits:
1282: 0x01 white space character
1283: 0x02 letter
1284: 0x04 decimal digit
1285: 0x08 hexadecimal digit
1286: 0x10 alphanumeric or '_'
1287: 0x80 regular expression metacharacter or binary zero
1288: */
1289:
1290: 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1.1.1.5 ! misho 1291: 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1.1 misho 1292: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1293: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1294: 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1295: 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1296: 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1297: 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1298: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1299: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1300: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1301: 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1302: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1303: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1304: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1305: 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1306: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1307: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1308: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1309: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1310: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1311: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1312: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1313: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1314: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1315: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1316: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1317: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1318: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1319: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1320: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1321: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1322:
1.1.1.5 ! misho 1323: /* This is a set of tables that came originally from a Windows user. It seems
! 1324: to be at least an approximation of ISO 8859. In particular, there are
! 1325: characters greater than 128 that are marked as spaces, letters, etc. */
1.1 misho 1326:
1.1.1.2 misho 1327: static const pcre_uint8 tables1[] = {
1.1 misho 1328: 0,1,2,3,4,5,6,7,
1329: 8,9,10,11,12,13,14,15,
1330: 16,17,18,19,20,21,22,23,
1331: 24,25,26,27,28,29,30,31,
1332: 32,33,34,35,36,37,38,39,
1333: 40,41,42,43,44,45,46,47,
1334: 48,49,50,51,52,53,54,55,
1335: 56,57,58,59,60,61,62,63,
1336: 64,97,98,99,100,101,102,103,
1337: 104,105,106,107,108,109,110,111,
1338: 112,113,114,115,116,117,118,119,
1339: 120,121,122,91,92,93,94,95,
1340: 96,97,98,99,100,101,102,103,
1341: 104,105,106,107,108,109,110,111,
1342: 112,113,114,115,116,117,118,119,
1343: 120,121,122,123,124,125,126,127,
1344: 128,129,130,131,132,133,134,135,
1345: 136,137,138,139,140,141,142,143,
1346: 144,145,146,147,148,149,150,151,
1347: 152,153,154,155,156,157,158,159,
1348: 160,161,162,163,164,165,166,167,
1349: 168,169,170,171,172,173,174,175,
1350: 176,177,178,179,180,181,182,183,
1351: 184,185,186,187,188,189,190,191,
1352: 224,225,226,227,228,229,230,231,
1353: 232,233,234,235,236,237,238,239,
1354: 240,241,242,243,244,245,246,215,
1355: 248,249,250,251,252,253,254,223,
1356: 224,225,226,227,228,229,230,231,
1357: 232,233,234,235,236,237,238,239,
1358: 240,241,242,243,244,245,246,247,
1359: 248,249,250,251,252,253,254,255,
1360: 0,1,2,3,4,5,6,7,
1361: 8,9,10,11,12,13,14,15,
1362: 16,17,18,19,20,21,22,23,
1363: 24,25,26,27,28,29,30,31,
1364: 32,33,34,35,36,37,38,39,
1365: 40,41,42,43,44,45,46,47,
1366: 48,49,50,51,52,53,54,55,
1367: 56,57,58,59,60,61,62,63,
1368: 64,97,98,99,100,101,102,103,
1369: 104,105,106,107,108,109,110,111,
1370: 112,113,114,115,116,117,118,119,
1371: 120,121,122,91,92,93,94,95,
1372: 96,65,66,67,68,69,70,71,
1373: 72,73,74,75,76,77,78,79,
1374: 80,81,82,83,84,85,86,87,
1375: 88,89,90,123,124,125,126,127,
1376: 128,129,130,131,132,133,134,135,
1377: 136,137,138,139,140,141,142,143,
1378: 144,145,146,147,148,149,150,151,
1379: 152,153,154,155,156,157,158,159,
1380: 160,161,162,163,164,165,166,167,
1381: 168,169,170,171,172,173,174,175,
1382: 176,177,178,179,180,181,182,183,
1383: 184,185,186,187,188,189,190,191,
1384: 224,225,226,227,228,229,230,231,
1385: 232,233,234,235,236,237,238,239,
1386: 240,241,242,243,244,245,246,215,
1387: 248,249,250,251,252,253,254,223,
1388: 192,193,194,195,196,197,198,199,
1389: 200,201,202,203,204,205,206,207,
1390: 208,209,210,211,212,213,214,247,
1391: 216,217,218,219,220,221,222,255,
1392: 0,62,0,0,1,0,0,0,
1393: 0,0,0,0,0,0,0,0,
1394: 32,0,0,0,1,0,0,0,
1395: 0,0,0,0,0,0,0,0,
1396: 0,0,0,0,0,0,255,3,
1397: 126,0,0,0,126,0,0,0,
1398: 0,0,0,0,0,0,0,0,
1399: 0,0,0,0,0,0,0,0,
1400: 0,0,0,0,0,0,255,3,
1401: 0,0,0,0,0,0,0,0,
1402: 0,0,0,0,0,0,12,2,
1403: 0,0,0,0,0,0,0,0,
1404: 0,0,0,0,0,0,0,0,
1405: 254,255,255,7,0,0,0,0,
1406: 0,0,0,0,0,0,0,0,
1407: 255,255,127,127,0,0,0,0,
1408: 0,0,0,0,0,0,0,0,
1409: 0,0,0,0,254,255,255,7,
1410: 0,0,0,0,0,4,32,4,
1411: 0,0,0,128,255,255,127,255,
1412: 0,0,0,0,0,0,255,3,
1413: 254,255,255,135,254,255,255,7,
1414: 0,0,0,0,0,4,44,6,
1415: 255,255,127,255,255,255,127,255,
1416: 0,0,0,0,254,255,255,255,
1417: 255,255,255,255,255,255,255,127,
1418: 0,0,0,0,254,255,255,255,
1419: 255,255,255,255,255,255,255,255,
1420: 0,2,0,0,255,255,255,255,
1421: 255,255,255,255,255,255,255,127,
1422: 0,0,0,0,255,255,255,255,
1423: 255,255,255,255,255,255,255,255,
1424: 0,0,0,0,254,255,0,252,
1425: 1,0,0,248,1,0,0,120,
1426: 0,0,0,0,254,255,255,255,
1427: 0,0,128,0,0,0,128,0,
1428: 255,255,255,255,0,0,0,0,
1429: 0,0,0,0,0,0,0,128,
1430: 255,255,255,255,0,0,0,0,
1431: 0,0,0,0,0,0,0,0,
1432: 128,0,0,0,0,0,0,0,
1433: 0,1,1,0,1,1,0,0,
1434: 0,0,0,0,0,0,0,0,
1435: 0,0,0,0,0,0,0,0,
1436: 1,0,0,0,128,0,0,0,
1437: 128,128,128,128,0,0,128,0,
1438: 28,28,28,28,28,28,28,28,
1439: 28,28,0,0,0,0,0,128,
1440: 0,26,26,26,26,26,26,18,
1441: 18,18,18,18,18,18,18,18,
1442: 18,18,18,18,18,18,18,18,
1443: 18,18,18,128,128,0,128,16,
1444: 0,26,26,26,26,26,26,18,
1445: 18,18,18,18,18,18,18,18,
1446: 18,18,18,18,18,18,18,18,
1447: 18,18,18,128,128,0,0,0,
1448: 0,0,0,0,0,1,0,0,
1449: 0,0,0,0,0,0,0,0,
1450: 0,0,0,0,0,0,0,0,
1451: 0,0,0,0,0,0,0,0,
1452: 1,0,0,0,0,0,0,0,
1453: 0,0,18,0,0,0,0,0,
1454: 0,0,20,20,0,18,0,0,
1455: 0,20,18,0,0,0,0,0,
1456: 18,18,18,18,18,18,18,18,
1457: 18,18,18,18,18,18,18,18,
1458: 18,18,18,18,18,18,18,0,
1459: 18,18,18,18,18,18,18,18,
1460: 18,18,18,18,18,18,18,18,
1461: 18,18,18,18,18,18,18,18,
1462: 18,18,18,18,18,18,18,0,
1463: 18,18,18,18,18,18,18,18
1464: };
1465:
1466:
1467:
1468:
1469: #ifndef HAVE_STRERROR
1470: /*************************************************
1471: * Provide strerror() for non-ANSI libraries *
1472: *************************************************/
1473:
1474: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1475: in their libraries, but can provide the same facility by this simple
1476: alternative function. */
1477:
1478: extern int sys_nerr;
1479: extern char *sys_errlist[];
1480:
1481: char *
1482: strerror(int n)
1483: {
1484: if (n < 0 || n >= sys_nerr) return "unknown error number";
1485: return sys_errlist[n];
1486: }
1487: #endif /* HAVE_STRERROR */
1488:
1489:
1.1.1.4 misho 1490:
1491: /*************************************************
1492: * Print newline configuration *
1493: *************************************************/
1494:
1495: /*
1496: Arguments:
1497: rc the return code from PCRE_CONFIG_NEWLINE
1498: isc TRUE if called from "-C newline"
1499: Returns: nothing
1500: */
1501:
1502: static void
1503: print_newline_config(int rc, BOOL isc)
1504: {
1505: const char *s = NULL;
1506: if (!isc) printf(" Newline sequence is ");
1507: switch(rc)
1508: {
1509: case CHAR_CR: s = "CR"; break;
1510: case CHAR_LF: s = "LF"; break;
1511: case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1512: case -1: s = "ANY"; break;
1513: case -2: s = "ANYCRLF"; break;
1514:
1515: default:
1516: printf("a non-standard value: 0x%04x\n", rc);
1517: return;
1518: }
1519:
1520: printf("%s\n", s);
1521: }
1522:
1523:
1524:
1.1 misho 1525: /*************************************************
1526: * JIT memory callback *
1527: *************************************************/
1528:
1529: static pcre_jit_stack* jit_callback(void *arg)
1530: {
1.1.1.3 misho 1531: jit_was_used = TRUE;
1.1 misho 1532: return (pcre_jit_stack *)arg;
1533: }
1534:
1535:
1.1.1.4 misho 1536: #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1.1.1.2 misho 1537: /*************************************************
1538: * Convert UTF-8 string to value *
1539: *************************************************/
1540:
1541: /* This function takes one or more bytes that represents a UTF-8 character,
1542: and returns the value of the character.
1543:
1544: Argument:
1545: utf8bytes a pointer to the byte vector
1546: vptr a pointer to an int to receive the value
1547:
1548: Returns: > 0 => the number of bytes consumed
1549: -6 to 0 => malformed UTF-8 character at offset = (-return)
1550: */
1551:
1552: static int
1.1.1.4 misho 1553: utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1.1.1.2 misho 1554: {
1.1.1.4 misho 1555: pcre_uint32 c = *utf8bytes++;
1556: pcre_uint32 d = c;
1.1.1.2 misho 1557: int i, j, s;
1558:
1559: for (i = -1; i < 6; i++) /* i is number of additional bytes */
1560: {
1561: if ((d & 0x80) == 0) break;
1562: d <<= 1;
1563: }
1564:
1565: if (i == -1) { *vptr = c; return 1; } /* ascii character */
1566: if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1567:
1568: /* i now has a value in the range 1-5 */
1569:
1570: s = 6*i;
1571: d = (c & utf8_table3[i]) << s;
1572:
1573: for (j = 0; j < i; j++)
1574: {
1575: c = *utf8bytes++;
1576: if ((c & 0xc0) != 0x80) return -(j+1);
1577: s -= 6;
1578: d |= (c & 0x3f) << s;
1579: }
1580:
1581: /* Check that encoding was the correct unique one */
1582:
1583: for (j = 0; j < utf8_table1_size; j++)
1.1.1.4 misho 1584: if (d <= (pcre_uint32)utf8_table1[j]) break;
1.1.1.2 misho 1585: if (j != i) return -(i+1);
1586:
1587: /* Valid value */
1588:
1589: *vptr = d;
1590: return i+1;
1591: }
1592: #endif /* NOUTF || SUPPORT_PCRE16 */
1593:
1594:
1595:
1.1.1.4 misho 1596: #if defined SUPPORT_PCRE8 && !defined NOUTF
1.1.1.2 misho 1597: /*************************************************
1598: * Convert character value to UTF-8 *
1599: *************************************************/
1600:
1601: /* This function takes an integer value in the range 0 - 0x7fffffff
1602: and encodes it as a UTF-8 character in 0 to 6 bytes.
1603:
1604: Arguments:
1605: cvalue the character value
1606: utf8bytes pointer to buffer for result - at least 6 bytes long
1607:
1608: Returns: number of characters placed in the buffer
1609: */
1610:
1611: static int
1.1.1.4 misho 1612: ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1.1.1.2 misho 1613: {
1614: register int i, j;
1.1.1.4 misho 1615: if (cvalue > 0x7fffffffu)
1616: return -1;
1.1.1.2 misho 1617: for (i = 0; i < utf8_table1_size; i++)
1.1.1.4 misho 1618: if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1.1.1.2 misho 1619: utf8bytes += i;
1620: for (j = i; j > 0; j--)
1621: {
1622: *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1623: cvalue >>= 6;
1624: }
1625: *utf8bytes = utf8_table2[i] | cvalue;
1626: return i + 1;
1627: }
1628: #endif
1629:
1630:
1631: #ifdef SUPPORT_PCRE16
1632: /*************************************************
1633: * Convert a string to 16-bit *
1634: *************************************************/
1635:
1636: /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1637: 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1638: double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1639: in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1640: result is always left in buffer16.
1641:
1642: Note that this function does not object to surrogate values. This is
1643: deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1644: for the purpose of testing that they are correctly faulted.
1645:
1646: Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1647: in UTF-8 so that values greater than 255 can be handled.
1648:
1649: Arguments:
1650: data TRUE if converting a data line; FALSE for a regex
1651: p points to a byte string
1652: utf true if UTF-8 (to be converted to UTF-16)
1653: len number of bytes in the string (excluding trailing zero)
1654:
1655: Returns: number of 16-bit data items used (excluding trailing zero)
1656: OR -1 if a UTF-8 string is malformed
1657: OR -2 if a value > 0x10ffff is encountered
1658: OR -3 if a value > 0xffff is encountered when not in UTF mode
1659: */
1660:
1661: static int
1662: to16(int data, pcre_uint8 *p, int utf, int len)
1663: {
1664: pcre_uint16 *pp;
1665:
1666: if (buffer16_size < 2*len + 2)
1667: {
1668: if (buffer16 != NULL) free(buffer16);
1669: buffer16_size = 2*len + 2;
1670: buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1671: if (buffer16 == NULL)
1672: {
1673: fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1674: exit(1);
1675: }
1676: }
1677:
1678: pp = buffer16;
1679:
1680: if (!utf && !data)
1681: {
1682: while (len-- > 0) *pp++ = *p++;
1683: }
1684:
1685: else
1686: {
1.1.1.4 misho 1687: pcre_uint32 c = 0;
1.1.1.2 misho 1688: while (len > 0)
1689: {
1690: int chlen = utf82ord(p, &c);
1691: if (chlen <= 0) return -1;
1692: if (c > 0x10ffff) return -2;
1693: p += chlen;
1694: len -= chlen;
1695: if (c < 0x10000) *pp++ = c; else
1696: {
1697: if (!utf) return -3;
1698: c -= 0x10000;
1699: *pp++ = 0xD800 | (c >> 10);
1700: *pp++ = 0xDC00 | (c & 0x3ff);
1701: }
1702: }
1703: }
1704:
1705: *pp = 0;
1706: return pp - buffer16;
1707: }
1708: #endif
1709:
1.1.1.4 misho 1710: #ifdef SUPPORT_PCRE32
1711: /*************************************************
1712: * Convert a string to 32-bit *
1713: *************************************************/
1714:
1715: /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1716: 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1717: times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1718: in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1719: result is always left in buffer32.
1720:
1721: Note that this function does not object to surrogate values. This is
1722: deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1723: for the purpose of testing that they are correctly faulted.
1724:
1725: Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1726: in UTF-8 so that values greater than 255 can be handled.
1727:
1728: Arguments:
1729: data TRUE if converting a data line; FALSE for a regex
1730: p points to a byte string
1731: utf true if UTF-8 (to be converted to UTF-32)
1732: len number of bytes in the string (excluding trailing zero)
1733:
1734: Returns: number of 32-bit data items used (excluding trailing zero)
1735: OR -1 if a UTF-8 string is malformed
1736: OR -2 if a value > 0x10ffff is encountered
1737: OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1738: */
1739:
1740: static int
1741: to32(int data, pcre_uint8 *p, int utf, int len)
1742: {
1743: pcre_uint32 *pp;
1744:
1745: if (buffer32_size < 4*len + 4)
1746: {
1747: if (buffer32 != NULL) free(buffer32);
1748: buffer32_size = 4*len + 4;
1749: buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1750: if (buffer32 == NULL)
1751: {
1752: fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1753: exit(1);
1754: }
1755: }
1756:
1757: pp = buffer32;
1758:
1759: if (!utf && !data)
1760: {
1761: while (len-- > 0) *pp++ = *p++;
1762: }
1763:
1764: else
1765: {
1766: pcre_uint32 c = 0;
1767: while (len > 0)
1768: {
1769: int chlen = utf82ord(p, &c);
1770: if (chlen <= 0) return -1;
1771: if (utf)
1772: {
1773: if (c > 0x10ffff) return -2;
1774: if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1775: }
1776:
1777: p += chlen;
1778: len -= chlen;
1779: *pp++ = c;
1780: }
1781: }
1782:
1783: *pp = 0;
1784: return pp - buffer32;
1785: }
1786:
1787: /* Check that a 32-bit character string is valid UTF-32.
1788:
1789: Arguments:
1790: string points to the string
1791: length length of string, or -1 if the string is zero-terminated
1792:
1793: Returns: TRUE if the string is a valid UTF-32 string
1794: FALSE otherwise
1795: */
1796:
1797: #ifdef NEVER /* Not used */
1798: #ifdef SUPPORT_UTF
1799: static BOOL
1800: valid_utf32(pcre_uint32 *string, int length)
1801: {
1802: register pcre_uint32 *p;
1803: register pcre_uint32 c;
1804:
1805: for (p = string; length-- > 0; p++)
1806: {
1807: c = *p;
1808: if (c > 0x10ffffu) return FALSE; /* Too big */
1809: if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1810: }
1811:
1812: return TRUE;
1813: }
1814: #endif /* SUPPORT_UTF */
1815: #endif /* NEVER */
1816: #endif /* SUPPORT_PCRE32 */
1817:
1.1.1.2 misho 1818:
1.1 misho 1819: /*************************************************
1820: * Read or extend an input line *
1821: *************************************************/
1822:
1823: /* Input lines are read into buffer, but both patterns and data lines can be
1824: continued over multiple input lines. In addition, if the buffer fills up, we
1825: want to automatically expand it so as to be able to handle extremely large
1826: lines that are needed for certain stress tests. When the input buffer is
1827: expanded, the other two buffers must also be expanded likewise, and the
1828: contents of pbuffer, which are a copy of the input for callouts, must be
1829: preserved (for when expansion happens for a data line). This is not the most
1830: optimal way of handling this, but hey, this is just a test program!
1831:
1832: Arguments:
1833: f the file to read
1834: start where in buffer to start (this *must* be within buffer)
1835: prompt for stdin or readline()
1836:
1837: Returns: pointer to the start of new data
1838: could be a copy of start, or could be moved
1839: NULL if no data read and EOF reached
1840: */
1841:
1.1.1.2 misho 1842: static pcre_uint8 *
1843: extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1.1 misho 1844: {
1.1.1.2 misho 1845: pcre_uint8 *here = start;
1.1 misho 1846:
1847: for (;;)
1848: {
1.1.1.2 misho 1849: size_t rlen = (size_t)(buffer_size - (here - buffer));
1.1 misho 1850:
1851: if (rlen > 1000)
1852: {
1853: int dlen;
1854:
1.1.1.3 misho 1855: /* If libreadline or libedit support is required, use readline() to read a
1856: line if the input is a terminal. Note that readline() removes the trailing
1857: newline, so we must put it back again, to be compatible with fgets(). */
1.1 misho 1858:
1.1.1.3 misho 1859: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 1860: if (isatty(fileno(f)))
1861: {
1862: size_t len;
1863: char *s = readline(prompt);
1864: if (s == NULL) return (here == start)? NULL : start;
1865: len = strlen(s);
1866: if (len > 0) add_history(s);
1867: if (len > rlen - 1) len = rlen - 1;
1868: memcpy(here, s, len);
1869: here[len] = '\n';
1870: here[len+1] = 0;
1871: free(s);
1872: }
1873: else
1874: #endif
1875:
1876: /* Read the next line by normal means, prompting if the file is stdin. */
1877:
1878: {
1879: if (f == stdin) printf("%s", prompt);
1880: if (fgets((char *)here, rlen, f) == NULL)
1881: return (here == start)? NULL : start;
1882: }
1883:
1884: dlen = (int)strlen((char *)here);
1885: if (dlen > 0 && here[dlen - 1] == '\n') return start;
1886: here += dlen;
1887: }
1888:
1889: else
1890: {
1891: int new_buffer_size = 2*buffer_size;
1.1.1.2 misho 1892: pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1893: pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1.1 misho 1894:
1.1.1.4 misho 1895: if (new_buffer == NULL || new_pbuffer == NULL)
1.1 misho 1896: {
1897: fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1898: exit(1);
1899: }
1900:
1901: memcpy(new_buffer, buffer, buffer_size);
1902: memcpy(new_pbuffer, pbuffer, buffer_size);
1903:
1904: buffer_size = new_buffer_size;
1905:
1906: start = new_buffer + (start - buffer);
1907: here = new_buffer + (here - buffer);
1908:
1909: free(buffer);
1910: free(pbuffer);
1911:
1912: buffer = new_buffer;
1913: pbuffer = new_pbuffer;
1914: }
1915: }
1916:
1.1.1.5 ! misho 1917: /* Control never gets here */
1.1 misho 1918: }
1919:
1920:
1921:
1922: /*************************************************
1923: * Read number from string *
1924: *************************************************/
1925:
1926: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1927: around with conditional compilation, just do the job by hand. It is only used
1928: for unpicking arguments, so just keep it simple.
1929:
1930: Arguments:
1931: str string to be converted
1932: endptr where to put the end pointer
1933:
1934: Returns: the unsigned long
1935: */
1936:
1937: static int
1.1.1.2 misho 1938: get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1.1 misho 1939: {
1940: int result = 0;
1941: while(*str != 0 && isspace(*str)) str++;
1942: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1943: *endptr = str;
1944: return(result);
1945: }
1946:
1947:
1948:
1949: /*************************************************
1.1.1.2 misho 1950: * Print one character *
1.1 misho 1951: *************************************************/
1952:
1.1.1.2 misho 1953: /* Print a single character either literally, or as a hex escape. */
1.1 misho 1954:
1.1.1.4 misho 1955: static int pchar(pcre_uint32 c, FILE *f)
1.1 misho 1956: {
1.1.1.4 misho 1957: int n = 0;
1.1.1.2 misho 1958: if (PRINTOK(c))
1959: {
1960: if (f != NULL) fprintf(f, "%c", c);
1961: return 1;
1962: }
1.1 misho 1963:
1.1.1.2 misho 1964: if (c < 0x100)
1.1 misho 1965: {
1.1.1.2 misho 1966: if (use_utf)
1967: {
1968: if (f != NULL) fprintf(f, "\\x{%02x}", c);
1969: return 6;
1970: }
1971: else
1972: {
1973: if (f != NULL) fprintf(f, "\\x%02x", c);
1974: return 4;
1975: }
1.1 misho 1976: }
1977:
1.1.1.4 misho 1978: if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1979: return n >= 0 ? n : 0;
1.1.1.2 misho 1980: }
1.1 misho 1981:
1982:
1983:
1.1.1.2 misho 1984: #ifdef SUPPORT_PCRE8
1985: /*************************************************
1986: * Print 8-bit character string *
1987: *************************************************/
1.1 misho 1988:
1.1.1.2 misho 1989: /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1990: If handed a NULL file, just counts chars without printing. */
1.1 misho 1991:
1.1.1.2 misho 1992: static int pchars(pcre_uint8 *p, int length, FILE *f)
1993: {
1.1.1.4 misho 1994: pcre_uint32 c = 0;
1.1.1.2 misho 1995: int yield = 0;
1.1 misho 1996:
1.1.1.2 misho 1997: if (length < 0)
1998: length = strlen((char *)p);
1.1 misho 1999:
1.1.1.2 misho 2000: while (length-- > 0)
2001: {
2002: #if !defined NOUTF
2003: if (use_utf)
2004: {
2005: int rc = utf82ord(p, &c);
2006: if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2007: {
2008: length -= rc - 1;
2009: p += rc;
2010: yield += pchar(c, f);
2011: continue;
2012: }
2013: }
2014: #endif
2015: c = *p++;
2016: yield += pchar(c, f);
2017: }
1.1 misho 2018:
1.1.1.2 misho 2019: return yield;
2020: }
1.1 misho 2021: #endif
2022:
2023:
2024:
1.1.1.2 misho 2025: #ifdef SUPPORT_PCRE16
1.1 misho 2026: /*************************************************
1.1.1.2 misho 2027: * Find length of 0-terminated 16-bit string *
1.1 misho 2028: *************************************************/
2029:
1.1.1.2 misho 2030: static int strlen16(PCRE_SPTR16 p)
1.1 misho 2031: {
1.1.1.5 ! misho 2032: PCRE_SPTR16 pp = p;
! 2033: while (*pp != 0) pp++;
! 2034: return (int)(pp - p);
1.1 misho 2035: }
1.1.1.2 misho 2036: #endif /* SUPPORT_PCRE16 */
1.1 misho 2037:
2038:
1.1.1.4 misho 2039:
2040: #ifdef SUPPORT_PCRE32
2041: /*************************************************
2042: * Find length of 0-terminated 32-bit string *
2043: *************************************************/
2044:
2045: static int strlen32(PCRE_SPTR32 p)
2046: {
1.1.1.5 ! misho 2047: PCRE_SPTR32 pp = p;
! 2048: while (*pp != 0) pp++;
! 2049: return (int)(pp - p);
1.1.1.4 misho 2050: }
2051: #endif /* SUPPORT_PCRE32 */
2052:
2053:
2054:
1.1.1.2 misho 2055: #ifdef SUPPORT_PCRE16
1.1 misho 2056: /*************************************************
1.1.1.2 misho 2057: * Print 16-bit character string *
1.1 misho 2058: *************************************************/
2059:
1.1.1.2 misho 2060: /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2061: If handed a NULL file, just counts chars without printing. */
1.1 misho 2062:
1.1.1.2 misho 2063: static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1.1 misho 2064: {
2065: int yield = 0;
2066:
1.1.1.2 misho 2067: if (length < 0)
2068: length = strlen16(p);
2069:
1.1 misho 2070: while (length-- > 0)
2071: {
1.1.1.4 misho 2072: pcre_uint32 c = *p++ & 0xffff;
1.1.1.2 misho 2073: #if !defined NOUTF
2074: if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1.1 misho 2075: {
1.1.1.2 misho 2076: int d = *p & 0xffff;
1.1.1.4 misho 2077: if (d >= 0xDC00 && d <= 0xDFFF)
1.1 misho 2078: {
1.1.1.2 misho 2079: c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2080: length--;
2081: p++;
1.1 misho 2082: }
2083: }
2084: #endif
1.1.1.2 misho 2085: yield += pchar(c, f);
2086: }
2087:
2088: return yield;
2089: }
2090: #endif /* SUPPORT_PCRE16 */
1.1 misho 2091:
2092:
1.1.1.2 misho 2093:
1.1.1.4 misho 2094: #ifdef SUPPORT_PCRE32
2095: /*************************************************
2096: * Print 32-bit character string *
2097: *************************************************/
2098:
2099: /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2100: If handed a NULL file, just counts chars without printing. */
2101:
2102: static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2103: {
2104: int yield = 0;
2105:
2106: (void)(utf); /* Avoid compiler warning */
2107:
2108: if (length < 0)
2109: length = strlen32(p);
2110:
2111: while (length-- > 0)
2112: {
2113: pcre_uint32 c = *p++;
2114: yield += pchar(c, f);
2115: }
2116:
2117: return yield;
2118: }
2119: #endif /* SUPPORT_PCRE32 */
2120:
2121:
2122:
1.1.1.2 misho 2123: #ifdef SUPPORT_PCRE8
2124: /*************************************************
2125: * Read a capture name (8-bit) and check it *
2126: *************************************************/
2127:
2128: static pcre_uint8 *
2129: read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2130: {
2131: pcre_uint8 *npp = *pp;
2132: while (isalnum(*p)) *npp++ = *p++;
2133: *npp++ = 0;
2134: *npp = 0;
2135: if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2136: {
2137: fprintf(outfile, "no parentheses with name \"");
2138: PCHARSV(*pp, 0, -1, outfile);
2139: fprintf(outfile, "\"\n");
1.1 misho 2140: }
2141:
1.1.1.2 misho 2142: *pp = npp;
2143: return p;
1.1 misho 2144: }
1.1.1.2 misho 2145: #endif /* SUPPORT_PCRE8 */
2146:
2147:
2148:
2149: #ifdef SUPPORT_PCRE16
2150: /*************************************************
2151: * Read a capture name (16-bit) and check it *
2152: *************************************************/
2153:
2154: /* Note that the text being read is 8-bit. */
2155:
2156: static pcre_uint8 *
2157: read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2158: {
2159: pcre_uint16 *npp = *pp;
2160: while (isalnum(*p)) *npp++ = *p++;
2161: *npp++ = 0;
2162: *npp = 0;
2163: if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2164: {
2165: fprintf(outfile, "no parentheses with name \"");
2166: PCHARSV(*pp, 0, -1, outfile);
2167: fprintf(outfile, "\"\n");
2168: }
2169: *pp = npp;
2170: return p;
2171: }
2172: #endif /* SUPPORT_PCRE16 */
1.1 misho 2173:
2174:
2175:
1.1.1.4 misho 2176: #ifdef SUPPORT_PCRE32
2177: /*************************************************
2178: * Read a capture name (32-bit) and check it *
2179: *************************************************/
2180:
2181: /* Note that the text being read is 8-bit. */
2182:
2183: static pcre_uint8 *
2184: read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2185: {
2186: pcre_uint32 *npp = *pp;
2187: while (isalnum(*p)) *npp++ = *p++;
2188: *npp++ = 0;
2189: *npp = 0;
2190: if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2191: {
2192: fprintf(outfile, "no parentheses with name \"");
2193: PCHARSV(*pp, 0, -1, outfile);
2194: fprintf(outfile, "\"\n");
2195: }
2196: *pp = npp;
2197: return p;
2198: }
2199: #endif /* SUPPORT_PCRE32 */
2200:
2201:
2202:
1.1 misho 2203: /*************************************************
2204: * Callout function *
2205: *************************************************/
2206:
2207: /* Called from PCRE as a result of the (?C) item. We print out where we are in
2208: the match. Yield zero unless more callouts than the fail count, or the callout
2209: data is not zero. */
2210:
2211: static int callout(pcre_callout_block *cb)
2212: {
2213: FILE *f = (first_callout | callout_extra)? outfile : NULL;
2214: int i, pre_start, post_start, subject_length;
2215:
2216: if (callout_extra)
2217: {
2218: fprintf(f, "Callout %d: last capture = %d\n",
2219: cb->callout_number, cb->capture_last);
2220:
2221: for (i = 0; i < cb->capture_top * 2; i += 2)
2222: {
2223: if (cb->offset_vector[i] < 0)
2224: fprintf(f, "%2d: <unset>\n", i/2);
2225: else
2226: {
2227: fprintf(f, "%2d: ", i/2);
1.1.1.2 misho 2228: PCHARSV(cb->subject, cb->offset_vector[i],
1.1 misho 2229: cb->offset_vector[i+1] - cb->offset_vector[i], f);
2230: fprintf(f, "\n");
2231: }
2232: }
2233: }
2234:
2235: /* Re-print the subject in canonical form, the first time or if giving full
2236: datails. On subsequent calls in the same match, we use pchars just to find the
2237: printed lengths of the substrings. */
2238:
2239: if (f != NULL) fprintf(f, "--->");
2240:
1.1.1.2 misho 2241: PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2242: PCHARS(post_start, cb->subject, cb->start_match,
1.1 misho 2243: cb->current_position - cb->start_match, f);
2244:
1.1.1.2 misho 2245: PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1.1 misho 2246:
1.1.1.2 misho 2247: PCHARSV(cb->subject, cb->current_position,
1.1 misho 2248: cb->subject_length - cb->current_position, f);
2249:
2250: if (f != NULL) fprintf(f, "\n");
2251:
2252: /* Always print appropriate indicators, with callout number if not already
2253: shown. For automatic callouts, show the pattern offset. */
2254:
2255: if (cb->callout_number == 255)
2256: {
2257: fprintf(outfile, "%+3d ", cb->pattern_position);
2258: if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2259: }
2260: else
2261: {
2262: if (callout_extra) fprintf(outfile, " ");
2263: else fprintf(outfile, "%3d ", cb->callout_number);
2264: }
2265:
2266: for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2267: fprintf(outfile, "^");
2268:
2269: if (post_start > 0)
2270: {
2271: for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2272: fprintf(outfile, "^");
2273: }
2274:
2275: for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2276: fprintf(outfile, " ");
2277:
2278: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2279: pbuffer + cb->pattern_position);
2280:
2281: fprintf(outfile, "\n");
2282: first_callout = 0;
2283:
2284: if (cb->mark != last_callout_mark)
2285: {
1.1.1.2 misho 2286: if (cb->mark == NULL)
2287: fprintf(outfile, "Latest Mark: <unset>\n");
2288: else
2289: {
2290: fprintf(outfile, "Latest Mark: ");
2291: PCHARSV(cb->mark, 0, -1, outfile);
2292: putc('\n', outfile);
2293: }
1.1 misho 2294: last_callout_mark = cb->mark;
2295: }
2296:
2297: if (cb->callout_data != NULL)
2298: {
2299: int callout_data = *((int *)(cb->callout_data));
2300: if (callout_data != 0)
2301: {
2302: fprintf(outfile, "Callout data = %d\n", callout_data);
2303: return callout_data;
2304: }
2305: }
2306:
2307: return (cb->callout_number != callout_fail_id)? 0 :
2308: (++callout_count >= callout_fail_count)? 1 : 0;
2309: }
2310:
2311:
2312: /*************************************************
2313: * Local malloc functions *
2314: *************************************************/
2315:
2316: /* Alternative malloc function, to test functionality and save the size of a
2317: compiled re, which is the first store request that pcre_compile() makes. The
2318: show_malloc variable is set only during matching. */
2319:
2320: static void *new_malloc(size_t size)
2321: {
2322: void *block = malloc(size);
2323: if (show_malloc)
2324: fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2325: return block;
2326: }
2327:
2328: static void new_free(void *block)
2329: {
2330: if (show_malloc)
2331: fprintf(outfile, "free %p\n", block);
2332: free(block);
2333: }
2334:
2335: /* For recursion malloc/free, to test stacking calls */
2336:
2337: static void *stack_malloc(size_t size)
2338: {
2339: void *block = malloc(size);
2340: if (show_malloc)
2341: fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2342: return block;
2343: }
2344:
2345: static void stack_free(void *block)
2346: {
2347: if (show_malloc)
2348: fprintf(outfile, "stack_free %p\n", block);
2349: free(block);
2350: }
2351:
2352:
1.1.1.2 misho 2353: /*************************************************
2354: * Call pcre_fullinfo() *
2355: *************************************************/
2356:
2357: /* Get one piece of information from the pcre_fullinfo() function. When only
1.1.1.4 misho 2358: one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
1.1.1.2 misho 2359: value, but the code is defensive.
2360:
2361: Arguments:
2362: re compiled regex
2363: study study data
2364: option PCRE_INFO_xxx option
2365: ptr where to put the data
2366:
2367: Returns: 0 when OK, < 0 on error
2368: */
2369:
2370: static int
2371: new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2372: {
2373: int rc;
2374:
1.1.1.4 misho 2375: if (pcre_mode == PCRE32_MODE)
2376: #ifdef SUPPORT_PCRE32
2377: rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2378: #else
2379: rc = PCRE_ERROR_BADMODE;
2380: #endif
2381: else if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 2382: #ifdef SUPPORT_PCRE16
2383: rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2384: #else
2385: rc = PCRE_ERROR_BADMODE;
2386: #endif
2387: else
2388: #ifdef SUPPORT_PCRE8
2389: rc = pcre_fullinfo(re, study, option, ptr);
2390: #else
2391: rc = PCRE_ERROR_BADMODE;
2392: #endif
2393:
1.1.1.4 misho 2394: if (rc < 0 && rc != PCRE_ERROR_UNSET)
1.1.1.2 misho 2395: {
2396: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1.1.1.4 misho 2397: pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
1.1.1.2 misho 2398: if (rc == PCRE_ERROR_BADMODE)
1.1.1.4 misho 2399: fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2400: "%d-bit mode\n", 8 * CHAR_SIZE,
2401: 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
1.1.1.2 misho 2402: }
2403:
2404: return rc;
2405: }
2406:
2407:
2408:
2409: /*************************************************
2410: * Swap byte functions *
2411: *************************************************/
2412:
2413: /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2414: value, respectively.
2415:
2416: Arguments:
2417: value any number
2418:
2419: Returns: the byte swapped value
2420: */
2421:
2422: static pcre_uint32
2423: swap_uint32(pcre_uint32 value)
2424: {
2425: return ((value & 0x000000ff) << 24) |
2426: ((value & 0x0000ff00) << 8) |
2427: ((value & 0x00ff0000) >> 8) |
2428: (value >> 24);
2429: }
2430:
2431: static pcre_uint16
2432: swap_uint16(pcre_uint16 value)
2433: {
2434: return (value >> 8) | (value << 8);
2435: }
2436:
2437:
2438:
2439: /*************************************************
2440: * Flip bytes in a compiled pattern *
2441: *************************************************/
2442:
2443: /* This function is called if the 'F' option was present on a pattern that is
2444: to be written to a file. We flip the bytes of all the integer fields in the
2445: regex data block and the study block. In 16-bit mode this also flips relevant
2446: bytes in the pattern itself. This is to make it possible to test PCRE's
2447: ability to reload byte-flipped patterns, e.g. those compiled on a different
2448: architecture. */
2449:
1.1.1.4 misho 2450: #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
1.1.1.2 misho 2451: static void
1.1.1.4 misho 2452: regexflip8_or_16(pcre *ere, pcre_extra *extra)
1.1.1.2 misho 2453: {
1.1.1.4 misho 2454: real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
1.1.1.2 misho 2455: #ifdef SUPPORT_PCRE16
2456: int op;
2457: pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2458: int length = re->name_count * re->name_entry_size;
2459: #ifdef SUPPORT_UTF
2460: BOOL utf = (re->options & PCRE_UTF16) != 0;
2461: BOOL utf16_char = FALSE;
2462: #endif /* SUPPORT_UTF */
2463: #endif /* SUPPORT_PCRE16 */
2464:
2465: /* Always flip the bytes in the main data block and study blocks. */
2466:
2467: re->magic_number = REVERSED_MAGIC_NUMBER;
2468: re->size = swap_uint32(re->size);
2469: re->options = swap_uint32(re->options);
1.1.1.4 misho 2470: re->flags = swap_uint32(re->flags);
2471: re->limit_match = swap_uint32(re->limit_match);
2472: re->limit_recursion = swap_uint32(re->limit_recursion);
1.1.1.2 misho 2473: re->first_char = swap_uint16(re->first_char);
2474: re->req_char = swap_uint16(re->req_char);
1.1.1.4 misho 2475: re->max_lookbehind = swap_uint16(re->max_lookbehind);
2476: re->top_bracket = swap_uint16(re->top_bracket);
2477: re->top_backref = swap_uint16(re->top_backref);
1.1.1.2 misho 2478: re->name_table_offset = swap_uint16(re->name_table_offset);
2479: re->name_entry_size = swap_uint16(re->name_entry_size);
2480: re->name_count = swap_uint16(re->name_count);
1.1.1.4 misho 2481: re->ref_count = swap_uint16(re->ref_count);
1.1.1.2 misho 2482:
2483: if (extra != NULL)
2484: {
2485: pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2486: rsd->size = swap_uint32(rsd->size);
2487: rsd->flags = swap_uint32(rsd->flags);
2488: rsd->minlength = swap_uint32(rsd->minlength);
2489: }
2490:
2491: /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2492: in the name table, if present, and then in the pattern itself. */
2493:
2494: #ifdef SUPPORT_PCRE16
1.1.1.4 misho 2495: if (pcre_mode != PCRE16_MODE) return;
1.1.1.2 misho 2496:
2497: while(TRUE)
2498: {
2499: /* Swap previous characters. */
2500: while (length-- > 0)
2501: {
2502: *ptr = swap_uint16(*ptr);
2503: ptr++;
2504: }
2505: #ifdef SUPPORT_UTF
2506: if (utf16_char)
2507: {
2508: if ((ptr[-1] & 0xfc00) == 0xd800)
2509: {
2510: /* We know that there is only one extra character in UTF-16. */
2511: *ptr = swap_uint16(*ptr);
2512: ptr++;
2513: }
2514: }
2515: utf16_char = FALSE;
2516: #endif /* SUPPORT_UTF */
2517:
2518: /* Get next opcode. */
1.1 misho 2519:
1.1.1.2 misho 2520: length = 0;
2521: op = *ptr;
2522: *ptr++ = swap_uint16(op);
1.1 misho 2523:
1.1.1.2 misho 2524: switch (op)
2525: {
2526: case OP_END:
2527: return;
1.1 misho 2528:
1.1.1.2 misho 2529: #ifdef SUPPORT_UTF
2530: case OP_CHAR:
2531: case OP_CHARI:
2532: case OP_NOT:
2533: case OP_NOTI:
2534: case OP_STAR:
2535: case OP_MINSTAR:
2536: case OP_PLUS:
2537: case OP_MINPLUS:
2538: case OP_QUERY:
2539: case OP_MINQUERY:
2540: case OP_UPTO:
2541: case OP_MINUPTO:
2542: case OP_EXACT:
2543: case OP_POSSTAR:
2544: case OP_POSPLUS:
2545: case OP_POSQUERY:
2546: case OP_POSUPTO:
2547: case OP_STARI:
2548: case OP_MINSTARI:
2549: case OP_PLUSI:
2550: case OP_MINPLUSI:
2551: case OP_QUERYI:
2552: case OP_MINQUERYI:
2553: case OP_UPTOI:
2554: case OP_MINUPTOI:
2555: case OP_EXACTI:
2556: case OP_POSSTARI:
2557: case OP_POSPLUSI:
2558: case OP_POSQUERYI:
2559: case OP_POSUPTOI:
2560: case OP_NOTSTAR:
2561: case OP_NOTMINSTAR:
2562: case OP_NOTPLUS:
2563: case OP_NOTMINPLUS:
2564: case OP_NOTQUERY:
2565: case OP_NOTMINQUERY:
2566: case OP_NOTUPTO:
2567: case OP_NOTMINUPTO:
2568: case OP_NOTEXACT:
2569: case OP_NOTPOSSTAR:
2570: case OP_NOTPOSPLUS:
2571: case OP_NOTPOSQUERY:
2572: case OP_NOTPOSUPTO:
2573: case OP_NOTSTARI:
2574: case OP_NOTMINSTARI:
2575: case OP_NOTPLUSI:
2576: case OP_NOTMINPLUSI:
2577: case OP_NOTQUERYI:
2578: case OP_NOTMINQUERYI:
2579: case OP_NOTUPTOI:
2580: case OP_NOTMINUPTOI:
2581: case OP_NOTEXACTI:
2582: case OP_NOTPOSSTARI:
2583: case OP_NOTPOSPLUSI:
2584: case OP_NOTPOSQUERYI:
2585: case OP_NOTPOSUPTOI:
2586: if (utf) utf16_char = TRUE;
2587: #endif
2588: /* Fall through. */
1.1 misho 2589:
1.1.1.2 misho 2590: default:
2591: length = OP_lengths16[op] - 1;
2592: break;
2593:
2594: case OP_CLASS:
2595: case OP_NCLASS:
2596: /* Skip the character bit map. */
2597: ptr += 32/sizeof(pcre_uint16);
2598: length = 0;
2599: break;
2600:
2601: case OP_XCLASS:
2602: /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2603: if (LINK_SIZE > 1)
2604: length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2605: - (1 + LINK_SIZE + 1));
2606: else
2607: length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1.1 misho 2608:
1.1.1.2 misho 2609: /* Reverse the size of the XCLASS instance. */
2610: *ptr = swap_uint16(*ptr);
2611: ptr++;
2612: if (LINK_SIZE > 1)
2613: {
2614: *ptr = swap_uint16(*ptr);
2615: ptr++;
2616: }
1.1 misho 2617:
1.1.1.2 misho 2618: op = *ptr;
2619: *ptr = swap_uint16(op);
2620: ptr++;
2621: if ((op & XCL_MAP) != 0)
2622: {
2623: /* Skip the character bit map. */
2624: ptr += 32/sizeof(pcre_uint16);
2625: length -= 32/sizeof(pcre_uint16);
2626: }
2627: break;
2628: }
2629: }
2630: /* Control should never reach here in 16 bit mode. */
2631: #endif /* SUPPORT_PCRE16 */
1.1 misho 2632: }
1.1.1.4 misho 2633: #endif /* SUPPORT_PCRE[8|16] */
2634:
2635:
2636:
2637: #if defined SUPPORT_PCRE32
2638: static void
2639: regexflip_32(pcre *ere, pcre_extra *extra)
2640: {
2641: real_pcre32 *re = (real_pcre32 *)ere;
2642: int op;
2643: pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2644: int length = re->name_count * re->name_entry_size;
2645:
2646: /* Always flip the bytes in the main data block and study blocks. */
2647:
2648: re->magic_number = REVERSED_MAGIC_NUMBER;
2649: re->size = swap_uint32(re->size);
2650: re->options = swap_uint32(re->options);
2651: re->flags = swap_uint32(re->flags);
2652: re->limit_match = swap_uint32(re->limit_match);
2653: re->limit_recursion = swap_uint32(re->limit_recursion);
2654: re->first_char = swap_uint32(re->first_char);
2655: re->req_char = swap_uint32(re->req_char);
2656: re->max_lookbehind = swap_uint16(re->max_lookbehind);
2657: re->top_bracket = swap_uint16(re->top_bracket);
2658: re->top_backref = swap_uint16(re->top_backref);
2659: re->name_table_offset = swap_uint16(re->name_table_offset);
2660: re->name_entry_size = swap_uint16(re->name_entry_size);
2661: re->name_count = swap_uint16(re->name_count);
2662: re->ref_count = swap_uint16(re->ref_count);
2663:
2664: if (extra != NULL)
2665: {
2666: pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2667: rsd->size = swap_uint32(rsd->size);
2668: rsd->flags = swap_uint32(rsd->flags);
2669: rsd->minlength = swap_uint32(rsd->minlength);
2670: }
2671:
2672: /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2673: the pattern itself. */
2674:
2675: while(TRUE)
2676: {
2677: /* Swap previous characters. */
2678: while (length-- > 0)
2679: {
2680: *ptr = swap_uint32(*ptr);
2681: ptr++;
2682: }
2683:
2684: /* Get next opcode. */
2685:
2686: length = 0;
2687: op = *ptr;
2688: *ptr++ = swap_uint32(op);
2689:
2690: switch (op)
2691: {
2692: case OP_END:
2693: return;
2694:
2695: default:
2696: length = OP_lengths32[op] - 1;
2697: break;
2698:
2699: case OP_CLASS:
2700: case OP_NCLASS:
2701: /* Skip the character bit map. */
2702: ptr += 32/sizeof(pcre_uint32);
2703: length = 0;
2704: break;
2705:
2706: case OP_XCLASS:
2707: /* LINK_SIZE can only be 1 in 32-bit mode. */
2708: length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2709:
2710: /* Reverse the size of the XCLASS instance. */
2711: *ptr = swap_uint32(*ptr);
2712: ptr++;
2713:
2714: op = *ptr;
2715: *ptr = swap_uint32(op);
2716: ptr++;
2717: if ((op & XCL_MAP) != 0)
2718: {
2719: /* Skip the character bit map. */
2720: ptr += 32/sizeof(pcre_uint32);
2721: length -= 32/sizeof(pcre_uint32);
2722: }
2723: break;
2724: }
2725: }
2726: /* Control should never reach here in 32 bit mode. */
2727: }
2728:
2729: #endif /* SUPPORT_PCRE32 */
2730:
2731:
2732:
2733: static void
2734: regexflip(pcre *ere, pcre_extra *extra)
2735: {
2736: #if defined SUPPORT_PCRE32
2737: if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2738: regexflip_32(ere, extra);
2739: #endif
2740: #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2741: if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2742: regexflip8_or_16(ere, extra);
2743: #endif
2744: }
1.1 misho 2745:
2746:
2747:
2748: /*************************************************
2749: * Check match or recursion limit *
2750: *************************************************/
2751:
2752: static int
1.1.1.2 misho 2753: check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1.1 misho 2754: int start_offset, int options, int *use_offsets, int use_size_offsets,
2755: int flag, unsigned long int *limit, int errnumber, const char *msg)
2756: {
2757: int count;
2758: int min = 0;
2759: int mid = 64;
2760: int max = -1;
2761:
2762: extra->flags |= flag;
2763:
2764: for (;;)
2765: {
2766: *limit = mid;
2767:
1.1.1.2 misho 2768: PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1.1 misho 2769: use_offsets, use_size_offsets);
2770:
2771: if (count == errnumber)
2772: {
2773: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2774: min = mid;
2775: mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2776: }
2777:
2778: else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2779: count == PCRE_ERROR_PARTIAL)
2780: {
2781: if (mid == min + 1)
2782: {
2783: fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2784: break;
2785: }
2786: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2787: max = mid;
2788: mid = (min + mid)/2;
2789: }
2790: else break; /* Some other error */
2791: }
2792:
2793: extra->flags &= ~flag;
2794: return count;
2795: }
2796:
2797:
2798:
2799: /*************************************************
2800: * Case-independent strncmp() function *
2801: *************************************************/
2802:
2803: /*
2804: Arguments:
2805: s first string
2806: t second string
2807: n number of characters to compare
2808:
2809: Returns: < 0, = 0, or > 0, according to the comparison
2810: */
2811:
2812: static int
1.1.1.2 misho 2813: strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1.1 misho 2814: {
2815: while (n--)
2816: {
2817: int c = tolower(*s++) - tolower(*t++);
2818: if (c) return c;
2819: }
2820: return 0;
2821: }
2822:
2823:
2824:
2825: /*************************************************
1.1.1.5 ! misho 2826: * Check multicharacter option *
1.1 misho 2827: *************************************************/
2828:
2829: /* This is used both at compile and run-time to check for <xxx> escapes. Print
2830: a message and return 0 if there is no match.
2831:
2832: Arguments:
2833: p points after the leading '<'
2834: f file for error message
1.1.1.5 ! misho 2835: nl TRUE to check only for newline settings
! 2836: stype "modifier" or "escape sequence"
1.1 misho 2837:
2838: Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2839: */
2840:
2841: static int
1.1.1.5 ! misho 2842: check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
1.1 misho 2843: {
1.1.1.2 misho 2844: if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2845: if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2846: if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2847: if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2848: if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2849: if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2850: if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1.1.1.5 ! misho 2851:
! 2852: if (!nl)
! 2853: {
! 2854: if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
! 2855: }
! 2856:
! 2857: fprintf(f, "Unknown %s at: <%s\n", stype, p);
1.1 misho 2858: return 0;
2859: }
2860:
2861:
2862:
2863: /*************************************************
2864: * Usage function *
2865: *************************************************/
2866:
2867: static void
2868: usage(void)
2869: {
2870: printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2871: printf("Input and output default to stdin and stdout.\n");
1.1.1.3 misho 2872: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 2873: printf("If input is a terminal, readline() is used to read from it.\n");
2874: #else
2875: printf("This version of pcretest is not linked with readline().\n");
2876: #endif
2877: printf("\nOptions:\n");
1.1.1.2 misho 2878: #ifdef SUPPORT_PCRE16
2879: printf(" -16 use the 16-bit library\n");
2880: #endif
1.1.1.4 misho 2881: #ifdef SUPPORT_PCRE32
2882: printf(" -32 use the 32-bit library\n");
2883: #endif
1.1.1.2 misho 2884: printf(" -b show compiled code\n");
1.1 misho 2885: printf(" -C show PCRE compile-time options and exit\n");
1.1.1.2 misho 2886: printf(" -C arg show a specific compile-time option\n");
2887: printf(" and exit with its value. The arg can be:\n");
2888: printf(" linksize internal link size [2, 3, 4]\n");
2889: printf(" pcre8 8 bit library support enabled [0, 1]\n");
2890: printf(" pcre16 16 bit library support enabled [0, 1]\n");
1.1.1.4 misho 2891: printf(" pcre32 32 bit library support enabled [0, 1]\n");
1.1.1.2 misho 2892: printf(" utf Unicode Transformation Format supported [0, 1]\n");
2893: printf(" ucp Unicode Properties supported [0, 1]\n");
2894: printf(" jit Just-in-time compiler supported [0, 1]\n");
2895: printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
1.1 misho 2896: printf(" -d debug: show compiled code and information (-b and -i)\n");
2897: #if !defined NODFA
2898: printf(" -dfa force DFA matching for all subjects\n");
2899: #endif
2900: printf(" -help show usage information\n");
2901: printf(" -i show information about compiled patterns\n"
2902: " -M find MATCH_LIMIT minimum for each subject\n"
2903: " -m output memory used information\n"
1.1.1.5 ! misho 2904: " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
1.1 misho 2905: " -o <n> set size of offsets vector to <n>\n");
2906: #if !defined NOPOSIX
2907: printf(" -p use POSIX interface\n");
2908: #endif
2909: printf(" -q quiet: do not output PCRE version number at start\n");
2910: printf(" -S <n> set stack size to <n> megabytes\n");
2911: printf(" -s force each pattern to be studied at basic level\n"
2912: " -s+ force each pattern to be studied, using JIT if available\n"
1.1.1.3 misho 2913: " -s++ ditto, verifying when JIT was actually used\n"
2914: " -s+n force each pattern to be studied, using JIT if available,\n"
2915: " where 1 <= n <= 7 selects JIT options\n"
2916: " -s++n ditto, verifying when JIT was actually used\n"
1.1 misho 2917: " -t time compilation and execution\n");
2918: printf(" -t <n> time compilation and execution, repeating <n> times\n");
2919: printf(" -tm time execution (matching) only\n");
2920: printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1.1.1.5 ! misho 2921: printf(" -T same as -t, but show total times at the end\n");
! 2922: printf(" -TM same as -tm, but show total time at the end\n");
1.1 misho 2923: }
2924:
2925:
2926:
2927: /*************************************************
2928: * Main Program *
2929: *************************************************/
2930:
2931: /* Read lines from named file or stdin and write to named file or stdout; lines
2932: consist of a regular expression, in delimiters and optionally followed by
2933: options, followed by a set of test data, terminated by an empty line. */
2934:
2935: int main(int argc, char **argv)
2936: {
2937: FILE *infile = stdin;
1.1.1.2 misho 2938: const char *version;
1.1 misho 2939: int options = 0;
2940: int study_options = 0;
2941: int default_find_match_limit = FALSE;
1.1.1.5 ! misho 2942: pcre_uint32 default_options = 0;
1.1 misho 2943: int op = 1;
2944: int timeit = 0;
2945: int timeitm = 0;
1.1.1.5 ! misho 2946: int showtotaltimes = 0;
1.1 misho 2947: int showinfo = 0;
2948: int showstore = 0;
2949: int force_study = -1;
2950: int force_study_options = 0;
2951: int quiet = 0;
2952: int size_offsets = 45;
2953: int size_offsets_max;
2954: int *offsets = NULL;
2955: int debug = 0;
2956: int done = 0;
2957: int all_use_dfa = 0;
1.1.1.3 misho 2958: int verify_jit = 0;
1.1 misho 2959: int yield = 0;
2960: int stack_size;
1.1.1.4 misho 2961: pcre_uint8 *dbuffer = NULL;
1.1.1.5 ! misho 2962: pcre_uint8 lockout[24] = { 0 };
1.1.1.4 misho 2963: size_t dbuffer_size = 1u << 14;
1.1.1.5 ! misho 2964: clock_t total_compile_time = 0;
! 2965: clock_t total_study_time = 0;
! 2966: clock_t total_match_time = 0;
1.1 misho 2967:
1.1.1.3 misho 2968: #if !defined NOPOSIX
2969: int posix = 0;
2970: #endif
2971: #if !defined NODFA
2972: int *dfa_workspace = NULL;
2973: #endif
2974:
1.1 misho 2975: pcre_jit_stack *jit_stack = NULL;
2976:
1.1.1.2 misho 2977: /* These vectors store, end-to-end, a list of zero-terminated captured
2978: substring names, each list itself being terminated by an empty name. Assume
2979: that 1024 is plenty long enough for the few names we'll be testing. It is
1.1.1.4 misho 2980: easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
1.1.1.2 misho 2981: for the actual memory, to ensure alignment. */
2982:
1.1.1.4 misho 2983: pcre_uint32 copynames[1024];
2984: pcre_uint32 getnames[1024];
2985:
2986: #ifdef SUPPORT_PCRE32
2987: pcre_uint32 *cn32ptr;
2988: pcre_uint32 *gn32ptr;
2989: #endif
1.1.1.2 misho 2990:
2991: #ifdef SUPPORT_PCRE16
1.1.1.4 misho 2992: pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2993: pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
1.1.1.2 misho 2994: pcre_uint16 *cn16ptr;
2995: pcre_uint16 *gn16ptr;
2996: #endif
1.1 misho 2997:
1.1.1.2 misho 2998: #ifdef SUPPORT_PCRE8
2999: pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3000: pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3001: pcre_uint8 *cn8ptr;
3002: pcre_uint8 *gn8ptr;
3003: #endif
1.1 misho 3004:
1.1.1.2 misho 3005: /* Get buffers from malloc() so that valgrind will check their misuse when
1.1.1.4 misho 3006: debugging. They grow automatically when very long lines are read. The 16-
3007: and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
1.1.1.2 misho 3008:
3009: buffer = (pcre_uint8 *)malloc(buffer_size);
3010: pbuffer = (pcre_uint8 *)malloc(buffer_size);
1.1 misho 3011:
3012: /* The outfile variable is static so that new_malloc can use it. */
3013:
3014: outfile = stdout;
3015:
3016: /* The following _setmode() stuff is some Windows magic that tells its runtime
3017: library to translate CRLF into a single LF character. At least, that's what
3018: I've been told: never having used Windows I take this all on trust. Originally
3019: it set 0x8000, but then I was advised that _O_BINARY was better. */
3020:
3021: #if defined(_WIN32) || defined(WIN32)
3022: _setmode( _fileno( stdout ), _O_BINARY );
3023: #endif
3024:
1.1.1.2 misho 3025: /* Get the version number: both pcre_version() and pcre16_version() give the
3026: same answer. We just need to ensure that we call one that is available. */
3027:
1.1.1.4 misho 3028: #if defined SUPPORT_PCRE8
1.1.1.2 misho 3029: version = pcre_version();
1.1.1.4 misho 3030: #elif defined SUPPORT_PCRE16
1.1.1.2 misho 3031: version = pcre16_version();
1.1.1.4 misho 3032: #elif defined SUPPORT_PCRE32
3033: version = pcre32_version();
1.1.1.2 misho 3034: #endif
3035:
1.1 misho 3036: /* Scan options */
3037:
3038: while (argc > 1 && argv[op][0] == '-')
3039: {
1.1.1.2 misho 3040: pcre_uint8 *endptr;
1.1.1.3 misho 3041: char *arg = argv[op];
1.1 misho 3042:
1.1.1.3 misho 3043: if (strcmp(arg, "-m") == 0) showstore = 1;
3044: else if (strcmp(arg, "-s") == 0) force_study = 0;
3045:
3046: else if (strncmp(arg, "-s+", 3) == 0)
1.1 misho 3047: {
1.1.1.3 misho 3048: arg += 3;
3049: if (*arg == '+') { arg++; verify_jit = TRUE; }
1.1 misho 3050: force_study = 1;
1.1.1.3 misho 3051: if (*arg == 0)
3052: force_study_options = jit_study_bits[6];
3053: else if (*arg >= '1' && *arg <= '7')
3054: force_study_options = jit_study_bits[*arg - '1'];
3055: else goto BAD_ARG;
1.1 misho 3056: }
1.1.1.4 misho 3057: else if (strcmp(arg, "-8") == 0)
3058: {
3059: #ifdef SUPPORT_PCRE8
3060: pcre_mode = PCRE8_MODE;
3061: #else
3062: printf("** This version of PCRE was built without 8-bit support\n");
3063: exit(1);
3064: #endif
3065: }
1.1.1.3 misho 3066: else if (strcmp(arg, "-16") == 0)
1.1.1.2 misho 3067: {
3068: #ifdef SUPPORT_PCRE16
1.1.1.4 misho 3069: pcre_mode = PCRE16_MODE;
1.1.1.2 misho 3070: #else
3071: printf("** This version of PCRE was built without 16-bit support\n");
3072: exit(1);
3073: #endif
3074: }
1.1.1.4 misho 3075: else if (strcmp(arg, "-32") == 0)
3076: {
3077: #ifdef SUPPORT_PCRE32
3078: pcre_mode = PCRE32_MODE;
3079: #else
3080: printf("** This version of PCRE was built without 32-bit support\n");
3081: exit(1);
3082: #endif
3083: }
1.1.1.3 misho 3084: else if (strcmp(arg, "-q") == 0) quiet = 1;
3085: else if (strcmp(arg, "-b") == 0) debug = 1;
3086: else if (strcmp(arg, "-i") == 0) showinfo = 1;
3087: else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3088: else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
1.1.1.5 ! misho 3089: else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
1.1 misho 3090: #if !defined NODFA
1.1.1.3 misho 3091: else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
1.1 misho 3092: #endif
1.1.1.3 misho 3093: else if (strcmp(arg, "-o") == 0 && argc > 2 &&
1.1.1.2 misho 3094: ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 3095: *endptr == 0))
3096: {
3097: op++;
3098: argc--;
3099: }
1.1.1.5 ! misho 3100: else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
! 3101: strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
1.1 misho 3102: {
3103: int temp;
1.1.1.5 ! misho 3104: int both = arg[2] == 0;
! 3105: showtotaltimes = arg[1] == 'T';
1.1.1.2 misho 3106: if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1.1 misho 3107: *endptr == 0))
3108: {
3109: timeitm = temp;
3110: op++;
3111: argc--;
3112: }
3113: else timeitm = LOOPREPEAT;
3114: if (both) timeit = timeitm;
3115: }
1.1.1.3 misho 3116: else if (strcmp(arg, "-S") == 0 && argc > 2 &&
1.1.1.2 misho 3117: ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 3118: *endptr == 0))
3119: {
1.1.1.4 misho 3120: #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
1.1 misho 3121: printf("PCRE: -S not supported on this OS\n");
3122: exit(1);
3123: #else
3124: int rc;
3125: struct rlimit rlim;
3126: getrlimit(RLIMIT_STACK, &rlim);
3127: rlim.rlim_cur = stack_size * 1024 * 1024;
3128: rc = setrlimit(RLIMIT_STACK, &rlim);
3129: if (rc != 0)
3130: {
3131: printf("PCRE: setrlimit() failed with error %d\n", rc);
3132: exit(1);
3133: }
3134: op++;
3135: argc--;
3136: #endif
3137: }
3138: #if !defined NOPOSIX
1.1.1.3 misho 3139: else if (strcmp(arg, "-p") == 0) posix = 1;
1.1 misho 3140: #endif
1.1.1.3 misho 3141: else if (strcmp(arg, "-C") == 0)
1.1 misho 3142: {
3143: int rc;
3144: unsigned long int lrc;
1.1.1.2 misho 3145:
3146: if (argc > 2)
3147: {
3148: if (strcmp(argv[op + 1], "linksize") == 0)
3149: {
3150: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3151: printf("%d\n", rc);
3152: yield = rc;
1.1.1.4 misho 3153:
3154: #ifdef __VMS
3155: vms_setsymbol("LINKSIZE",0,yield );
3156: #endif
1.1.1.2 misho 3157: }
1.1.1.4 misho 3158: else if (strcmp(argv[op + 1], "pcre8") == 0)
1.1.1.2 misho 3159: {
3160: #ifdef SUPPORT_PCRE8
3161: printf("1\n");
3162: yield = 1;
3163: #else
3164: printf("0\n");
3165: yield = 0;
3166: #endif
1.1.1.4 misho 3167: #ifdef __VMS
3168: vms_setsymbol("PCRE8",0,yield );
3169: #endif
1.1.1.2 misho 3170: }
1.1.1.4 misho 3171: else if (strcmp(argv[op + 1], "pcre16") == 0)
1.1.1.2 misho 3172: {
3173: #ifdef SUPPORT_PCRE16
3174: printf("1\n");
3175: yield = 1;
3176: #else
3177: printf("0\n");
3178: yield = 0;
3179: #endif
1.1.1.4 misho 3180: #ifdef __VMS
3181: vms_setsymbol("PCRE16",0,yield );
3182: #endif
1.1.1.2 misho 3183: }
1.1.1.4 misho 3184: else if (strcmp(argv[op + 1], "pcre32") == 0)
1.1.1.2 misho 3185: {
1.1.1.4 misho 3186: #ifdef SUPPORT_PCRE32
3187: printf("1\n");
3188: yield = 1;
1.1.1.2 misho 3189: #else
1.1.1.4 misho 3190: printf("0\n");
3191: yield = 0;
3192: #endif
3193: #ifdef __VMS
3194: vms_setsymbol("PCRE32",0,yield );
3195: #endif
3196: }
3197: else if (strcmp(argv[op + 1], "utf") == 0)
3198: {
3199: #ifdef SUPPORT_PCRE8
3200: if (pcre_mode == PCRE8_MODE)
3201: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3202: #endif
3203: #ifdef SUPPORT_PCRE16
3204: if (pcre_mode == PCRE16_MODE)
3205: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3206: #endif
3207: #ifdef SUPPORT_PCRE32
3208: if (pcre_mode == PCRE32_MODE)
3209: (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3210: #endif
1.1.1.2 misho 3211: printf("%d\n", rc);
3212: yield = rc;
1.1.1.4 misho 3213: #ifdef __VMS
3214: vms_setsymbol("UTF",0,yield );
1.1.1.2 misho 3215: #endif
3216: }
1.1.1.4 misho 3217: else if (strcmp(argv[op + 1], "ucp") == 0)
1.1.1.2 misho 3218: {
3219: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3220: printf("%d\n", rc);
3221: yield = rc;
3222: }
1.1.1.4 misho 3223: else if (strcmp(argv[op + 1], "jit") == 0)
3224: {
3225: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3226: printf("%d\n", rc);
3227: yield = rc;
3228: }
3229: else if (strcmp(argv[op + 1], "newline") == 0)
3230: {
3231: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3232: print_newline_config(rc, TRUE);
3233: }
3234: else if (strcmp(argv[op + 1], "ebcdic") == 0)
3235: {
3236: #ifdef EBCDIC
3237: printf("1\n");
3238: yield = 1;
3239: #else
3240: printf("0\n");
3241: #endif
3242: }
3243: else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
1.1.1.2 misho 3244: {
1.1.1.4 misho 3245: #ifdef EBCDIC
3246: printf("0x%02x\n", CHAR_LF);
3247: #else
3248: printf("0\n");
3249: #endif
1.1.1.2 misho 3250: }
1.1.1.4 misho 3251: else
1.1.1.2 misho 3252: {
1.1.1.4 misho 3253: printf("Unknown -C option: %s\n", argv[op + 1]);
1.1.1.2 misho 3254: }
3255: goto EXIT;
3256: }
3257:
1.1.1.4 misho 3258: /* No argument for -C: output all configuration information. */
3259:
1.1.1.2 misho 3260: printf("PCRE version %s\n", version);
1.1 misho 3261: printf("Compiled with\n");
1.1.1.2 misho 3262:
1.1.1.4 misho 3263: #ifdef EBCDIC
3264: printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3265: #endif
3266:
1.1.1.2 misho 3267: /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3268: are set, either both UTFs are supported or both are not supported. */
3269:
1.1.1.4 misho 3270: #ifdef SUPPORT_PCRE8
3271: printf(" 8-bit support\n");
1.1 misho 3272: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1.1.1.4 misho 3273: printf (" %sUTF-8 support\n", rc ? "" : "No ");
3274: #endif
3275: #ifdef SUPPORT_PCRE16
3276: printf(" 16-bit support\n");
1.1.1.2 misho 3277: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1.1.1.4 misho 3278: printf (" %sUTF-16 support\n", rc ? "" : "No ");
3279: #endif
3280: #ifdef SUPPORT_PCRE32
3281: printf(" 32-bit support\n");
3282: (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3283: printf (" %sUTF-32 support\n", rc ? "" : "No ");
1.1.1.2 misho 3284: #endif
3285:
3286: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1.1 misho 3287: printf(" %sUnicode properties support\n", rc? "" : "No ");
1.1.1.2 misho 3288: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
1.1 misho 3289: if (rc)
1.1.1.2 misho 3290: {
3291: const char *arch;
3292: (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3293: printf(" Just-in-time compiler support: %s\n", arch);
3294: }
1.1 misho 3295: else
3296: printf(" No just-in-time compiler support\n");
1.1.1.2 misho 3297: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
1.1.1.4 misho 3298: print_newline_config(rc, FALSE);
1.1.1.2 misho 3299: (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
1.1 misho 3300: printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3301: "all Unicode newlines");
1.1.1.2 misho 3302: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
1.1 misho 3303: printf(" Internal link size = %d\n", rc);
1.1.1.2 misho 3304: (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1.1 misho 3305: printf(" POSIX malloc threshold = %d\n", rc);
1.1.1.5 ! misho 3306: (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
! 3307: printf(" Parentheses nest limit = %ld\n", lrc);
1.1.1.2 misho 3308: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1.1 misho 3309: printf(" Default match limit = %ld\n", lrc);
1.1.1.2 misho 3310: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1.1 misho 3311: printf(" Default recursion depth limit = %ld\n", lrc);
1.1.1.2 misho 3312: (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3313: printf(" Match recursion uses %s", rc? "stack" : "heap");
3314: if (showstore)
3315: {
3316: PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3317: printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3318: }
3319: printf("\n");
1.1 misho 3320: goto EXIT;
3321: }
1.1.1.3 misho 3322: else if (strcmp(arg, "-help") == 0 ||
3323: strcmp(arg, "--help") == 0)
1.1 misho 3324: {
3325: usage();
3326: goto EXIT;
3327: }
3328: else
3329: {
1.1.1.3 misho 3330: BAD_ARG:
3331: printf("** Unknown or malformed option %s\n", arg);
1.1 misho 3332: usage();
3333: yield = 1;
3334: goto EXIT;
3335: }
3336: op++;
3337: argc--;
3338: }
3339:
3340: /* Get the store for the offsets vector, and remember what it was */
3341:
3342: size_offsets_max = size_offsets;
3343: offsets = (int *)malloc(size_offsets_max * sizeof(int));
3344: if (offsets == NULL)
3345: {
3346: printf("** Failed to get %d bytes of memory for offsets vector\n",
3347: (int)(size_offsets_max * sizeof(int)));
3348: yield = 1;
3349: goto EXIT;
3350: }
3351:
3352: /* Sort out the input and output files */
3353:
3354: if (argc > 1)
3355: {
3356: infile = fopen(argv[op], INPUT_MODE);
3357: if (infile == NULL)
3358: {
3359: printf("** Failed to open %s\n", argv[op]);
3360: yield = 1;
3361: goto EXIT;
3362: }
3363: }
3364:
3365: if (argc > 2)
3366: {
3367: outfile = fopen(argv[op+1], OUTPUT_MODE);
3368: if (outfile == NULL)
3369: {
3370: printf("** Failed to open %s\n", argv[op+1]);
3371: yield = 1;
3372: goto EXIT;
3373: }
3374: }
3375:
3376: /* Set alternative malloc function */
3377:
1.1.1.2 misho 3378: #ifdef SUPPORT_PCRE8
1.1 misho 3379: pcre_malloc = new_malloc;
3380: pcre_free = new_free;
3381: pcre_stack_malloc = stack_malloc;
3382: pcre_stack_free = stack_free;
1.1.1.2 misho 3383: #endif
3384:
3385: #ifdef SUPPORT_PCRE16
3386: pcre16_malloc = new_malloc;
3387: pcre16_free = new_free;
3388: pcre16_stack_malloc = stack_malloc;
3389: pcre16_stack_free = stack_free;
3390: #endif
1.1 misho 3391:
1.1.1.4 misho 3392: #ifdef SUPPORT_PCRE32
3393: pcre32_malloc = new_malloc;
3394: pcre32_free = new_free;
3395: pcre32_stack_malloc = stack_malloc;
3396: pcre32_stack_free = stack_free;
3397: #endif
3398:
1.1.1.5 ! misho 3399: /* Heading line unless quiet */
1.1 misho 3400:
1.1.1.2 misho 3401: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
1.1 misho 3402:
3403: /* Main loop */
3404:
3405: while (!done)
3406: {
3407: pcre *re = NULL;
3408: pcre_extra *extra = NULL;
3409:
3410: #if !defined NOPOSIX /* There are still compilers that require no indent */
3411: regex_t preg;
3412: int do_posix = 0;
3413: #endif
3414:
3415: const char *error;
1.1.1.2 misho 3416: pcre_uint8 *markptr;
3417: pcre_uint8 *p, *pp, *ppp;
3418: pcre_uint8 *to_file = NULL;
3419: const pcre_uint8 *tables = NULL;
3420: unsigned long int get_options;
1.1 misho 3421: unsigned long int true_size, true_study_size = 0;
1.1.1.5 ! misho 3422: size_t size;
1.1 misho 3423: int do_allcaps = 0;
3424: int do_mark = 0;
3425: int do_study = 0;
3426: int no_force_study = 0;
3427: int do_debug = debug;
3428: int do_G = 0;
3429: int do_g = 0;
3430: int do_showinfo = showinfo;
3431: int do_showrest = 0;
3432: int do_showcaprest = 0;
3433: int do_flip = 0;
3434: int erroroffset, len, delimiter, poffset;
3435:
1.1.1.3 misho 3436: #if !defined NODFA
3437: int dfa_matched = 0;
3438: #endif
3439:
1.1.1.2 misho 3440: use_utf = 0;
1.1 misho 3441: debug_lengths = 1;
3442:
3443: if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3444: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3445: fflush(outfile);
3446:
3447: p = buffer;
3448: while (isspace(*p)) p++;
3449: if (*p == 0) continue;
3450:
1.1.1.5 ! misho 3451: /* Handle option lock-out setting */
! 3452:
! 3453: if (*p == '<' && p[1] == ' ')
! 3454: {
! 3455: p += 2;
! 3456: while (isspace(*p)) p++;
! 3457: if (strncmp((char *)p, "forbid ", 7) == 0)
! 3458: {
! 3459: p += 7;
! 3460: while (isspace(*p)) p++;
! 3461: pp = lockout;
! 3462: while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
! 3463: *pp++ = *p++;
! 3464: *pp = 0;
! 3465: }
! 3466: else
! 3467: {
! 3468: printf("** Unrecognized special command '%s'\n", p);
! 3469: yield = 1;
! 3470: goto EXIT;
! 3471: }
! 3472: continue;
! 3473: }
! 3474:
1.1 misho 3475: /* See if the pattern is to be loaded pre-compiled from a file. */
3476:
3477: if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3478: {
1.1.1.2 misho 3479: pcre_uint32 magic;
3480: pcre_uint8 sbuf[8];
1.1 misho 3481: FILE *f;
3482:
3483: p++;
1.1.1.2 misho 3484: if (*p == '!')
3485: {
3486: do_debug = TRUE;
3487: do_showinfo = TRUE;
3488: p++;
3489: }
3490:
1.1 misho 3491: pp = p + (int)strlen((char *)p);
3492: while (isspace(pp[-1])) pp--;
3493: *pp = 0;
3494:
3495: f = fopen((char *)p, "rb");
3496: if (f == NULL)
3497: {
3498: fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3499: continue;
3500: }
3501: if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3502:
3503: true_size =
3504: (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3505: true_study_size =
3506: (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3507:
1.1.1.2 misho 3508: re = (pcre *)new_malloc(true_size);
1.1.1.4 misho 3509: if (re == NULL)
3510: {
3511: printf("** Failed to get %d bytes of memory for pcre object\n",
3512: (int)true_size);
3513: yield = 1;
3514: goto EXIT;
3515: }
1.1 misho 3516: if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3517:
1.1.1.4 misho 3518: magic = REAL_PCRE_MAGIC(re);
1.1 misho 3519: if (magic != MAGIC_NUMBER)
3520: {
1.1.1.2 misho 3521: if (swap_uint32(magic) == MAGIC_NUMBER)
1.1 misho 3522: {
3523: do_flip = 1;
3524: }
3525: else
3526: {
3527: fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1.1.1.4 misho 3528: new_free(re);
1.1 misho 3529: fclose(f);
3530: continue;
3531: }
3532: }
3533:
1.1.1.2 misho 3534: /* We hide the byte-invert info for little and big endian tests. */
1.1 misho 3535: fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1.1.1.2 misho 3536: do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
1.1 misho 3537:
3538: /* Now see if there is any following study data. */
3539:
3540: if (true_study_size != 0)
3541: {
3542: pcre_study_data *psd;
3543:
3544: extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3545: extra->flags = PCRE_EXTRA_STUDY_DATA;
3546:
3547: psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3548: extra->study_data = psd;
3549:
3550: if (fread(psd, 1, true_study_size, f) != true_study_size)
3551: {
3552: FAIL_READ:
3553: fprintf(outfile, "Failed to read data from %s\n", p);
1.1.1.2 misho 3554: if (extra != NULL)
3555: {
3556: PCRE_FREE_STUDY(extra);
3557: }
1.1.1.4 misho 3558: new_free(re);
1.1 misho 3559: fclose(f);
3560: continue;
3561: }
3562: fprintf(outfile, "Study data loaded from %s\n", p);
3563: do_study = 1; /* To get the data output if requested */
3564: }
3565: else fprintf(outfile, "No study data\n");
3566:
1.1.1.2 misho 3567: /* Flip the necessary bytes. */
3568: if (do_flip)
3569: {
3570: int rc;
3571: PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3572: if (rc == PCRE_ERROR_BADMODE)
3573: {
1.1.1.4 misho 3574: pcre_uint32 flags_in_host_byte_order;
3575: if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3576: flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3577: else
3578: flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
1.1.1.2 misho 3579: /* Simulate the result of the function call below. */
3580: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1.1.1.4 misho 3581: pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3582: PCRE_INFO_OPTIONS);
3583: fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3584: "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3585: new_free(re);
3586: fclose(f);
1.1.1.2 misho 3587: continue;
3588: }
3589: }
3590:
3591: /* Need to know if UTF-8 for printing data strings. */
3592:
1.1.1.4 misho 3593: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3594: {
3595: new_free(re);
3596: fclose(f);
3597: continue;
3598: }
1.1.1.2 misho 3599: use_utf = (get_options & PCRE_UTF8) != 0;
3600:
1.1 misho 3601: fclose(f);
3602: goto SHOW_INFO;
3603: }
3604:
3605: /* In-line pattern (the usual case). Get the delimiter and seek the end of
1.1.1.2 misho 3606: the pattern; if it isn't complete, read more. */
1.1 misho 3607:
3608: delimiter = *p++;
3609:
3610: if (isalnum(delimiter) || delimiter == '\\')
3611: {
3612: fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3613: goto SKIP_DATA;
3614: }
3615:
3616: pp = p;
3617: poffset = (int)(p - buffer);
3618:
3619: for(;;)
3620: {
3621: while (*pp != 0)
3622: {
3623: if (*pp == '\\' && pp[1] != 0) pp++;
3624: else if (*pp == delimiter) break;
3625: pp++;
3626: }
3627: if (*pp != 0) break;
3628: if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3629: {
3630: fprintf(outfile, "** Unexpected EOF\n");
3631: done = 1;
3632: goto CONTINUE;
3633: }
3634: if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3635: }
3636:
3637: /* The buffer may have moved while being extended; reset the start of data
3638: pointer to the correct relative point in the buffer. */
3639:
3640: p = buffer + poffset;
3641:
3642: /* If the first character after the delimiter is backslash, make
3643: the pattern end with backslash. This is purely to provide a way
3644: of testing for the error message when a pattern ends with backslash. */
3645:
3646: if (pp[1] == '\\') *pp++ = '\\';
3647:
3648: /* Terminate the pattern at the delimiter, and save a copy of the pattern
3649: for callouts. */
3650:
3651: *pp++ = 0;
3652: strcpy((char *)pbuffer, (char *)p);
3653:
1.1.1.5 ! misho 3654: /* Look for modifiers and options after the final delimiter. */
1.1 misho 3655:
1.1.1.5 ! misho 3656: options = default_options;
1.1.1.4 misho 3657: study_options = force_study_options;
1.1 misho 3658: log_store = showstore; /* default from command line */
3659:
3660: while (*pp != 0)
3661: {
1.1.1.5 ! misho 3662: /* Check to see whether this modifier has been locked out for this file.
! 3663: This is complicated for the multi-character options that begin with '<'.
! 3664: If there is no '>' in the lockout string, all multi-character modifiers are
! 3665: locked out. */
! 3666:
! 3667: if (strchr((char *)lockout, *pp) != NULL)
! 3668: {
! 3669: if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
! 3670: {
! 3671: int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
! 3672: if (x == 0) goto SKIP_DATA;
! 3673:
! 3674: for (ppp = lockout; *ppp != 0; ppp++)
! 3675: {
! 3676: if (*ppp == '<')
! 3677: {
! 3678: int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
! 3679: if (y == 0)
! 3680: {
! 3681: printf("** Error in modifier forbid data - giving up.\n");
! 3682: yield = 1;
! 3683: goto EXIT;
! 3684: }
! 3685: if (x == y)
! 3686: {
! 3687: ppp = pp;
! 3688: while (*ppp != '>') ppp++;
! 3689: printf("** The %.*s modifier is locked out - giving up.\n",
! 3690: (int)(ppp - pp + 1), pp);
! 3691: yield = 1;
! 3692: goto EXIT;
! 3693: }
! 3694: }
! 3695: }
! 3696: }
! 3697:
! 3698: /* The single-character modifiers are straightforward. */
! 3699:
! 3700: else
! 3701: {
! 3702: printf("** The /%c modifier is locked out - giving up.\n", *pp);
! 3703: yield = 1;
! 3704: goto EXIT;
! 3705: }
! 3706: }
! 3707:
! 3708: /* The modifier is not locked out; handle it. */
! 3709:
1.1 misho 3710: switch (*pp++)
3711: {
3712: case 'f': options |= PCRE_FIRSTLINE; break;
3713: case 'g': do_g = 1; break;
3714: case 'i': options |= PCRE_CASELESS; break;
3715: case 'm': options |= PCRE_MULTILINE; break;
3716: case 's': options |= PCRE_DOTALL; break;
3717: case 'x': options |= PCRE_EXTENDED; break;
3718:
3719: case '+':
3720: if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3721: break;
3722:
3723: case '=': do_allcaps = 1; break;
3724: case 'A': options |= PCRE_ANCHORED; break;
3725: case 'B': do_debug = 1; break;
3726: case 'C': options |= PCRE_AUTO_CALLOUT; break;
3727: case 'D': do_debug = do_showinfo = 1; break;
3728: case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3729: case 'F': do_flip = 1; break;
3730: case 'G': do_G = 1; break;
3731: case 'I': do_showinfo = 1; break;
3732: case 'J': options |= PCRE_DUPNAMES; break;
3733: case 'K': do_mark = 1; break;
3734: case 'M': log_store = 1; break;
3735: case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1.1.1.5 ! misho 3736: case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
1.1 misho 3737:
3738: #if !defined NOPOSIX
3739: case 'P': do_posix = 1; break;
3740: #endif
3741:
3742: case 'S':
1.1.1.4 misho 3743: do_study = 1;
3744: for (;;)
1.1 misho 3745: {
1.1.1.4 misho 3746: switch (*pp++)
1.1 misho 3747: {
1.1.1.4 misho 3748: case 'S':
3749: do_study = 0;
3750: no_force_study = 1;
3751: break;
3752:
3753: case '!':
3754: study_options |= PCRE_STUDY_EXTRA_NEEDED;
3755: break;
3756:
3757: case '+':
3758: if (*pp == '+')
1.1.1.3 misho 3759: {
3760: verify_jit = TRUE;
3761: pp++;
3762: }
3763: if (*pp >= '1' && *pp <= '7')
3764: study_options |= jit_study_bits[*pp++ - '1'];
3765: else
3766: study_options |= jit_study_bits[6];
1.1.1.4 misho 3767: break;
3768:
3769: case '-':
3770: study_options &= ~PCRE_STUDY_ALLJIT;
3771: break;
3772:
3773: default:
3774: pp--;
3775: goto ENDLOOP;
1.1 misho 3776: }
3777: }
1.1.1.4 misho 3778: ENDLOOP:
1.1 misho 3779: break;
3780:
3781: case 'U': options |= PCRE_UNGREEDY; break;
3782: case 'W': options |= PCRE_UCP; break;
3783: case 'X': options |= PCRE_EXTRA; break;
3784: case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3785: case 'Z': debug_lengths = 0; break;
1.1.1.2 misho 3786: case '8': options |= PCRE_UTF8; use_utf = 1; break;
1.1.1.4 misho 3787: case '9': options |= PCRE_NEVER_UTF; break;
1.1 misho 3788: case '?': options |= PCRE_NO_UTF8_CHECK; break;
3789:
3790: case 'T':
3791: switch (*pp++)
3792: {
3793: case '0': tables = tables0; break;
3794: case '1': tables = tables1; break;
3795:
3796: case '\r':
3797: case '\n':
3798: case ' ':
3799: case 0:
3800: fprintf(outfile, "** Missing table number after /T\n");
3801: goto SKIP_DATA;
3802:
3803: default:
3804: fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3805: goto SKIP_DATA;
3806: }
3807: break;
3808:
3809: case 'L':
3810: ppp = pp;
3811: /* The '\r' test here is so that it works on Windows. */
3812: /* The '0' test is just in case this is an unterminated line. */
3813: while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3814: *ppp = 0;
3815: if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3816: {
3817: fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3818: goto SKIP_DATA;
3819: }
3820: locale_set = 1;
1.1.1.2 misho 3821: tables = PCRE_MAKETABLES;
1.1 misho 3822: pp = ppp;
3823: break;
3824:
3825: case '>':
3826: to_file = pp;
3827: while (*pp != 0) pp++;
3828: while (isspace(pp[-1])) pp--;
3829: *pp = 0;
3830: break;
3831:
3832: case '<':
3833: {
1.1.1.5 ! misho 3834: int x = check_mc_option(pp, outfile, FALSE, "modifier");
! 3835: if (x == 0) goto SKIP_DATA;
! 3836: options |= x;
! 3837: while (*pp++ != '>');
1.1 misho 3838: }
3839: break;
3840:
3841: case '\r': /* So that it works in Windows */
3842: case '\n':
3843: case ' ':
3844: break;
3845:
3846: default:
1.1.1.5 ! misho 3847: fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
1.1 misho 3848: goto SKIP_DATA;
3849: }
3850: }
3851:
3852: /* Handle compiling via the POSIX interface, which doesn't support the
3853: timing, showing, or debugging options, nor the ability to pass over
1.1.1.2 misho 3854: local character tables. Neither does it have 16-bit support. */
1.1 misho 3855:
3856: #if !defined NOPOSIX
3857: if (posix || do_posix)
3858: {
3859: int rc;
3860: int cflags = 0;
3861:
3862: if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3863: if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3864: if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3865: if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3866: if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3867: if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3868: if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3869:
3870: rc = regcomp(&preg, (char *)p, cflags);
3871:
3872: /* Compilation failed; go back for another re, skipping to blank line
3873: if non-interactive. */
3874:
3875: if (rc != 0)
3876: {
3877: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3878: fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3879: goto SKIP_DATA;
3880: }
3881: }
3882:
3883: /* Handle compiling via the native interface */
3884:
3885: else
3886: #endif /* !defined NOPOSIX */
3887:
3888: {
1.1.1.4 misho 3889: /* In 16- or 32-bit mode, convert the input. */
1.1.1.2 misho 3890:
3891: #ifdef SUPPORT_PCRE16
1.1.1.4 misho 3892: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 3893: {
3894: switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3895: {
3896: case -1:
3897: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3898: "converted to UTF-16\n");
3899: goto SKIP_DATA;
3900:
3901: case -2:
3902: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3903: "cannot be converted to UTF-16\n");
3904: goto SKIP_DATA;
3905:
3906: case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3907: fprintf(outfile, "**Failed: character value greater than 0xffff "
3908: "cannot be converted to 16-bit in non-UTF mode\n");
3909: goto SKIP_DATA;
3910:
3911: default:
3912: break;
3913: }
3914: p = (pcre_uint8 *)buffer16;
3915: }
3916: #endif
3917:
1.1.1.4 misho 3918: #ifdef SUPPORT_PCRE32
3919: if (pcre_mode == PCRE32_MODE)
3920: {
3921: switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3922: {
3923: case -1:
3924: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3925: "converted to UTF-32\n");
3926: goto SKIP_DATA;
3927:
3928: case -2:
3929: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3930: "cannot be converted to UTF-32\n");
3931: goto SKIP_DATA;
3932:
3933: case -3:
3934: fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3935: goto SKIP_DATA;
3936:
3937: default:
3938: break;
3939: }
3940: p = (pcre_uint8 *)buffer32;
3941: }
3942: #endif
3943:
1.1.1.2 misho 3944: /* Compile many times when timing */
1.1 misho 3945:
3946: if (timeit > 0)
3947: {
3948: register int i;
3949: clock_t time_taken;
3950: clock_t start_time = clock();
3951: for (i = 0; i < timeit; i++)
3952: {
1.1.1.2 misho 3953: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 3954: if (re != NULL) free(re);
3955: }
1.1.1.5 ! misho 3956: total_compile_time += (time_taken = clock() - start_time);
1.1 misho 3957: fprintf(outfile, "Compile time %.4f milliseconds\n",
3958: (((double)time_taken * 1000.0) / (double)timeit) /
3959: (double)CLOCKS_PER_SEC);
3960: }
3961:
1.1.1.2 misho 3962: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 3963:
3964: /* Compilation failed; go back for another re, skipping to blank line
3965: if non-interactive. */
3966:
3967: if (re == NULL)
3968: {
3969: fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3970: SKIP_DATA:
3971: if (infile != stdin)
3972: {
3973: for (;;)
3974: {
3975: if (extend_inputline(infile, buffer, NULL) == NULL)
3976: {
3977: done = 1;
3978: goto CONTINUE;
3979: }
3980: len = (int)strlen((char *)buffer);
3981: while (len > 0 && isspace(buffer[len-1])) len--;
3982: if (len == 0) break;
3983: }
3984: fprintf(outfile, "\n");
3985: }
3986: goto CONTINUE;
3987: }
3988:
3989: /* Compilation succeeded. It is now possible to set the UTF-8 option from
3990: within the regex; check for this so that we know how to process the data
3991: lines. */
3992:
1.1.1.2 misho 3993: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3994: goto SKIP_DATA;
3995: if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
1.1 misho 3996:
3997: /* Extract the size for possible writing before possibly flipping it,
3998: and remember the store that was got. */
3999:
1.1.1.4 misho 4000: true_size = REAL_PCRE_SIZE(re);
1.1 misho 4001:
4002: /* Output code size information if requested */
4003:
4004: if (log_store)
1.1.1.4 misho 4005: {
4006: int name_count, name_entry_size, real_pcre_size;
4007:
4008: new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4009: new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4010: real_pcre_size = 0;
4011: #ifdef SUPPORT_PCRE8
4012: if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4013: real_pcre_size = sizeof(real_pcre);
4014: #endif
4015: #ifdef SUPPORT_PCRE16
4016: if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4017: real_pcre_size = sizeof(real_pcre16);
4018: #endif
4019: #ifdef SUPPORT_PCRE32
4020: if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4021: real_pcre_size = sizeof(real_pcre32);
4022: #endif
1.1.1.5 ! misho 4023: new_info(re, NULL, PCRE_INFO_SIZE, &size);
1.1 misho 4024: fprintf(outfile, "Memory allocation (code space): %d\n",
1.1.1.5 ! misho 4025: (int)(size - real_pcre_size - name_count * name_entry_size));
1.1.1.4 misho 4026: }
1.1 misho 4027:
4028: /* If -s or /S was present, study the regex to generate additional info to
4029: help with the matching, unless the pattern has the SS option, which
4030: suppresses the effect of /S (used for a few test patterns where studying is
4031: never sensible). */
4032:
4033: if (do_study || (force_study >= 0 && !no_force_study))
4034: {
4035: if (timeit > 0)
4036: {
4037: register int i;
4038: clock_t time_taken;
4039: clock_t start_time = clock();
4040: for (i = 0; i < timeit; i++)
1.1.1.2 misho 4041: {
1.1.1.4 misho 4042: PCRE_STUDY(extra, re, study_options, &error);
1.1.1.2 misho 4043: }
1.1.1.5 ! misho 4044: total_study_time = (time_taken = clock() - start_time);
1.1.1.2 misho 4045: if (extra != NULL)
4046: {
4047: PCRE_FREE_STUDY(extra);
4048: }
1.1 misho 4049: fprintf(outfile, " Study time %.4f milliseconds\n",
4050: (((double)time_taken * 1000.0) / (double)timeit) /
4051: (double)CLOCKS_PER_SEC);
4052: }
1.1.1.4 misho 4053: PCRE_STUDY(extra, re, study_options, &error);
1.1 misho 4054: if (error != NULL)
4055: fprintf(outfile, "Failed to study: %s\n", error);
4056: else if (extra != NULL)
4057: {
4058: true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4059: if (log_store)
4060: {
4061: size_t jitsize;
1.1.1.2 misho 4062: if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4063: jitsize != 0)
4064: fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
1.1 misho 4065: }
4066: }
4067: }
4068:
4069: /* If /K was present, we set up for handling MARK data. */
4070:
4071: if (do_mark)
4072: {
4073: if (extra == NULL)
4074: {
4075: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4076: extra->flags = 0;
4077: }
4078: extra->mark = &markptr;
4079: extra->flags |= PCRE_EXTRA_MARK;
4080: }
4081:
1.1.1.2 misho 4082: /* Extract and display information from the compiled data if required. */
1.1 misho 4083:
4084: SHOW_INFO:
4085:
4086: if (do_debug)
4087: {
4088: fprintf(outfile, "------------------------------------------------------------------\n");
1.1.1.2 misho 4089: PCRE_PRINTINT(re, outfile, debug_lengths);
1.1 misho 4090: }
4091:
4092: /* We already have the options in get_options (see above) */
4093:
4094: if (do_showinfo)
4095: {
4096: unsigned long int all_options;
1.1.1.4 misho 4097: pcre_uint32 first_char, need_char;
4098: pcre_uint32 match_limit, recursion_limit;
4099: int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
1.1.1.5 ! misho 4100: hascrorlf, maxlookbehind, match_empty;
1.1 misho 4101: int nameentrysize, namecount;
1.1.1.2 misho 4102: const pcre_uint8 *nametable;
1.1 misho 4103:
1.1.1.5 ! misho 4104: if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
1.1.1.2 misho 4105: new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
1.1.1.4 misho 4106: new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4107: new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4108: new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4109: new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
1.1.1.2 misho 4110: new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4111: new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4112: new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4113: new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4114: new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
1.1.1.3 misho 4115: new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
1.1.1.5 ! misho 4116: new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
1.1.1.3 misho 4117: new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
1.1.1.2 misho 4118: != 0)
4119: goto SKIP_DATA;
1.1 misho 4120:
4121: fprintf(outfile, "Capturing subpattern count = %d\n", count);
1.1.1.4 misho 4122:
1.1 misho 4123: if (backrefmax > 0)
4124: fprintf(outfile, "Max back reference = %d\n", backrefmax);
4125:
1.1.1.4 misho 4126: if (maxlookbehind > 0)
4127: fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4128:
4129: if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4130: fprintf(outfile, "Match limit = %u\n", match_limit);
4131:
4132: if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4133: fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4134:
1.1 misho 4135: if (namecount > 0)
4136: {
4137: fprintf(outfile, "Named capturing subpatterns:\n");
4138: while (namecount-- > 0)
4139: {
1.1.1.4 misho 4140: int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
1.1.1.2 misho 4141: int length = (int)STRLEN(nametable + imm2_size);
4142: fprintf(outfile, " ");
4143: PCHARSV(nametable, imm2_size, length, outfile);
4144: while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
1.1.1.4 misho 4145: #ifdef SUPPORT_PCRE32
4146: if (pcre_mode == PCRE32_MODE)
4147: fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
1.1.1.2 misho 4148: #endif
1.1.1.4 misho 4149: #ifdef SUPPORT_PCRE16
4150: if (pcre_mode == PCRE16_MODE)
4151: fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4152: #endif
4153: #ifdef SUPPORT_PCRE8
4154: if (pcre_mode == PCRE8_MODE)
4155: fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
1.1.1.2 misho 4156: #endif
1.1.1.4 misho 4157: nametable += nameentrysize * CHAR_SIZE;
1.1 misho 4158: }
4159: }
4160:
1.1.1.5 ! misho 4161: if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
! 4162: if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
! 4163: if (match_empty) fprintf(outfile, "May match empty string\n");
1.1 misho 4164:
1.1.1.4 misho 4165: all_options = REAL_PCRE_OPTIONS(re);
1.1.1.2 misho 4166: if (do_flip) all_options = swap_uint32(all_options);
1.1 misho 4167:
4168: if (get_options == 0) fprintf(outfile, "No options\n");
1.1.1.5 ! misho 4169: else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1.1 misho 4170: ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4171: ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4172: ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4173: ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4174: ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4175: ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4176: ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4177: ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4178: ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4179: ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4180: ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4181: ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1.1.1.5 ! misho 4182: ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
1.1.1.2 misho 4183: ((get_options & PCRE_UTF8) != 0)? " utf" : "",
1.1 misho 4184: ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1.1.1.2 misho 4185: ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
1.1 misho 4186: ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1.1.1.4 misho 4187: ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4188: ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
1.1 misho 4189:
4190: if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4191:
4192: switch (get_options & PCRE_NEWLINE_BITS)
4193: {
4194: case PCRE_NEWLINE_CR:
4195: fprintf(outfile, "Forced newline sequence: CR\n");
4196: break;
4197:
4198: case PCRE_NEWLINE_LF:
4199: fprintf(outfile, "Forced newline sequence: LF\n");
4200: break;
4201:
4202: case PCRE_NEWLINE_CRLF:
4203: fprintf(outfile, "Forced newline sequence: CRLF\n");
4204: break;
4205:
4206: case PCRE_NEWLINE_ANYCRLF:
4207: fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4208: break;
4209:
4210: case PCRE_NEWLINE_ANY:
4211: fprintf(outfile, "Forced newline sequence: ANY\n");
4212: break;
4213:
4214: default:
4215: break;
4216: }
4217:
1.1.1.4 misho 4218: if (first_char_set == 2)
1.1 misho 4219: {
4220: fprintf(outfile, "First char at start or follows newline\n");
4221: }
1.1.1.4 misho 4222: else if (first_char_set == 1)
1.1 misho 4223: {
1.1.1.2 misho 4224: const char *caseless =
1.1.1.4 misho 4225: ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
1.1 misho 4226: "" : " (caseless)";
1.1.1.2 misho 4227:
4228: if (PRINTOK(first_char))
4229: fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
1.1 misho 4230: else
1.1.1.2 misho 4231: {
4232: fprintf(outfile, "First char = ");
4233: pchar(first_char, outfile);
4234: fprintf(outfile, "%s\n", caseless);
4235: }
1.1 misho 4236: }
1.1.1.4 misho 4237: else
4238: {
4239: fprintf(outfile, "No first char\n");
4240: }
1.1 misho 4241:
1.1.1.4 misho 4242: if (need_char_set == 0)
1.1 misho 4243: {
4244: fprintf(outfile, "No need char\n");
4245: }
4246: else
4247: {
1.1.1.2 misho 4248: const char *caseless =
1.1.1.4 misho 4249: ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
1.1 misho 4250: "" : " (caseless)";
1.1.1.2 misho 4251:
4252: if (PRINTOK(need_char))
4253: fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
1.1 misho 4254: else
1.1.1.2 misho 4255: {
4256: fprintf(outfile, "Need char = ");
4257: pchar(need_char, outfile);
4258: fprintf(outfile, "%s\n", caseless);
4259: }
1.1 misho 4260: }
4261:
4262: /* Don't output study size; at present it is in any case a fixed
4263: value, but it varies, depending on the computer architecture, and
4264: so messes up the test suite. (And with the /F option, it might be
4265: flipped.) If study was forced by an external -s, don't show this
4266: information unless -i or -d was also present. This means that, except
4267: when auto-callouts are involved, the output from runs with and without
4268: -s should be identical. */
4269:
4270: if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4271: {
4272: if (extra == NULL)
4273: fprintf(outfile, "Study returned NULL\n");
4274: else
4275: {
1.1.1.2 misho 4276: pcre_uint8 *start_bits = NULL;
1.1 misho 4277: int minlength;
4278:
1.1.1.2 misho 4279: if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4280: fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1.1 misho 4281:
1.1.1.2 misho 4282: if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
1.1 misho 4283: {
1.1.1.2 misho 4284: if (start_bits == NULL)
4285: fprintf(outfile, "No set of starting bytes\n");
4286: else
1.1 misho 4287: {
1.1.1.2 misho 4288: int i;
4289: int c = 24;
4290: fprintf(outfile, "Starting byte set: ");
4291: for (i = 0; i < 256; i++)
1.1 misho 4292: {
1.1.1.2 misho 4293: if ((start_bits[i/8] & (1<<(i&7))) != 0)
1.1 misho 4294: {
1.1.1.2 misho 4295: if (c > 75)
4296: {
4297: fprintf(outfile, "\n ");
4298: c = 2;
4299: }
4300: if (PRINTOK(i) && i != ' ')
4301: {
4302: fprintf(outfile, "%c ", i);
4303: c += 2;
4304: }
4305: else
4306: {
4307: fprintf(outfile, "\\x%02x ", i);
4308: c += 5;
4309: }
1.1 misho 4310: }
4311: }
1.1.1.2 misho 4312: fprintf(outfile, "\n");
1.1 misho 4313: }
4314: }
4315: }
4316:
4317: /* Show this only if the JIT was set by /S, not by -s. */
4318:
1.1.1.4 misho 4319: if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4320: (force_study_options & PCRE_STUDY_ALLJIT) == 0)
1.1 misho 4321: {
4322: int jit;
1.1.1.2 misho 4323: if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4324: {
4325: if (jit)
4326: fprintf(outfile, "JIT study was successful\n");
4327: else
1.1 misho 4328: #ifdef SUPPORT_JIT
1.1.1.2 misho 4329: fprintf(outfile, "JIT study was not successful\n");
1.1 misho 4330: #else
1.1.1.2 misho 4331: fprintf(outfile, "JIT support is not available in this version of PCRE\n");
1.1 misho 4332: #endif
1.1.1.2 misho 4333: }
1.1 misho 4334: }
4335: }
4336: }
4337:
4338: /* If the '>' option was present, we write out the regex to a file, and
4339: that is all. The first 8 bytes of the file are the regex length and then
4340: the study length, in big-endian order. */
4341:
4342: if (to_file != NULL)
4343: {
4344: FILE *f = fopen((char *)to_file, "wb");
4345: if (f == NULL)
4346: {
4347: fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4348: }
4349: else
4350: {
1.1.1.2 misho 4351: pcre_uint8 sbuf[8];
4352:
4353: if (do_flip) regexflip(re, extra);
4354: sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4355: sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4356: sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4357: sbuf[3] = (pcre_uint8)((true_size) & 255);
4358: sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4359: sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4360: sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4361: sbuf[7] = (pcre_uint8)((true_study_size) & 255);
1.1 misho 4362:
4363: if (fwrite(sbuf, 1, 8, f) < 8 ||
4364: fwrite(re, 1, true_size, f) < true_size)
4365: {
4366: fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4367: }
4368: else
4369: {
4370: fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4371:
4372: /* If there is study data, write it. */
4373:
4374: if (extra != NULL)
4375: {
4376: if (fwrite(extra->study_data, 1, true_study_size, f) <
4377: true_study_size)
4378: {
4379: fprintf(outfile, "Write error on %s: %s\n", to_file,
4380: strerror(errno));
4381: }
4382: else fprintf(outfile, "Study data written to %s\n", to_file);
4383: }
4384: }
4385: fclose(f);
4386: }
4387:
4388: new_free(re);
1.1.1.2 misho 4389: if (extra != NULL)
4390: {
4391: PCRE_FREE_STUDY(extra);
4392: }
1.1 misho 4393: if (locale_set)
4394: {
4395: new_free((void *)tables);
4396: setlocale(LC_CTYPE, "C");
4397: locale_set = 0;
4398: }
4399: continue; /* With next regex */
4400: }
4401: } /* End of non-POSIX compile */
4402:
4403: /* Read data lines and test them */
4404:
4405: for (;;)
4406: {
1.1.1.4 misho 4407: #ifdef SUPPORT_PCRE8
4408: pcre_uint8 *q8;
4409: #endif
4410: #ifdef SUPPORT_PCRE16
4411: pcre_uint16 *q16;
4412: #endif
4413: #ifdef SUPPORT_PCRE32
4414: pcre_uint32 *q32;
4415: #endif
1.1.1.2 misho 4416: pcre_uint8 *bptr;
1.1 misho 4417: int *use_offsets = offsets;
4418: int use_size_offsets = size_offsets;
4419: int callout_data = 0;
4420: int callout_data_set = 0;
1.1.1.4 misho 4421: int count;
4422: pcre_uint32 c;
1.1 misho 4423: int copystrings = 0;
4424: int find_match_limit = default_find_match_limit;
4425: int getstrings = 0;
4426: int getlist = 0;
4427: int gmatched = 0;
4428: int start_offset = 0;
4429: int start_offset_sign = 1;
4430: int g_notempty = 0;
4431: int use_dfa = 0;
4432:
4433: *copynames = 0;
4434: *getnames = 0;
4435:
1.1.1.4 misho 4436: #ifdef SUPPORT_PCRE32
4437: cn32ptr = copynames;
4438: gn32ptr = getnames;
4439: #endif
1.1.1.2 misho 4440: #ifdef SUPPORT_PCRE16
1.1.1.4 misho 4441: cn16ptr = copynames16;
4442: gn16ptr = getnames16;
1.1.1.2 misho 4443: #endif
4444: #ifdef SUPPORT_PCRE8
4445: cn8ptr = copynames8;
4446: gn8ptr = getnames8;
4447: #endif
1.1 misho 4448:
1.1.1.2 misho 4449: SET_PCRE_CALLOUT(callout);
1.1 misho 4450: first_callout = 1;
4451: last_callout_mark = NULL;
4452: callout_extra = 0;
4453: callout_count = 0;
4454: callout_fail_count = 999999;
4455: callout_fail_id = -1;
4456: show_malloc = 0;
1.1.1.2 misho 4457: options = 0;
1.1 misho 4458:
4459: if (extra != NULL) extra->flags &=
4460: ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4461:
4462: len = 0;
4463: for (;;)
4464: {
4465: if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4466: {
4467: if (len > 0) /* Reached EOF without hitting a newline */
4468: {
4469: fprintf(outfile, "\n");
4470: break;
4471: }
4472: done = 1;
4473: goto CONTINUE;
4474: }
4475: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4476: len = (int)strlen((char *)buffer);
4477: if (buffer[len-1] == '\n') break;
4478: }
4479:
4480: while (len > 0 && isspace(buffer[len-1])) len--;
4481: buffer[len] = 0;
4482: if (len == 0) break;
4483:
4484: p = buffer;
4485: while (isspace(*p)) p++;
4486:
1.1.1.4 misho 4487: #ifndef NOUTF
4488: /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4489: invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4490:
4491: if (use_utf)
4492: {
4493: pcre_uint8 *q;
4494: pcre_uint32 cc;
4495: int n = 1;
4496:
4497: for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4498: if (n <= 0)
4499: {
4500: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4501: goto NEXT_DATA;
4502: }
4503: }
4504: #endif
4505:
4506: #ifdef SUPPORT_VALGRIND
4507: /* Mark the dbuffer as addressable but undefined again. */
4508:
4509: if (dbuffer != NULL)
4510: {
4511: VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4512: }
4513: #endif
4514:
4515: /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4516: the number of pcre_uchar units that will be needed. */
4517:
4518: while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4519: {
4520: dbuffer_size *= 2;
4521: dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4522: if (dbuffer == NULL)
4523: {
4524: fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4525: exit(1);
4526: }
4527: }
4528:
4529: #ifdef SUPPORT_PCRE8
4530: q8 = (pcre_uint8 *) dbuffer;
4531: #endif
4532: #ifdef SUPPORT_PCRE16
4533: q16 = (pcre_uint16 *) dbuffer;
4534: #endif
4535: #ifdef SUPPORT_PCRE32
4536: q32 = (pcre_uint32 *) dbuffer;
4537: #endif
4538:
1.1 misho 4539: while ((c = *p++) != 0)
4540: {
4541: int i = 0;
4542: int n = 0;
4543:
1.1.1.2 misho 4544: /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4545: In non-UTF mode, allow the value of the byte to fall through to later,
4546: where values greater than 127 are turned into UTF-8 when running in
1.1.1.4 misho 4547: 16-bit or 32-bit mode. */
1.1.1.2 misho 4548:
4549: if (c != '\\')
4550: {
1.1.1.4 misho 4551: #ifndef NOUTF
4552: if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4553: #endif
1.1.1.2 misho 4554: }
4555:
4556: /* Handle backslash escapes */
4557:
4558: else switch ((c = *p++))
1.1 misho 4559: {
4560: case 'a': c = 7; break;
4561: case 'b': c = '\b'; break;
4562: case 'e': c = 27; break;
4563: case 'f': c = '\f'; break;
4564: case 'n': c = '\n'; break;
4565: case 'r': c = '\r'; break;
4566: case 't': c = '\t'; break;
4567: case 'v': c = '\v'; break;
4568:
4569: case '0': case '1': case '2': case '3':
4570: case '4': case '5': case '6': case '7':
4571: c -= '0';
4572: while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4573: c = c * 8 + *p++ - '0';
4574: break;
4575:
1.1.1.5 ! misho 4576: case 'o':
! 4577: if (*p == '{')
! 4578: {
! 4579: pcre_uint8 *pt = p;
! 4580: c = 0;
! 4581: for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
! 4582: {
! 4583: if (++i == 12)
! 4584: fprintf(outfile, "** Too many octal digits in \\o{...} item; "
! 4585: "using only the first twelve.\n");
! 4586: else c = c * 8 + *pt - '0';
! 4587: }
! 4588: if (*pt == '}') p = pt + 1;
! 4589: else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
! 4590: }
! 4591: break;
! 4592:
1.1 misho 4593: case 'x':
4594: if (*p == '{')
4595: {
1.1.1.2 misho 4596: pcre_uint8 *pt = p;
1.1 misho 4597: c = 0;
4598:
4599: /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4600: when isxdigit() is a macro that refers to its argument more than
4601: once. This is banned by the C Standard, but apparently happens in at
4602: least one MacOS environment. */
4603:
4604: for (pt++; isxdigit(*pt); pt++)
1.1.1.2 misho 4605: {
4606: if (++i == 9)
4607: fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4608: "using only the first eight.\n");
4609: else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4610: }
1.1 misho 4611: if (*pt == '}')
4612: {
4613: p = pt + 1;
4614: break;
4615: }
1.1.1.2 misho 4616: /* Not correct form for \x{...}; fall through */
1.1 misho 4617: }
4618:
1.1.1.2 misho 4619: /* \x without {} always defines just one byte in 8-bit mode. This
4620: allows UTF-8 characters to be constructed byte by byte, and also allows
4621: invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4622: Otherwise, pass it down to later code so that it can be turned into
1.1.1.4 misho 4623: UTF-8 when running in 16/32-bit mode. */
1.1 misho 4624:
4625: c = 0;
4626: while (i++ < 2 && isxdigit(*p))
4627: {
4628: c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4629: p++;
4630: }
1.1.1.4 misho 4631: #if !defined NOUTF && defined SUPPORT_PCRE8
4632: if (use_utf && (pcre_mode == PCRE8_MODE))
1.1.1.2 misho 4633: {
1.1.1.4 misho 4634: *q8++ = c;
1.1.1.2 misho 4635: continue;
4636: }
1.1.1.4 misho 4637: #endif
1.1 misho 4638: break;
4639:
4640: case 0: /* \ followed by EOF allows for an empty line */
4641: p--;
4642: continue;
4643:
4644: case '>':
4645: if (*p == '-')
4646: {
4647: start_offset_sign = -1;
4648: p++;
4649: }
4650: while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4651: start_offset *= start_offset_sign;
4652: continue;
4653:
4654: case 'A': /* Option setting */
4655: options |= PCRE_ANCHORED;
4656: continue;
4657:
4658: case 'B':
4659: options |= PCRE_NOTBOL;
4660: continue;
4661:
4662: case 'C':
4663: if (isdigit(*p)) /* Set copy string */
4664: {
4665: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4666: copystrings |= 1 << n;
4667: }
4668: else if (isalnum(*p))
4669: {
1.1.1.4 misho 4670: READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
1.1 misho 4671: }
4672: else if (*p == '+')
4673: {
4674: callout_extra = 1;
4675: p++;
4676: }
4677: else if (*p == '-')
4678: {
1.1.1.2 misho 4679: SET_PCRE_CALLOUT(NULL);
1.1 misho 4680: p++;
4681: }
4682: else if (*p == '!')
4683: {
4684: callout_fail_id = 0;
4685: p++;
4686: while(isdigit(*p))
4687: callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4688: callout_fail_count = 0;
4689: if (*p == '!')
4690: {
4691: p++;
4692: while(isdigit(*p))
4693: callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4694: }
4695: }
4696: else if (*p == '*')
4697: {
4698: int sign = 1;
4699: callout_data = 0;
4700: if (*(++p) == '-') { sign = -1; p++; }
4701: while(isdigit(*p))
4702: callout_data = callout_data * 10 + *p++ - '0';
4703: callout_data *= sign;
4704: callout_data_set = 1;
4705: }
4706: continue;
4707:
4708: #if !defined NODFA
4709: case 'D':
4710: #if !defined NOPOSIX
4711: if (posix || do_posix)
4712: printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4713: else
4714: #endif
4715: use_dfa = 1;
4716: continue;
4717: #endif
4718:
4719: #if !defined NODFA
4720: case 'F':
4721: options |= PCRE_DFA_SHORTEST;
4722: continue;
4723: #endif
4724:
4725: case 'G':
4726: if (isdigit(*p))
4727: {
4728: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4729: getstrings |= 1 << n;
4730: }
4731: else if (isalnum(*p))
4732: {
1.1.1.4 misho 4733: READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
1.1 misho 4734: }
4735: continue;
4736:
4737: case 'J':
4738: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4739: if (extra != NULL
4740: && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4741: && extra->executable_jit != NULL)
4742: {
1.1.1.2 misho 4743: if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4744: jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4745: PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
1.1 misho 4746: }
4747: continue;
4748:
4749: case 'L':
4750: getlist = 1;
4751: continue;
4752:
4753: case 'M':
4754: find_match_limit = 1;
4755: continue;
4756:
4757: case 'N':
4758: if ((options & PCRE_NOTEMPTY) != 0)
4759: options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4760: else
4761: options |= PCRE_NOTEMPTY;
4762: continue;
4763:
4764: case 'O':
4765: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4766: if (n > size_offsets_max)
4767: {
4768: size_offsets_max = n;
4769: free(offsets);
4770: use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4771: if (offsets == NULL)
4772: {
4773: printf("** Failed to get %d bytes of memory for offsets vector\n",
4774: (int)(size_offsets_max * sizeof(int)));
4775: yield = 1;
4776: goto EXIT;
4777: }
4778: }
4779: use_size_offsets = n;
4780: if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1.1.1.3 misho 4781: else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
1.1 misho 4782: continue;
4783:
4784: case 'P':
4785: options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4786: PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4787: continue;
4788:
4789: case 'Q':
4790: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4791: if (extra == NULL)
4792: {
4793: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4794: extra->flags = 0;
4795: }
4796: extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4797: extra->match_limit_recursion = n;
4798: continue;
4799:
4800: case 'q':
4801: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4802: if (extra == NULL)
4803: {
4804: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4805: extra->flags = 0;
4806: }
4807: extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4808: extra->match_limit = n;
4809: continue;
4810:
4811: #if !defined NODFA
4812: case 'R':
4813: options |= PCRE_DFA_RESTART;
4814: continue;
4815: #endif
4816:
4817: case 'S':
4818: show_malloc = 1;
4819: continue;
4820:
4821: case 'Y':
4822: options |= PCRE_NO_START_OPTIMIZE;
4823: continue;
4824:
4825: case 'Z':
4826: options |= PCRE_NOTEOL;
4827: continue;
4828:
4829: case '?':
4830: options |= PCRE_NO_UTF8_CHECK;
4831: continue;
4832:
4833: case '<':
4834: {
1.1.1.5 ! misho 4835: int x = check_mc_option(p, outfile, TRUE, "escape sequence");
1.1 misho 4836: if (x == 0) goto NEXT_DATA;
4837: options |= x;
4838: while (*p++ != '>');
4839: }
4840: continue;
4841: }
1.1.1.2 misho 4842:
1.1.1.4 misho 4843: /* We now have a character value in c that may be greater than 255.
4844: In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4845: than 127 in UTF mode must have come from \x{...} or octal constructs
4846: because values from \x.. get this far only in non-UTF mode. */
4847:
4848: #ifdef SUPPORT_PCRE8
4849: if (pcre_mode == PCRE8_MODE)
4850: {
4851: #ifndef NOUTF
4852: if (use_utf)
4853: {
4854: if (c > 0x7fffffff)
4855: {
4856: fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4857: "and so cannot be converted to UTF-8\n", c);
4858: goto NEXT_DATA;
4859: }
4860: q8 += ord2utf8(c, q8);
4861: }
4862: else
4863: #endif
4864: {
4865: if (c > 0xffu)
4866: {
4867: fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4868: "and UTF-8 mode is not enabled.\n", c);
4869: fprintf(outfile, "** Truncation will probably give the wrong "
4870: "result.\n");
4871: }
4872: *q8++ = c;
4873: }
1.1.1.2 misho 4874: }
4875: #endif
1.1.1.4 misho 4876: #ifdef SUPPORT_PCRE16
4877: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 4878: {
1.1.1.4 misho 4879: #ifndef NOUTF
4880: if (use_utf)
4881: {
4882: if (c > 0x10ffffu)
4883: {
4884: fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4885: "0x10ffff and so cannot be converted to UTF-16\n", c);
4886: goto NEXT_DATA;
4887: }
4888: else if (c >= 0x10000u)
4889: {
4890: c-= 0x10000u;
4891: *q16++ = 0xD800 | (c >> 10);
4892: *q16++ = 0xDC00 | (c & 0x3ff);
4893: }
4894: else
4895: *q16++ = c;
4896: }
4897: else
4898: #endif
1.1.1.2 misho 4899: {
1.1.1.4 misho 4900: if (c > 0xffffu)
4901: {
4902: fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4903: "and UTF-16 mode is not enabled.\n", c);
4904: fprintf(outfile, "** Truncation will probably give the wrong "
4905: "result.\n");
4906: }
4907:
4908: *q16++ = c;
1.1.1.2 misho 4909: }
4910: }
1.1.1.4 misho 4911: #endif
4912: #ifdef SUPPORT_PCRE32
4913: if (pcre_mode == PCRE32_MODE)
4914: {
4915: *q32++ = c;
4916: }
4917: #endif
4918:
1.1 misho 4919: }
1.1.1.2 misho 4920:
4921: /* Reached end of subject string */
4922:
1.1.1.4 misho 4923: #ifdef SUPPORT_PCRE8
4924: if (pcre_mode == PCRE8_MODE)
4925: {
4926: *q8 = 0;
4927: len = (int)(q8 - (pcre_uint8 *)dbuffer);
4928: }
4929: #endif
4930: #ifdef SUPPORT_PCRE16
4931: if (pcre_mode == PCRE16_MODE)
4932: {
4933: *q16 = 0;
4934: len = (int)(q16 - (pcre_uint16 *)dbuffer);
4935: }
4936: #endif
4937: #ifdef SUPPORT_PCRE32
4938: if (pcre_mode == PCRE32_MODE)
4939: {
4940: *q32 = 0;
4941: len = (int)(q32 - (pcre_uint32 *)dbuffer);
4942: }
4943: #endif
4944:
4945: /* If we're compiling with explicit valgrind support, Mark the data from after
4946: its end to the end of the buffer as unaddressable, so that a read over the end
4947: of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4948: If we're not building with valgrind support, at least move the data to the end
4949: of the buffer so that it might at least cause a crash.
4950: If we are using the POSIX interface, we must include the terminating zero. */
1.1 misho 4951:
1.1.1.4 misho 4952: bptr = dbuffer;
1.1 misho 4953:
4954: #if !defined NOPOSIX
4955: if (posix || do_posix)
4956: {
1.1.1.4 misho 4957: #ifdef SUPPORT_VALGRIND
4958: VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4959: #else
4960: memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4961: bptr += dbuffer_size - len - 1;
4962: #endif
1.1 misho 4963: }
4964: else
4965: #endif
4966: {
1.1.1.4 misho 4967: #ifdef SUPPORT_VALGRIND
4968: VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4969: #else
4970: bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4971: #endif
1.1 misho 4972: }
4973:
4974: if ((all_use_dfa || use_dfa) && find_match_limit)
4975: {
4976: printf("**Match limit not relevant for DFA matching: ignored\n");
4977: find_match_limit = 0;
4978: }
4979:
4980: /* Handle matching via the POSIX interface, which does not
4981: support timing or playing with the match limit or callout data. */
4982:
4983: #if !defined NOPOSIX
4984: if (posix || do_posix)
4985: {
4986: int rc;
4987: int eflags = 0;
4988: regmatch_t *pmatch = NULL;
4989: if (use_size_offsets > 0)
4990: pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4991: if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4992: if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4993: if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4994:
4995: rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4996:
4997: if (rc != 0)
4998: {
4999: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5000: fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5001: }
1.1.1.4 misho 5002: else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
1.1 misho 5003: {
5004: fprintf(outfile, "Matched with REG_NOSUB\n");
5005: }
5006: else
5007: {
5008: size_t i;
5009: for (i = 0; i < (size_t)use_size_offsets; i++)
5010: {
5011: if (pmatch[i].rm_so >= 0)
5012: {
5013: fprintf(outfile, "%2d: ", (int)i);
1.1.1.2 misho 5014: PCHARSV(dbuffer, pmatch[i].rm_so,
1.1 misho 5015: pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5016: fprintf(outfile, "\n");
5017: if (do_showcaprest || (i == 0 && do_showrest))
5018: {
5019: fprintf(outfile, "%2d+ ", (int)i);
1.1.1.2 misho 5020: PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1.1 misho 5021: outfile);
5022: fprintf(outfile, "\n");
5023: }
5024: }
5025: }
5026: }
5027: free(pmatch);
1.1.1.2 misho 5028: goto NEXT_DATA;
1.1 misho 5029: }
5030:
1.1.1.2 misho 5031: #endif /* !defined NOPOSIX */
5032:
1.1 misho 5033: /* Handle matching via the native interface - repeats for /g and /G */
5034:
1.1.1.3 misho 5035: /* Ensure that there is a JIT callback if we want to verify that JIT was
5036: actually used. If jit_stack == NULL, no stack has yet been assigned. */
5037:
5038: if (verify_jit && jit_stack == NULL && extra != NULL)
5039: { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5040:
1.1 misho 5041: for (;; gmatched++) /* Loop for /g or /G */
5042: {
5043: markptr = NULL;
1.1.1.3 misho 5044: jit_was_used = FALSE;
1.1 misho 5045:
5046: if (timeitm > 0)
5047: {
5048: register int i;
5049: clock_t time_taken;
5050: clock_t start_time = clock();
5051:
5052: #if !defined NODFA
5053: if (all_use_dfa || use_dfa)
5054: {
1.1.1.3 misho 5055: if ((options & PCRE_DFA_RESTART) != 0)
5056: {
5057: fprintf(outfile, "Timing DFA restarts is not supported\n");
5058: break;
5059: }
5060: if (dfa_workspace == NULL)
5061: dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
1.1 misho 5062: for (i = 0; i < timeitm; i++)
1.1.1.2 misho 5063: {
5064: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 misho 5065: (options | g_notempty), use_offsets, use_size_offsets,
5066: dfa_workspace, DFA_WS_DIMENSION);
1.1.1.2 misho 5067: }
1.1 misho 5068: }
5069: else
5070: #endif
5071:
5072: for (i = 0; i < timeitm; i++)
1.1.1.2 misho 5073: {
5074: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5075: (options | g_notempty), use_offsets, use_size_offsets);
5076: }
1.1.1.5 ! misho 5077: total_match_time += (time_taken = clock() - start_time);
1.1 misho 5078: fprintf(outfile, "Execute time %.4f milliseconds\n",
5079: (((double)time_taken * 1000.0) / (double)timeitm) /
5080: (double)CLOCKS_PER_SEC);
5081: }
5082:
5083: /* If find_match_limit is set, we want to do repeated matches with
5084: varying limits in order to find the minimum value for the match limit and
5085: for the recursion limit. The match limits are relevant only to the normal
5086: running of pcre_exec(), so disable the JIT optimization. This makes it
5087: possible to run the same set of tests with and without JIT externally
5088: requested. */
5089:
5090: if (find_match_limit)
5091: {
1.1.1.4 misho 5092: if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5093: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5094: extra->flags = 0;
1.1 misho 5095:
5096: (void)check_match_limit(re, extra, bptr, len, start_offset,
5097: options|g_notempty, use_offsets, use_size_offsets,
5098: PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5099: PCRE_ERROR_MATCHLIMIT, "match()");
5100:
5101: count = check_match_limit(re, extra, bptr, len, start_offset,
5102: options|g_notempty, use_offsets, use_size_offsets,
5103: PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5104: PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5105: }
5106:
5107: /* If callout_data is set, use the interface with additional data */
5108:
5109: else if (callout_data_set)
5110: {
5111: if (extra == NULL)
5112: {
5113: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5114: extra->flags = 0;
5115: }
5116: extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5117: extra->callout_data = &callout_data;
1.1.1.2 misho 5118: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
1.1 misho 5119: options | g_notempty, use_offsets, use_size_offsets);
5120: extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5121: }
5122:
5123: /* The normal case is just to do the match once, with the default
5124: value of match_limit. */
5125:
5126: #if !defined NODFA
5127: else if (all_use_dfa || use_dfa)
5128: {
1.1.1.3 misho 5129: if (dfa_workspace == NULL)
5130: dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5131: if (dfa_matched++ == 0)
5132: dfa_workspace[0] = -1; /* To catch bad restart */
1.1.1.2 misho 5133: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 misho 5134: (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5135: DFA_WS_DIMENSION);
1.1 misho 5136: if (count == 0)
5137: {
1.1.1.4 misho 5138: fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
1.1 misho 5139: count = use_size_offsets/2;
5140: }
5141: }
5142: #endif
5143:
5144: else
5145: {
1.1.1.2 misho 5146: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5147: options | g_notempty, use_offsets, use_size_offsets);
1.1 misho 5148: if (count == 0)
5149: {
5150: fprintf(outfile, "Matched, but too many substrings\n");
1.1.1.4 misho 5151: /* 2 is a special case; match can be returned */
5152: count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
1.1 misho 5153: }
5154: }
5155:
5156: /* Matched */
5157:
5158: if (count >= 0)
5159: {
5160: int i, maxcount;
1.1.1.2 misho 5161: void *cnptr, *gnptr;
1.1 misho 5162:
5163: #if !defined NODFA
5164: if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5165: #endif
1.1.1.4 misho 5166: /* 2 is a special case; match can be returned */
5167: maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
1.1 misho 5168:
5169: /* This is a check against a lunatic return value. */
5170:
5171: if (count > maxcount)
5172: {
5173: fprintf(outfile,
5174: "** PCRE error: returned count %d is too big for offset size %d\n",
5175: count, use_size_offsets);
5176: count = use_size_offsets/3;
5177: if (do_g || do_G)
5178: {
5179: fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5180: do_g = do_G = FALSE; /* Break g/G loop */
5181: }
5182: }
5183:
5184: /* do_allcaps requests showing of all captures in the pattern, to check
5185: unset ones at the end. */
5186:
5187: if (do_allcaps)
5188: {
1.1.1.2 misho 5189: if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5190: goto SKIP_DATA;
1.1 misho 5191: count++; /* Allow for full match */
5192: if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5193: }
5194:
5195: /* Output the captured substrings */
5196:
5197: for (i = 0; i < count * 2; i += 2)
5198: {
5199: if (use_offsets[i] < 0)
5200: {
5201: if (use_offsets[i] != -1)
5202: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5203: use_offsets[i], i);
5204: if (use_offsets[i+1] != -1)
5205: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5206: use_offsets[i+1], i+1);
5207: fprintf(outfile, "%2d: <unset>\n", i/2);
5208: }
5209: else
5210: {
5211: fprintf(outfile, "%2d: ", i/2);
1.1.1.2 misho 5212: PCHARSV(bptr, use_offsets[i],
1.1 misho 5213: use_offsets[i+1] - use_offsets[i], outfile);
1.1.1.3 misho 5214: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1 misho 5215: fprintf(outfile, "\n");
5216: if (do_showcaprest || (i == 0 && do_showrest))
5217: {
5218: fprintf(outfile, "%2d+ ", i/2);
1.1.1.2 misho 5219: PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
1.1 misho 5220: outfile);
5221: fprintf(outfile, "\n");
5222: }
5223: }
5224: }
5225:
1.1.1.2 misho 5226: if (markptr != NULL)
5227: {
5228: fprintf(outfile, "MK: ");
5229: PCHARSV(markptr, 0, -1, outfile);
5230: fprintf(outfile, "\n");
5231: }
1.1 misho 5232:
5233: for (i = 0; i < 32; i++)
5234: {
5235: if ((copystrings & (1 << i)) != 0)
5236: {
1.1.1.2 misho 5237: int rc;
1.1 misho 5238: char copybuffer[256];
1.1.1.2 misho 5239: PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5240: copybuffer, sizeof(copybuffer));
1.1 misho 5241: if (rc < 0)
5242: fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5243: else
1.1.1.2 misho 5244: {
5245: fprintf(outfile, "%2dC ", i);
5246: PCHARSV(copybuffer, 0, rc, outfile);
5247: fprintf(outfile, " (%d)\n", rc);
5248: }
1.1 misho 5249: }
5250: }
5251:
1.1.1.2 misho 5252: cnptr = copynames;
5253: for (;;)
1.1 misho 5254: {
1.1.1.2 misho 5255: int rc;
1.1 misho 5256: char copybuffer[256];
1.1.1.2 misho 5257:
1.1.1.4 misho 5258: #ifdef SUPPORT_PCRE32
5259: if (pcre_mode == PCRE32_MODE)
5260: {
5261: if (*(pcre_uint32 *)cnptr == 0) break;
5262: }
5263: #endif
5264: #ifdef SUPPORT_PCRE16
5265: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 5266: {
5267: if (*(pcre_uint16 *)cnptr == 0) break;
5268: }
1.1.1.4 misho 5269: #endif
5270: #ifdef SUPPORT_PCRE8
5271: if (pcre_mode == PCRE8_MODE)
1.1.1.2 misho 5272: {
5273: if (*(pcre_uint8 *)cnptr == 0) break;
5274: }
1.1.1.4 misho 5275: #endif
1.1.1.2 misho 5276:
5277: PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5278: cnptr, copybuffer, sizeof(copybuffer));
5279:
1.1 misho 5280: if (rc < 0)
1.1.1.2 misho 5281: {
5282: fprintf(outfile, "copy substring ");
5283: PCHARSV(cnptr, 0, -1, outfile);
5284: fprintf(outfile, " failed %d\n", rc);
5285: }
1.1 misho 5286: else
1.1.1.2 misho 5287: {
5288: fprintf(outfile, " C ");
5289: PCHARSV(copybuffer, 0, rc, outfile);
5290: fprintf(outfile, " (%d) ", rc);
5291: PCHARSV(cnptr, 0, -1, outfile);
5292: putc('\n', outfile);
5293: }
5294:
5295: cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
1.1 misho 5296: }
5297:
5298: for (i = 0; i < 32; i++)
5299: {
5300: if ((getstrings & (1 << i)) != 0)
5301: {
1.1.1.2 misho 5302: int rc;
1.1 misho 5303: const char *substring;
1.1.1.2 misho 5304: PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
1.1 misho 5305: if (rc < 0)
5306: fprintf(outfile, "get substring %d failed %d\n", i, rc);
5307: else
5308: {
1.1.1.2 misho 5309: fprintf(outfile, "%2dG ", i);
5310: PCHARSV(substring, 0, rc, outfile);
5311: fprintf(outfile, " (%d)\n", rc);
5312: PCRE_FREE_SUBSTRING(substring);
1.1 misho 5313: }
5314: }
5315: }
5316:
1.1.1.2 misho 5317: gnptr = getnames;
5318: for (;;)
1.1 misho 5319: {
1.1.1.2 misho 5320: int rc;
1.1 misho 5321: const char *substring;
1.1.1.2 misho 5322:
1.1.1.4 misho 5323: #ifdef SUPPORT_PCRE32
5324: if (pcre_mode == PCRE32_MODE)
5325: {
5326: if (*(pcre_uint32 *)gnptr == 0) break;
5327: }
5328: #endif
5329: #ifdef SUPPORT_PCRE16
5330: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 5331: {
5332: if (*(pcre_uint16 *)gnptr == 0) break;
5333: }
1.1.1.4 misho 5334: #endif
5335: #ifdef SUPPORT_PCRE8
5336: if (pcre_mode == PCRE8_MODE)
1.1.1.2 misho 5337: {
5338: if (*(pcre_uint8 *)gnptr == 0) break;
5339: }
1.1.1.4 misho 5340: #endif
1.1.1.2 misho 5341:
5342: PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5343: gnptr, &substring);
1.1 misho 5344: if (rc < 0)
1.1.1.2 misho 5345: {
5346: fprintf(outfile, "get substring ");
5347: PCHARSV(gnptr, 0, -1, outfile);
5348: fprintf(outfile, " failed %d\n", rc);
5349: }
1.1 misho 5350: else
5351: {
1.1.1.2 misho 5352: fprintf(outfile, " G ");
5353: PCHARSV(substring, 0, rc, outfile);
5354: fprintf(outfile, " (%d) ", rc);
5355: PCHARSV(gnptr, 0, -1, outfile);
5356: PCRE_FREE_SUBSTRING(substring);
5357: putc('\n', outfile);
1.1 misho 5358: }
1.1.1.2 misho 5359:
5360: gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
1.1 misho 5361: }
5362:
5363: if (getlist)
5364: {
1.1.1.2 misho 5365: int rc;
1.1 misho 5366: const char **stringlist;
1.1.1.2 misho 5367: PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
1.1 misho 5368: if (rc < 0)
5369: fprintf(outfile, "get substring list failed %d\n", rc);
5370: else
5371: {
5372: for (i = 0; i < count; i++)
1.1.1.2 misho 5373: {
5374: fprintf(outfile, "%2dL ", i);
5375: PCHARSV(stringlist[i], 0, -1, outfile);
5376: putc('\n', outfile);
5377: }
1.1 misho 5378: if (stringlist[i] != NULL)
5379: fprintf(outfile, "string list not terminated by NULL\n");
1.1.1.2 misho 5380: PCRE_FREE_SUBSTRING_LIST(stringlist);
1.1 misho 5381: }
5382: }
5383: }
5384:
1.1.1.4 misho 5385: /* There was a partial match. If the bumpalong point is not the same as
5386: the first inspected character, show the offset explicitly. */
1.1 misho 5387:
5388: else if (count == PCRE_ERROR_PARTIAL)
5389: {
1.1.1.4 misho 5390: fprintf(outfile, "Partial match");
5391: if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5392: fprintf(outfile, " at offset %d", use_offsets[2]);
5393: if (markptr != NULL)
1.1.1.2 misho 5394: {
1.1.1.4 misho 5395: fprintf(outfile, ", mark=");
1.1.1.2 misho 5396: PCHARSV(markptr, 0, -1, outfile);
5397: }
1.1 misho 5398: if (use_size_offsets > 1)
5399: {
5400: fprintf(outfile, ": ");
1.1.1.2 misho 5401: PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
1.1 misho 5402: outfile);
5403: }
1.1.1.3 misho 5404: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1 misho 5405: fprintf(outfile, "\n");
5406: break; /* Out of the /g loop */
5407: }
5408:
5409: /* Failed to match. If this is a /g or /G loop and we previously set
5410: g_notempty after a null match, this is not necessarily the end. We want
5411: to advance the start offset, and continue. We won't be at the end of the
5412: string - that was checked before setting g_notempty.
5413:
5414: Complication arises in the case when the newline convention is "any",
5415: "crlf", or "anycrlf". If the previous match was at the end of a line
5416: terminated by CRLF, an advance of one character just passes the \r,
5417: whereas we should prefer the longer newline sequence, as does the code in
5418: pcre_exec(). Fudge the offset value to achieve this. We check for a
1.1.1.2 misho 5419: newline setting in the pattern; if none was set, use PCRE_CONFIG() to
1.1 misho 5420: find the default.
5421:
5422: Otherwise, in the case of UTF-8 matching, the advance must be one
5423: character, not one byte. */
5424:
5425: else
5426: {
5427: if (g_notempty != 0)
5428: {
5429: int onechar = 1;
1.1.1.4 misho 5430: unsigned int obits = REAL_PCRE_OPTIONS(re);
1.1 misho 5431: use_offsets[0] = start_offset;
5432: if ((obits & PCRE_NEWLINE_BITS) == 0)
5433: {
5434: int d;
1.1.1.2 misho 5435: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
1.1 misho 5436: /* Note that these values are always the ASCII ones, even in
5437: EBCDIC environments. CR = 13, NL = 10. */
5438: obits = (d == 13)? PCRE_NEWLINE_CR :
5439: (d == 10)? PCRE_NEWLINE_LF :
5440: (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5441: (d == -2)? PCRE_NEWLINE_ANYCRLF :
5442: (d == -1)? PCRE_NEWLINE_ANY : 0;
5443: }
5444: if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5445: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5446: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5447: &&
1.1.1.4 misho 5448: start_offset < len - 1 && (
5449: #ifdef SUPPORT_PCRE8
5450: (pcre_mode == PCRE8_MODE &&
5451: bptr[start_offset] == '\r' &&
5452: bptr[start_offset + 1] == '\n') ||
1.1.1.2 misho 5453: #endif
1.1.1.4 misho 5454: #ifdef SUPPORT_PCRE16
5455: (pcre_mode == PCRE16_MODE &&
5456: ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5457: ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5458: #endif
5459: #ifdef SUPPORT_PCRE32
5460: (pcre_mode == PCRE32_MODE &&
5461: ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5462: ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5463: #endif
5464: 0))
1.1 misho 5465: onechar++;
1.1.1.2 misho 5466: else if (use_utf)
1.1 misho 5467: {
5468: while (start_offset + onechar < len)
5469: {
5470: if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5471: onechar++;
5472: }
5473: }
5474: use_offsets[1] = start_offset + onechar;
5475: }
5476: else
5477: {
5478: switch(count)
5479: {
5480: case PCRE_ERROR_NOMATCH:
5481: if (gmatched == 0)
5482: {
1.1.1.2 misho 5483: if (markptr == NULL)
5484: {
1.1.1.3 misho 5485: fprintf(outfile, "No match");
1.1.1.2 misho 5486: }
5487: else
5488: {
5489: fprintf(outfile, "No match, mark = ");
5490: PCHARSV(markptr, 0, -1, outfile);
5491: }
1.1.1.3 misho 5492: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5493: putc('\n', outfile);
1.1 misho 5494: }
5495: break;
5496:
5497: case PCRE_ERROR_BADUTF8:
5498: case PCRE_ERROR_SHORTUTF8:
1.1.1.4 misho 5499: fprintf(outfile, "Error %d (%s UTF-%d string)", count,
1.1.1.2 misho 5500: (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
1.1.1.4 misho 5501: 8 * CHAR_SIZE);
1.1 misho 5502: if (use_size_offsets >= 2)
5503: fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5504: use_offsets[1]);
5505: fprintf(outfile, "\n");
5506: break;
5507:
1.1.1.2 misho 5508: case PCRE_ERROR_BADUTF8_OFFSET:
1.1.1.4 misho 5509: fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5510: 8 * CHAR_SIZE);
1.1.1.2 misho 5511: break;
5512:
1.1 misho 5513: default:
1.1.1.2 misho 5514: if (count < 0 &&
5515: (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
1.1 misho 5516: fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5517: else
5518: fprintf(outfile, "Error %d (Unexpected value)\n", count);
5519: break;
5520: }
5521:
5522: break; /* Out of the /g loop */
5523: }
5524: }
5525:
5526: /* If not /g or /G we are done */
5527:
5528: if (!do_g && !do_G) break;
5529:
5530: /* If we have matched an empty string, first check to see if we are at
5531: the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5532: Perl's /g options does. This turns out to be rather cunning. First we set
5533: PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5534: same point. If this fails (picked up above) we advance to the next
5535: character. */
5536:
5537: g_notempty = 0;
5538:
5539: if (use_offsets[0] == use_offsets[1])
5540: {
5541: if (use_offsets[0] == len) break;
5542: g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5543: }
5544:
5545: /* For /g, update the start offset, leaving the rest alone */
5546:
5547: if (do_g) start_offset = use_offsets[1];
5548:
5549: /* For /G, update the pointer and length */
5550:
5551: else
5552: {
1.1.1.2 misho 5553: bptr += use_offsets[1] * CHAR_SIZE;
1.1 misho 5554: len -= use_offsets[1];
5555: }
5556: } /* End of loop for /g and /G */
5557:
5558: NEXT_DATA: continue;
5559: } /* End of loop for data lines */
5560:
5561: CONTINUE:
5562:
5563: #if !defined NOPOSIX
5564: if (posix || do_posix) regfree(&preg);
5565: #endif
5566:
5567: if (re != NULL) new_free(re);
1.1.1.2 misho 5568: if (extra != NULL)
5569: {
5570: PCRE_FREE_STUDY(extra);
5571: }
1.1 misho 5572: if (locale_set)
5573: {
5574: new_free((void *)tables);
5575: setlocale(LC_CTYPE, "C");
5576: locale_set = 0;
5577: }
5578: if (jit_stack != NULL)
5579: {
1.1.1.2 misho 5580: PCRE_JIT_STACK_FREE(jit_stack);
1.1 misho 5581: jit_stack = NULL;
5582: }
5583: }
5584:
5585: if (infile == stdin) fprintf(outfile, "\n");
5586:
1.1.1.5 ! misho 5587: if (showtotaltimes)
! 5588: {
! 5589: fprintf(outfile, "--------------------------------------\n");
! 5590: if (timeit > 0)
! 5591: {
! 5592: fprintf(outfile, "Total compile time %.4f milliseconds\n",
! 5593: (((double)total_compile_time * 1000.0) / (double)timeit) /
! 5594: (double)CLOCKS_PER_SEC);
! 5595: fprintf(outfile, "Total study time %.4f milliseconds\n",
! 5596: (((double)total_study_time * 1000.0) / (double)timeit) /
! 5597: (double)CLOCKS_PER_SEC);
! 5598: }
! 5599: fprintf(outfile, "Total execute time %.4f milliseconds\n",
! 5600: (((double)total_match_time * 1000.0) / (double)timeitm) /
! 5601: (double)CLOCKS_PER_SEC);
! 5602: }
! 5603:
1.1 misho 5604: EXIT:
5605:
5606: if (infile != NULL && infile != stdin) fclose(infile);
5607: if (outfile != NULL && outfile != stdout) fclose(outfile);
5608:
5609: free(buffer);
5610: free(dbuffer);
5611: free(pbuffer);
5612: free(offsets);
5613:
1.1.1.2 misho 5614: #ifdef SUPPORT_PCRE16
5615: if (buffer16 != NULL) free(buffer16);
5616: #endif
1.1.1.4 misho 5617: #ifdef SUPPORT_PCRE32
5618: if (buffer32 != NULL) free(buffer32);
5619: #endif
5620:
5621: #if !defined NODFA
5622: if (dfa_workspace != NULL)
5623: free(dfa_workspace);
5624: #endif
5625:
5626: #if defined(__VMS)
5627: yield = SS$_NORMAL; /* Return values via DCL symbols */
5628: #endif
1.1.1.2 misho 5629:
1.1 misho 5630: return yield;
5631: }
5632:
5633: /* End of pcretest.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>