Annotation of embedaddon/pcre/pcretest.c, revision 1.1.1.4
1.1 misho 1: /*************************************************
2: * PCRE testing program *
3: *************************************************/
4:
5: /* This program was hacked up as a tester for PCRE. I really should have
6: written it more tidily in the first place. Will I ever learn? It has grown and
1.1.1.2 misho 7: been extended and consequently is now rather, er, *very* untidy in places. The
8: addition of 16-bit support has made it even worse. :-(
1.1 misho 9:
10: -----------------------------------------------------------------------------
11: Redistribution and use in source and binary forms, with or without
12: modification, are permitted provided that the following conditions are met:
13:
14: * Redistributions of source code must retain the above copyright notice,
15: this list of conditions and the following disclaimer.
16:
17: * Redistributions in binary form must reproduce the above copyright
18: notice, this list of conditions and the following disclaimer in the
19: documentation and/or other materials provided with the distribution.
20:
21: * Neither the name of the University of Cambridge nor the names of its
22: contributors may be used to endorse or promote products derived from
23: this software without specific prior written permission.
24:
25: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: POSSIBILITY OF SUCH DAMAGE.
36: -----------------------------------------------------------------------------
37: */
38:
1.1.1.4 ! misho 39: /* This program now supports the testing of all of the 8-bit, 16-bit, and
! 40: 32-bit PCRE libraries in a single program. This is different from the modules
! 41: such as pcre_compile.c in the library itself, which are compiled separately for
! 42: each mode. If two modes are enabled, for example, pcre_compile.c is compiled
! 43: twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
! 44: make use of any of the macros from pcre_internal.h that depend on
! 45: COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
! 46: SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
! 47: supported library functions. */
1.1.1.2 misho 48:
1.1 misho 49: #ifdef HAVE_CONFIG_H
50: #include "config.h"
51: #endif
52:
53: #include <ctype.h>
54: #include <stdio.h>
55: #include <string.h>
56: #include <stdlib.h>
57: #include <time.h>
58: #include <locale.h>
59: #include <errno.h>
60:
1.1.1.3 misho 61: /* Both libreadline and libedit are optionally supported. The user-supplied
62: original patch uses readline/readline.h for libedit, but in at least one system
63: it is installed as editline/readline.h, so the configuration code now looks for
64: that first, falling back to readline/readline.h. */
65:
66: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 67: #ifdef HAVE_UNISTD_H
68: #include <unistd.h>
69: #endif
1.1.1.3 misho 70: #if defined(SUPPORT_LIBREADLINE)
1.1 misho 71: #include <readline/readline.h>
72: #include <readline/history.h>
1.1.1.3 misho 73: #else
74: #if defined(HAVE_EDITLINE_READLINE_H)
75: #include <editline/readline.h>
76: #else
77: #include <readline/readline.h>
78: #endif
79: #endif
1.1 misho 80: #endif
81:
82: /* A number of things vary for Windows builds. Originally, pcretest opened its
83: input and output without "b"; then I was told that "b" was needed in some
84: environments, so it was added for release 5.0 to both the input and output. (It
85: makes no difference on Unix-like systems.) Later I was told that it is wrong
86: for the input on Windows. I've now abstracted the modes into two macros that
87: are set here, to make it easier to fiddle with them, and removed "b" from the
88: input mode under Windows. */
89:
90: #if defined(_WIN32) || defined(WIN32)
91: #include <io.h> /* For _setmode() */
92: #include <fcntl.h> /* For _O_BINARY */
93: #define INPUT_MODE "r"
94: #define OUTPUT_MODE "wb"
95:
96: #ifndef isatty
97: #define isatty _isatty /* This is what Windows calls them, I'm told, */
98: #endif /* though in some environments they seem to */
99: /* be already defined, hence the #ifndefs. */
100: #ifndef fileno
101: #define fileno _fileno
102: #endif
103:
104: /* A user sent this fix for Borland Builder 5 under Windows. */
105:
106: #ifdef __BORLANDC__
107: #define _setmode(handle, mode) setmode(handle, mode)
108: #endif
109:
110: /* Not Windows */
111:
112: #else
113: #include <sys/time.h> /* These two includes are needed */
114: #include <sys/resource.h> /* for setrlimit(). */
1.1.1.4 ! misho 115: #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
! 116: #define INPUT_MODE "r"
! 117: #define OUTPUT_MODE "w"
! 118: #else
1.1 misho 119: #define INPUT_MODE "rb"
120: #define OUTPUT_MODE "wb"
121: #endif
1.1.1.4 ! misho 122: #endif
! 123:
! 124: #ifdef __VMS
! 125: #include <ssdef.h>
! 126: void vms_setsymbol( char *, char *, int );
! 127: #endif
! 128:
1.1 misho 129:
1.1.1.2 misho 130: #define PRIV(name) name
1.1 misho 131:
132: /* We have to include pcre_internal.h because we need the internal info for
133: displaying the results of pcre_study() and we also need to know about the
134: internal macros, structures, and other internal data values; pcretest has
135: "inside information" compared to a program that strictly follows the PCRE API.
136:
137: Although pcre_internal.h does itself include pcre.h, we explicitly include it
138: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139: appropriately for an application, not for building PCRE. */
140:
141: #include "pcre.h"
142: #include "pcre_internal.h"
143:
1.1.1.2 misho 144: /* The pcre_printint() function, which prints the internal form of a compiled
145: regex, is held in a separate file so that (a) it can be compiled in either
1.1.1.4 ! misho 146: 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
1.1.1.2 misho 147: when that is compiled in debug mode. */
148:
149: #ifdef SUPPORT_PCRE8
150: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151: #endif
152: #ifdef SUPPORT_PCRE16
153: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154: #endif
1.1.1.4 ! misho 155: #ifdef SUPPORT_PCRE32
! 156: void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
! 157: #endif
1.1.1.2 misho 158:
1.1 misho 159: /* We need access to some of the data tables that PCRE uses. So as not to have
1.1.1.4 ! misho 160: to keep two copies, we include the source files here, changing the names of the
1.1 misho 161: external symbols to prevent clashes. */
162:
1.1.1.2 misho 163: #define PCRE_INCLUDED
1.1 misho 164:
165: #include "pcre_tables.c"
1.1.1.4 ! misho 166: #include "pcre_ucd.c"
1.1 misho 167:
168: /* The definition of the macro PRINTABLE, which determines whether to print an
169: output character as-is or as a hex value when showing compiled patterns, is
1.1.1.2 misho 170: the same as in the printint.src file. We uses it here in cases when the locale
171: has not been explicitly changed, so as to get consistent output from systems
172: that differ in their output from isprint() even in the "C" locale. */
1.1 misho 173:
1.1.1.2 misho 174: #ifdef EBCDIC
175: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176: #else
177: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178: #endif
179:
180: #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
181:
1.1.1.4 ! misho 182: /* Posix support is disabled in 16 or 32 bit only mode. */
! 183: #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
1.1.1.2 misho 184: #define NOPOSIX
185: #endif
1.1 misho 186:
187: /* It is possible to compile this test program without including support for
188: testing the POSIX interface, though this is not available via the standard
189: Makefile. */
190:
191: #if !defined NOPOSIX
192: #include "pcreposix.h"
193: #endif
194:
1.1.1.2 misho 195: /* It is also possible, originally for the benefit of a version that was
196: imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197: NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198: automatically cut out the UTF support if PCRE is built without it. */
199:
200: #ifndef SUPPORT_UTF
201: #ifndef NOUTF
202: #define NOUTF
1.1 misho 203: #endif
204: #endif
205:
1.1.1.4 ! misho 206: /* To make the code a bit tidier for 8/16/32-bit support, we define macros
1.1.1.2 misho 207: for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208: only from one place and is handled differently). I couldn't dream up any way of
209: using a single macro to do this in a generic way, because of the many different
210: argument requirements. We know that at least one of SUPPORT_PCRE8 and
211: SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212: use these in the definitions of generic macros.
213:
214: **** Special note about the PCHARSxxx macros: the address of the string to be
215: printed is always given as two arguments: a base address followed by an offset.
216: The base address is cast to the correct data size for 8 or 16 bit data; the
217: offset is in units of this size. If the string were given as base+offset in one
218: argument, the casting might be incorrectly applied. */
219:
220: #ifdef SUPPORT_PCRE8
221:
222: #define PCHARS8(lv, p, offset, len, f) \
223: lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224:
225: #define PCHARSV8(p, offset, len, f) \
226: (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227:
1.1.1.4 ! misho 228: #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
1.1.1.2 misho 229: p = read_capture_name8(p, cn8, re)
230:
231: #define STRLEN8(p) ((int)strlen((char *)p))
232:
233: #define SET_PCRE_CALLOUT8(callout) \
234: pcre_callout = callout
235:
236: #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
237: pcre_assign_jit_stack(extra, callback, userdata)
238:
239: #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
240: re = pcre_compile((char *)pat, options, error, erroffset, tables)
241:
242: #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
243: namesptr, cbuffer, size) \
244: rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
245: (char *)namesptr, cbuffer, size)
246:
247: #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
248: rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
249:
250: #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
251: offsets, size_offsets, workspace, size_workspace) \
252: count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
253: offsets, size_offsets, workspace, size_workspace)
254:
255: #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
256: offsets, size_offsets) \
257: count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
258: offsets, size_offsets)
259:
260: #define PCRE_FREE_STUDY8(extra) \
261: pcre_free_study(extra)
262:
263: #define PCRE_FREE_SUBSTRING8(substring) \
264: pcre_free_substring(substring)
265:
266: #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
267: pcre_free_substring_list(listptr)
268:
269: #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
270: getnamesptr, subsptr) \
271: rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
272: (char *)getnamesptr, subsptr)
273:
274: #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
275: n = pcre_get_stringnumber(re, (char *)ptr)
276:
277: #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
278: rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
279:
280: #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
281: rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
282:
283: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
284: rc = pcre_pattern_to_host_byte_order(re, extra, tables)
285:
286: #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
287: pcre_printint(re, outfile, debug_lengths)
288:
289: #define PCRE_STUDY8(extra, re, options, error) \
290: extra = pcre_study(re, options, error)
291:
292: #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
293: pcre_jit_stack_alloc(startsize, maxsize)
294:
295: #define PCRE_JIT_STACK_FREE8(stack) \
296: pcre_jit_stack_free(stack)
297:
1.1.1.4 ! misho 298: #define pcre8_maketables pcre_maketables
! 299:
1.1.1.2 misho 300: #endif /* SUPPORT_PCRE8 */
301:
302: /* -----------------------------------------------------------*/
303:
304: #ifdef SUPPORT_PCRE16
305:
306: #define PCHARS16(lv, p, offset, len, f) \
307: lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
308:
309: #define PCHARSV16(p, offset, len, f) \
310: (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
311:
1.1.1.4 ! misho 312: #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
1.1.1.2 misho 313: p = read_capture_name16(p, cn16, re)
314:
315: #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
316:
317: #define SET_PCRE_CALLOUT16(callout) \
318: pcre16_callout = (int (*)(pcre16_callout_block *))callout
319:
320: #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
321: pcre16_assign_jit_stack((pcre16_extra *)extra, \
322: (pcre16_jit_callback)callback, userdata)
323:
324: #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
325: re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
326: tables)
327:
328: #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
329: namesptr, cbuffer, size) \
330: rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
331: count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
332:
333: #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
334: rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
335: (PCRE_UCHAR16 *)cbuffer, size/2)
336:
337: #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338: offsets, size_offsets, workspace, size_workspace) \
339: count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
340: (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
341: workspace, size_workspace)
342:
343: #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344: offsets, size_offsets) \
345: count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
346: len, start_offset, options, offsets, size_offsets)
347:
348: #define PCRE_FREE_STUDY16(extra) \
349: pcre16_free_study((pcre16_extra *)extra)
350:
351: #define PCRE_FREE_SUBSTRING16(substring) \
352: pcre16_free_substring((PCRE_SPTR16)substring)
353:
354: #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
355: pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
356:
357: #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
358: getnamesptr, subsptr) \
359: rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
360: count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
361:
362: #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
363: n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
364:
365: #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
366: rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
367: (PCRE_SPTR16 *)(void*)subsptr)
368:
369: #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
370: rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
371: (PCRE_SPTR16 **)(void*)listptr)
372:
373: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
374: rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
375: tables)
376:
377: #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
378: pcre16_printint(re, outfile, debug_lengths)
379:
380: #define PCRE_STUDY16(extra, re, options, error) \
381: extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
382:
383: #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
384: (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
385:
386: #define PCRE_JIT_STACK_FREE16(stack) \
387: pcre16_jit_stack_free((pcre16_jit_stack *)stack)
388:
389: #endif /* SUPPORT_PCRE16 */
390:
1.1.1.4 ! misho 391: /* -----------------------------------------------------------*/
! 392:
! 393: #ifdef SUPPORT_PCRE32
! 394:
! 395: #define PCHARS32(lv, p, offset, len, f) \
! 396: lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
! 397:
! 398: #define PCHARSV32(p, offset, len, f) \
! 399: (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
! 400:
! 401: #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
! 402: p = read_capture_name32(p, cn32, re)
! 403:
! 404: #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
! 405:
! 406: #define SET_PCRE_CALLOUT32(callout) \
! 407: pcre32_callout = (int (*)(pcre32_callout_block *))callout
! 408:
! 409: #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
! 410: pcre32_assign_jit_stack((pcre32_extra *)extra, \
! 411: (pcre32_jit_callback)callback, userdata)
! 412:
! 413: #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
! 414: re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
! 415: tables)
! 416:
! 417: #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
! 418: namesptr, cbuffer, size) \
! 419: rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
! 420: count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
! 421:
! 422: #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
! 423: rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
! 424: (PCRE_UCHAR32 *)cbuffer, size/2)
! 425:
! 426: #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
! 427: offsets, size_offsets, workspace, size_workspace) \
! 428: count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
! 429: (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
! 430: workspace, size_workspace)
! 431:
! 432: #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
! 433: offsets, size_offsets) \
! 434: count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
! 435: len, start_offset, options, offsets, size_offsets)
! 436:
! 437: #define PCRE_FREE_STUDY32(extra) \
! 438: pcre32_free_study((pcre32_extra *)extra)
! 439:
! 440: #define PCRE_FREE_SUBSTRING32(substring) \
! 441: pcre32_free_substring((PCRE_SPTR32)substring)
! 442:
! 443: #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
! 444: pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
! 445:
! 446: #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
! 447: getnamesptr, subsptr) \
! 448: rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
! 449: count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
! 450:
! 451: #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
! 452: n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
! 453:
! 454: #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
! 455: rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
! 456: (PCRE_SPTR32 *)(void*)subsptr)
! 457:
! 458: #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
! 459: rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
! 460: (PCRE_SPTR32 **)(void*)listptr)
! 461:
! 462: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
! 463: rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
! 464: tables)
! 465:
! 466: #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
! 467: pcre32_printint(re, outfile, debug_lengths)
! 468:
! 469: #define PCRE_STUDY32(extra, re, options, error) \
! 470: extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
! 471:
! 472: #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
! 473: (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
! 474:
! 475: #define PCRE_JIT_STACK_FREE32(stack) \
! 476: pcre32_jit_stack_free((pcre32_jit_stack *)stack)
! 477:
! 478: #endif /* SUPPORT_PCRE32 */
1.1.1.2 misho 479:
1.1.1.4 ! misho 480:
! 481: /* ----- More than one mode is supported; a runtime test is needed, except for
1.1.1.2 misho 482: pcre_config(), and the JIT stack functions, when it doesn't matter which
1.1.1.4 ! misho 483: available version is called. ----- */
! 484:
! 485: enum {
! 486: PCRE8_MODE,
! 487: PCRE16_MODE,
! 488: PCRE32_MODE
! 489: };
! 490:
! 491: #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
! 492: defined (SUPPORT_PCRE32)) >= 2
1.1.1.2 misho 493:
1.1.1.4 ! misho 494: #define CHAR_SIZE (1 << pcre_mode)
1.1.1.2 misho 495:
1.1.1.4 ! misho 496: /* There doesn't seem to be an easy way of writing these macros that can cope
! 497: with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
! 498: cases separately. */
! 499:
! 500: /* ----- All three modes supported ----- */
! 501:
! 502: #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
1.1.1.2 misho 503:
504: #define PCHARS(lv, p, offset, len, f) \
1.1.1.4 ! misho 505: if (pcre_mode == PCRE32_MODE) \
! 506: PCHARS32(lv, p, offset, len, f); \
! 507: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 508: PCHARS16(lv, p, offset, len, f); \
509: else \
510: PCHARS8(lv, p, offset, len, f)
511:
512: #define PCHARSV(p, offset, len, f) \
1.1.1.4 ! misho 513: if (pcre_mode == PCRE32_MODE) \
! 514: PCHARSV32(p, offset, len, f); \
! 515: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 516: PCHARSV16(p, offset, len, f); \
517: else \
518: PCHARSV8(p, offset, len, f)
519:
1.1.1.4 ! misho 520: #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
! 521: if (pcre_mode == PCRE32_MODE) \
! 522: READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
! 523: else if (pcre_mode == PCRE16_MODE) \
! 524: READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
1.1.1.2 misho 525: else \
1.1.1.4 ! misho 526: READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
1.1.1.2 misho 527:
528: #define SET_PCRE_CALLOUT(callout) \
1.1.1.4 ! misho 529: if (pcre_mode == PCRE32_MODE) \
! 530: SET_PCRE_CALLOUT32(callout); \
! 531: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 532: SET_PCRE_CALLOUT16(callout); \
533: else \
534: SET_PCRE_CALLOUT8(callout)
535:
1.1.1.4 ! misho 536: #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
1.1.1.2 misho 537:
538: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
1.1.1.4 ! misho 539: if (pcre_mode == PCRE32_MODE) \
! 540: PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
! 541: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 542: PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
543: else \
544: PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
545:
546: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
1.1.1.4 ! misho 547: if (pcre_mode == PCRE32_MODE) \
! 548: PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
! 549: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 550: PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
551: else \
552: PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
553:
554: #define PCRE_CONFIG pcre_config
555:
556: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
557: namesptr, cbuffer, size) \
1.1.1.4 ! misho 558: if (pcre_mode == PCRE32_MODE) \
! 559: PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
! 560: namesptr, cbuffer, size); \
! 561: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 562: PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
563: namesptr, cbuffer, size); \
564: else \
565: PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
566: namesptr, cbuffer, size)
567:
568: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
1.1.1.4 ! misho 569: if (pcre_mode == PCRE32_MODE) \
! 570: PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
! 571: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 572: PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
573: else \
574: PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
575:
576: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
577: offsets, size_offsets, workspace, size_workspace) \
1.1.1.4 ! misho 578: if (pcre_mode == PCRE32_MODE) \
! 579: PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
! 580: offsets, size_offsets, workspace, size_workspace); \
! 581: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 582: PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
583: offsets, size_offsets, workspace, size_workspace); \
584: else \
585: PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
586: offsets, size_offsets, workspace, size_workspace)
587:
588: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
589: offsets, size_offsets) \
1.1.1.4 ! misho 590: if (pcre_mode == PCRE32_MODE) \
! 591: PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
! 592: offsets, size_offsets); \
! 593: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 594: PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
595: offsets, size_offsets); \
596: else \
597: PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
598: offsets, size_offsets)
599:
600: #define PCRE_FREE_STUDY(extra) \
1.1.1.4 ! misho 601: if (pcre_mode == PCRE32_MODE) \
! 602: PCRE_FREE_STUDY32(extra); \
! 603: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 604: PCRE_FREE_STUDY16(extra); \
605: else \
606: PCRE_FREE_STUDY8(extra)
607:
608: #define PCRE_FREE_SUBSTRING(substring) \
1.1.1.4 ! misho 609: if (pcre_mode == PCRE32_MODE) \
! 610: PCRE_FREE_SUBSTRING32(substring); \
! 611: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 612: PCRE_FREE_SUBSTRING16(substring); \
613: else \
614: PCRE_FREE_SUBSTRING8(substring)
615:
616: #define PCRE_FREE_SUBSTRING_LIST(listptr) \
1.1.1.4 ! misho 617: if (pcre_mode == PCRE32_MODE) \
! 618: PCRE_FREE_SUBSTRING_LIST32(listptr); \
! 619: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 620: PCRE_FREE_SUBSTRING_LIST16(listptr); \
621: else \
622: PCRE_FREE_SUBSTRING_LIST8(listptr)
623:
624: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
625: getnamesptr, subsptr) \
1.1.1.4 ! misho 626: if (pcre_mode == PCRE32_MODE) \
! 627: PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
! 628: getnamesptr, subsptr); \
! 629: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 630: PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
631: getnamesptr, subsptr); \
632: else \
633: PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
634: getnamesptr, subsptr)
635:
636: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
1.1.1.4 ! misho 637: if (pcre_mode == PCRE32_MODE) \
! 638: PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
! 639: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 640: PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
641: else \
642: PCRE_GET_STRINGNUMBER8(n, rc, ptr)
643:
644: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
1.1.1.4 ! misho 645: if (pcre_mode == PCRE32_MODE) \
! 646: PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
! 647: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 648: PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
649: else \
650: PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
651:
652: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
1.1.1.4 ! misho 653: if (pcre_mode == PCRE32_MODE) \
! 654: PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
! 655: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 656: PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
657: else \
658: PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
659:
660: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
1.1.1.4 ! misho 661: (pcre_mode == PCRE32_MODE ? \
! 662: PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
! 663: : pcre_mode == PCRE16_MODE ? \
! 664: PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
! 665: : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
1.1.1.2 misho 666:
667: #define PCRE_JIT_STACK_FREE(stack) \
1.1.1.4 ! misho 668: if (pcre_mode == PCRE32_MODE) \
! 669: PCRE_JIT_STACK_FREE32(stack); \
! 670: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 671: PCRE_JIT_STACK_FREE16(stack); \
672: else \
673: PCRE_JIT_STACK_FREE8(stack)
674:
675: #define PCRE_MAKETABLES \
1.1.1.4 ! misho 676: (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
1.1.1.2 misho 677:
678: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
1.1.1.4 ! misho 679: if (pcre_mode == PCRE32_MODE) \
! 680: PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
! 681: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 682: PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
683: else \
684: PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
685:
686: #define PCRE_PRINTINT(re, outfile, debug_lengths) \
1.1.1.4 ! misho 687: if (pcre_mode == PCRE32_MODE) \
! 688: PCRE_PRINTINT32(re, outfile, debug_lengths); \
! 689: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 690: PCRE_PRINTINT16(re, outfile, debug_lengths); \
691: else \
692: PCRE_PRINTINT8(re, outfile, debug_lengths)
693:
694: #define PCRE_STUDY(extra, re, options, error) \
1.1.1.4 ! misho 695: if (pcre_mode == PCRE32_MODE) \
! 696: PCRE_STUDY32(extra, re, options, error); \
! 697: else if (pcre_mode == PCRE16_MODE) \
1.1.1.2 misho 698: PCRE_STUDY16(extra, re, options, error); \
699: else \
700: PCRE_STUDY8(extra, re, options, error)
701:
1.1.1.4 ! misho 702:
! 703: /* ----- Two out of three modes are supported ----- */
! 704:
! 705: #else
! 706:
! 707: /* We can use some macro trickery to make a single set of definitions work in
! 708: the three different cases. */
! 709:
! 710: /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
! 711:
! 712: #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
! 713: #define BITONE 32
! 714: #define BITTWO 16
! 715:
! 716: /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
! 717:
! 718: #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
! 719: #define BITONE 32
! 720: #define BITTWO 8
! 721:
! 722: /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
! 723:
! 724: #else
! 725: #define BITONE 16
! 726: #define BITTWO 8
! 727: #endif
! 728:
! 729: #define glue(a,b) a##b
! 730: #define G(a,b) glue(a,b)
! 731:
! 732:
! 733: /* ----- Common macros for two-mode cases ----- */
! 734:
! 735: #define PCHARS(lv, p, offset, len, f) \
! 736: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 737: G(PCHARS,BITONE)(lv, p, offset, len, f); \
! 738: else \
! 739: G(PCHARS,BITTWO)(lv, p, offset, len, f)
! 740:
! 741: #define PCHARSV(p, offset, len, f) \
! 742: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 743: G(PCHARSV,BITONE)(p, offset, len, f); \
! 744: else \
! 745: G(PCHARSV,BITTWO)(p, offset, len, f)
! 746:
! 747: #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
! 748: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 749: G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
! 750: else \
! 751: G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
! 752:
! 753: #define SET_PCRE_CALLOUT(callout) \
! 754: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 755: G(SET_PCRE_CALLOUT,BITONE)(callout); \
! 756: else \
! 757: G(SET_PCRE_CALLOUT,BITTWO)(callout)
! 758:
! 759: #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
! 760: G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
! 761:
! 762: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
! 763: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 764: G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
! 765: else \
! 766: G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
! 767:
! 768: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
! 769: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 770: G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
! 771: else \
! 772: G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
! 773:
! 774: #define PCRE_CONFIG G(G(pcre,BITONE),_config)
! 775:
! 776: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
! 777: namesptr, cbuffer, size) \
! 778: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 779: G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
! 780: namesptr, cbuffer, size); \
! 781: else \
! 782: G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
! 783: namesptr, cbuffer, size)
! 784:
! 785: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
! 786: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 787: G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
! 788: else \
! 789: G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
! 790:
! 791: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
! 792: offsets, size_offsets, workspace, size_workspace) \
! 793: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 794: G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
! 795: offsets, size_offsets, workspace, size_workspace); \
! 796: else \
! 797: G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
! 798: offsets, size_offsets, workspace, size_workspace)
! 799:
! 800: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
! 801: offsets, size_offsets) \
! 802: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 803: G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
! 804: offsets, size_offsets); \
! 805: else \
! 806: G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
! 807: offsets, size_offsets)
! 808:
! 809: #define PCRE_FREE_STUDY(extra) \
! 810: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 811: G(PCRE_FREE_STUDY,BITONE)(extra); \
! 812: else \
! 813: G(PCRE_FREE_STUDY,BITTWO)(extra)
! 814:
! 815: #define PCRE_FREE_SUBSTRING(substring) \
! 816: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 817: G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
! 818: else \
! 819: G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
! 820:
! 821: #define PCRE_FREE_SUBSTRING_LIST(listptr) \
! 822: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 823: G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
! 824: else \
! 825: G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
! 826:
! 827: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
! 828: getnamesptr, subsptr) \
! 829: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 830: G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
! 831: getnamesptr, subsptr); \
! 832: else \
! 833: G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
! 834: getnamesptr, subsptr)
! 835:
! 836: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
! 837: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 838: G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
! 839: else \
! 840: G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
! 841:
! 842: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
! 843: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 844: G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
! 845: else \
! 846: G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
! 847:
! 848: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
! 849: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 850: G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
! 851: else \
! 852: G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
! 853:
! 854: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
! 855: (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
! 856: G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
! 857: : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
! 858:
! 859: #define PCRE_JIT_STACK_FREE(stack) \
! 860: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 861: G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
! 862: else \
! 863: G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
! 864:
! 865: #define PCRE_MAKETABLES \
! 866: (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
! 867: G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
! 868:
! 869: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
! 870: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 871: G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
! 872: else \
! 873: G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
! 874:
! 875: #define PCRE_PRINTINT(re, outfile, debug_lengths) \
! 876: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 877: G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
! 878: else \
! 879: G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
! 880:
! 881: #define PCRE_STUDY(extra, re, options, error) \
! 882: if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
! 883: G(PCRE_STUDY,BITONE)(extra, re, options, error); \
! 884: else \
! 885: G(PCRE_STUDY,BITTWO)(extra, re, options, error)
! 886:
! 887: #endif /* Two out of three modes */
! 888:
! 889: /* ----- End of cases where more than one mode is supported ----- */
! 890:
! 891:
1.1.1.2 misho 892: /* ----- Only 8-bit mode is supported ----- */
893:
894: #elif defined SUPPORT_PCRE8
895: #define CHAR_SIZE 1
896: #define PCHARS PCHARS8
897: #define PCHARSV PCHARSV8
898: #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
899: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
900: #define STRLEN STRLEN8
901: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
902: #define PCRE_COMPILE PCRE_COMPILE8
903: #define PCRE_CONFIG pcre_config
904: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
905: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
906: #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
907: #define PCRE_EXEC PCRE_EXEC8
908: #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
909: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
910: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
911: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
912: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
913: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
914: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
915: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
916: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
917: #define PCRE_MAKETABLES pcre_maketables()
918: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
919: #define PCRE_PRINTINT PCRE_PRINTINT8
920: #define PCRE_STUDY PCRE_STUDY8
921:
922: /* ----- Only 16-bit mode is supported ----- */
923:
1.1.1.4 ! misho 924: #elif defined SUPPORT_PCRE16
1.1.1.2 misho 925: #define CHAR_SIZE 2
926: #define PCHARS PCHARS16
927: #define PCHARSV PCHARSV16
928: #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
929: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
930: #define STRLEN STRLEN16
931: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
932: #define PCRE_COMPILE PCRE_COMPILE16
933: #define PCRE_CONFIG pcre16_config
934: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
935: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
936: #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
937: #define PCRE_EXEC PCRE_EXEC16
938: #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
939: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
940: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
941: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
942: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
943: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
944: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
945: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
946: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
947: #define PCRE_MAKETABLES pcre16_maketables()
948: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
949: #define PCRE_PRINTINT PCRE_PRINTINT16
950: #define PCRE_STUDY PCRE_STUDY16
1.1.1.4 ! misho 951:
! 952: /* ----- Only 32-bit mode is supported ----- */
! 953:
! 954: #elif defined SUPPORT_PCRE32
! 955: #define CHAR_SIZE 4
! 956: #define PCHARS PCHARS32
! 957: #define PCHARSV PCHARSV32
! 958: #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
! 959: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
! 960: #define STRLEN STRLEN32
! 961: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
! 962: #define PCRE_COMPILE PCRE_COMPILE32
! 963: #define PCRE_CONFIG pcre32_config
! 964: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
! 965: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
! 966: #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
! 967: #define PCRE_EXEC PCRE_EXEC32
! 968: #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
! 969: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
! 970: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
! 971: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
! 972: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
! 973: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
! 974: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
! 975: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
! 976: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
! 977: #define PCRE_MAKETABLES pcre32_maketables()
! 978: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
! 979: #define PCRE_PRINTINT PCRE_PRINTINT32
! 980: #define PCRE_STUDY PCRE_STUDY32
! 981:
1.1.1.2 misho 982: #endif
983:
984: /* ----- End of mode-specific function call macros ----- */
985:
1.1 misho 986:
987: /* Other parameters */
988:
989: #ifndef CLOCKS_PER_SEC
990: #ifdef CLK_TCK
991: #define CLOCKS_PER_SEC CLK_TCK
992: #else
993: #define CLOCKS_PER_SEC 100
994: #endif
995: #endif
996:
1.1.1.3 misho 997: #if !defined NODFA
998: #define DFA_WS_DIMENSION 1000
999: #endif
1000:
1.1 misho 1001: /* This is the default loop count for timing. */
1002:
1003: #define LOOPREPEAT 500000
1004:
1005: /* Static variables */
1006:
1007: static FILE *outfile;
1008: static int log_store = 0;
1009: static int callout_count;
1010: static int callout_extra;
1011: static int callout_fail_count;
1012: static int callout_fail_id;
1013: static int debug_lengths;
1014: static int first_callout;
1.1.1.3 misho 1015: static int jit_was_used;
1.1 misho 1016: static int locale_set = 0;
1017: static int show_malloc;
1.1.1.2 misho 1018: static int use_utf;
1.1 misho 1019: static size_t gotten_store;
1020: static size_t first_gotten_store = 0;
1021: static const unsigned char *last_callout_mark = NULL;
1022:
1023: /* The buffers grow automatically if very long input lines are encountered. */
1024:
1025: static int buffer_size = 50000;
1.1.1.2 misho 1026: static pcre_uint8 *buffer = NULL;
1027: static pcre_uint8 *pbuffer = NULL;
1028:
1.1.1.4 ! misho 1029: /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1.1.1.2 misho 1030:
1031: #ifdef COMPILE_PCRE16
1032: #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1033: #endif
1034:
1.1.1.4 ! misho 1035: #ifdef COMPILE_PCRE32
! 1036: #error COMPILE_PCRE32 must not be set when compiling pcretest.c
! 1037: #endif
! 1038:
! 1039: /* We need buffers for building 16/32-bit strings, and the tables of operator
! 1040: lengths that are used for 16/32-bit compiling, in order to swap bytes in a
! 1041: pattern for saving/reloading testing. Luckily, the data for these tables is
! 1042: defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
! 1043: are used in the tables) are adjusted appropriately for the 16/32-bit world.
! 1044: LINK_SIZE is also used later in this program. */
! 1045:
! 1046: #ifdef SUPPORT_PCRE16
! 1047: #undef IMM2_SIZE
! 1048: #define IMM2_SIZE 1
! 1049:
1.1.1.2 misho 1050: #if LINK_SIZE == 2
1051: #undef LINK_SIZE
1052: #define LINK_SIZE 1
1053: #elif LINK_SIZE == 3 || LINK_SIZE == 4
1054: #undef LINK_SIZE
1055: #define LINK_SIZE 2
1056: #else
1057: #error LINK_SIZE must be either 2, 3, or 4
1058: #endif
1059:
1.1.1.4 ! misho 1060: static int buffer16_size = 0;
! 1061: static pcre_uint16 *buffer16 = NULL;
1.1.1.2 misho 1062: static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1063: #endif /* SUPPORT_PCRE16 */
1064:
1.1.1.4 ! misho 1065: #ifdef SUPPORT_PCRE32
! 1066: #undef IMM2_SIZE
! 1067: #define IMM2_SIZE 1
! 1068: #undef LINK_SIZE
! 1069: #define LINK_SIZE 1
1.1.1.2 misho 1070:
1.1.1.4 ! misho 1071: static int buffer32_size = 0;
! 1072: static pcre_uint32 *buffer32 = NULL;
! 1073: static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
! 1074: #endif /* SUPPORT_PCRE32 */
! 1075:
! 1076: /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
! 1077: support, it can be changed by an option. If there is no 8-bit support, there
! 1078: must be 16-or 32-bit support, so default it to 1. */
! 1079:
! 1080: #if defined SUPPORT_PCRE8
! 1081: static int pcre_mode = PCRE8_MODE;
! 1082: #elif defined SUPPORT_PCRE16
! 1083: static int pcre_mode = PCRE16_MODE;
! 1084: #elif defined SUPPORT_PCRE32
! 1085: static int pcre_mode = PCRE32_MODE;
1.1.1.2 misho 1086: #endif
1.1 misho 1087:
1.1.1.3 misho 1088: /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1089:
1090: static int jit_study_bits[] =
1091: {
1092: PCRE_STUDY_JIT_COMPILE,
1093: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1094: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1095: PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1096: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1097: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1098: PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1099: PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1100: };
1101:
1.1.1.4 ! misho 1102: #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
! 1103: PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
! 1104:
1.1 misho 1105: /* Textual explanations for runtime error codes */
1106:
1107: static const char *errtexts[] = {
1108: NULL, /* 0 is no error */
1109: NULL, /* NOMATCH is handled specially */
1110: "NULL argument passed",
1111: "bad option value",
1112: "magic number missing",
1113: "unknown opcode - pattern overwritten?",
1114: "no more memory",
1115: NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1116: "match limit exceeded",
1117: "callout error code",
1.1.1.2 misho 1118: NULL, /* BADUTF8/16 is handled specially */
1119: NULL, /* BADUTF8/16 offset is handled specially */
1.1 misho 1120: NULL, /* PARTIAL is handled specially */
1121: "not used - internal error",
1122: "internal error - pattern overwritten?",
1123: "bad count value",
1124: "item unsupported for DFA matching",
1125: "backreference condition or recursion test not supported for DFA matching",
1126: "match limit not supported for DFA matching",
1127: "workspace size exceeded in DFA matching",
1128: "too much recursion for DFA matching",
1129: "recursion limit exceeded",
1130: "not used - internal error",
1131: "invalid combination of newline options",
1132: "bad offset value",
1.1.1.2 misho 1133: NULL, /* SHORTUTF8/16 is handled specially */
1.1 misho 1134: "nested recursion at the same subject position",
1.1.1.2 misho 1135: "JIT stack limit reached",
1.1.1.3 misho 1136: "pattern compiled in wrong mode: 8-bit/16-bit error",
1137: "pattern compiled with other endianness",
1.1.1.4 ! misho 1138: "invalid data in workspace for DFA restart",
! 1139: "bad JIT option",
! 1140: "bad length"
1.1 misho 1141: };
1142:
1143:
1144: /*************************************************
1145: * Alternate character tables *
1146: *************************************************/
1147:
1148: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1149: using the default tables of the library. However, the T option can be used to
1150: select alternate sets of tables, for different kinds of testing. Note also that
1151: the L (locale) option also adjusts the tables. */
1152:
1153: /* This is the set of tables distributed as default with PCRE. It recognizes
1154: only ASCII characters. */
1155:
1.1.1.2 misho 1156: static const pcre_uint8 tables0[] = {
1.1 misho 1157:
1158: /* This table is a lower casing table. */
1159:
1160: 0, 1, 2, 3, 4, 5, 6, 7,
1161: 8, 9, 10, 11, 12, 13, 14, 15,
1162: 16, 17, 18, 19, 20, 21, 22, 23,
1163: 24, 25, 26, 27, 28, 29, 30, 31,
1164: 32, 33, 34, 35, 36, 37, 38, 39,
1165: 40, 41, 42, 43, 44, 45, 46, 47,
1166: 48, 49, 50, 51, 52, 53, 54, 55,
1167: 56, 57, 58, 59, 60, 61, 62, 63,
1168: 64, 97, 98, 99,100,101,102,103,
1169: 104,105,106,107,108,109,110,111,
1170: 112,113,114,115,116,117,118,119,
1171: 120,121,122, 91, 92, 93, 94, 95,
1172: 96, 97, 98, 99,100,101,102,103,
1173: 104,105,106,107,108,109,110,111,
1174: 112,113,114,115,116,117,118,119,
1175: 120,121,122,123,124,125,126,127,
1176: 128,129,130,131,132,133,134,135,
1177: 136,137,138,139,140,141,142,143,
1178: 144,145,146,147,148,149,150,151,
1179: 152,153,154,155,156,157,158,159,
1180: 160,161,162,163,164,165,166,167,
1181: 168,169,170,171,172,173,174,175,
1182: 176,177,178,179,180,181,182,183,
1183: 184,185,186,187,188,189,190,191,
1184: 192,193,194,195,196,197,198,199,
1185: 200,201,202,203,204,205,206,207,
1186: 208,209,210,211,212,213,214,215,
1187: 216,217,218,219,220,221,222,223,
1188: 224,225,226,227,228,229,230,231,
1189: 232,233,234,235,236,237,238,239,
1190: 240,241,242,243,244,245,246,247,
1191: 248,249,250,251,252,253,254,255,
1192:
1193: /* This table is a case flipping table. */
1194:
1195: 0, 1, 2, 3, 4, 5, 6, 7,
1196: 8, 9, 10, 11, 12, 13, 14, 15,
1197: 16, 17, 18, 19, 20, 21, 22, 23,
1198: 24, 25, 26, 27, 28, 29, 30, 31,
1199: 32, 33, 34, 35, 36, 37, 38, 39,
1200: 40, 41, 42, 43, 44, 45, 46, 47,
1201: 48, 49, 50, 51, 52, 53, 54, 55,
1202: 56, 57, 58, 59, 60, 61, 62, 63,
1203: 64, 97, 98, 99,100,101,102,103,
1204: 104,105,106,107,108,109,110,111,
1205: 112,113,114,115,116,117,118,119,
1206: 120,121,122, 91, 92, 93, 94, 95,
1207: 96, 65, 66, 67, 68, 69, 70, 71,
1208: 72, 73, 74, 75, 76, 77, 78, 79,
1209: 80, 81, 82, 83, 84, 85, 86, 87,
1210: 88, 89, 90,123,124,125,126,127,
1211: 128,129,130,131,132,133,134,135,
1212: 136,137,138,139,140,141,142,143,
1213: 144,145,146,147,148,149,150,151,
1214: 152,153,154,155,156,157,158,159,
1215: 160,161,162,163,164,165,166,167,
1216: 168,169,170,171,172,173,174,175,
1217: 176,177,178,179,180,181,182,183,
1218: 184,185,186,187,188,189,190,191,
1219: 192,193,194,195,196,197,198,199,
1220: 200,201,202,203,204,205,206,207,
1221: 208,209,210,211,212,213,214,215,
1222: 216,217,218,219,220,221,222,223,
1223: 224,225,226,227,228,229,230,231,
1224: 232,233,234,235,236,237,238,239,
1225: 240,241,242,243,244,245,246,247,
1226: 248,249,250,251,252,253,254,255,
1227:
1228: /* This table contains bit maps for various character classes. Each map is 32
1229: bytes long and the bits run from the least significant end of each byte. The
1230: classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1231: graph, print, punct, and cntrl. Other classes are built from combinations. */
1232:
1233: 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1234: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1236: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237:
1238: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1239: 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1240: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1241: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1242:
1243: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1244: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1246: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247:
1248: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1249: 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1250: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1251: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252:
1253: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1254: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1255: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1256: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1257:
1258: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1259: 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1260: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262:
1263: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1264: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1265: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267:
1268: 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1269: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1270: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272:
1273: 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1274: 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1275: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277:
1278: 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1279: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1280: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282:
1283: /* This table identifies various classes of character by individual bits:
1284: 0x01 white space character
1285: 0x02 letter
1286: 0x04 decimal digit
1287: 0x08 hexadecimal digit
1288: 0x10 alphanumeric or '_'
1289: 0x80 regular expression metacharacter or binary zero
1290: */
1291:
1292: 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1293: 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1294: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1295: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1296: 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1297: 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1298: 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1299: 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1300: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1301: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1302: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1303: 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1304: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1305: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1306: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1307: 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1308: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1309: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1310: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1311: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1312: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1313: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1314: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1315: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1316: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1317: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1318: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1319: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1320: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1321: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1322: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1323: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1324:
1.1.1.4 ! misho 1325: /* This is a set of tables that came originally from a Windows user. It seems to
1.1 misho 1326: be at least an approximation of ISO 8859. In particular, there are characters
1327: greater than 128 that are marked as spaces, letters, etc. */
1328:
1.1.1.2 misho 1329: static const pcre_uint8 tables1[] = {
1.1 misho 1330: 0,1,2,3,4,5,6,7,
1331: 8,9,10,11,12,13,14,15,
1332: 16,17,18,19,20,21,22,23,
1333: 24,25,26,27,28,29,30,31,
1334: 32,33,34,35,36,37,38,39,
1335: 40,41,42,43,44,45,46,47,
1336: 48,49,50,51,52,53,54,55,
1337: 56,57,58,59,60,61,62,63,
1338: 64,97,98,99,100,101,102,103,
1339: 104,105,106,107,108,109,110,111,
1340: 112,113,114,115,116,117,118,119,
1341: 120,121,122,91,92,93,94,95,
1342: 96,97,98,99,100,101,102,103,
1343: 104,105,106,107,108,109,110,111,
1344: 112,113,114,115,116,117,118,119,
1345: 120,121,122,123,124,125,126,127,
1346: 128,129,130,131,132,133,134,135,
1347: 136,137,138,139,140,141,142,143,
1348: 144,145,146,147,148,149,150,151,
1349: 152,153,154,155,156,157,158,159,
1350: 160,161,162,163,164,165,166,167,
1351: 168,169,170,171,172,173,174,175,
1352: 176,177,178,179,180,181,182,183,
1353: 184,185,186,187,188,189,190,191,
1354: 224,225,226,227,228,229,230,231,
1355: 232,233,234,235,236,237,238,239,
1356: 240,241,242,243,244,245,246,215,
1357: 248,249,250,251,252,253,254,223,
1358: 224,225,226,227,228,229,230,231,
1359: 232,233,234,235,236,237,238,239,
1360: 240,241,242,243,244,245,246,247,
1361: 248,249,250,251,252,253,254,255,
1362: 0,1,2,3,4,5,6,7,
1363: 8,9,10,11,12,13,14,15,
1364: 16,17,18,19,20,21,22,23,
1365: 24,25,26,27,28,29,30,31,
1366: 32,33,34,35,36,37,38,39,
1367: 40,41,42,43,44,45,46,47,
1368: 48,49,50,51,52,53,54,55,
1369: 56,57,58,59,60,61,62,63,
1370: 64,97,98,99,100,101,102,103,
1371: 104,105,106,107,108,109,110,111,
1372: 112,113,114,115,116,117,118,119,
1373: 120,121,122,91,92,93,94,95,
1374: 96,65,66,67,68,69,70,71,
1375: 72,73,74,75,76,77,78,79,
1376: 80,81,82,83,84,85,86,87,
1377: 88,89,90,123,124,125,126,127,
1378: 128,129,130,131,132,133,134,135,
1379: 136,137,138,139,140,141,142,143,
1380: 144,145,146,147,148,149,150,151,
1381: 152,153,154,155,156,157,158,159,
1382: 160,161,162,163,164,165,166,167,
1383: 168,169,170,171,172,173,174,175,
1384: 176,177,178,179,180,181,182,183,
1385: 184,185,186,187,188,189,190,191,
1386: 224,225,226,227,228,229,230,231,
1387: 232,233,234,235,236,237,238,239,
1388: 240,241,242,243,244,245,246,215,
1389: 248,249,250,251,252,253,254,223,
1390: 192,193,194,195,196,197,198,199,
1391: 200,201,202,203,204,205,206,207,
1392: 208,209,210,211,212,213,214,247,
1393: 216,217,218,219,220,221,222,255,
1394: 0,62,0,0,1,0,0,0,
1395: 0,0,0,0,0,0,0,0,
1396: 32,0,0,0,1,0,0,0,
1397: 0,0,0,0,0,0,0,0,
1398: 0,0,0,0,0,0,255,3,
1399: 126,0,0,0,126,0,0,0,
1400: 0,0,0,0,0,0,0,0,
1401: 0,0,0,0,0,0,0,0,
1402: 0,0,0,0,0,0,255,3,
1403: 0,0,0,0,0,0,0,0,
1404: 0,0,0,0,0,0,12,2,
1405: 0,0,0,0,0,0,0,0,
1406: 0,0,0,0,0,0,0,0,
1407: 254,255,255,7,0,0,0,0,
1408: 0,0,0,0,0,0,0,0,
1409: 255,255,127,127,0,0,0,0,
1410: 0,0,0,0,0,0,0,0,
1411: 0,0,0,0,254,255,255,7,
1412: 0,0,0,0,0,4,32,4,
1413: 0,0,0,128,255,255,127,255,
1414: 0,0,0,0,0,0,255,3,
1415: 254,255,255,135,254,255,255,7,
1416: 0,0,0,0,0,4,44,6,
1417: 255,255,127,255,255,255,127,255,
1418: 0,0,0,0,254,255,255,255,
1419: 255,255,255,255,255,255,255,127,
1420: 0,0,0,0,254,255,255,255,
1421: 255,255,255,255,255,255,255,255,
1422: 0,2,0,0,255,255,255,255,
1423: 255,255,255,255,255,255,255,127,
1424: 0,0,0,0,255,255,255,255,
1425: 255,255,255,255,255,255,255,255,
1426: 0,0,0,0,254,255,0,252,
1427: 1,0,0,248,1,0,0,120,
1428: 0,0,0,0,254,255,255,255,
1429: 0,0,128,0,0,0,128,0,
1430: 255,255,255,255,0,0,0,0,
1431: 0,0,0,0,0,0,0,128,
1432: 255,255,255,255,0,0,0,0,
1433: 0,0,0,0,0,0,0,0,
1434: 128,0,0,0,0,0,0,0,
1435: 0,1,1,0,1,1,0,0,
1436: 0,0,0,0,0,0,0,0,
1437: 0,0,0,0,0,0,0,0,
1438: 1,0,0,0,128,0,0,0,
1439: 128,128,128,128,0,0,128,0,
1440: 28,28,28,28,28,28,28,28,
1441: 28,28,0,0,0,0,0,128,
1442: 0,26,26,26,26,26,26,18,
1443: 18,18,18,18,18,18,18,18,
1444: 18,18,18,18,18,18,18,18,
1445: 18,18,18,128,128,0,128,16,
1446: 0,26,26,26,26,26,26,18,
1447: 18,18,18,18,18,18,18,18,
1448: 18,18,18,18,18,18,18,18,
1449: 18,18,18,128,128,0,0,0,
1450: 0,0,0,0,0,1,0,0,
1451: 0,0,0,0,0,0,0,0,
1452: 0,0,0,0,0,0,0,0,
1453: 0,0,0,0,0,0,0,0,
1454: 1,0,0,0,0,0,0,0,
1455: 0,0,18,0,0,0,0,0,
1456: 0,0,20,20,0,18,0,0,
1457: 0,20,18,0,0,0,0,0,
1458: 18,18,18,18,18,18,18,18,
1459: 18,18,18,18,18,18,18,18,
1460: 18,18,18,18,18,18,18,0,
1461: 18,18,18,18,18,18,18,18,
1462: 18,18,18,18,18,18,18,18,
1463: 18,18,18,18,18,18,18,18,
1464: 18,18,18,18,18,18,18,0,
1465: 18,18,18,18,18,18,18,18
1466: };
1467:
1468:
1469:
1470:
1471: #ifndef HAVE_STRERROR
1472: /*************************************************
1473: * Provide strerror() for non-ANSI libraries *
1474: *************************************************/
1475:
1476: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1477: in their libraries, but can provide the same facility by this simple
1478: alternative function. */
1479:
1480: extern int sys_nerr;
1481: extern char *sys_errlist[];
1482:
1483: char *
1484: strerror(int n)
1485: {
1486: if (n < 0 || n >= sys_nerr) return "unknown error number";
1487: return sys_errlist[n];
1488: }
1489: #endif /* HAVE_STRERROR */
1490:
1491:
1.1.1.4 ! misho 1492:
! 1493: /*************************************************
! 1494: * Print newline configuration *
! 1495: *************************************************/
! 1496:
! 1497: /*
! 1498: Arguments:
! 1499: rc the return code from PCRE_CONFIG_NEWLINE
! 1500: isc TRUE if called from "-C newline"
! 1501: Returns: nothing
! 1502: */
! 1503:
! 1504: static void
! 1505: print_newline_config(int rc, BOOL isc)
! 1506: {
! 1507: const char *s = NULL;
! 1508: if (!isc) printf(" Newline sequence is ");
! 1509: switch(rc)
! 1510: {
! 1511: case CHAR_CR: s = "CR"; break;
! 1512: case CHAR_LF: s = "LF"; break;
! 1513: case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
! 1514: case -1: s = "ANY"; break;
! 1515: case -2: s = "ANYCRLF"; break;
! 1516:
! 1517: default:
! 1518: printf("a non-standard value: 0x%04x\n", rc);
! 1519: return;
! 1520: }
! 1521:
! 1522: printf("%s\n", s);
! 1523: }
! 1524:
! 1525:
! 1526:
1.1 misho 1527: /*************************************************
1528: * JIT memory callback *
1529: *************************************************/
1530:
1531: static pcre_jit_stack* jit_callback(void *arg)
1532: {
1.1.1.3 misho 1533: jit_was_used = TRUE;
1.1 misho 1534: return (pcre_jit_stack *)arg;
1535: }
1536:
1537:
1.1.1.4 ! misho 1538: #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1.1.1.2 misho 1539: /*************************************************
1540: * Convert UTF-8 string to value *
1541: *************************************************/
1542:
1543: /* This function takes one or more bytes that represents a UTF-8 character,
1544: and returns the value of the character.
1545:
1546: Argument:
1547: utf8bytes a pointer to the byte vector
1548: vptr a pointer to an int to receive the value
1549:
1550: Returns: > 0 => the number of bytes consumed
1551: -6 to 0 => malformed UTF-8 character at offset = (-return)
1552: */
1553:
1554: static int
1.1.1.4 ! misho 1555: utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1.1.1.2 misho 1556: {
1.1.1.4 ! misho 1557: pcre_uint32 c = *utf8bytes++;
! 1558: pcre_uint32 d = c;
1.1.1.2 misho 1559: int i, j, s;
1560:
1561: for (i = -1; i < 6; i++) /* i is number of additional bytes */
1562: {
1563: if ((d & 0x80) == 0) break;
1564: d <<= 1;
1565: }
1566:
1567: if (i == -1) { *vptr = c; return 1; } /* ascii character */
1568: if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1569:
1570: /* i now has a value in the range 1-5 */
1571:
1572: s = 6*i;
1573: d = (c & utf8_table3[i]) << s;
1574:
1575: for (j = 0; j < i; j++)
1576: {
1577: c = *utf8bytes++;
1578: if ((c & 0xc0) != 0x80) return -(j+1);
1579: s -= 6;
1580: d |= (c & 0x3f) << s;
1581: }
1582:
1583: /* Check that encoding was the correct unique one */
1584:
1585: for (j = 0; j < utf8_table1_size; j++)
1.1.1.4 ! misho 1586: if (d <= (pcre_uint32)utf8_table1[j]) break;
1.1.1.2 misho 1587: if (j != i) return -(i+1);
1588:
1589: /* Valid value */
1590:
1591: *vptr = d;
1592: return i+1;
1593: }
1594: #endif /* NOUTF || SUPPORT_PCRE16 */
1595:
1596:
1597:
1.1.1.4 ! misho 1598: #if defined SUPPORT_PCRE8 && !defined NOUTF
1.1.1.2 misho 1599: /*************************************************
1600: * Convert character value to UTF-8 *
1601: *************************************************/
1602:
1603: /* This function takes an integer value in the range 0 - 0x7fffffff
1604: and encodes it as a UTF-8 character in 0 to 6 bytes.
1605:
1606: Arguments:
1607: cvalue the character value
1608: utf8bytes pointer to buffer for result - at least 6 bytes long
1609:
1610: Returns: number of characters placed in the buffer
1611: */
1612:
1613: static int
1.1.1.4 ! misho 1614: ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1.1.1.2 misho 1615: {
1616: register int i, j;
1.1.1.4 ! misho 1617: if (cvalue > 0x7fffffffu)
! 1618: return -1;
1.1.1.2 misho 1619: for (i = 0; i < utf8_table1_size; i++)
1.1.1.4 ! misho 1620: if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1.1.1.2 misho 1621: utf8bytes += i;
1622: for (j = i; j > 0; j--)
1623: {
1624: *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1625: cvalue >>= 6;
1626: }
1627: *utf8bytes = utf8_table2[i] | cvalue;
1628: return i + 1;
1629: }
1630: #endif
1631:
1632:
1633: #ifdef SUPPORT_PCRE16
1634: /*************************************************
1635: * Convert a string to 16-bit *
1636: *************************************************/
1637:
1638: /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1639: 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1640: double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1641: in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1642: result is always left in buffer16.
1643:
1644: Note that this function does not object to surrogate values. This is
1645: deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1646: for the purpose of testing that they are correctly faulted.
1647:
1648: Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1649: in UTF-8 so that values greater than 255 can be handled.
1650:
1651: Arguments:
1652: data TRUE if converting a data line; FALSE for a regex
1653: p points to a byte string
1654: utf true if UTF-8 (to be converted to UTF-16)
1655: len number of bytes in the string (excluding trailing zero)
1656:
1657: Returns: number of 16-bit data items used (excluding trailing zero)
1658: OR -1 if a UTF-8 string is malformed
1659: OR -2 if a value > 0x10ffff is encountered
1660: OR -3 if a value > 0xffff is encountered when not in UTF mode
1661: */
1662:
1663: static int
1664: to16(int data, pcre_uint8 *p, int utf, int len)
1665: {
1666: pcre_uint16 *pp;
1667:
1668: if (buffer16_size < 2*len + 2)
1669: {
1670: if (buffer16 != NULL) free(buffer16);
1671: buffer16_size = 2*len + 2;
1672: buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1673: if (buffer16 == NULL)
1674: {
1675: fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1676: exit(1);
1677: }
1678: }
1679:
1680: pp = buffer16;
1681:
1682: if (!utf && !data)
1683: {
1684: while (len-- > 0) *pp++ = *p++;
1685: }
1686:
1687: else
1688: {
1.1.1.4 ! misho 1689: pcre_uint32 c = 0;
1.1.1.2 misho 1690: while (len > 0)
1691: {
1692: int chlen = utf82ord(p, &c);
1693: if (chlen <= 0) return -1;
1694: if (c > 0x10ffff) return -2;
1695: p += chlen;
1696: len -= chlen;
1697: if (c < 0x10000) *pp++ = c; else
1698: {
1699: if (!utf) return -3;
1700: c -= 0x10000;
1701: *pp++ = 0xD800 | (c >> 10);
1702: *pp++ = 0xDC00 | (c & 0x3ff);
1703: }
1704: }
1705: }
1706:
1707: *pp = 0;
1708: return pp - buffer16;
1709: }
1710: #endif
1711:
1.1.1.4 ! misho 1712: #ifdef SUPPORT_PCRE32
! 1713: /*************************************************
! 1714: * Convert a string to 32-bit *
! 1715: *************************************************/
! 1716:
! 1717: /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
! 1718: 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
! 1719: times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
! 1720: in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
! 1721: result is always left in buffer32.
! 1722:
! 1723: Note that this function does not object to surrogate values. This is
! 1724: deliberate; it makes it possible to construct UTF-32 strings that are invalid,
! 1725: for the purpose of testing that they are correctly faulted.
! 1726:
! 1727: Patterns to be converted are either plain ASCII or UTF-8; data lines are always
! 1728: in UTF-8 so that values greater than 255 can be handled.
! 1729:
! 1730: Arguments:
! 1731: data TRUE if converting a data line; FALSE for a regex
! 1732: p points to a byte string
! 1733: utf true if UTF-8 (to be converted to UTF-32)
! 1734: len number of bytes in the string (excluding trailing zero)
! 1735:
! 1736: Returns: number of 32-bit data items used (excluding trailing zero)
! 1737: OR -1 if a UTF-8 string is malformed
! 1738: OR -2 if a value > 0x10ffff is encountered
! 1739: OR -3 if an ill-formed value is encountered (i.e. a surrogate)
! 1740: */
! 1741:
! 1742: static int
! 1743: to32(int data, pcre_uint8 *p, int utf, int len)
! 1744: {
! 1745: pcre_uint32 *pp;
! 1746:
! 1747: if (buffer32_size < 4*len + 4)
! 1748: {
! 1749: if (buffer32 != NULL) free(buffer32);
! 1750: buffer32_size = 4*len + 4;
! 1751: buffer32 = (pcre_uint32 *)malloc(buffer32_size);
! 1752: if (buffer32 == NULL)
! 1753: {
! 1754: fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
! 1755: exit(1);
! 1756: }
! 1757: }
! 1758:
! 1759: pp = buffer32;
! 1760:
! 1761: if (!utf && !data)
! 1762: {
! 1763: while (len-- > 0) *pp++ = *p++;
! 1764: }
! 1765:
! 1766: else
! 1767: {
! 1768: pcre_uint32 c = 0;
! 1769: while (len > 0)
! 1770: {
! 1771: int chlen = utf82ord(p, &c);
! 1772: if (chlen <= 0) return -1;
! 1773: if (utf)
! 1774: {
! 1775: if (c > 0x10ffff) return -2;
! 1776: if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
! 1777: }
! 1778:
! 1779: p += chlen;
! 1780: len -= chlen;
! 1781: *pp++ = c;
! 1782: }
! 1783: }
! 1784:
! 1785: *pp = 0;
! 1786: return pp - buffer32;
! 1787: }
! 1788:
! 1789: /* Check that a 32-bit character string is valid UTF-32.
! 1790:
! 1791: Arguments:
! 1792: string points to the string
! 1793: length length of string, or -1 if the string is zero-terminated
! 1794:
! 1795: Returns: TRUE if the string is a valid UTF-32 string
! 1796: FALSE otherwise
! 1797: */
! 1798:
! 1799: #ifdef NEVER /* Not used */
! 1800: #ifdef SUPPORT_UTF
! 1801: static BOOL
! 1802: valid_utf32(pcre_uint32 *string, int length)
! 1803: {
! 1804: register pcre_uint32 *p;
! 1805: register pcre_uint32 c;
! 1806:
! 1807: for (p = string; length-- > 0; p++)
! 1808: {
! 1809: c = *p;
! 1810: if (c > 0x10ffffu) return FALSE; /* Too big */
! 1811: if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
! 1812: }
! 1813:
! 1814: return TRUE;
! 1815: }
! 1816: #endif /* SUPPORT_UTF */
! 1817: #endif /* NEVER */
! 1818: #endif /* SUPPORT_PCRE32 */
! 1819:
1.1.1.2 misho 1820:
1.1 misho 1821: /*************************************************
1822: * Read or extend an input line *
1823: *************************************************/
1824:
1825: /* Input lines are read into buffer, but both patterns and data lines can be
1826: continued over multiple input lines. In addition, if the buffer fills up, we
1827: want to automatically expand it so as to be able to handle extremely large
1828: lines that are needed for certain stress tests. When the input buffer is
1829: expanded, the other two buffers must also be expanded likewise, and the
1830: contents of pbuffer, which are a copy of the input for callouts, must be
1831: preserved (for when expansion happens for a data line). This is not the most
1832: optimal way of handling this, but hey, this is just a test program!
1833:
1834: Arguments:
1835: f the file to read
1836: start where in buffer to start (this *must* be within buffer)
1837: prompt for stdin or readline()
1838:
1839: Returns: pointer to the start of new data
1840: could be a copy of start, or could be moved
1841: NULL if no data read and EOF reached
1842: */
1843:
1.1.1.2 misho 1844: static pcre_uint8 *
1845: extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1.1 misho 1846: {
1.1.1.2 misho 1847: pcre_uint8 *here = start;
1.1 misho 1848:
1849: for (;;)
1850: {
1.1.1.2 misho 1851: size_t rlen = (size_t)(buffer_size - (here - buffer));
1.1 misho 1852:
1853: if (rlen > 1000)
1854: {
1855: int dlen;
1856:
1.1.1.3 misho 1857: /* If libreadline or libedit support is required, use readline() to read a
1858: line if the input is a terminal. Note that readline() removes the trailing
1859: newline, so we must put it back again, to be compatible with fgets(). */
1.1 misho 1860:
1.1.1.3 misho 1861: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 1862: if (isatty(fileno(f)))
1863: {
1864: size_t len;
1865: char *s = readline(prompt);
1866: if (s == NULL) return (here == start)? NULL : start;
1867: len = strlen(s);
1868: if (len > 0) add_history(s);
1869: if (len > rlen - 1) len = rlen - 1;
1870: memcpy(here, s, len);
1871: here[len] = '\n';
1872: here[len+1] = 0;
1873: free(s);
1874: }
1875: else
1876: #endif
1877:
1878: /* Read the next line by normal means, prompting if the file is stdin. */
1879:
1880: {
1881: if (f == stdin) printf("%s", prompt);
1882: if (fgets((char *)here, rlen, f) == NULL)
1883: return (here == start)? NULL : start;
1884: }
1885:
1886: dlen = (int)strlen((char *)here);
1887: if (dlen > 0 && here[dlen - 1] == '\n') return start;
1888: here += dlen;
1889: }
1890:
1891: else
1892: {
1893: int new_buffer_size = 2*buffer_size;
1.1.1.2 misho 1894: pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1895: pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1.1 misho 1896:
1.1.1.4 ! misho 1897: if (new_buffer == NULL || new_pbuffer == NULL)
1.1 misho 1898: {
1899: fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1900: exit(1);
1901: }
1902:
1903: memcpy(new_buffer, buffer, buffer_size);
1904: memcpy(new_pbuffer, pbuffer, buffer_size);
1905:
1906: buffer_size = new_buffer_size;
1907:
1908: start = new_buffer + (start - buffer);
1909: here = new_buffer + (here - buffer);
1910:
1911: free(buffer);
1912: free(pbuffer);
1913:
1914: buffer = new_buffer;
1915: pbuffer = new_pbuffer;
1916: }
1917: }
1918:
1919: return NULL; /* Control never gets here */
1920: }
1921:
1922:
1923:
1924: /*************************************************
1925: * Read number from string *
1926: *************************************************/
1927:
1928: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1929: around with conditional compilation, just do the job by hand. It is only used
1930: for unpicking arguments, so just keep it simple.
1931:
1932: Arguments:
1933: str string to be converted
1934: endptr where to put the end pointer
1935:
1936: Returns: the unsigned long
1937: */
1938:
1939: static int
1.1.1.2 misho 1940: get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1.1 misho 1941: {
1942: int result = 0;
1943: while(*str != 0 && isspace(*str)) str++;
1944: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1945: *endptr = str;
1946: return(result);
1947: }
1948:
1949:
1950:
1951: /*************************************************
1.1.1.2 misho 1952: * Print one character *
1.1 misho 1953: *************************************************/
1954:
1.1.1.2 misho 1955: /* Print a single character either literally, or as a hex escape. */
1.1 misho 1956:
1.1.1.4 ! misho 1957: static int pchar(pcre_uint32 c, FILE *f)
1.1 misho 1958: {
1.1.1.4 ! misho 1959: int n = 0;
1.1.1.2 misho 1960: if (PRINTOK(c))
1961: {
1962: if (f != NULL) fprintf(f, "%c", c);
1963: return 1;
1964: }
1.1 misho 1965:
1.1.1.2 misho 1966: if (c < 0x100)
1.1 misho 1967: {
1.1.1.2 misho 1968: if (use_utf)
1969: {
1970: if (f != NULL) fprintf(f, "\\x{%02x}", c);
1971: return 6;
1972: }
1973: else
1974: {
1975: if (f != NULL) fprintf(f, "\\x%02x", c);
1976: return 4;
1977: }
1.1 misho 1978: }
1979:
1.1.1.4 ! misho 1980: if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
! 1981: return n >= 0 ? n : 0;
1.1.1.2 misho 1982: }
1.1 misho 1983:
1984:
1985:
1.1.1.2 misho 1986: #ifdef SUPPORT_PCRE8
1987: /*************************************************
1988: * Print 8-bit character string *
1989: *************************************************/
1.1 misho 1990:
1.1.1.2 misho 1991: /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1992: If handed a NULL file, just counts chars without printing. */
1.1 misho 1993:
1.1.1.2 misho 1994: static int pchars(pcre_uint8 *p, int length, FILE *f)
1995: {
1.1.1.4 ! misho 1996: pcre_uint32 c = 0;
1.1.1.2 misho 1997: int yield = 0;
1.1 misho 1998:
1.1.1.2 misho 1999: if (length < 0)
2000: length = strlen((char *)p);
1.1 misho 2001:
1.1.1.2 misho 2002: while (length-- > 0)
2003: {
2004: #if !defined NOUTF
2005: if (use_utf)
2006: {
2007: int rc = utf82ord(p, &c);
2008: if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2009: {
2010: length -= rc - 1;
2011: p += rc;
2012: yield += pchar(c, f);
2013: continue;
2014: }
2015: }
2016: #endif
2017: c = *p++;
2018: yield += pchar(c, f);
2019: }
1.1 misho 2020:
1.1.1.2 misho 2021: return yield;
2022: }
1.1 misho 2023: #endif
2024:
2025:
2026:
1.1.1.2 misho 2027: #ifdef SUPPORT_PCRE16
1.1 misho 2028: /*************************************************
1.1.1.2 misho 2029: * Find length of 0-terminated 16-bit string *
1.1 misho 2030: *************************************************/
2031:
1.1.1.2 misho 2032: static int strlen16(PCRE_SPTR16 p)
1.1 misho 2033: {
1.1.1.2 misho 2034: int len = 0;
2035: while (*p++ != 0) len++;
2036: return len;
1.1 misho 2037: }
1.1.1.2 misho 2038: #endif /* SUPPORT_PCRE16 */
1.1 misho 2039:
2040:
1.1.1.4 ! misho 2041:
! 2042: #ifdef SUPPORT_PCRE32
! 2043: /*************************************************
! 2044: * Find length of 0-terminated 32-bit string *
! 2045: *************************************************/
! 2046:
! 2047: static int strlen32(PCRE_SPTR32 p)
! 2048: {
! 2049: int len = 0;
! 2050: while (*p++ != 0) len++;
! 2051: return len;
! 2052: }
! 2053: #endif /* SUPPORT_PCRE32 */
! 2054:
! 2055:
! 2056:
1.1.1.2 misho 2057: #ifdef SUPPORT_PCRE16
1.1 misho 2058: /*************************************************
1.1.1.2 misho 2059: * Print 16-bit character string *
1.1 misho 2060: *************************************************/
2061:
1.1.1.2 misho 2062: /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2063: If handed a NULL file, just counts chars without printing. */
1.1 misho 2064:
1.1.1.2 misho 2065: static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1.1 misho 2066: {
2067: int yield = 0;
2068:
1.1.1.2 misho 2069: if (length < 0)
2070: length = strlen16(p);
2071:
1.1 misho 2072: while (length-- > 0)
2073: {
1.1.1.4 ! misho 2074: pcre_uint32 c = *p++ & 0xffff;
1.1.1.2 misho 2075: #if !defined NOUTF
2076: if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1.1 misho 2077: {
1.1.1.2 misho 2078: int d = *p & 0xffff;
1.1.1.4 ! misho 2079: if (d >= 0xDC00 && d <= 0xDFFF)
1.1 misho 2080: {
1.1.1.2 misho 2081: c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2082: length--;
2083: p++;
1.1 misho 2084: }
2085: }
2086: #endif
1.1.1.2 misho 2087: yield += pchar(c, f);
2088: }
2089:
2090: return yield;
2091: }
2092: #endif /* SUPPORT_PCRE16 */
1.1 misho 2093:
2094:
1.1.1.2 misho 2095:
1.1.1.4 ! misho 2096: #ifdef SUPPORT_PCRE32
! 2097: /*************************************************
! 2098: * Print 32-bit character string *
! 2099: *************************************************/
! 2100:
! 2101: /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
! 2102: If handed a NULL file, just counts chars without printing. */
! 2103:
! 2104: static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
! 2105: {
! 2106: int yield = 0;
! 2107:
! 2108: (void)(utf); /* Avoid compiler warning */
! 2109:
! 2110: if (length < 0)
! 2111: length = strlen32(p);
! 2112:
! 2113: while (length-- > 0)
! 2114: {
! 2115: pcre_uint32 c = *p++;
! 2116: yield += pchar(c, f);
! 2117: }
! 2118:
! 2119: return yield;
! 2120: }
! 2121: #endif /* SUPPORT_PCRE32 */
! 2122:
! 2123:
! 2124:
1.1.1.2 misho 2125: #ifdef SUPPORT_PCRE8
2126: /*************************************************
2127: * Read a capture name (8-bit) and check it *
2128: *************************************************/
2129:
2130: static pcre_uint8 *
2131: read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2132: {
2133: pcre_uint8 *npp = *pp;
2134: while (isalnum(*p)) *npp++ = *p++;
2135: *npp++ = 0;
2136: *npp = 0;
2137: if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2138: {
2139: fprintf(outfile, "no parentheses with name \"");
2140: PCHARSV(*pp, 0, -1, outfile);
2141: fprintf(outfile, "\"\n");
1.1 misho 2142: }
2143:
1.1.1.2 misho 2144: *pp = npp;
2145: return p;
1.1 misho 2146: }
1.1.1.2 misho 2147: #endif /* SUPPORT_PCRE8 */
2148:
2149:
2150:
2151: #ifdef SUPPORT_PCRE16
2152: /*************************************************
2153: * Read a capture name (16-bit) and check it *
2154: *************************************************/
2155:
2156: /* Note that the text being read is 8-bit. */
2157:
2158: static pcre_uint8 *
2159: read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2160: {
2161: pcre_uint16 *npp = *pp;
2162: while (isalnum(*p)) *npp++ = *p++;
2163: *npp++ = 0;
2164: *npp = 0;
2165: if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2166: {
2167: fprintf(outfile, "no parentheses with name \"");
2168: PCHARSV(*pp, 0, -1, outfile);
2169: fprintf(outfile, "\"\n");
2170: }
2171: *pp = npp;
2172: return p;
2173: }
2174: #endif /* SUPPORT_PCRE16 */
1.1 misho 2175:
2176:
2177:
1.1.1.4 ! misho 2178: #ifdef SUPPORT_PCRE32
! 2179: /*************************************************
! 2180: * Read a capture name (32-bit) and check it *
! 2181: *************************************************/
! 2182:
! 2183: /* Note that the text being read is 8-bit. */
! 2184:
! 2185: static pcre_uint8 *
! 2186: read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
! 2187: {
! 2188: pcre_uint32 *npp = *pp;
! 2189: while (isalnum(*p)) *npp++ = *p++;
! 2190: *npp++ = 0;
! 2191: *npp = 0;
! 2192: if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
! 2193: {
! 2194: fprintf(outfile, "no parentheses with name \"");
! 2195: PCHARSV(*pp, 0, -1, outfile);
! 2196: fprintf(outfile, "\"\n");
! 2197: }
! 2198: *pp = npp;
! 2199: return p;
! 2200: }
! 2201: #endif /* SUPPORT_PCRE32 */
! 2202:
! 2203:
! 2204:
1.1 misho 2205: /*************************************************
2206: * Callout function *
2207: *************************************************/
2208:
2209: /* Called from PCRE as a result of the (?C) item. We print out where we are in
2210: the match. Yield zero unless more callouts than the fail count, or the callout
2211: data is not zero. */
2212:
2213: static int callout(pcre_callout_block *cb)
2214: {
2215: FILE *f = (first_callout | callout_extra)? outfile : NULL;
2216: int i, pre_start, post_start, subject_length;
2217:
2218: if (callout_extra)
2219: {
2220: fprintf(f, "Callout %d: last capture = %d\n",
2221: cb->callout_number, cb->capture_last);
2222:
2223: for (i = 0; i < cb->capture_top * 2; i += 2)
2224: {
2225: if (cb->offset_vector[i] < 0)
2226: fprintf(f, "%2d: <unset>\n", i/2);
2227: else
2228: {
2229: fprintf(f, "%2d: ", i/2);
1.1.1.2 misho 2230: PCHARSV(cb->subject, cb->offset_vector[i],
1.1 misho 2231: cb->offset_vector[i+1] - cb->offset_vector[i], f);
2232: fprintf(f, "\n");
2233: }
2234: }
2235: }
2236:
2237: /* Re-print the subject in canonical form, the first time or if giving full
2238: datails. On subsequent calls in the same match, we use pchars just to find the
2239: printed lengths of the substrings. */
2240:
2241: if (f != NULL) fprintf(f, "--->");
2242:
1.1.1.2 misho 2243: PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2244: PCHARS(post_start, cb->subject, cb->start_match,
1.1 misho 2245: cb->current_position - cb->start_match, f);
2246:
1.1.1.2 misho 2247: PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1.1 misho 2248:
1.1.1.2 misho 2249: PCHARSV(cb->subject, cb->current_position,
1.1 misho 2250: cb->subject_length - cb->current_position, f);
2251:
2252: if (f != NULL) fprintf(f, "\n");
2253:
2254: /* Always print appropriate indicators, with callout number if not already
2255: shown. For automatic callouts, show the pattern offset. */
2256:
2257: if (cb->callout_number == 255)
2258: {
2259: fprintf(outfile, "%+3d ", cb->pattern_position);
2260: if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2261: }
2262: else
2263: {
2264: if (callout_extra) fprintf(outfile, " ");
2265: else fprintf(outfile, "%3d ", cb->callout_number);
2266: }
2267:
2268: for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2269: fprintf(outfile, "^");
2270:
2271: if (post_start > 0)
2272: {
2273: for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2274: fprintf(outfile, "^");
2275: }
2276:
2277: for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2278: fprintf(outfile, " ");
2279:
2280: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2281: pbuffer + cb->pattern_position);
2282:
2283: fprintf(outfile, "\n");
2284: first_callout = 0;
2285:
2286: if (cb->mark != last_callout_mark)
2287: {
1.1.1.2 misho 2288: if (cb->mark == NULL)
2289: fprintf(outfile, "Latest Mark: <unset>\n");
2290: else
2291: {
2292: fprintf(outfile, "Latest Mark: ");
2293: PCHARSV(cb->mark, 0, -1, outfile);
2294: putc('\n', outfile);
2295: }
1.1 misho 2296: last_callout_mark = cb->mark;
2297: }
2298:
2299: if (cb->callout_data != NULL)
2300: {
2301: int callout_data = *((int *)(cb->callout_data));
2302: if (callout_data != 0)
2303: {
2304: fprintf(outfile, "Callout data = %d\n", callout_data);
2305: return callout_data;
2306: }
2307: }
2308:
2309: return (cb->callout_number != callout_fail_id)? 0 :
2310: (++callout_count >= callout_fail_count)? 1 : 0;
2311: }
2312:
2313:
2314: /*************************************************
2315: * Local malloc functions *
2316: *************************************************/
2317:
2318: /* Alternative malloc function, to test functionality and save the size of a
2319: compiled re, which is the first store request that pcre_compile() makes. The
2320: show_malloc variable is set only during matching. */
2321:
2322: static void *new_malloc(size_t size)
2323: {
2324: void *block = malloc(size);
2325: gotten_store = size;
2326: if (first_gotten_store == 0) first_gotten_store = size;
2327: if (show_malloc)
2328: fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2329: return block;
2330: }
2331:
2332: static void new_free(void *block)
2333: {
2334: if (show_malloc)
2335: fprintf(outfile, "free %p\n", block);
2336: free(block);
2337: }
2338:
2339: /* For recursion malloc/free, to test stacking calls */
2340:
2341: static void *stack_malloc(size_t size)
2342: {
2343: void *block = malloc(size);
2344: if (show_malloc)
2345: fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2346: return block;
2347: }
2348:
2349: static void stack_free(void *block)
2350: {
2351: if (show_malloc)
2352: fprintf(outfile, "stack_free %p\n", block);
2353: free(block);
2354: }
2355:
2356:
1.1.1.2 misho 2357: /*************************************************
2358: * Call pcre_fullinfo() *
2359: *************************************************/
2360:
2361: /* Get one piece of information from the pcre_fullinfo() function. When only
1.1.1.4 ! misho 2362: one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
1.1.1.2 misho 2363: value, but the code is defensive.
2364:
2365: Arguments:
2366: re compiled regex
2367: study study data
2368: option PCRE_INFO_xxx option
2369: ptr where to put the data
2370:
2371: Returns: 0 when OK, < 0 on error
2372: */
2373:
2374: static int
2375: new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2376: {
2377: int rc;
2378:
1.1.1.4 ! misho 2379: if (pcre_mode == PCRE32_MODE)
! 2380: #ifdef SUPPORT_PCRE32
! 2381: rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
! 2382: #else
! 2383: rc = PCRE_ERROR_BADMODE;
! 2384: #endif
! 2385: else if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 2386: #ifdef SUPPORT_PCRE16
2387: rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2388: #else
2389: rc = PCRE_ERROR_BADMODE;
2390: #endif
2391: else
2392: #ifdef SUPPORT_PCRE8
2393: rc = pcre_fullinfo(re, study, option, ptr);
2394: #else
2395: rc = PCRE_ERROR_BADMODE;
2396: #endif
2397:
1.1.1.4 ! misho 2398: if (rc < 0 && rc != PCRE_ERROR_UNSET)
1.1.1.2 misho 2399: {
2400: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1.1.1.4 ! misho 2401: pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
1.1.1.2 misho 2402: if (rc == PCRE_ERROR_BADMODE)
1.1.1.4 ! misho 2403: fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
! 2404: "%d-bit mode\n", 8 * CHAR_SIZE,
! 2405: 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
1.1.1.2 misho 2406: }
2407:
2408: return rc;
2409: }
2410:
2411:
2412:
2413: /*************************************************
2414: * Swap byte functions *
2415: *************************************************/
2416:
2417: /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2418: value, respectively.
2419:
2420: Arguments:
2421: value any number
2422:
2423: Returns: the byte swapped value
2424: */
2425:
2426: static pcre_uint32
2427: swap_uint32(pcre_uint32 value)
2428: {
2429: return ((value & 0x000000ff) << 24) |
2430: ((value & 0x0000ff00) << 8) |
2431: ((value & 0x00ff0000) >> 8) |
2432: (value >> 24);
2433: }
2434:
2435: static pcre_uint16
2436: swap_uint16(pcre_uint16 value)
2437: {
2438: return (value >> 8) | (value << 8);
2439: }
2440:
2441:
2442:
2443: /*************************************************
2444: * Flip bytes in a compiled pattern *
2445: *************************************************/
2446:
2447: /* This function is called if the 'F' option was present on a pattern that is
2448: to be written to a file. We flip the bytes of all the integer fields in the
2449: regex data block and the study block. In 16-bit mode this also flips relevant
2450: bytes in the pattern itself. This is to make it possible to test PCRE's
2451: ability to reload byte-flipped patterns, e.g. those compiled on a different
2452: architecture. */
2453:
1.1.1.4 ! misho 2454: #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
1.1.1.2 misho 2455: static void
1.1.1.4 ! misho 2456: regexflip8_or_16(pcre *ere, pcre_extra *extra)
1.1.1.2 misho 2457: {
1.1.1.4 ! misho 2458: real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
1.1.1.2 misho 2459: #ifdef SUPPORT_PCRE16
2460: int op;
2461: pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2462: int length = re->name_count * re->name_entry_size;
2463: #ifdef SUPPORT_UTF
2464: BOOL utf = (re->options & PCRE_UTF16) != 0;
2465: BOOL utf16_char = FALSE;
2466: #endif /* SUPPORT_UTF */
2467: #endif /* SUPPORT_PCRE16 */
2468:
2469: /* Always flip the bytes in the main data block and study blocks. */
2470:
2471: re->magic_number = REVERSED_MAGIC_NUMBER;
2472: re->size = swap_uint32(re->size);
2473: re->options = swap_uint32(re->options);
1.1.1.4 ! misho 2474: re->flags = swap_uint32(re->flags);
! 2475: re->limit_match = swap_uint32(re->limit_match);
! 2476: re->limit_recursion = swap_uint32(re->limit_recursion);
1.1.1.2 misho 2477: re->first_char = swap_uint16(re->first_char);
2478: re->req_char = swap_uint16(re->req_char);
1.1.1.4 ! misho 2479: re->max_lookbehind = swap_uint16(re->max_lookbehind);
! 2480: re->top_bracket = swap_uint16(re->top_bracket);
! 2481: re->top_backref = swap_uint16(re->top_backref);
1.1.1.2 misho 2482: re->name_table_offset = swap_uint16(re->name_table_offset);
2483: re->name_entry_size = swap_uint16(re->name_entry_size);
2484: re->name_count = swap_uint16(re->name_count);
1.1.1.4 ! misho 2485: re->ref_count = swap_uint16(re->ref_count);
1.1.1.2 misho 2486:
2487: if (extra != NULL)
2488: {
2489: pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2490: rsd->size = swap_uint32(rsd->size);
2491: rsd->flags = swap_uint32(rsd->flags);
2492: rsd->minlength = swap_uint32(rsd->minlength);
2493: }
2494:
2495: /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2496: in the name table, if present, and then in the pattern itself. */
2497:
2498: #ifdef SUPPORT_PCRE16
1.1.1.4 ! misho 2499: if (pcre_mode != PCRE16_MODE) return;
1.1.1.2 misho 2500:
2501: while(TRUE)
2502: {
2503: /* Swap previous characters. */
2504: while (length-- > 0)
2505: {
2506: *ptr = swap_uint16(*ptr);
2507: ptr++;
2508: }
2509: #ifdef SUPPORT_UTF
2510: if (utf16_char)
2511: {
2512: if ((ptr[-1] & 0xfc00) == 0xd800)
2513: {
2514: /* We know that there is only one extra character in UTF-16. */
2515: *ptr = swap_uint16(*ptr);
2516: ptr++;
2517: }
2518: }
2519: utf16_char = FALSE;
2520: #endif /* SUPPORT_UTF */
2521:
2522: /* Get next opcode. */
1.1 misho 2523:
1.1.1.2 misho 2524: length = 0;
2525: op = *ptr;
2526: *ptr++ = swap_uint16(op);
1.1 misho 2527:
1.1.1.2 misho 2528: switch (op)
2529: {
2530: case OP_END:
2531: return;
1.1 misho 2532:
1.1.1.2 misho 2533: #ifdef SUPPORT_UTF
2534: case OP_CHAR:
2535: case OP_CHARI:
2536: case OP_NOT:
2537: case OP_NOTI:
2538: case OP_STAR:
2539: case OP_MINSTAR:
2540: case OP_PLUS:
2541: case OP_MINPLUS:
2542: case OP_QUERY:
2543: case OP_MINQUERY:
2544: case OP_UPTO:
2545: case OP_MINUPTO:
2546: case OP_EXACT:
2547: case OP_POSSTAR:
2548: case OP_POSPLUS:
2549: case OP_POSQUERY:
2550: case OP_POSUPTO:
2551: case OP_STARI:
2552: case OP_MINSTARI:
2553: case OP_PLUSI:
2554: case OP_MINPLUSI:
2555: case OP_QUERYI:
2556: case OP_MINQUERYI:
2557: case OP_UPTOI:
2558: case OP_MINUPTOI:
2559: case OP_EXACTI:
2560: case OP_POSSTARI:
2561: case OP_POSPLUSI:
2562: case OP_POSQUERYI:
2563: case OP_POSUPTOI:
2564: case OP_NOTSTAR:
2565: case OP_NOTMINSTAR:
2566: case OP_NOTPLUS:
2567: case OP_NOTMINPLUS:
2568: case OP_NOTQUERY:
2569: case OP_NOTMINQUERY:
2570: case OP_NOTUPTO:
2571: case OP_NOTMINUPTO:
2572: case OP_NOTEXACT:
2573: case OP_NOTPOSSTAR:
2574: case OP_NOTPOSPLUS:
2575: case OP_NOTPOSQUERY:
2576: case OP_NOTPOSUPTO:
2577: case OP_NOTSTARI:
2578: case OP_NOTMINSTARI:
2579: case OP_NOTPLUSI:
2580: case OP_NOTMINPLUSI:
2581: case OP_NOTQUERYI:
2582: case OP_NOTMINQUERYI:
2583: case OP_NOTUPTOI:
2584: case OP_NOTMINUPTOI:
2585: case OP_NOTEXACTI:
2586: case OP_NOTPOSSTARI:
2587: case OP_NOTPOSPLUSI:
2588: case OP_NOTPOSQUERYI:
2589: case OP_NOTPOSUPTOI:
2590: if (utf) utf16_char = TRUE;
2591: #endif
2592: /* Fall through. */
1.1 misho 2593:
1.1.1.2 misho 2594: default:
2595: length = OP_lengths16[op] - 1;
2596: break;
2597:
2598: case OP_CLASS:
2599: case OP_NCLASS:
2600: /* Skip the character bit map. */
2601: ptr += 32/sizeof(pcre_uint16);
2602: length = 0;
2603: break;
2604:
2605: case OP_XCLASS:
2606: /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2607: if (LINK_SIZE > 1)
2608: length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2609: - (1 + LINK_SIZE + 1));
2610: else
2611: length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1.1 misho 2612:
1.1.1.2 misho 2613: /* Reverse the size of the XCLASS instance. */
2614: *ptr = swap_uint16(*ptr);
2615: ptr++;
2616: if (LINK_SIZE > 1)
2617: {
2618: *ptr = swap_uint16(*ptr);
2619: ptr++;
2620: }
1.1 misho 2621:
1.1.1.2 misho 2622: op = *ptr;
2623: *ptr = swap_uint16(op);
2624: ptr++;
2625: if ((op & XCL_MAP) != 0)
2626: {
2627: /* Skip the character bit map. */
2628: ptr += 32/sizeof(pcre_uint16);
2629: length -= 32/sizeof(pcre_uint16);
2630: }
2631: break;
2632: }
2633: }
2634: /* Control should never reach here in 16 bit mode. */
2635: #endif /* SUPPORT_PCRE16 */
1.1 misho 2636: }
1.1.1.4 ! misho 2637: #endif /* SUPPORT_PCRE[8|16] */
! 2638:
! 2639:
! 2640:
! 2641: #if defined SUPPORT_PCRE32
! 2642: static void
! 2643: regexflip_32(pcre *ere, pcre_extra *extra)
! 2644: {
! 2645: real_pcre32 *re = (real_pcre32 *)ere;
! 2646: int op;
! 2647: pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
! 2648: int length = re->name_count * re->name_entry_size;
! 2649:
! 2650: /* Always flip the bytes in the main data block and study blocks. */
! 2651:
! 2652: re->magic_number = REVERSED_MAGIC_NUMBER;
! 2653: re->size = swap_uint32(re->size);
! 2654: re->options = swap_uint32(re->options);
! 2655: re->flags = swap_uint32(re->flags);
! 2656: re->limit_match = swap_uint32(re->limit_match);
! 2657: re->limit_recursion = swap_uint32(re->limit_recursion);
! 2658: re->first_char = swap_uint32(re->first_char);
! 2659: re->req_char = swap_uint32(re->req_char);
! 2660: re->max_lookbehind = swap_uint16(re->max_lookbehind);
! 2661: re->top_bracket = swap_uint16(re->top_bracket);
! 2662: re->top_backref = swap_uint16(re->top_backref);
! 2663: re->name_table_offset = swap_uint16(re->name_table_offset);
! 2664: re->name_entry_size = swap_uint16(re->name_entry_size);
! 2665: re->name_count = swap_uint16(re->name_count);
! 2666: re->ref_count = swap_uint16(re->ref_count);
! 2667:
! 2668: if (extra != NULL)
! 2669: {
! 2670: pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
! 2671: rsd->size = swap_uint32(rsd->size);
! 2672: rsd->flags = swap_uint32(rsd->flags);
! 2673: rsd->minlength = swap_uint32(rsd->minlength);
! 2674: }
! 2675:
! 2676: /* In 32-bit mode we must swap bytes in the name table, if present, and then in
! 2677: the pattern itself. */
! 2678:
! 2679: while(TRUE)
! 2680: {
! 2681: /* Swap previous characters. */
! 2682: while (length-- > 0)
! 2683: {
! 2684: *ptr = swap_uint32(*ptr);
! 2685: ptr++;
! 2686: }
! 2687:
! 2688: /* Get next opcode. */
! 2689:
! 2690: length = 0;
! 2691: op = *ptr;
! 2692: *ptr++ = swap_uint32(op);
! 2693:
! 2694: switch (op)
! 2695: {
! 2696: case OP_END:
! 2697: return;
! 2698:
! 2699: default:
! 2700: length = OP_lengths32[op] - 1;
! 2701: break;
! 2702:
! 2703: case OP_CLASS:
! 2704: case OP_NCLASS:
! 2705: /* Skip the character bit map. */
! 2706: ptr += 32/sizeof(pcre_uint32);
! 2707: length = 0;
! 2708: break;
! 2709:
! 2710: case OP_XCLASS:
! 2711: /* LINK_SIZE can only be 1 in 32-bit mode. */
! 2712: length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
! 2713:
! 2714: /* Reverse the size of the XCLASS instance. */
! 2715: *ptr = swap_uint32(*ptr);
! 2716: ptr++;
! 2717:
! 2718: op = *ptr;
! 2719: *ptr = swap_uint32(op);
! 2720: ptr++;
! 2721: if ((op & XCL_MAP) != 0)
! 2722: {
! 2723: /* Skip the character bit map. */
! 2724: ptr += 32/sizeof(pcre_uint32);
! 2725: length -= 32/sizeof(pcre_uint32);
! 2726: }
! 2727: break;
! 2728: }
! 2729: }
! 2730: /* Control should never reach here in 32 bit mode. */
! 2731: }
! 2732:
! 2733: #endif /* SUPPORT_PCRE32 */
! 2734:
! 2735:
! 2736:
! 2737: static void
! 2738: regexflip(pcre *ere, pcre_extra *extra)
! 2739: {
! 2740: #if defined SUPPORT_PCRE32
! 2741: if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
! 2742: regexflip_32(ere, extra);
! 2743: #endif
! 2744: #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
! 2745: if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
! 2746: regexflip8_or_16(ere, extra);
! 2747: #endif
! 2748: }
1.1 misho 2749:
2750:
2751:
2752: /*************************************************
2753: * Check match or recursion limit *
2754: *************************************************/
2755:
2756: static int
1.1.1.2 misho 2757: check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1.1 misho 2758: int start_offset, int options, int *use_offsets, int use_size_offsets,
2759: int flag, unsigned long int *limit, int errnumber, const char *msg)
2760: {
2761: int count;
2762: int min = 0;
2763: int mid = 64;
2764: int max = -1;
2765:
2766: extra->flags |= flag;
2767:
2768: for (;;)
2769: {
2770: *limit = mid;
2771:
1.1.1.2 misho 2772: PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1.1 misho 2773: use_offsets, use_size_offsets);
2774:
2775: if (count == errnumber)
2776: {
2777: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2778: min = mid;
2779: mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2780: }
2781:
2782: else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2783: count == PCRE_ERROR_PARTIAL)
2784: {
2785: if (mid == min + 1)
2786: {
2787: fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2788: break;
2789: }
2790: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2791: max = mid;
2792: mid = (min + mid)/2;
2793: }
2794: else break; /* Some other error */
2795: }
2796:
2797: extra->flags &= ~flag;
2798: return count;
2799: }
2800:
2801:
2802:
2803: /*************************************************
2804: * Case-independent strncmp() function *
2805: *************************************************/
2806:
2807: /*
2808: Arguments:
2809: s first string
2810: t second string
2811: n number of characters to compare
2812:
2813: Returns: < 0, = 0, or > 0, according to the comparison
2814: */
2815:
2816: static int
1.1.1.2 misho 2817: strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1.1 misho 2818: {
2819: while (n--)
2820: {
2821: int c = tolower(*s++) - tolower(*t++);
2822: if (c) return c;
2823: }
2824: return 0;
2825: }
2826:
2827:
2828:
2829: /*************************************************
2830: * Check newline indicator *
2831: *************************************************/
2832:
2833: /* This is used both at compile and run-time to check for <xxx> escapes. Print
2834: a message and return 0 if there is no match.
2835:
2836: Arguments:
2837: p points after the leading '<'
2838: f file for error message
2839:
2840: Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2841: */
2842:
2843: static int
1.1.1.2 misho 2844: check_newline(pcre_uint8 *p, FILE *f)
1.1 misho 2845: {
1.1.1.2 misho 2846: if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2847: if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2848: if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2849: if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2850: if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2851: if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2852: if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1.1 misho 2853: fprintf(f, "Unknown newline type at: <%s\n", p);
2854: return 0;
2855: }
2856:
2857:
2858:
2859: /*************************************************
2860: * Usage function *
2861: *************************************************/
2862:
2863: static void
2864: usage(void)
2865: {
2866: printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2867: printf("Input and output default to stdin and stdout.\n");
1.1.1.3 misho 2868: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1 misho 2869: printf("If input is a terminal, readline() is used to read from it.\n");
2870: #else
2871: printf("This version of pcretest is not linked with readline().\n");
2872: #endif
2873: printf("\nOptions:\n");
1.1.1.2 misho 2874: #ifdef SUPPORT_PCRE16
2875: printf(" -16 use the 16-bit library\n");
2876: #endif
1.1.1.4 ! misho 2877: #ifdef SUPPORT_PCRE32
! 2878: printf(" -32 use the 32-bit library\n");
! 2879: #endif
1.1.1.2 misho 2880: printf(" -b show compiled code\n");
1.1 misho 2881: printf(" -C show PCRE compile-time options and exit\n");
1.1.1.2 misho 2882: printf(" -C arg show a specific compile-time option\n");
2883: printf(" and exit with its value. The arg can be:\n");
2884: printf(" linksize internal link size [2, 3, 4]\n");
2885: printf(" pcre8 8 bit library support enabled [0, 1]\n");
2886: printf(" pcre16 16 bit library support enabled [0, 1]\n");
1.1.1.4 ! misho 2887: printf(" pcre32 32 bit library support enabled [0, 1]\n");
1.1.1.2 misho 2888: printf(" utf Unicode Transformation Format supported [0, 1]\n");
2889: printf(" ucp Unicode Properties supported [0, 1]\n");
2890: printf(" jit Just-in-time compiler supported [0, 1]\n");
2891: printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
1.1 misho 2892: printf(" -d debug: show compiled code and information (-b and -i)\n");
2893: #if !defined NODFA
2894: printf(" -dfa force DFA matching for all subjects\n");
2895: #endif
2896: printf(" -help show usage information\n");
2897: printf(" -i show information about compiled patterns\n"
2898: " -M find MATCH_LIMIT minimum for each subject\n"
2899: " -m output memory used information\n"
2900: " -o <n> set size of offsets vector to <n>\n");
2901: #if !defined NOPOSIX
2902: printf(" -p use POSIX interface\n");
2903: #endif
2904: printf(" -q quiet: do not output PCRE version number at start\n");
2905: printf(" -S <n> set stack size to <n> megabytes\n");
2906: printf(" -s force each pattern to be studied at basic level\n"
2907: " -s+ force each pattern to be studied, using JIT if available\n"
1.1.1.3 misho 2908: " -s++ ditto, verifying when JIT was actually used\n"
2909: " -s+n force each pattern to be studied, using JIT if available,\n"
2910: " where 1 <= n <= 7 selects JIT options\n"
2911: " -s++n ditto, verifying when JIT was actually used\n"
1.1 misho 2912: " -t time compilation and execution\n");
2913: printf(" -t <n> time compilation and execution, repeating <n> times\n");
2914: printf(" -tm time execution (matching) only\n");
2915: printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2916: }
2917:
2918:
2919:
2920: /*************************************************
2921: * Main Program *
2922: *************************************************/
2923:
2924: /* Read lines from named file or stdin and write to named file or stdout; lines
2925: consist of a regular expression, in delimiters and optionally followed by
2926: options, followed by a set of test data, terminated by an empty line. */
2927:
2928: int main(int argc, char **argv)
2929: {
2930: FILE *infile = stdin;
1.1.1.2 misho 2931: const char *version;
1.1 misho 2932: int options = 0;
2933: int study_options = 0;
2934: int default_find_match_limit = FALSE;
2935: int op = 1;
2936: int timeit = 0;
2937: int timeitm = 0;
2938: int showinfo = 0;
2939: int showstore = 0;
2940: int force_study = -1;
2941: int force_study_options = 0;
2942: int quiet = 0;
2943: int size_offsets = 45;
2944: int size_offsets_max;
2945: int *offsets = NULL;
2946: int debug = 0;
2947: int done = 0;
2948: int all_use_dfa = 0;
1.1.1.3 misho 2949: int verify_jit = 0;
1.1 misho 2950: int yield = 0;
2951: int stack_size;
1.1.1.4 ! misho 2952: pcre_uint8 *dbuffer = NULL;
! 2953: size_t dbuffer_size = 1u << 14;
1.1 misho 2954:
1.1.1.3 misho 2955: #if !defined NOPOSIX
2956: int posix = 0;
2957: #endif
2958: #if !defined NODFA
2959: int *dfa_workspace = NULL;
2960: #endif
2961:
1.1 misho 2962: pcre_jit_stack *jit_stack = NULL;
2963:
1.1.1.2 misho 2964: /* These vectors store, end-to-end, a list of zero-terminated captured
2965: substring names, each list itself being terminated by an empty name. Assume
2966: that 1024 is plenty long enough for the few names we'll be testing. It is
1.1.1.4 ! misho 2967: easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
1.1.1.2 misho 2968: for the actual memory, to ensure alignment. */
2969:
1.1.1.4 ! misho 2970: pcre_uint32 copynames[1024];
! 2971: pcre_uint32 getnames[1024];
! 2972:
! 2973: #ifdef SUPPORT_PCRE32
! 2974: pcre_uint32 *cn32ptr;
! 2975: pcre_uint32 *gn32ptr;
! 2976: #endif
1.1.1.2 misho 2977:
2978: #ifdef SUPPORT_PCRE16
1.1.1.4 ! misho 2979: pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
! 2980: pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
1.1.1.2 misho 2981: pcre_uint16 *cn16ptr;
2982: pcre_uint16 *gn16ptr;
2983: #endif
1.1 misho 2984:
1.1.1.2 misho 2985: #ifdef SUPPORT_PCRE8
2986: pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2987: pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2988: pcre_uint8 *cn8ptr;
2989: pcre_uint8 *gn8ptr;
2990: #endif
1.1 misho 2991:
1.1.1.2 misho 2992: /* Get buffers from malloc() so that valgrind will check their misuse when
1.1.1.4 ! misho 2993: debugging. They grow automatically when very long lines are read. The 16-
! 2994: and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
1.1.1.2 misho 2995:
2996: buffer = (pcre_uint8 *)malloc(buffer_size);
2997: pbuffer = (pcre_uint8 *)malloc(buffer_size);
1.1 misho 2998:
2999: /* The outfile variable is static so that new_malloc can use it. */
3000:
3001: outfile = stdout;
3002:
3003: /* The following _setmode() stuff is some Windows magic that tells its runtime
3004: library to translate CRLF into a single LF character. At least, that's what
3005: I've been told: never having used Windows I take this all on trust. Originally
3006: it set 0x8000, but then I was advised that _O_BINARY was better. */
3007:
3008: #if defined(_WIN32) || defined(WIN32)
3009: _setmode( _fileno( stdout ), _O_BINARY );
3010: #endif
3011:
1.1.1.2 misho 3012: /* Get the version number: both pcre_version() and pcre16_version() give the
3013: same answer. We just need to ensure that we call one that is available. */
3014:
1.1.1.4 ! misho 3015: #if defined SUPPORT_PCRE8
1.1.1.2 misho 3016: version = pcre_version();
1.1.1.4 ! misho 3017: #elif defined SUPPORT_PCRE16
1.1.1.2 misho 3018: version = pcre16_version();
1.1.1.4 ! misho 3019: #elif defined SUPPORT_PCRE32
! 3020: version = pcre32_version();
1.1.1.2 misho 3021: #endif
3022:
1.1 misho 3023: /* Scan options */
3024:
3025: while (argc > 1 && argv[op][0] == '-')
3026: {
1.1.1.2 misho 3027: pcre_uint8 *endptr;
1.1.1.3 misho 3028: char *arg = argv[op];
1.1 misho 3029:
1.1.1.3 misho 3030: if (strcmp(arg, "-m") == 0) showstore = 1;
3031: else if (strcmp(arg, "-s") == 0) force_study = 0;
3032:
3033: else if (strncmp(arg, "-s+", 3) == 0)
1.1 misho 3034: {
1.1.1.3 misho 3035: arg += 3;
3036: if (*arg == '+') { arg++; verify_jit = TRUE; }
1.1 misho 3037: force_study = 1;
1.1.1.3 misho 3038: if (*arg == 0)
3039: force_study_options = jit_study_bits[6];
3040: else if (*arg >= '1' && *arg <= '7')
3041: force_study_options = jit_study_bits[*arg - '1'];
3042: else goto BAD_ARG;
1.1 misho 3043: }
1.1.1.4 ! misho 3044: else if (strcmp(arg, "-8") == 0)
! 3045: {
! 3046: #ifdef SUPPORT_PCRE8
! 3047: pcre_mode = PCRE8_MODE;
! 3048: #else
! 3049: printf("** This version of PCRE was built without 8-bit support\n");
! 3050: exit(1);
! 3051: #endif
! 3052: }
1.1.1.3 misho 3053: else if (strcmp(arg, "-16") == 0)
1.1.1.2 misho 3054: {
3055: #ifdef SUPPORT_PCRE16
1.1.1.4 ! misho 3056: pcre_mode = PCRE16_MODE;
1.1.1.2 misho 3057: #else
3058: printf("** This version of PCRE was built without 16-bit support\n");
3059: exit(1);
3060: #endif
3061: }
1.1.1.4 ! misho 3062: else if (strcmp(arg, "-32") == 0)
! 3063: {
! 3064: #ifdef SUPPORT_PCRE32
! 3065: pcre_mode = PCRE32_MODE;
! 3066: #else
! 3067: printf("** This version of PCRE was built without 32-bit support\n");
! 3068: exit(1);
! 3069: #endif
! 3070: }
1.1.1.3 misho 3071: else if (strcmp(arg, "-q") == 0) quiet = 1;
3072: else if (strcmp(arg, "-b") == 0) debug = 1;
3073: else if (strcmp(arg, "-i") == 0) showinfo = 1;
3074: else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3075: else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
1.1 misho 3076: #if !defined NODFA
1.1.1.3 misho 3077: else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
1.1 misho 3078: #endif
1.1.1.3 misho 3079: else if (strcmp(arg, "-o") == 0 && argc > 2 &&
1.1.1.2 misho 3080: ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 3081: *endptr == 0))
3082: {
3083: op++;
3084: argc--;
3085: }
1.1.1.3 misho 3086: else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
1.1 misho 3087: {
1.1.1.3 misho 3088: int both = arg[2] == 0;
1.1 misho 3089: int temp;
1.1.1.2 misho 3090: if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1.1 misho 3091: *endptr == 0))
3092: {
3093: timeitm = temp;
3094: op++;
3095: argc--;
3096: }
3097: else timeitm = LOOPREPEAT;
3098: if (both) timeit = timeitm;
3099: }
1.1.1.3 misho 3100: else if (strcmp(arg, "-S") == 0 && argc > 2 &&
1.1.1.2 misho 3101: ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 3102: *endptr == 0))
3103: {
1.1.1.4 ! misho 3104: #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
1.1 misho 3105: printf("PCRE: -S not supported on this OS\n");
3106: exit(1);
3107: #else
3108: int rc;
3109: struct rlimit rlim;
3110: getrlimit(RLIMIT_STACK, &rlim);
3111: rlim.rlim_cur = stack_size * 1024 * 1024;
3112: rc = setrlimit(RLIMIT_STACK, &rlim);
3113: if (rc != 0)
3114: {
3115: printf("PCRE: setrlimit() failed with error %d\n", rc);
3116: exit(1);
3117: }
3118: op++;
3119: argc--;
3120: #endif
3121: }
3122: #if !defined NOPOSIX
1.1.1.3 misho 3123: else if (strcmp(arg, "-p") == 0) posix = 1;
1.1 misho 3124: #endif
1.1.1.3 misho 3125: else if (strcmp(arg, "-C") == 0)
1.1 misho 3126: {
3127: int rc;
3128: unsigned long int lrc;
1.1.1.2 misho 3129:
3130: if (argc > 2)
3131: {
3132: if (strcmp(argv[op + 1], "linksize") == 0)
3133: {
3134: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3135: printf("%d\n", rc);
3136: yield = rc;
1.1.1.4 ! misho 3137:
! 3138: #ifdef __VMS
! 3139: vms_setsymbol("LINKSIZE",0,yield );
! 3140: #endif
1.1.1.2 misho 3141: }
1.1.1.4 ! misho 3142: else if (strcmp(argv[op + 1], "pcre8") == 0)
1.1.1.2 misho 3143: {
3144: #ifdef SUPPORT_PCRE8
3145: printf("1\n");
3146: yield = 1;
3147: #else
3148: printf("0\n");
3149: yield = 0;
3150: #endif
1.1.1.4 ! misho 3151: #ifdef __VMS
! 3152: vms_setsymbol("PCRE8",0,yield );
! 3153: #endif
1.1.1.2 misho 3154: }
1.1.1.4 ! misho 3155: else if (strcmp(argv[op + 1], "pcre16") == 0)
1.1.1.2 misho 3156: {
3157: #ifdef SUPPORT_PCRE16
3158: printf("1\n");
3159: yield = 1;
3160: #else
3161: printf("0\n");
3162: yield = 0;
3163: #endif
1.1.1.4 ! misho 3164: #ifdef __VMS
! 3165: vms_setsymbol("PCRE16",0,yield );
! 3166: #endif
1.1.1.2 misho 3167: }
1.1.1.4 ! misho 3168: else if (strcmp(argv[op + 1], "pcre32") == 0)
1.1.1.2 misho 3169: {
1.1.1.4 ! misho 3170: #ifdef SUPPORT_PCRE32
! 3171: printf("1\n");
! 3172: yield = 1;
1.1.1.2 misho 3173: #else
1.1.1.4 ! misho 3174: printf("0\n");
! 3175: yield = 0;
! 3176: #endif
! 3177: #ifdef __VMS
! 3178: vms_setsymbol("PCRE32",0,yield );
! 3179: #endif
! 3180: }
! 3181: else if (strcmp(argv[op + 1], "utf") == 0)
! 3182: {
! 3183: #ifdef SUPPORT_PCRE8
! 3184: if (pcre_mode == PCRE8_MODE)
! 3185: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
! 3186: #endif
! 3187: #ifdef SUPPORT_PCRE16
! 3188: if (pcre_mode == PCRE16_MODE)
! 3189: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
! 3190: #endif
! 3191: #ifdef SUPPORT_PCRE32
! 3192: if (pcre_mode == PCRE32_MODE)
! 3193: (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
! 3194: #endif
1.1.1.2 misho 3195: printf("%d\n", rc);
3196: yield = rc;
1.1.1.4 ! misho 3197: #ifdef __VMS
! 3198: vms_setsymbol("UTF",0,yield );
1.1.1.2 misho 3199: #endif
3200: }
1.1.1.4 ! misho 3201: else if (strcmp(argv[op + 1], "ucp") == 0)
1.1.1.2 misho 3202: {
3203: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3204: printf("%d\n", rc);
3205: yield = rc;
3206: }
1.1.1.4 ! misho 3207: else if (strcmp(argv[op + 1], "jit") == 0)
! 3208: {
! 3209: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
! 3210: printf("%d\n", rc);
! 3211: yield = rc;
! 3212: }
! 3213: else if (strcmp(argv[op + 1], "newline") == 0)
! 3214: {
! 3215: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
! 3216: print_newline_config(rc, TRUE);
! 3217: }
! 3218: else if (strcmp(argv[op + 1], "ebcdic") == 0)
! 3219: {
! 3220: #ifdef EBCDIC
! 3221: printf("1\n");
! 3222: yield = 1;
! 3223: #else
! 3224: printf("0\n");
! 3225: #endif
! 3226: }
! 3227: else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
1.1.1.2 misho 3228: {
1.1.1.4 ! misho 3229: #ifdef EBCDIC
! 3230: printf("0x%02x\n", CHAR_LF);
! 3231: #else
! 3232: printf("0\n");
! 3233: #endif
1.1.1.2 misho 3234: }
1.1.1.4 ! misho 3235: else
1.1.1.2 misho 3236: {
1.1.1.4 ! misho 3237: printf("Unknown -C option: %s\n", argv[op + 1]);
1.1.1.2 misho 3238: }
3239: goto EXIT;
3240: }
3241:
1.1.1.4 ! misho 3242: /* No argument for -C: output all configuration information. */
! 3243:
1.1.1.2 misho 3244: printf("PCRE version %s\n", version);
1.1 misho 3245: printf("Compiled with\n");
1.1.1.2 misho 3246:
1.1.1.4 ! misho 3247: #ifdef EBCDIC
! 3248: printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
! 3249: #endif
! 3250:
1.1.1.2 misho 3251: /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3252: are set, either both UTFs are supported or both are not supported. */
3253:
1.1.1.4 ! misho 3254: #ifdef SUPPORT_PCRE8
! 3255: printf(" 8-bit support\n");
1.1 misho 3256: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1.1.1.4 ! misho 3257: printf (" %sUTF-8 support\n", rc ? "" : "No ");
! 3258: #endif
! 3259: #ifdef SUPPORT_PCRE16
! 3260: printf(" 16-bit support\n");
1.1.1.2 misho 3261: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1.1.1.4 ! misho 3262: printf (" %sUTF-16 support\n", rc ? "" : "No ");
! 3263: #endif
! 3264: #ifdef SUPPORT_PCRE32
! 3265: printf(" 32-bit support\n");
! 3266: (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
! 3267: printf (" %sUTF-32 support\n", rc ? "" : "No ");
1.1.1.2 misho 3268: #endif
3269:
3270: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1.1 misho 3271: printf(" %sUnicode properties support\n", rc? "" : "No ");
1.1.1.2 misho 3272: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
1.1 misho 3273: if (rc)
1.1.1.2 misho 3274: {
3275: const char *arch;
3276: (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3277: printf(" Just-in-time compiler support: %s\n", arch);
3278: }
1.1 misho 3279: else
3280: printf(" No just-in-time compiler support\n");
1.1.1.2 misho 3281: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
1.1.1.4 ! misho 3282: print_newline_config(rc, FALSE);
1.1.1.2 misho 3283: (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
1.1 misho 3284: printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3285: "all Unicode newlines");
1.1.1.2 misho 3286: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
1.1 misho 3287: printf(" Internal link size = %d\n", rc);
1.1.1.2 misho 3288: (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1.1 misho 3289: printf(" POSIX malloc threshold = %d\n", rc);
1.1.1.2 misho 3290: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1.1 misho 3291: printf(" Default match limit = %ld\n", lrc);
1.1.1.2 misho 3292: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1.1 misho 3293: printf(" Default recursion depth limit = %ld\n", lrc);
1.1.1.2 misho 3294: (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3295: printf(" Match recursion uses %s", rc? "stack" : "heap");
3296: if (showstore)
3297: {
3298: PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3299: printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3300: }
3301: printf("\n");
1.1 misho 3302: goto EXIT;
3303: }
1.1.1.3 misho 3304: else if (strcmp(arg, "-help") == 0 ||
3305: strcmp(arg, "--help") == 0)
1.1 misho 3306: {
3307: usage();
3308: goto EXIT;
3309: }
3310: else
3311: {
1.1.1.3 misho 3312: BAD_ARG:
3313: printf("** Unknown or malformed option %s\n", arg);
1.1 misho 3314: usage();
3315: yield = 1;
3316: goto EXIT;
3317: }
3318: op++;
3319: argc--;
3320: }
3321:
3322: /* Get the store for the offsets vector, and remember what it was */
3323:
3324: size_offsets_max = size_offsets;
3325: offsets = (int *)malloc(size_offsets_max * sizeof(int));
3326: if (offsets == NULL)
3327: {
3328: printf("** Failed to get %d bytes of memory for offsets vector\n",
3329: (int)(size_offsets_max * sizeof(int)));
3330: yield = 1;
3331: goto EXIT;
3332: }
3333:
3334: /* Sort out the input and output files */
3335:
3336: if (argc > 1)
3337: {
3338: infile = fopen(argv[op], INPUT_MODE);
3339: if (infile == NULL)
3340: {
3341: printf("** Failed to open %s\n", argv[op]);
3342: yield = 1;
3343: goto EXIT;
3344: }
3345: }
3346:
3347: if (argc > 2)
3348: {
3349: outfile = fopen(argv[op+1], OUTPUT_MODE);
3350: if (outfile == NULL)
3351: {
3352: printf("** Failed to open %s\n", argv[op+1]);
3353: yield = 1;
3354: goto EXIT;
3355: }
3356: }
3357:
3358: /* Set alternative malloc function */
3359:
1.1.1.2 misho 3360: #ifdef SUPPORT_PCRE8
1.1 misho 3361: pcre_malloc = new_malloc;
3362: pcre_free = new_free;
3363: pcre_stack_malloc = stack_malloc;
3364: pcre_stack_free = stack_free;
1.1.1.2 misho 3365: #endif
3366:
3367: #ifdef SUPPORT_PCRE16
3368: pcre16_malloc = new_malloc;
3369: pcre16_free = new_free;
3370: pcre16_stack_malloc = stack_malloc;
3371: pcre16_stack_free = stack_free;
3372: #endif
1.1 misho 3373:
1.1.1.4 ! misho 3374: #ifdef SUPPORT_PCRE32
! 3375: pcre32_malloc = new_malloc;
! 3376: pcre32_free = new_free;
! 3377: pcre32_stack_malloc = stack_malloc;
! 3378: pcre32_stack_free = stack_free;
! 3379: #endif
! 3380:
1.1 misho 3381: /* Heading line unless quiet, then prompt for first regex if stdin */
3382:
1.1.1.2 misho 3383: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
1.1 misho 3384:
3385: /* Main loop */
3386:
3387: while (!done)
3388: {
3389: pcre *re = NULL;
3390: pcre_extra *extra = NULL;
3391:
3392: #if !defined NOPOSIX /* There are still compilers that require no indent */
3393: regex_t preg;
3394: int do_posix = 0;
3395: #endif
3396:
3397: const char *error;
1.1.1.2 misho 3398: pcre_uint8 *markptr;
3399: pcre_uint8 *p, *pp, *ppp;
3400: pcre_uint8 *to_file = NULL;
3401: const pcre_uint8 *tables = NULL;
3402: unsigned long int get_options;
1.1 misho 3403: unsigned long int true_size, true_study_size = 0;
3404: size_t size, regex_gotten_store;
3405: int do_allcaps = 0;
3406: int do_mark = 0;
3407: int do_study = 0;
3408: int no_force_study = 0;
3409: int do_debug = debug;
3410: int do_G = 0;
3411: int do_g = 0;
3412: int do_showinfo = showinfo;
3413: int do_showrest = 0;
3414: int do_showcaprest = 0;
3415: int do_flip = 0;
3416: int erroroffset, len, delimiter, poffset;
3417:
1.1.1.3 misho 3418: #if !defined NODFA
3419: int dfa_matched = 0;
3420: #endif
3421:
1.1.1.2 misho 3422: use_utf = 0;
1.1 misho 3423: debug_lengths = 1;
3424:
3425: if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3426: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3427: fflush(outfile);
3428:
3429: p = buffer;
3430: while (isspace(*p)) p++;
3431: if (*p == 0) continue;
3432:
3433: /* See if the pattern is to be loaded pre-compiled from a file. */
3434:
3435: if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3436: {
1.1.1.2 misho 3437: pcre_uint32 magic;
3438: pcre_uint8 sbuf[8];
1.1 misho 3439: FILE *f;
3440:
3441: p++;
1.1.1.2 misho 3442: if (*p == '!')
3443: {
3444: do_debug = TRUE;
3445: do_showinfo = TRUE;
3446: p++;
3447: }
3448:
1.1 misho 3449: pp = p + (int)strlen((char *)p);
3450: while (isspace(pp[-1])) pp--;
3451: *pp = 0;
3452:
3453: f = fopen((char *)p, "rb");
3454: if (f == NULL)
3455: {
3456: fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3457: continue;
3458: }
3459:
1.1.1.2 misho 3460: first_gotten_store = 0;
1.1 misho 3461: if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3462:
3463: true_size =
3464: (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3465: true_study_size =
3466: (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3467:
1.1.1.2 misho 3468: re = (pcre *)new_malloc(true_size);
1.1.1.4 ! misho 3469: if (re == NULL)
! 3470: {
! 3471: printf("** Failed to get %d bytes of memory for pcre object\n",
! 3472: (int)true_size);
! 3473: yield = 1;
! 3474: goto EXIT;
! 3475: }
1.1 misho 3476: regex_gotten_store = first_gotten_store;
3477:
3478: if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3479:
1.1.1.4 ! misho 3480: magic = REAL_PCRE_MAGIC(re);
1.1 misho 3481: if (magic != MAGIC_NUMBER)
3482: {
1.1.1.2 misho 3483: if (swap_uint32(magic) == MAGIC_NUMBER)
1.1 misho 3484: {
3485: do_flip = 1;
3486: }
3487: else
3488: {
3489: fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1.1.1.4 ! misho 3490: new_free(re);
1.1 misho 3491: fclose(f);
3492: continue;
3493: }
3494: }
3495:
1.1.1.2 misho 3496: /* We hide the byte-invert info for little and big endian tests. */
1.1 misho 3497: fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1.1.1.2 misho 3498: do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
1.1 misho 3499:
3500: /* Now see if there is any following study data. */
3501:
3502: if (true_study_size != 0)
3503: {
3504: pcre_study_data *psd;
3505:
3506: extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3507: extra->flags = PCRE_EXTRA_STUDY_DATA;
3508:
3509: psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3510: extra->study_data = psd;
3511:
3512: if (fread(psd, 1, true_study_size, f) != true_study_size)
3513: {
3514: FAIL_READ:
3515: fprintf(outfile, "Failed to read data from %s\n", p);
1.1.1.2 misho 3516: if (extra != NULL)
3517: {
3518: PCRE_FREE_STUDY(extra);
3519: }
1.1.1.4 ! misho 3520: new_free(re);
1.1 misho 3521: fclose(f);
3522: continue;
3523: }
3524: fprintf(outfile, "Study data loaded from %s\n", p);
3525: do_study = 1; /* To get the data output if requested */
3526: }
3527: else fprintf(outfile, "No study data\n");
3528:
1.1.1.2 misho 3529: /* Flip the necessary bytes. */
3530: if (do_flip)
3531: {
3532: int rc;
3533: PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3534: if (rc == PCRE_ERROR_BADMODE)
3535: {
1.1.1.4 ! misho 3536: pcre_uint32 flags_in_host_byte_order;
! 3537: if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
! 3538: flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
! 3539: else
! 3540: flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
1.1.1.2 misho 3541: /* Simulate the result of the function call below. */
3542: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1.1.1.4 ! misho 3543: pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
! 3544: PCRE_INFO_OPTIONS);
! 3545: fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
! 3546: "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
! 3547: new_free(re);
! 3548: fclose(f);
1.1.1.2 misho 3549: continue;
3550: }
3551: }
3552:
3553: /* Need to know if UTF-8 for printing data strings. */
3554:
1.1.1.4 ! misho 3555: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
! 3556: {
! 3557: new_free(re);
! 3558: fclose(f);
! 3559: continue;
! 3560: }
1.1.1.2 misho 3561: use_utf = (get_options & PCRE_UTF8) != 0;
3562:
1.1 misho 3563: fclose(f);
3564: goto SHOW_INFO;
3565: }
3566:
3567: /* In-line pattern (the usual case). Get the delimiter and seek the end of
1.1.1.2 misho 3568: the pattern; if it isn't complete, read more. */
1.1 misho 3569:
3570: delimiter = *p++;
3571:
3572: if (isalnum(delimiter) || delimiter == '\\')
3573: {
3574: fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3575: goto SKIP_DATA;
3576: }
3577:
3578: pp = p;
3579: poffset = (int)(p - buffer);
3580:
3581: for(;;)
3582: {
3583: while (*pp != 0)
3584: {
3585: if (*pp == '\\' && pp[1] != 0) pp++;
3586: else if (*pp == delimiter) break;
3587: pp++;
3588: }
3589: if (*pp != 0) break;
3590: if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3591: {
3592: fprintf(outfile, "** Unexpected EOF\n");
3593: done = 1;
3594: goto CONTINUE;
3595: }
3596: if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3597: }
3598:
3599: /* The buffer may have moved while being extended; reset the start of data
3600: pointer to the correct relative point in the buffer. */
3601:
3602: p = buffer + poffset;
3603:
3604: /* If the first character after the delimiter is backslash, make
3605: the pattern end with backslash. This is purely to provide a way
3606: of testing for the error message when a pattern ends with backslash. */
3607:
3608: if (pp[1] == '\\') *pp++ = '\\';
3609:
3610: /* Terminate the pattern at the delimiter, and save a copy of the pattern
3611: for callouts. */
3612:
3613: *pp++ = 0;
3614: strcpy((char *)pbuffer, (char *)p);
3615:
3616: /* Look for options after final delimiter */
3617:
3618: options = 0;
1.1.1.4 ! misho 3619: study_options = force_study_options;
1.1 misho 3620: log_store = showstore; /* default from command line */
3621:
3622: while (*pp != 0)
3623: {
3624: switch (*pp++)
3625: {
3626: case 'f': options |= PCRE_FIRSTLINE; break;
3627: case 'g': do_g = 1; break;
3628: case 'i': options |= PCRE_CASELESS; break;
3629: case 'm': options |= PCRE_MULTILINE; break;
3630: case 's': options |= PCRE_DOTALL; break;
3631: case 'x': options |= PCRE_EXTENDED; break;
3632:
3633: case '+':
3634: if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3635: break;
3636:
3637: case '=': do_allcaps = 1; break;
3638: case 'A': options |= PCRE_ANCHORED; break;
3639: case 'B': do_debug = 1; break;
3640: case 'C': options |= PCRE_AUTO_CALLOUT; break;
3641: case 'D': do_debug = do_showinfo = 1; break;
3642: case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3643: case 'F': do_flip = 1; break;
3644: case 'G': do_G = 1; break;
3645: case 'I': do_showinfo = 1; break;
3646: case 'J': options |= PCRE_DUPNAMES; break;
3647: case 'K': do_mark = 1; break;
3648: case 'M': log_store = 1; break;
3649: case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3650:
3651: #if !defined NOPOSIX
3652: case 'P': do_posix = 1; break;
3653: #endif
3654:
3655: case 'S':
1.1.1.4 ! misho 3656: do_study = 1;
! 3657: for (;;)
1.1 misho 3658: {
1.1.1.4 ! misho 3659: switch (*pp++)
1.1 misho 3660: {
1.1.1.4 ! misho 3661: case 'S':
! 3662: do_study = 0;
! 3663: no_force_study = 1;
! 3664: break;
! 3665:
! 3666: case '!':
! 3667: study_options |= PCRE_STUDY_EXTRA_NEEDED;
! 3668: break;
! 3669:
! 3670: case '+':
! 3671: if (*pp == '+')
1.1.1.3 misho 3672: {
3673: verify_jit = TRUE;
3674: pp++;
3675: }
3676: if (*pp >= '1' && *pp <= '7')
3677: study_options |= jit_study_bits[*pp++ - '1'];
3678: else
3679: study_options |= jit_study_bits[6];
1.1.1.4 ! misho 3680: break;
! 3681:
! 3682: case '-':
! 3683: study_options &= ~PCRE_STUDY_ALLJIT;
! 3684: break;
! 3685:
! 3686: default:
! 3687: pp--;
! 3688: goto ENDLOOP;
1.1 misho 3689: }
3690: }
1.1.1.4 ! misho 3691: ENDLOOP:
1.1 misho 3692: break;
3693:
3694: case 'U': options |= PCRE_UNGREEDY; break;
3695: case 'W': options |= PCRE_UCP; break;
3696: case 'X': options |= PCRE_EXTRA; break;
3697: case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3698: case 'Z': debug_lengths = 0; break;
1.1.1.2 misho 3699: case '8': options |= PCRE_UTF8; use_utf = 1; break;
1.1.1.4 ! misho 3700: case '9': options |= PCRE_NEVER_UTF; break;
1.1 misho 3701: case '?': options |= PCRE_NO_UTF8_CHECK; break;
3702:
3703: case 'T':
3704: switch (*pp++)
3705: {
3706: case '0': tables = tables0; break;
3707: case '1': tables = tables1; break;
3708:
3709: case '\r':
3710: case '\n':
3711: case ' ':
3712: case 0:
3713: fprintf(outfile, "** Missing table number after /T\n");
3714: goto SKIP_DATA;
3715:
3716: default:
3717: fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3718: goto SKIP_DATA;
3719: }
3720: break;
3721:
3722: case 'L':
3723: ppp = pp;
3724: /* The '\r' test here is so that it works on Windows. */
3725: /* The '0' test is just in case this is an unterminated line. */
3726: while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3727: *ppp = 0;
3728: if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3729: {
3730: fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3731: goto SKIP_DATA;
3732: }
3733: locale_set = 1;
1.1.1.2 misho 3734: tables = PCRE_MAKETABLES;
1.1 misho 3735: pp = ppp;
3736: break;
3737:
3738: case '>':
3739: to_file = pp;
3740: while (*pp != 0) pp++;
3741: while (isspace(pp[-1])) pp--;
3742: *pp = 0;
3743: break;
3744:
3745: case '<':
3746: {
1.1.1.2 misho 3747: if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1.1 misho 3748: {
3749: options |= PCRE_JAVASCRIPT_COMPAT;
3750: pp += 3;
3751: }
3752: else
3753: {
3754: int x = check_newline(pp, outfile);
3755: if (x == 0) goto SKIP_DATA;
3756: options |= x;
3757: while (*pp++ != '>');
3758: }
3759: }
3760: break;
3761:
3762: case '\r': /* So that it works in Windows */
3763: case '\n':
3764: case ' ':
3765: break;
3766:
3767: default:
3768: fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3769: goto SKIP_DATA;
3770: }
3771: }
3772:
3773: /* Handle compiling via the POSIX interface, which doesn't support the
3774: timing, showing, or debugging options, nor the ability to pass over
1.1.1.2 misho 3775: local character tables. Neither does it have 16-bit support. */
1.1 misho 3776:
3777: #if !defined NOPOSIX
3778: if (posix || do_posix)
3779: {
3780: int rc;
3781: int cflags = 0;
3782:
3783: if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3784: if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3785: if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3786: if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3787: if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3788: if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3789: if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3790:
3791: first_gotten_store = 0;
3792: rc = regcomp(&preg, (char *)p, cflags);
3793:
3794: /* Compilation failed; go back for another re, skipping to blank line
3795: if non-interactive. */
3796:
3797: if (rc != 0)
3798: {
3799: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3800: fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3801: goto SKIP_DATA;
3802: }
3803: }
3804:
3805: /* Handle compiling via the native interface */
3806:
3807: else
3808: #endif /* !defined NOPOSIX */
3809:
3810: {
1.1.1.4 ! misho 3811: /* In 16- or 32-bit mode, convert the input. */
1.1.1.2 misho 3812:
3813: #ifdef SUPPORT_PCRE16
1.1.1.4 ! misho 3814: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 3815: {
3816: switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3817: {
3818: case -1:
3819: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3820: "converted to UTF-16\n");
3821: goto SKIP_DATA;
3822:
3823: case -2:
3824: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3825: "cannot be converted to UTF-16\n");
3826: goto SKIP_DATA;
3827:
3828: case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3829: fprintf(outfile, "**Failed: character value greater than 0xffff "
3830: "cannot be converted to 16-bit in non-UTF mode\n");
3831: goto SKIP_DATA;
3832:
3833: default:
3834: break;
3835: }
3836: p = (pcre_uint8 *)buffer16;
3837: }
3838: #endif
3839:
1.1.1.4 ! misho 3840: #ifdef SUPPORT_PCRE32
! 3841: if (pcre_mode == PCRE32_MODE)
! 3842: {
! 3843: switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
! 3844: {
! 3845: case -1:
! 3846: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
! 3847: "converted to UTF-32\n");
! 3848: goto SKIP_DATA;
! 3849:
! 3850: case -2:
! 3851: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
! 3852: "cannot be converted to UTF-32\n");
! 3853: goto SKIP_DATA;
! 3854:
! 3855: case -3:
! 3856: fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
! 3857: goto SKIP_DATA;
! 3858:
! 3859: default:
! 3860: break;
! 3861: }
! 3862: p = (pcre_uint8 *)buffer32;
! 3863: }
! 3864: #endif
! 3865:
1.1.1.2 misho 3866: /* Compile many times when timing */
1.1 misho 3867:
3868: if (timeit > 0)
3869: {
3870: register int i;
3871: clock_t time_taken;
3872: clock_t start_time = clock();
3873: for (i = 0; i < timeit; i++)
3874: {
1.1.1.2 misho 3875: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 3876: if (re != NULL) free(re);
3877: }
3878: time_taken = clock() - start_time;
3879: fprintf(outfile, "Compile time %.4f milliseconds\n",
3880: (((double)time_taken * 1000.0) / (double)timeit) /
3881: (double)CLOCKS_PER_SEC);
3882: }
3883:
3884: first_gotten_store = 0;
1.1.1.2 misho 3885: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 3886:
3887: /* Compilation failed; go back for another re, skipping to blank line
3888: if non-interactive. */
3889:
3890: if (re == NULL)
3891: {
3892: fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3893: SKIP_DATA:
3894: if (infile != stdin)
3895: {
3896: for (;;)
3897: {
3898: if (extend_inputline(infile, buffer, NULL) == NULL)
3899: {
3900: done = 1;
3901: goto CONTINUE;
3902: }
3903: len = (int)strlen((char *)buffer);
3904: while (len > 0 && isspace(buffer[len-1])) len--;
3905: if (len == 0) break;
3906: }
3907: fprintf(outfile, "\n");
3908: }
3909: goto CONTINUE;
3910: }
3911:
3912: /* Compilation succeeded. It is now possible to set the UTF-8 option from
3913: within the regex; check for this so that we know how to process the data
3914: lines. */
3915:
1.1.1.2 misho 3916: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3917: goto SKIP_DATA;
3918: if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
1.1 misho 3919:
3920: /* Extract the size for possible writing before possibly flipping it,
3921: and remember the store that was got. */
3922:
1.1.1.4 ! misho 3923: true_size = REAL_PCRE_SIZE(re);
1.1 misho 3924: regex_gotten_store = first_gotten_store;
3925:
3926: /* Output code size information if requested */
3927:
3928: if (log_store)
1.1.1.4 ! misho 3929: {
! 3930: int name_count, name_entry_size, real_pcre_size;
! 3931:
! 3932: new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
! 3933: new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
! 3934: real_pcre_size = 0;
! 3935: #ifdef SUPPORT_PCRE8
! 3936: if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
! 3937: real_pcre_size = sizeof(real_pcre);
! 3938: #endif
! 3939: #ifdef SUPPORT_PCRE16
! 3940: if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
! 3941: real_pcre_size = sizeof(real_pcre16);
! 3942: #endif
! 3943: #ifdef SUPPORT_PCRE32
! 3944: if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
! 3945: real_pcre_size = sizeof(real_pcre32);
! 3946: #endif
1.1 misho 3947: fprintf(outfile, "Memory allocation (code space): %d\n",
1.1.1.4 ! misho 3948: (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
! 3949: }
1.1 misho 3950:
3951: /* If -s or /S was present, study the regex to generate additional info to
3952: help with the matching, unless the pattern has the SS option, which
3953: suppresses the effect of /S (used for a few test patterns where studying is
3954: never sensible). */
3955:
3956: if (do_study || (force_study >= 0 && !no_force_study))
3957: {
3958: if (timeit > 0)
3959: {
3960: register int i;
3961: clock_t time_taken;
3962: clock_t start_time = clock();
3963: for (i = 0; i < timeit; i++)
1.1.1.2 misho 3964: {
1.1.1.4 ! misho 3965: PCRE_STUDY(extra, re, study_options, &error);
1.1.1.2 misho 3966: }
1.1 misho 3967: time_taken = clock() - start_time;
1.1.1.2 misho 3968: if (extra != NULL)
3969: {
3970: PCRE_FREE_STUDY(extra);
3971: }
1.1 misho 3972: fprintf(outfile, " Study time %.4f milliseconds\n",
3973: (((double)time_taken * 1000.0) / (double)timeit) /
3974: (double)CLOCKS_PER_SEC);
3975: }
1.1.1.4 ! misho 3976: PCRE_STUDY(extra, re, study_options, &error);
1.1 misho 3977: if (error != NULL)
3978: fprintf(outfile, "Failed to study: %s\n", error);
3979: else if (extra != NULL)
3980: {
3981: true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3982: if (log_store)
3983: {
3984: size_t jitsize;
1.1.1.2 misho 3985: if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3986: jitsize != 0)
3987: fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
1.1 misho 3988: }
3989: }
3990: }
3991:
3992: /* If /K was present, we set up for handling MARK data. */
3993:
3994: if (do_mark)
3995: {
3996: if (extra == NULL)
3997: {
3998: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3999: extra->flags = 0;
4000: }
4001: extra->mark = &markptr;
4002: extra->flags |= PCRE_EXTRA_MARK;
4003: }
4004:
1.1.1.2 misho 4005: /* Extract and display information from the compiled data if required. */
1.1 misho 4006:
4007: SHOW_INFO:
4008:
4009: if (do_debug)
4010: {
4011: fprintf(outfile, "------------------------------------------------------------------\n");
1.1.1.2 misho 4012: PCRE_PRINTINT(re, outfile, debug_lengths);
1.1 misho 4013: }
4014:
4015: /* We already have the options in get_options (see above) */
4016:
4017: if (do_showinfo)
4018: {
4019: unsigned long int all_options;
1.1.1.4 ! misho 4020: pcre_uint32 first_char, need_char;
! 4021: pcre_uint32 match_limit, recursion_limit;
! 4022: int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
1.1.1.3 misho 4023: hascrorlf, maxlookbehind;
1.1 misho 4024: int nameentrysize, namecount;
1.1.1.2 misho 4025: const pcre_uint8 *nametable;
1.1 misho 4026:
1.1.1.2 misho 4027: if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4028: new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4029: new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
1.1.1.4 ! misho 4030: new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
! 4031: new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
! 4032: new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
! 4033: new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
1.1.1.2 misho 4034: new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4035: new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4036: new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4037: new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4038: new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
1.1.1.3 misho 4039: new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4040: new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
1.1.1.2 misho 4041: != 0)
4042: goto SKIP_DATA;
1.1 misho 4043:
4044: if (size != regex_gotten_store) fprintf(outfile,
4045: "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4046: (int)size, (int)regex_gotten_store);
4047:
4048: fprintf(outfile, "Capturing subpattern count = %d\n", count);
1.1.1.4 ! misho 4049:
1.1 misho 4050: if (backrefmax > 0)
4051: fprintf(outfile, "Max back reference = %d\n", backrefmax);
4052:
1.1.1.4 ! misho 4053: if (maxlookbehind > 0)
! 4054: fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
! 4055:
! 4056: if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
! 4057: fprintf(outfile, "Match limit = %u\n", match_limit);
! 4058:
! 4059: if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
! 4060: fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
! 4061:
1.1 misho 4062: if (namecount > 0)
4063: {
4064: fprintf(outfile, "Named capturing subpatterns:\n");
4065: while (namecount-- > 0)
4066: {
1.1.1.4 ! misho 4067: int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
1.1.1.2 misho 4068: int length = (int)STRLEN(nametable + imm2_size);
4069: fprintf(outfile, " ");
4070: PCHARSV(nametable, imm2_size, length, outfile);
4071: while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
1.1.1.4 ! misho 4072: #ifdef SUPPORT_PCRE32
! 4073: if (pcre_mode == PCRE32_MODE)
! 4074: fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
1.1.1.2 misho 4075: #endif
1.1.1.4 ! misho 4076: #ifdef SUPPORT_PCRE16
! 4077: if (pcre_mode == PCRE16_MODE)
! 4078: fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
! 4079: #endif
! 4080: #ifdef SUPPORT_PCRE8
! 4081: if (pcre_mode == PCRE8_MODE)
! 4082: fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
1.1.1.2 misho 4083: #endif
1.1.1.4 ! misho 4084: nametable += nameentrysize * CHAR_SIZE;
1.1 misho 4085: }
4086: }
4087:
4088: if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4089: if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4090:
1.1.1.4 ! misho 4091: all_options = REAL_PCRE_OPTIONS(re);
1.1.1.2 misho 4092: if (do_flip) all_options = swap_uint32(all_options);
1.1 misho 4093:
4094: if (get_options == 0) fprintf(outfile, "No options\n");
1.1.1.4 ! misho 4095: else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1.1 misho 4096: ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4097: ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4098: ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4099: ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4100: ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4101: ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4102: ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4103: ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4104: ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4105: ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4106: ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4107: ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1.1.1.2 misho 4108: ((get_options & PCRE_UTF8) != 0)? " utf" : "",
1.1 misho 4109: ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1.1.1.2 misho 4110: ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
1.1 misho 4111: ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1.1.1.4 ! misho 4112: ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
! 4113: ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
1.1 misho 4114:
4115: if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4116:
4117: switch (get_options & PCRE_NEWLINE_BITS)
4118: {
4119: case PCRE_NEWLINE_CR:
4120: fprintf(outfile, "Forced newline sequence: CR\n");
4121: break;
4122:
4123: case PCRE_NEWLINE_LF:
4124: fprintf(outfile, "Forced newline sequence: LF\n");
4125: break;
4126:
4127: case PCRE_NEWLINE_CRLF:
4128: fprintf(outfile, "Forced newline sequence: CRLF\n");
4129: break;
4130:
4131: case PCRE_NEWLINE_ANYCRLF:
4132: fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4133: break;
4134:
4135: case PCRE_NEWLINE_ANY:
4136: fprintf(outfile, "Forced newline sequence: ANY\n");
4137: break;
4138:
4139: default:
4140: break;
4141: }
4142:
1.1.1.4 ! misho 4143: if (first_char_set == 2)
1.1 misho 4144: {
4145: fprintf(outfile, "First char at start or follows newline\n");
4146: }
1.1.1.4 ! misho 4147: else if (first_char_set == 1)
1.1 misho 4148: {
1.1.1.2 misho 4149: const char *caseless =
1.1.1.4 ! misho 4150: ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
1.1 misho 4151: "" : " (caseless)";
1.1.1.2 misho 4152:
4153: if (PRINTOK(first_char))
4154: fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
1.1 misho 4155: else
1.1.1.2 misho 4156: {
4157: fprintf(outfile, "First char = ");
4158: pchar(first_char, outfile);
4159: fprintf(outfile, "%s\n", caseless);
4160: }
1.1 misho 4161: }
1.1.1.4 ! misho 4162: else
! 4163: {
! 4164: fprintf(outfile, "No first char\n");
! 4165: }
1.1 misho 4166:
1.1.1.4 ! misho 4167: if (need_char_set == 0)
1.1 misho 4168: {
4169: fprintf(outfile, "No need char\n");
4170: }
4171: else
4172: {
1.1.1.2 misho 4173: const char *caseless =
1.1.1.4 ! misho 4174: ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
1.1 misho 4175: "" : " (caseless)";
1.1.1.2 misho 4176:
4177: if (PRINTOK(need_char))
4178: fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
1.1 misho 4179: else
1.1.1.2 misho 4180: {
4181: fprintf(outfile, "Need char = ");
4182: pchar(need_char, outfile);
4183: fprintf(outfile, "%s\n", caseless);
4184: }
1.1 misho 4185: }
4186:
4187: /* Don't output study size; at present it is in any case a fixed
4188: value, but it varies, depending on the computer architecture, and
4189: so messes up the test suite. (And with the /F option, it might be
4190: flipped.) If study was forced by an external -s, don't show this
4191: information unless -i or -d was also present. This means that, except
4192: when auto-callouts are involved, the output from runs with and without
4193: -s should be identical. */
4194:
4195: if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4196: {
4197: if (extra == NULL)
4198: fprintf(outfile, "Study returned NULL\n");
4199: else
4200: {
1.1.1.2 misho 4201: pcre_uint8 *start_bits = NULL;
1.1 misho 4202: int minlength;
4203:
1.1.1.2 misho 4204: if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4205: fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1.1 misho 4206:
1.1.1.2 misho 4207: if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
1.1 misho 4208: {
1.1.1.2 misho 4209: if (start_bits == NULL)
4210: fprintf(outfile, "No set of starting bytes\n");
4211: else
1.1 misho 4212: {
1.1.1.2 misho 4213: int i;
4214: int c = 24;
4215: fprintf(outfile, "Starting byte set: ");
4216: for (i = 0; i < 256; i++)
1.1 misho 4217: {
1.1.1.2 misho 4218: if ((start_bits[i/8] & (1<<(i&7))) != 0)
1.1 misho 4219: {
1.1.1.2 misho 4220: if (c > 75)
4221: {
4222: fprintf(outfile, "\n ");
4223: c = 2;
4224: }
4225: if (PRINTOK(i) && i != ' ')
4226: {
4227: fprintf(outfile, "%c ", i);
4228: c += 2;
4229: }
4230: else
4231: {
4232: fprintf(outfile, "\\x%02x ", i);
4233: c += 5;
4234: }
1.1 misho 4235: }
4236: }
1.1.1.2 misho 4237: fprintf(outfile, "\n");
1.1 misho 4238: }
4239: }
4240: }
4241:
4242: /* Show this only if the JIT was set by /S, not by -s. */
4243:
1.1.1.4 ! misho 4244: if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
! 4245: (force_study_options & PCRE_STUDY_ALLJIT) == 0)
1.1 misho 4246: {
4247: int jit;
1.1.1.2 misho 4248: if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4249: {
4250: if (jit)
4251: fprintf(outfile, "JIT study was successful\n");
4252: else
1.1 misho 4253: #ifdef SUPPORT_JIT
1.1.1.2 misho 4254: fprintf(outfile, "JIT study was not successful\n");
1.1 misho 4255: #else
1.1.1.2 misho 4256: fprintf(outfile, "JIT support is not available in this version of PCRE\n");
1.1 misho 4257: #endif
1.1.1.2 misho 4258: }
1.1 misho 4259: }
4260: }
4261: }
4262:
4263: /* If the '>' option was present, we write out the regex to a file, and
4264: that is all. The first 8 bytes of the file are the regex length and then
4265: the study length, in big-endian order. */
4266:
4267: if (to_file != NULL)
4268: {
4269: FILE *f = fopen((char *)to_file, "wb");
4270: if (f == NULL)
4271: {
4272: fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4273: }
4274: else
4275: {
1.1.1.2 misho 4276: pcre_uint8 sbuf[8];
4277:
4278: if (do_flip) regexflip(re, extra);
4279: sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4280: sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4281: sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4282: sbuf[3] = (pcre_uint8)((true_size) & 255);
4283: sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4284: sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4285: sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4286: sbuf[7] = (pcre_uint8)((true_study_size) & 255);
1.1 misho 4287:
4288: if (fwrite(sbuf, 1, 8, f) < 8 ||
4289: fwrite(re, 1, true_size, f) < true_size)
4290: {
4291: fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4292: }
4293: else
4294: {
4295: fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4296:
4297: /* If there is study data, write it. */
4298:
4299: if (extra != NULL)
4300: {
4301: if (fwrite(extra->study_data, 1, true_study_size, f) <
4302: true_study_size)
4303: {
4304: fprintf(outfile, "Write error on %s: %s\n", to_file,
4305: strerror(errno));
4306: }
4307: else fprintf(outfile, "Study data written to %s\n", to_file);
4308: }
4309: }
4310: fclose(f);
4311: }
4312:
4313: new_free(re);
1.1.1.2 misho 4314: if (extra != NULL)
4315: {
4316: PCRE_FREE_STUDY(extra);
4317: }
1.1 misho 4318: if (locale_set)
4319: {
4320: new_free((void *)tables);
4321: setlocale(LC_CTYPE, "C");
4322: locale_set = 0;
4323: }
4324: continue; /* With next regex */
4325: }
4326: } /* End of non-POSIX compile */
4327:
4328: /* Read data lines and test them */
4329:
4330: for (;;)
4331: {
1.1.1.4 ! misho 4332: #ifdef SUPPORT_PCRE8
! 4333: pcre_uint8 *q8;
! 4334: #endif
! 4335: #ifdef SUPPORT_PCRE16
! 4336: pcre_uint16 *q16;
! 4337: #endif
! 4338: #ifdef SUPPORT_PCRE32
! 4339: pcre_uint32 *q32;
! 4340: #endif
1.1.1.2 misho 4341: pcre_uint8 *bptr;
1.1 misho 4342: int *use_offsets = offsets;
4343: int use_size_offsets = size_offsets;
4344: int callout_data = 0;
4345: int callout_data_set = 0;
1.1.1.4 ! misho 4346: int count;
! 4347: pcre_uint32 c;
1.1 misho 4348: int copystrings = 0;
4349: int find_match_limit = default_find_match_limit;
4350: int getstrings = 0;
4351: int getlist = 0;
4352: int gmatched = 0;
4353: int start_offset = 0;
4354: int start_offset_sign = 1;
4355: int g_notempty = 0;
4356: int use_dfa = 0;
4357:
4358: *copynames = 0;
4359: *getnames = 0;
4360:
1.1.1.4 ! misho 4361: #ifdef SUPPORT_PCRE32
! 4362: cn32ptr = copynames;
! 4363: gn32ptr = getnames;
! 4364: #endif
1.1.1.2 misho 4365: #ifdef SUPPORT_PCRE16
1.1.1.4 ! misho 4366: cn16ptr = copynames16;
! 4367: gn16ptr = getnames16;
1.1.1.2 misho 4368: #endif
4369: #ifdef SUPPORT_PCRE8
4370: cn8ptr = copynames8;
4371: gn8ptr = getnames8;
4372: #endif
1.1 misho 4373:
1.1.1.2 misho 4374: SET_PCRE_CALLOUT(callout);
1.1 misho 4375: first_callout = 1;
4376: last_callout_mark = NULL;
4377: callout_extra = 0;
4378: callout_count = 0;
4379: callout_fail_count = 999999;
4380: callout_fail_id = -1;
4381: show_malloc = 0;
1.1.1.2 misho 4382: options = 0;
1.1 misho 4383:
4384: if (extra != NULL) extra->flags &=
4385: ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4386:
4387: len = 0;
4388: for (;;)
4389: {
4390: if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4391: {
4392: if (len > 0) /* Reached EOF without hitting a newline */
4393: {
4394: fprintf(outfile, "\n");
4395: break;
4396: }
4397: done = 1;
4398: goto CONTINUE;
4399: }
4400: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4401: len = (int)strlen((char *)buffer);
4402: if (buffer[len-1] == '\n') break;
4403: }
4404:
4405: while (len > 0 && isspace(buffer[len-1])) len--;
4406: buffer[len] = 0;
4407: if (len == 0) break;
4408:
4409: p = buffer;
4410: while (isspace(*p)) p++;
4411:
1.1.1.4 ! misho 4412: #ifndef NOUTF
! 4413: /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
! 4414: invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
! 4415:
! 4416: if (use_utf)
! 4417: {
! 4418: pcre_uint8 *q;
! 4419: pcre_uint32 cc;
! 4420: int n = 1;
! 4421:
! 4422: for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
! 4423: if (n <= 0)
! 4424: {
! 4425: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
! 4426: goto NEXT_DATA;
! 4427: }
! 4428: }
! 4429: #endif
! 4430:
! 4431: #ifdef SUPPORT_VALGRIND
! 4432: /* Mark the dbuffer as addressable but undefined again. */
! 4433:
! 4434: if (dbuffer != NULL)
! 4435: {
! 4436: VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
! 4437: }
! 4438: #endif
! 4439:
! 4440: /* Allocate a buffer to hold the data line; len+1 is an upper bound on
! 4441: the number of pcre_uchar units that will be needed. */
! 4442:
! 4443: while (dbuffer == NULL || (size_t)len >= dbuffer_size)
! 4444: {
! 4445: dbuffer_size *= 2;
! 4446: dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
! 4447: if (dbuffer == NULL)
! 4448: {
! 4449: fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
! 4450: exit(1);
! 4451: }
! 4452: }
! 4453:
! 4454: #ifdef SUPPORT_PCRE8
! 4455: q8 = (pcre_uint8 *) dbuffer;
! 4456: #endif
! 4457: #ifdef SUPPORT_PCRE16
! 4458: q16 = (pcre_uint16 *) dbuffer;
! 4459: #endif
! 4460: #ifdef SUPPORT_PCRE32
! 4461: q32 = (pcre_uint32 *) dbuffer;
! 4462: #endif
! 4463:
1.1 misho 4464: while ((c = *p++) != 0)
4465: {
4466: int i = 0;
4467: int n = 0;
4468:
1.1.1.2 misho 4469: /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4470: In non-UTF mode, allow the value of the byte to fall through to later,
4471: where values greater than 127 are turned into UTF-8 when running in
1.1.1.4 ! misho 4472: 16-bit or 32-bit mode. */
1.1.1.2 misho 4473:
4474: if (c != '\\')
4475: {
1.1.1.4 ! misho 4476: #ifndef NOUTF
! 4477: if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
! 4478: #endif
1.1.1.2 misho 4479: }
4480:
4481: /* Handle backslash escapes */
4482:
4483: else switch ((c = *p++))
1.1 misho 4484: {
4485: case 'a': c = 7; break;
4486: case 'b': c = '\b'; break;
4487: case 'e': c = 27; break;
4488: case 'f': c = '\f'; break;
4489: case 'n': c = '\n'; break;
4490: case 'r': c = '\r'; break;
4491: case 't': c = '\t'; break;
4492: case 'v': c = '\v'; break;
4493:
4494: case '0': case '1': case '2': case '3':
4495: case '4': case '5': case '6': case '7':
4496: c -= '0';
4497: while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4498: c = c * 8 + *p++ - '0';
4499: break;
4500:
4501: case 'x':
4502: if (*p == '{')
4503: {
1.1.1.2 misho 4504: pcre_uint8 *pt = p;
1.1 misho 4505: c = 0;
4506:
4507: /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4508: when isxdigit() is a macro that refers to its argument more than
4509: once. This is banned by the C Standard, but apparently happens in at
4510: least one MacOS environment. */
4511:
4512: for (pt++; isxdigit(*pt); pt++)
1.1.1.2 misho 4513: {
4514: if (++i == 9)
4515: fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4516: "using only the first eight.\n");
4517: else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4518: }
1.1 misho 4519: if (*pt == '}')
4520: {
4521: p = pt + 1;
4522: break;
4523: }
1.1.1.2 misho 4524: /* Not correct form for \x{...}; fall through */
1.1 misho 4525: }
4526:
1.1.1.2 misho 4527: /* \x without {} always defines just one byte in 8-bit mode. This
4528: allows UTF-8 characters to be constructed byte by byte, and also allows
4529: invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4530: Otherwise, pass it down to later code so that it can be turned into
1.1.1.4 ! misho 4531: UTF-8 when running in 16/32-bit mode. */
1.1 misho 4532:
4533: c = 0;
4534: while (i++ < 2 && isxdigit(*p))
4535: {
4536: c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4537: p++;
4538: }
1.1.1.4 ! misho 4539: #if !defined NOUTF && defined SUPPORT_PCRE8
! 4540: if (use_utf && (pcre_mode == PCRE8_MODE))
1.1.1.2 misho 4541: {
1.1.1.4 ! misho 4542: *q8++ = c;
1.1.1.2 misho 4543: continue;
4544: }
1.1.1.4 ! misho 4545: #endif
1.1 misho 4546: break;
4547:
4548: case 0: /* \ followed by EOF allows for an empty line */
4549: p--;
4550: continue;
4551:
4552: case '>':
4553: if (*p == '-')
4554: {
4555: start_offset_sign = -1;
4556: p++;
4557: }
4558: while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4559: start_offset *= start_offset_sign;
4560: continue;
4561:
4562: case 'A': /* Option setting */
4563: options |= PCRE_ANCHORED;
4564: continue;
4565:
4566: case 'B':
4567: options |= PCRE_NOTBOL;
4568: continue;
4569:
4570: case 'C':
4571: if (isdigit(*p)) /* Set copy string */
4572: {
4573: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4574: copystrings |= 1 << n;
4575: }
4576: else if (isalnum(*p))
4577: {
1.1.1.4 ! misho 4578: READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
1.1 misho 4579: }
4580: else if (*p == '+')
4581: {
4582: callout_extra = 1;
4583: p++;
4584: }
4585: else if (*p == '-')
4586: {
1.1.1.2 misho 4587: SET_PCRE_CALLOUT(NULL);
1.1 misho 4588: p++;
4589: }
4590: else if (*p == '!')
4591: {
4592: callout_fail_id = 0;
4593: p++;
4594: while(isdigit(*p))
4595: callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4596: callout_fail_count = 0;
4597: if (*p == '!')
4598: {
4599: p++;
4600: while(isdigit(*p))
4601: callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4602: }
4603: }
4604: else if (*p == '*')
4605: {
4606: int sign = 1;
4607: callout_data = 0;
4608: if (*(++p) == '-') { sign = -1; p++; }
4609: while(isdigit(*p))
4610: callout_data = callout_data * 10 + *p++ - '0';
4611: callout_data *= sign;
4612: callout_data_set = 1;
4613: }
4614: continue;
4615:
4616: #if !defined NODFA
4617: case 'D':
4618: #if !defined NOPOSIX
4619: if (posix || do_posix)
4620: printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4621: else
4622: #endif
4623: use_dfa = 1;
4624: continue;
4625: #endif
4626:
4627: #if !defined NODFA
4628: case 'F':
4629: options |= PCRE_DFA_SHORTEST;
4630: continue;
4631: #endif
4632:
4633: case 'G':
4634: if (isdigit(*p))
4635: {
4636: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4637: getstrings |= 1 << n;
4638: }
4639: else if (isalnum(*p))
4640: {
1.1.1.4 ! misho 4641: READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
1.1 misho 4642: }
4643: continue;
4644:
4645: case 'J':
4646: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4647: if (extra != NULL
4648: && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4649: && extra->executable_jit != NULL)
4650: {
1.1.1.2 misho 4651: if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4652: jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4653: PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
1.1 misho 4654: }
4655: continue;
4656:
4657: case 'L':
4658: getlist = 1;
4659: continue;
4660:
4661: case 'M':
4662: find_match_limit = 1;
4663: continue;
4664:
4665: case 'N':
4666: if ((options & PCRE_NOTEMPTY) != 0)
4667: options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4668: else
4669: options |= PCRE_NOTEMPTY;
4670: continue;
4671:
4672: case 'O':
4673: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4674: if (n > size_offsets_max)
4675: {
4676: size_offsets_max = n;
4677: free(offsets);
4678: use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4679: if (offsets == NULL)
4680: {
4681: printf("** Failed to get %d bytes of memory for offsets vector\n",
4682: (int)(size_offsets_max * sizeof(int)));
4683: yield = 1;
4684: goto EXIT;
4685: }
4686: }
4687: use_size_offsets = n;
4688: if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1.1.1.3 misho 4689: else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
1.1 misho 4690: continue;
4691:
4692: case 'P':
4693: options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4694: PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4695: continue;
4696:
4697: case 'Q':
4698: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4699: if (extra == NULL)
4700: {
4701: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4702: extra->flags = 0;
4703: }
4704: extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4705: extra->match_limit_recursion = n;
4706: continue;
4707:
4708: case 'q':
4709: while(isdigit(*p)) n = n * 10 + *p++ - '0';
4710: if (extra == NULL)
4711: {
4712: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4713: extra->flags = 0;
4714: }
4715: extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4716: extra->match_limit = n;
4717: continue;
4718:
4719: #if !defined NODFA
4720: case 'R':
4721: options |= PCRE_DFA_RESTART;
4722: continue;
4723: #endif
4724:
4725: case 'S':
4726: show_malloc = 1;
4727: continue;
4728:
4729: case 'Y':
4730: options |= PCRE_NO_START_OPTIMIZE;
4731: continue;
4732:
4733: case 'Z':
4734: options |= PCRE_NOTEOL;
4735: continue;
4736:
4737: case '?':
4738: options |= PCRE_NO_UTF8_CHECK;
4739: continue;
4740:
4741: case '<':
4742: {
4743: int x = check_newline(p, outfile);
4744: if (x == 0) goto NEXT_DATA;
4745: options |= x;
4746: while (*p++ != '>');
4747: }
4748: continue;
4749: }
1.1.1.2 misho 4750:
1.1.1.4 ! misho 4751: /* We now have a character value in c that may be greater than 255.
! 4752: In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
! 4753: than 127 in UTF mode must have come from \x{...} or octal constructs
! 4754: because values from \x.. get this far only in non-UTF mode. */
! 4755:
! 4756: #ifdef SUPPORT_PCRE8
! 4757: if (pcre_mode == PCRE8_MODE)
! 4758: {
! 4759: #ifndef NOUTF
! 4760: if (use_utf)
! 4761: {
! 4762: if (c > 0x7fffffff)
! 4763: {
! 4764: fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
! 4765: "and so cannot be converted to UTF-8\n", c);
! 4766: goto NEXT_DATA;
! 4767: }
! 4768: q8 += ord2utf8(c, q8);
! 4769: }
! 4770: else
! 4771: #endif
! 4772: {
! 4773: if (c > 0xffu)
! 4774: {
! 4775: fprintf(outfile, "** Character \\x{%x} is greater than 255 "
! 4776: "and UTF-8 mode is not enabled.\n", c);
! 4777: fprintf(outfile, "** Truncation will probably give the wrong "
! 4778: "result.\n");
! 4779: }
! 4780: *q8++ = c;
! 4781: }
1.1.1.2 misho 4782: }
4783: #endif
1.1.1.4 ! misho 4784: #ifdef SUPPORT_PCRE16
! 4785: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 4786: {
1.1.1.4 ! misho 4787: #ifndef NOUTF
! 4788: if (use_utf)
! 4789: {
! 4790: if (c > 0x10ffffu)
! 4791: {
! 4792: fprintf(outfile, "** Failed: character \\x{%x} is greater than "
! 4793: "0x10ffff and so cannot be converted to UTF-16\n", c);
! 4794: goto NEXT_DATA;
! 4795: }
! 4796: else if (c >= 0x10000u)
! 4797: {
! 4798: c-= 0x10000u;
! 4799: *q16++ = 0xD800 | (c >> 10);
! 4800: *q16++ = 0xDC00 | (c & 0x3ff);
! 4801: }
! 4802: else
! 4803: *q16++ = c;
! 4804: }
! 4805: else
! 4806: #endif
1.1.1.2 misho 4807: {
1.1.1.4 ! misho 4808: if (c > 0xffffu)
! 4809: {
! 4810: fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
! 4811: "and UTF-16 mode is not enabled.\n", c);
! 4812: fprintf(outfile, "** Truncation will probably give the wrong "
! 4813: "result.\n");
! 4814: }
! 4815:
! 4816: *q16++ = c;
1.1.1.2 misho 4817: }
4818: }
1.1.1.4 ! misho 4819: #endif
! 4820: #ifdef SUPPORT_PCRE32
! 4821: if (pcre_mode == PCRE32_MODE)
! 4822: {
! 4823: *q32++ = c;
! 4824: }
! 4825: #endif
! 4826:
1.1 misho 4827: }
1.1.1.2 misho 4828:
4829: /* Reached end of subject string */
4830:
1.1.1.4 ! misho 4831: #ifdef SUPPORT_PCRE8
! 4832: if (pcre_mode == PCRE8_MODE)
! 4833: {
! 4834: *q8 = 0;
! 4835: len = (int)(q8 - (pcre_uint8 *)dbuffer);
! 4836: }
! 4837: #endif
! 4838: #ifdef SUPPORT_PCRE16
! 4839: if (pcre_mode == PCRE16_MODE)
! 4840: {
! 4841: *q16 = 0;
! 4842: len = (int)(q16 - (pcre_uint16 *)dbuffer);
! 4843: }
! 4844: #endif
! 4845: #ifdef SUPPORT_PCRE32
! 4846: if (pcre_mode == PCRE32_MODE)
! 4847: {
! 4848: *q32 = 0;
! 4849: len = (int)(q32 - (pcre_uint32 *)dbuffer);
! 4850: }
! 4851: #endif
! 4852:
! 4853: /* If we're compiling with explicit valgrind support, Mark the data from after
! 4854: its end to the end of the buffer as unaddressable, so that a read over the end
! 4855: of the buffer will be seen by valgrind, even if it doesn't cause a crash.
! 4856: If we're not building with valgrind support, at least move the data to the end
! 4857: of the buffer so that it might at least cause a crash.
! 4858: If we are using the POSIX interface, we must include the terminating zero. */
1.1 misho 4859:
1.1.1.4 ! misho 4860: bptr = dbuffer;
1.1 misho 4861:
4862: #if !defined NOPOSIX
4863: if (posix || do_posix)
4864: {
1.1.1.4 ! misho 4865: #ifdef SUPPORT_VALGRIND
! 4866: VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
! 4867: #else
! 4868: memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
! 4869: bptr += dbuffer_size - len - 1;
! 4870: #endif
1.1 misho 4871: }
4872: else
4873: #endif
4874: {
1.1.1.4 ! misho 4875: #ifdef SUPPORT_VALGRIND
! 4876: VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
! 4877: #else
! 4878: bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
! 4879: #endif
1.1 misho 4880: }
4881:
4882: if ((all_use_dfa || use_dfa) && find_match_limit)
4883: {
4884: printf("**Match limit not relevant for DFA matching: ignored\n");
4885: find_match_limit = 0;
4886: }
4887:
4888: /* Handle matching via the POSIX interface, which does not
4889: support timing or playing with the match limit or callout data. */
4890:
4891: #if !defined NOPOSIX
4892: if (posix || do_posix)
4893: {
4894: int rc;
4895: int eflags = 0;
4896: regmatch_t *pmatch = NULL;
4897: if (use_size_offsets > 0)
4898: pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4899: if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4900: if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4901: if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4902:
4903: rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4904:
4905: if (rc != 0)
4906: {
4907: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4908: fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4909: }
1.1.1.4 ! misho 4910: else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
1.1 misho 4911: {
4912: fprintf(outfile, "Matched with REG_NOSUB\n");
4913: }
4914: else
4915: {
4916: size_t i;
4917: for (i = 0; i < (size_t)use_size_offsets; i++)
4918: {
4919: if (pmatch[i].rm_so >= 0)
4920: {
4921: fprintf(outfile, "%2d: ", (int)i);
1.1.1.2 misho 4922: PCHARSV(dbuffer, pmatch[i].rm_so,
1.1 misho 4923: pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4924: fprintf(outfile, "\n");
4925: if (do_showcaprest || (i == 0 && do_showrest))
4926: {
4927: fprintf(outfile, "%2d+ ", (int)i);
1.1.1.2 misho 4928: PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1.1 misho 4929: outfile);
4930: fprintf(outfile, "\n");
4931: }
4932: }
4933: }
4934: }
4935: free(pmatch);
1.1.1.2 misho 4936: goto NEXT_DATA;
1.1 misho 4937: }
4938:
1.1.1.2 misho 4939: #endif /* !defined NOPOSIX */
4940:
1.1 misho 4941: /* Handle matching via the native interface - repeats for /g and /G */
4942:
1.1.1.3 misho 4943: /* Ensure that there is a JIT callback if we want to verify that JIT was
4944: actually used. If jit_stack == NULL, no stack has yet been assigned. */
4945:
4946: if (verify_jit && jit_stack == NULL && extra != NULL)
4947: { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4948:
1.1 misho 4949: for (;; gmatched++) /* Loop for /g or /G */
4950: {
4951: markptr = NULL;
1.1.1.3 misho 4952: jit_was_used = FALSE;
1.1 misho 4953:
4954: if (timeitm > 0)
4955: {
4956: register int i;
4957: clock_t time_taken;
4958: clock_t start_time = clock();
4959:
4960: #if !defined NODFA
4961: if (all_use_dfa || use_dfa)
4962: {
1.1.1.3 misho 4963: if ((options & PCRE_DFA_RESTART) != 0)
4964: {
4965: fprintf(outfile, "Timing DFA restarts is not supported\n");
4966: break;
4967: }
4968: if (dfa_workspace == NULL)
4969: dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
1.1 misho 4970: for (i = 0; i < timeitm; i++)
1.1.1.2 misho 4971: {
4972: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 misho 4973: (options | g_notempty), use_offsets, use_size_offsets,
4974: dfa_workspace, DFA_WS_DIMENSION);
1.1.1.2 misho 4975: }
1.1 misho 4976: }
4977: else
4978: #endif
4979:
4980: for (i = 0; i < timeitm; i++)
1.1.1.2 misho 4981: {
4982: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4983: (options | g_notempty), use_offsets, use_size_offsets);
4984: }
1.1 misho 4985: time_taken = clock() - start_time;
4986: fprintf(outfile, "Execute time %.4f milliseconds\n",
4987: (((double)time_taken * 1000.0) / (double)timeitm) /
4988: (double)CLOCKS_PER_SEC);
4989: }
4990:
4991: /* If find_match_limit is set, we want to do repeated matches with
4992: varying limits in order to find the minimum value for the match limit and
4993: for the recursion limit. The match limits are relevant only to the normal
4994: running of pcre_exec(), so disable the JIT optimization. This makes it
4995: possible to run the same set of tests with and without JIT externally
4996: requested. */
4997:
4998: if (find_match_limit)
4999: {
1.1.1.4 ! misho 5000: if (extra != NULL) { PCRE_FREE_STUDY(extra); }
! 5001: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
! 5002: extra->flags = 0;
1.1 misho 5003:
5004: (void)check_match_limit(re, extra, bptr, len, start_offset,
5005: options|g_notempty, use_offsets, use_size_offsets,
5006: PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5007: PCRE_ERROR_MATCHLIMIT, "match()");
5008:
5009: count = check_match_limit(re, extra, bptr, len, start_offset,
5010: options|g_notempty, use_offsets, use_size_offsets,
5011: PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5012: PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5013: }
5014:
5015: /* If callout_data is set, use the interface with additional data */
5016:
5017: else if (callout_data_set)
5018: {
5019: if (extra == NULL)
5020: {
5021: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5022: extra->flags = 0;
5023: }
5024: extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5025: extra->callout_data = &callout_data;
1.1.1.2 misho 5026: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
1.1 misho 5027: options | g_notempty, use_offsets, use_size_offsets);
5028: extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5029: }
5030:
5031: /* The normal case is just to do the match once, with the default
5032: value of match_limit. */
5033:
5034: #if !defined NODFA
5035: else if (all_use_dfa || use_dfa)
5036: {
1.1.1.3 misho 5037: if (dfa_workspace == NULL)
5038: dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5039: if (dfa_matched++ == 0)
5040: dfa_workspace[0] = -1; /* To catch bad restart */
1.1.1.2 misho 5041: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 misho 5042: (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5043: DFA_WS_DIMENSION);
1.1 misho 5044: if (count == 0)
5045: {
1.1.1.4 ! misho 5046: fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
1.1 misho 5047: count = use_size_offsets/2;
5048: }
5049: }
5050: #endif
5051:
5052: else
5053: {
1.1.1.2 misho 5054: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5055: options | g_notempty, use_offsets, use_size_offsets);
1.1 misho 5056: if (count == 0)
5057: {
5058: fprintf(outfile, "Matched, but too many substrings\n");
1.1.1.4 ! misho 5059: /* 2 is a special case; match can be returned */
! 5060: count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
1.1 misho 5061: }
5062: }
5063:
5064: /* Matched */
5065:
5066: if (count >= 0)
5067: {
5068: int i, maxcount;
1.1.1.2 misho 5069: void *cnptr, *gnptr;
1.1 misho 5070:
5071: #if !defined NODFA
5072: if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5073: #endif
1.1.1.4 ! misho 5074: /* 2 is a special case; match can be returned */
! 5075: maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
1.1 misho 5076:
5077: /* This is a check against a lunatic return value. */
5078:
5079: if (count > maxcount)
5080: {
5081: fprintf(outfile,
5082: "** PCRE error: returned count %d is too big for offset size %d\n",
5083: count, use_size_offsets);
5084: count = use_size_offsets/3;
5085: if (do_g || do_G)
5086: {
5087: fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5088: do_g = do_G = FALSE; /* Break g/G loop */
5089: }
5090: }
5091:
5092: /* do_allcaps requests showing of all captures in the pattern, to check
5093: unset ones at the end. */
5094:
5095: if (do_allcaps)
5096: {
1.1.1.2 misho 5097: if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5098: goto SKIP_DATA;
1.1 misho 5099: count++; /* Allow for full match */
5100: if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5101: }
5102:
5103: /* Output the captured substrings */
5104:
5105: for (i = 0; i < count * 2; i += 2)
5106: {
5107: if (use_offsets[i] < 0)
5108: {
5109: if (use_offsets[i] != -1)
5110: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5111: use_offsets[i], i);
5112: if (use_offsets[i+1] != -1)
5113: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5114: use_offsets[i+1], i+1);
5115: fprintf(outfile, "%2d: <unset>\n", i/2);
5116: }
5117: else
5118: {
5119: fprintf(outfile, "%2d: ", i/2);
1.1.1.2 misho 5120: PCHARSV(bptr, use_offsets[i],
1.1 misho 5121: use_offsets[i+1] - use_offsets[i], outfile);
1.1.1.3 misho 5122: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1 misho 5123: fprintf(outfile, "\n");
5124: if (do_showcaprest || (i == 0 && do_showrest))
5125: {
5126: fprintf(outfile, "%2d+ ", i/2);
1.1.1.2 misho 5127: PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
1.1 misho 5128: outfile);
5129: fprintf(outfile, "\n");
5130: }
5131: }
5132: }
5133:
1.1.1.2 misho 5134: if (markptr != NULL)
5135: {
5136: fprintf(outfile, "MK: ");
5137: PCHARSV(markptr, 0, -1, outfile);
5138: fprintf(outfile, "\n");
5139: }
1.1 misho 5140:
5141: for (i = 0; i < 32; i++)
5142: {
5143: if ((copystrings & (1 << i)) != 0)
5144: {
1.1.1.2 misho 5145: int rc;
1.1 misho 5146: char copybuffer[256];
1.1.1.2 misho 5147: PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5148: copybuffer, sizeof(copybuffer));
1.1 misho 5149: if (rc < 0)
5150: fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5151: else
1.1.1.2 misho 5152: {
5153: fprintf(outfile, "%2dC ", i);
5154: PCHARSV(copybuffer, 0, rc, outfile);
5155: fprintf(outfile, " (%d)\n", rc);
5156: }
1.1 misho 5157: }
5158: }
5159:
1.1.1.2 misho 5160: cnptr = copynames;
5161: for (;;)
1.1 misho 5162: {
1.1.1.2 misho 5163: int rc;
1.1 misho 5164: char copybuffer[256];
1.1.1.2 misho 5165:
1.1.1.4 ! misho 5166: #ifdef SUPPORT_PCRE32
! 5167: if (pcre_mode == PCRE32_MODE)
! 5168: {
! 5169: if (*(pcre_uint32 *)cnptr == 0) break;
! 5170: }
! 5171: #endif
! 5172: #ifdef SUPPORT_PCRE16
! 5173: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 5174: {
5175: if (*(pcre_uint16 *)cnptr == 0) break;
5176: }
1.1.1.4 ! misho 5177: #endif
! 5178: #ifdef SUPPORT_PCRE8
! 5179: if (pcre_mode == PCRE8_MODE)
1.1.1.2 misho 5180: {
5181: if (*(pcre_uint8 *)cnptr == 0) break;
5182: }
1.1.1.4 ! misho 5183: #endif
1.1.1.2 misho 5184:
5185: PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5186: cnptr, copybuffer, sizeof(copybuffer));
5187:
1.1 misho 5188: if (rc < 0)
1.1.1.2 misho 5189: {
5190: fprintf(outfile, "copy substring ");
5191: PCHARSV(cnptr, 0, -1, outfile);
5192: fprintf(outfile, " failed %d\n", rc);
5193: }
1.1 misho 5194: else
1.1.1.2 misho 5195: {
5196: fprintf(outfile, " C ");
5197: PCHARSV(copybuffer, 0, rc, outfile);
5198: fprintf(outfile, " (%d) ", rc);
5199: PCHARSV(cnptr, 0, -1, outfile);
5200: putc('\n', outfile);
5201: }
5202:
5203: cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
1.1 misho 5204: }
5205:
5206: for (i = 0; i < 32; i++)
5207: {
5208: if ((getstrings & (1 << i)) != 0)
5209: {
1.1.1.2 misho 5210: int rc;
1.1 misho 5211: const char *substring;
1.1.1.2 misho 5212: PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
1.1 misho 5213: if (rc < 0)
5214: fprintf(outfile, "get substring %d failed %d\n", i, rc);
5215: else
5216: {
1.1.1.2 misho 5217: fprintf(outfile, "%2dG ", i);
5218: PCHARSV(substring, 0, rc, outfile);
5219: fprintf(outfile, " (%d)\n", rc);
5220: PCRE_FREE_SUBSTRING(substring);
1.1 misho 5221: }
5222: }
5223: }
5224:
1.1.1.2 misho 5225: gnptr = getnames;
5226: for (;;)
1.1 misho 5227: {
1.1.1.2 misho 5228: int rc;
1.1 misho 5229: const char *substring;
1.1.1.2 misho 5230:
1.1.1.4 ! misho 5231: #ifdef SUPPORT_PCRE32
! 5232: if (pcre_mode == PCRE32_MODE)
! 5233: {
! 5234: if (*(pcre_uint32 *)gnptr == 0) break;
! 5235: }
! 5236: #endif
! 5237: #ifdef SUPPORT_PCRE16
! 5238: if (pcre_mode == PCRE16_MODE)
1.1.1.2 misho 5239: {
5240: if (*(pcre_uint16 *)gnptr == 0) break;
5241: }
1.1.1.4 ! misho 5242: #endif
! 5243: #ifdef SUPPORT_PCRE8
! 5244: if (pcre_mode == PCRE8_MODE)
1.1.1.2 misho 5245: {
5246: if (*(pcre_uint8 *)gnptr == 0) break;
5247: }
1.1.1.4 ! misho 5248: #endif
1.1.1.2 misho 5249:
5250: PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5251: gnptr, &substring);
1.1 misho 5252: if (rc < 0)
1.1.1.2 misho 5253: {
5254: fprintf(outfile, "get substring ");
5255: PCHARSV(gnptr, 0, -1, outfile);
5256: fprintf(outfile, " failed %d\n", rc);
5257: }
1.1 misho 5258: else
5259: {
1.1.1.2 misho 5260: fprintf(outfile, " G ");
5261: PCHARSV(substring, 0, rc, outfile);
5262: fprintf(outfile, " (%d) ", rc);
5263: PCHARSV(gnptr, 0, -1, outfile);
5264: PCRE_FREE_SUBSTRING(substring);
5265: putc('\n', outfile);
1.1 misho 5266: }
1.1.1.2 misho 5267:
5268: gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
1.1 misho 5269: }
5270:
5271: if (getlist)
5272: {
1.1.1.2 misho 5273: int rc;
1.1 misho 5274: const char **stringlist;
1.1.1.2 misho 5275: PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
1.1 misho 5276: if (rc < 0)
5277: fprintf(outfile, "get substring list failed %d\n", rc);
5278: else
5279: {
5280: for (i = 0; i < count; i++)
1.1.1.2 misho 5281: {
5282: fprintf(outfile, "%2dL ", i);
5283: PCHARSV(stringlist[i], 0, -1, outfile);
5284: putc('\n', outfile);
5285: }
1.1 misho 5286: if (stringlist[i] != NULL)
5287: fprintf(outfile, "string list not terminated by NULL\n");
1.1.1.2 misho 5288: PCRE_FREE_SUBSTRING_LIST(stringlist);
1.1 misho 5289: }
5290: }
5291: }
5292:
1.1.1.4 ! misho 5293: /* There was a partial match. If the bumpalong point is not the same as
! 5294: the first inspected character, show the offset explicitly. */
1.1 misho 5295:
5296: else if (count == PCRE_ERROR_PARTIAL)
5297: {
1.1.1.4 ! misho 5298: fprintf(outfile, "Partial match");
! 5299: if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
! 5300: fprintf(outfile, " at offset %d", use_offsets[2]);
! 5301: if (markptr != NULL)
1.1.1.2 misho 5302: {
1.1.1.4 ! misho 5303: fprintf(outfile, ", mark=");
1.1.1.2 misho 5304: PCHARSV(markptr, 0, -1, outfile);
5305: }
1.1 misho 5306: if (use_size_offsets > 1)
5307: {
5308: fprintf(outfile, ": ");
1.1.1.2 misho 5309: PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
1.1 misho 5310: outfile);
5311: }
1.1.1.3 misho 5312: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1 misho 5313: fprintf(outfile, "\n");
5314: break; /* Out of the /g loop */
5315: }
5316:
5317: /* Failed to match. If this is a /g or /G loop and we previously set
5318: g_notempty after a null match, this is not necessarily the end. We want
5319: to advance the start offset, and continue. We won't be at the end of the
5320: string - that was checked before setting g_notempty.
5321:
5322: Complication arises in the case when the newline convention is "any",
5323: "crlf", or "anycrlf". If the previous match was at the end of a line
5324: terminated by CRLF, an advance of one character just passes the \r,
5325: whereas we should prefer the longer newline sequence, as does the code in
5326: pcre_exec(). Fudge the offset value to achieve this. We check for a
1.1.1.2 misho 5327: newline setting in the pattern; if none was set, use PCRE_CONFIG() to
1.1 misho 5328: find the default.
5329:
5330: Otherwise, in the case of UTF-8 matching, the advance must be one
5331: character, not one byte. */
5332:
5333: else
5334: {
5335: if (g_notempty != 0)
5336: {
5337: int onechar = 1;
1.1.1.4 ! misho 5338: unsigned int obits = REAL_PCRE_OPTIONS(re);
1.1 misho 5339: use_offsets[0] = start_offset;
5340: if ((obits & PCRE_NEWLINE_BITS) == 0)
5341: {
5342: int d;
1.1.1.2 misho 5343: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
1.1 misho 5344: /* Note that these values are always the ASCII ones, even in
5345: EBCDIC environments. CR = 13, NL = 10. */
5346: obits = (d == 13)? PCRE_NEWLINE_CR :
5347: (d == 10)? PCRE_NEWLINE_LF :
5348: (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5349: (d == -2)? PCRE_NEWLINE_ANYCRLF :
5350: (d == -1)? PCRE_NEWLINE_ANY : 0;
5351: }
5352: if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5353: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5354: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5355: &&
1.1.1.4 ! misho 5356: start_offset < len - 1 && (
! 5357: #ifdef SUPPORT_PCRE8
! 5358: (pcre_mode == PCRE8_MODE &&
! 5359: bptr[start_offset] == '\r' &&
! 5360: bptr[start_offset + 1] == '\n') ||
1.1.1.2 misho 5361: #endif
1.1.1.4 ! misho 5362: #ifdef SUPPORT_PCRE16
! 5363: (pcre_mode == PCRE16_MODE &&
! 5364: ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
! 5365: ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
! 5366: #endif
! 5367: #ifdef SUPPORT_PCRE32
! 5368: (pcre_mode == PCRE32_MODE &&
! 5369: ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
! 5370: ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
! 5371: #endif
! 5372: 0))
1.1 misho 5373: onechar++;
1.1.1.2 misho 5374: else if (use_utf)
1.1 misho 5375: {
5376: while (start_offset + onechar < len)
5377: {
5378: if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5379: onechar++;
5380: }
5381: }
5382: use_offsets[1] = start_offset + onechar;
5383: }
5384: else
5385: {
5386: switch(count)
5387: {
5388: case PCRE_ERROR_NOMATCH:
5389: if (gmatched == 0)
5390: {
1.1.1.2 misho 5391: if (markptr == NULL)
5392: {
1.1.1.3 misho 5393: fprintf(outfile, "No match");
1.1.1.2 misho 5394: }
5395: else
5396: {
5397: fprintf(outfile, "No match, mark = ");
5398: PCHARSV(markptr, 0, -1, outfile);
5399: }
1.1.1.3 misho 5400: if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5401: putc('\n', outfile);
1.1 misho 5402: }
5403: break;
5404:
5405: case PCRE_ERROR_BADUTF8:
5406: case PCRE_ERROR_SHORTUTF8:
1.1.1.4 ! misho 5407: fprintf(outfile, "Error %d (%s UTF-%d string)", count,
1.1.1.2 misho 5408: (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
1.1.1.4 ! misho 5409: 8 * CHAR_SIZE);
1.1 misho 5410: if (use_size_offsets >= 2)
5411: fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5412: use_offsets[1]);
5413: fprintf(outfile, "\n");
5414: break;
5415:
1.1.1.2 misho 5416: case PCRE_ERROR_BADUTF8_OFFSET:
1.1.1.4 ! misho 5417: fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
! 5418: 8 * CHAR_SIZE);
1.1.1.2 misho 5419: break;
5420:
1.1 misho 5421: default:
1.1.1.2 misho 5422: if (count < 0 &&
5423: (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
1.1 misho 5424: fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5425: else
5426: fprintf(outfile, "Error %d (Unexpected value)\n", count);
5427: break;
5428: }
5429:
5430: break; /* Out of the /g loop */
5431: }
5432: }
5433:
5434: /* If not /g or /G we are done */
5435:
5436: if (!do_g && !do_G) break;
5437:
5438: /* If we have matched an empty string, first check to see if we are at
5439: the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5440: Perl's /g options does. This turns out to be rather cunning. First we set
5441: PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5442: same point. If this fails (picked up above) we advance to the next
5443: character. */
5444:
5445: g_notempty = 0;
5446:
5447: if (use_offsets[0] == use_offsets[1])
5448: {
5449: if (use_offsets[0] == len) break;
5450: g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5451: }
5452:
5453: /* For /g, update the start offset, leaving the rest alone */
5454:
5455: if (do_g) start_offset = use_offsets[1];
5456:
5457: /* For /G, update the pointer and length */
5458:
5459: else
5460: {
1.1.1.2 misho 5461: bptr += use_offsets[1] * CHAR_SIZE;
1.1 misho 5462: len -= use_offsets[1];
5463: }
5464: } /* End of loop for /g and /G */
5465:
5466: NEXT_DATA: continue;
5467: } /* End of loop for data lines */
5468:
5469: CONTINUE:
5470:
5471: #if !defined NOPOSIX
5472: if (posix || do_posix) regfree(&preg);
5473: #endif
5474:
5475: if (re != NULL) new_free(re);
1.1.1.2 misho 5476: if (extra != NULL)
5477: {
5478: PCRE_FREE_STUDY(extra);
5479: }
1.1 misho 5480: if (locale_set)
5481: {
5482: new_free((void *)tables);
5483: setlocale(LC_CTYPE, "C");
5484: locale_set = 0;
5485: }
5486: if (jit_stack != NULL)
5487: {
1.1.1.2 misho 5488: PCRE_JIT_STACK_FREE(jit_stack);
1.1 misho 5489: jit_stack = NULL;
5490: }
5491: }
5492:
5493: if (infile == stdin) fprintf(outfile, "\n");
5494:
5495: EXIT:
5496:
5497: if (infile != NULL && infile != stdin) fclose(infile);
5498: if (outfile != NULL && outfile != stdout) fclose(outfile);
5499:
5500: free(buffer);
5501: free(dbuffer);
5502: free(pbuffer);
5503: free(offsets);
5504:
1.1.1.2 misho 5505: #ifdef SUPPORT_PCRE16
5506: if (buffer16 != NULL) free(buffer16);
5507: #endif
1.1.1.4 ! misho 5508: #ifdef SUPPORT_PCRE32
! 5509: if (buffer32 != NULL) free(buffer32);
! 5510: #endif
! 5511:
! 5512: #if !defined NODFA
! 5513: if (dfa_workspace != NULL)
! 5514: free(dfa_workspace);
! 5515: #endif
! 5516:
! 5517: #if defined(__VMS)
! 5518: yield = SS$_NORMAL; /* Return values via DCL symbols */
! 5519: #endif
1.1.1.2 misho 5520:
1.1 misho 5521: return yield;
5522: }
5523:
5524: /* End of pcretest.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>