Annotation of embedaddon/pcre/pcretest.c, revision 1.1.1.2
1.1 misho 1: /*************************************************
2: * PCRE testing program *
3: *************************************************/
4:
5: /* This program was hacked up as a tester for PCRE. I really should have
6: written it more tidily in the first place. Will I ever learn? It has grown and
1.1.1.2 ! misho 7: been extended and consequently is now rather, er, *very* untidy in places. The
! 8: addition of 16-bit support has made it even worse. :-(
1.1 misho 9:
10: -----------------------------------------------------------------------------
11: Redistribution and use in source and binary forms, with or without
12: modification, are permitted provided that the following conditions are met:
13:
14: * Redistributions of source code must retain the above copyright notice,
15: this list of conditions and the following disclaimer.
16:
17: * Redistributions in binary form must reproduce the above copyright
18: notice, this list of conditions and the following disclaimer in the
19: documentation and/or other materials provided with the distribution.
20:
21: * Neither the name of the University of Cambridge nor the names of its
22: contributors may be used to endorse or promote products derived from
23: this software without specific prior written permission.
24:
25: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: POSSIBILITY OF SUCH DAMAGE.
36: -----------------------------------------------------------------------------
37: */
38:
1.1.1.2 ! misho 39: /* This program now supports the testing of both the 8-bit and 16-bit PCRE
! 40: libraries in a single program. This is different from the modules such as
! 41: pcre_compile.c in the library itself, which are compiled separately for each
! 42: mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
! 43: (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
! 44: compiled only once. Therefore, it must not make use of any of the macros from
! 45: pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
! 46: however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
! 47: only supported library functions. */
! 48:
1.1 misho 49:
50: #ifdef HAVE_CONFIG_H
51: #include "config.h"
52: #endif
53:
54: #include <ctype.h>
55: #include <stdio.h>
56: #include <string.h>
57: #include <stdlib.h>
58: #include <time.h>
59: #include <locale.h>
60: #include <errno.h>
61:
62: #ifdef SUPPORT_LIBREADLINE
63: #ifdef HAVE_UNISTD_H
64: #include <unistd.h>
65: #endif
66: #include <readline/readline.h>
67: #include <readline/history.h>
68: #endif
69:
70:
71: /* A number of things vary for Windows builds. Originally, pcretest opened its
72: input and output without "b"; then I was told that "b" was needed in some
73: environments, so it was added for release 5.0 to both the input and output. (It
74: makes no difference on Unix-like systems.) Later I was told that it is wrong
75: for the input on Windows. I've now abstracted the modes into two macros that
76: are set here, to make it easier to fiddle with them, and removed "b" from the
77: input mode under Windows. */
78:
79: #if defined(_WIN32) || defined(WIN32)
80: #include <io.h> /* For _setmode() */
81: #include <fcntl.h> /* For _O_BINARY */
82: #define INPUT_MODE "r"
83: #define OUTPUT_MODE "wb"
84:
85: #ifndef isatty
86: #define isatty _isatty /* This is what Windows calls them, I'm told, */
87: #endif /* though in some environments they seem to */
88: /* be already defined, hence the #ifndefs. */
89: #ifndef fileno
90: #define fileno _fileno
91: #endif
92:
93: /* A user sent this fix for Borland Builder 5 under Windows. */
94:
95: #ifdef __BORLANDC__
96: #define _setmode(handle, mode) setmode(handle, mode)
97: #endif
98:
99: /* Not Windows */
100:
101: #else
102: #include <sys/time.h> /* These two includes are needed */
103: #include <sys/resource.h> /* for setrlimit(). */
104: #define INPUT_MODE "rb"
105: #define OUTPUT_MODE "wb"
106: #endif
107:
1.1.1.2 ! misho 108: #define PRIV(name) name
1.1 misho 109:
110: /* We have to include pcre_internal.h because we need the internal info for
111: displaying the results of pcre_study() and we also need to know about the
112: internal macros, structures, and other internal data values; pcretest has
113: "inside information" compared to a program that strictly follows the PCRE API.
114:
115: Although pcre_internal.h does itself include pcre.h, we explicitly include it
116: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117: appropriately for an application, not for building PCRE. */
118:
119: #include "pcre.h"
1.1.1.2 ! misho 120:
! 121: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
! 122: /* Configure internal macros to 16 bit mode. */
! 123: #define COMPILE_PCRE16
! 124: #endif
! 125:
1.1 misho 126: #include "pcre_internal.h"
127:
1.1.1.2 ! misho 128: /* The pcre_printint() function, which prints the internal form of a compiled
! 129: regex, is held in a separate file so that (a) it can be compiled in either
! 130: 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
! 131: when that is compiled in debug mode. */
! 132:
! 133: #ifdef SUPPORT_PCRE8
! 134: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
! 135: #endif
! 136: #ifdef SUPPORT_PCRE16
! 137: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
! 138: #endif
! 139:
1.1 misho 140: /* We need access to some of the data tables that PCRE uses. So as not to have
141: to keep two copies, we include the source file here, changing the names of the
142: external symbols to prevent clashes. */
143:
1.1.1.2 ! misho 144: #define PCRE_INCLUDED
1.1 misho 145:
146: #include "pcre_tables.c"
147:
148: /* The definition of the macro PRINTABLE, which determines whether to print an
149: output character as-is or as a hex value when showing compiled patterns, is
1.1.1.2 ! misho 150: the same as in the printint.src file. We uses it here in cases when the locale
! 151: has not been explicitly changed, so as to get consistent output from systems
! 152: that differ in their output from isprint() even in the "C" locale. */
1.1 misho 153:
1.1.1.2 ! misho 154: #ifdef EBCDIC
! 155: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
! 156: #else
! 157: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
! 158: #endif
! 159:
! 160: #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
! 161:
! 162: /* Posix support is disabled in 16 bit only mode. */
! 163: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
! 164: #define NOPOSIX
! 165: #endif
1.1 misho 166:
167: /* It is possible to compile this test program without including support for
168: testing the POSIX interface, though this is not available via the standard
169: Makefile. */
170:
171: #if !defined NOPOSIX
172: #include "pcreposix.h"
173: #endif
174:
1.1.1.2 ! misho 175: /* It is also possible, originally for the benefit of a version that was
! 176: imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
! 177: NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
! 178: automatically cut out the UTF support if PCRE is built without it. */
! 179:
! 180: #ifndef SUPPORT_UTF
! 181: #ifndef NOUTF
! 182: #define NOUTF
1.1 misho 183: #endif
184: #endif
185:
1.1.1.2 ! misho 186: /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
! 187: for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
! 188: only from one place and is handled differently). I couldn't dream up any way of
! 189: using a single macro to do this in a generic way, because of the many different
! 190: argument requirements. We know that at least one of SUPPORT_PCRE8 and
! 191: SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
! 192: use these in the definitions of generic macros.
! 193:
! 194: **** Special note about the PCHARSxxx macros: the address of the string to be
! 195: printed is always given as two arguments: a base address followed by an offset.
! 196: The base address is cast to the correct data size for 8 or 16 bit data; the
! 197: offset is in units of this size. If the string were given as base+offset in one
! 198: argument, the casting might be incorrectly applied. */
! 199:
! 200: #ifdef SUPPORT_PCRE8
! 201:
! 202: #define PCHARS8(lv, p, offset, len, f) \
! 203: lv = pchars((pcre_uint8 *)(p) + offset, len, f)
! 204:
! 205: #define PCHARSV8(p, offset, len, f) \
! 206: (void)pchars((pcre_uint8 *)(p) + offset, len, f)
! 207:
! 208: #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
! 209: p = read_capture_name8(p, cn8, re)
! 210:
! 211: #define STRLEN8(p) ((int)strlen((char *)p))
! 212:
! 213: #define SET_PCRE_CALLOUT8(callout) \
! 214: pcre_callout = callout
! 215:
! 216: #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
! 217: pcre_assign_jit_stack(extra, callback, userdata)
! 218:
! 219: #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
! 220: re = pcre_compile((char *)pat, options, error, erroffset, tables)
! 221:
! 222: #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
! 223: namesptr, cbuffer, size) \
! 224: rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
! 225: (char *)namesptr, cbuffer, size)
! 226:
! 227: #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
! 228: rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
! 229:
! 230: #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
! 231: offsets, size_offsets, workspace, size_workspace) \
! 232: count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
! 233: offsets, size_offsets, workspace, size_workspace)
! 234:
! 235: #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
! 236: offsets, size_offsets) \
! 237: count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
! 238: offsets, size_offsets)
! 239:
! 240: #define PCRE_FREE_STUDY8(extra) \
! 241: pcre_free_study(extra)
! 242:
! 243: #define PCRE_FREE_SUBSTRING8(substring) \
! 244: pcre_free_substring(substring)
! 245:
! 246: #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
! 247: pcre_free_substring_list(listptr)
! 248:
! 249: #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
! 250: getnamesptr, subsptr) \
! 251: rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
! 252: (char *)getnamesptr, subsptr)
! 253:
! 254: #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
! 255: n = pcre_get_stringnumber(re, (char *)ptr)
! 256:
! 257: #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
! 258: rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
! 259:
! 260: #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
! 261: rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
! 262:
! 263: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
! 264: rc = pcre_pattern_to_host_byte_order(re, extra, tables)
! 265:
! 266: #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
! 267: pcre_printint(re, outfile, debug_lengths)
! 268:
! 269: #define PCRE_STUDY8(extra, re, options, error) \
! 270: extra = pcre_study(re, options, error)
! 271:
! 272: #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
! 273: pcre_jit_stack_alloc(startsize, maxsize)
! 274:
! 275: #define PCRE_JIT_STACK_FREE8(stack) \
! 276: pcre_jit_stack_free(stack)
! 277:
! 278: #endif /* SUPPORT_PCRE8 */
! 279:
! 280: /* -----------------------------------------------------------*/
! 281:
! 282: #ifdef SUPPORT_PCRE16
! 283:
! 284: #define PCHARS16(lv, p, offset, len, f) \
! 285: lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
! 286:
! 287: #define PCHARSV16(p, offset, len, f) \
! 288: (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
! 289:
! 290: #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
! 291: p = read_capture_name16(p, cn16, re)
! 292:
! 293: #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
! 294:
! 295: #define SET_PCRE_CALLOUT16(callout) \
! 296: pcre16_callout = (int (*)(pcre16_callout_block *))callout
! 297:
! 298: #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
! 299: pcre16_assign_jit_stack((pcre16_extra *)extra, \
! 300: (pcre16_jit_callback)callback, userdata)
! 301:
! 302: #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
! 303: re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
! 304: tables)
! 305:
! 306: #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
! 307: namesptr, cbuffer, size) \
! 308: rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
! 309: count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
! 310:
! 311: #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
! 312: rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
! 313: (PCRE_UCHAR16 *)cbuffer, size/2)
! 314:
! 315: #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
! 316: offsets, size_offsets, workspace, size_workspace) \
! 317: count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
! 318: (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
! 319: workspace, size_workspace)
! 320:
! 321: #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
! 322: offsets, size_offsets) \
! 323: count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
! 324: len, start_offset, options, offsets, size_offsets)
! 325:
! 326: #define PCRE_FREE_STUDY16(extra) \
! 327: pcre16_free_study((pcre16_extra *)extra)
! 328:
! 329: #define PCRE_FREE_SUBSTRING16(substring) \
! 330: pcre16_free_substring((PCRE_SPTR16)substring)
! 331:
! 332: #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
! 333: pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
! 334:
! 335: #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
! 336: getnamesptr, subsptr) \
! 337: rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
! 338: count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
! 339:
! 340: #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
! 341: n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
! 342:
! 343: #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
! 344: rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
! 345: (PCRE_SPTR16 *)(void*)subsptr)
! 346:
! 347: #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
! 348: rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
! 349: (PCRE_SPTR16 **)(void*)listptr)
! 350:
! 351: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
! 352: rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
! 353: tables)
! 354:
! 355: #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
! 356: pcre16_printint(re, outfile, debug_lengths)
! 357:
! 358: #define PCRE_STUDY16(extra, re, options, error) \
! 359: extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
! 360:
! 361: #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
! 362: (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
! 363:
! 364: #define PCRE_JIT_STACK_FREE16(stack) \
! 365: pcre16_jit_stack_free((pcre16_jit_stack *)stack)
! 366:
! 367: #endif /* SUPPORT_PCRE16 */
! 368:
! 369:
! 370: /* ----- Both modes are supported; a runtime test is needed, except for
! 371: pcre_config(), and the JIT stack functions, when it doesn't matter which
! 372: version is called. ----- */
! 373:
! 374: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
! 375:
! 376: #define CHAR_SIZE (use_pcre16? 2:1)
! 377:
! 378: #define PCHARS(lv, p, offset, len, f) \
! 379: if (use_pcre16) \
! 380: PCHARS16(lv, p, offset, len, f); \
! 381: else \
! 382: PCHARS8(lv, p, offset, len, f)
! 383:
! 384: #define PCHARSV(p, offset, len, f) \
! 385: if (use_pcre16) \
! 386: PCHARSV16(p, offset, len, f); \
! 387: else \
! 388: PCHARSV8(p, offset, len, f)
! 389:
! 390: #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
! 391: if (use_pcre16) \
! 392: READ_CAPTURE_NAME16(p, cn8, cn16, re); \
! 393: else \
! 394: READ_CAPTURE_NAME8(p, cn8, cn16, re)
! 395:
! 396: #define SET_PCRE_CALLOUT(callout) \
! 397: if (use_pcre16) \
! 398: SET_PCRE_CALLOUT16(callout); \
! 399: else \
! 400: SET_PCRE_CALLOUT8(callout)
! 401:
! 402: #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
! 403:
! 404: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
! 405: if (use_pcre16) \
! 406: PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
! 407: else \
! 408: PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
! 409:
! 410: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
! 411: if (use_pcre16) \
! 412: PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
! 413: else \
! 414: PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
! 415:
! 416: #define PCRE_CONFIG pcre_config
! 417:
! 418: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
! 419: namesptr, cbuffer, size) \
! 420: if (use_pcre16) \
! 421: PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
! 422: namesptr, cbuffer, size); \
! 423: else \
! 424: PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
! 425: namesptr, cbuffer, size)
! 426:
! 427: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
! 428: if (use_pcre16) \
! 429: PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
! 430: else \
! 431: PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
! 432:
! 433: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
! 434: offsets, size_offsets, workspace, size_workspace) \
! 435: if (use_pcre16) \
! 436: PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
! 437: offsets, size_offsets, workspace, size_workspace); \
! 438: else \
! 439: PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
! 440: offsets, size_offsets, workspace, size_workspace)
! 441:
! 442: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
! 443: offsets, size_offsets) \
! 444: if (use_pcre16) \
! 445: PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
! 446: offsets, size_offsets); \
! 447: else \
! 448: PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
! 449: offsets, size_offsets)
! 450:
! 451: #define PCRE_FREE_STUDY(extra) \
! 452: if (use_pcre16) \
! 453: PCRE_FREE_STUDY16(extra); \
! 454: else \
! 455: PCRE_FREE_STUDY8(extra)
! 456:
! 457: #define PCRE_FREE_SUBSTRING(substring) \
! 458: if (use_pcre16) \
! 459: PCRE_FREE_SUBSTRING16(substring); \
! 460: else \
! 461: PCRE_FREE_SUBSTRING8(substring)
! 462:
! 463: #define PCRE_FREE_SUBSTRING_LIST(listptr) \
! 464: if (use_pcre16) \
! 465: PCRE_FREE_SUBSTRING_LIST16(listptr); \
! 466: else \
! 467: PCRE_FREE_SUBSTRING_LIST8(listptr)
! 468:
! 469: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
! 470: getnamesptr, subsptr) \
! 471: if (use_pcre16) \
! 472: PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
! 473: getnamesptr, subsptr); \
! 474: else \
! 475: PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
! 476: getnamesptr, subsptr)
! 477:
! 478: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
! 479: if (use_pcre16) \
! 480: PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
! 481: else \
! 482: PCRE_GET_STRINGNUMBER8(n, rc, ptr)
! 483:
! 484: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
! 485: if (use_pcre16) \
! 486: PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
! 487: else \
! 488: PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
! 489:
! 490: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
! 491: if (use_pcre16) \
! 492: PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
! 493: else \
! 494: PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
! 495:
! 496: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
! 497: (use_pcre16 ? \
! 498: PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
! 499: :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
! 500:
! 501: #define PCRE_JIT_STACK_FREE(stack) \
! 502: if (use_pcre16) \
! 503: PCRE_JIT_STACK_FREE16(stack); \
! 504: else \
! 505: PCRE_JIT_STACK_FREE8(stack)
! 506:
! 507: #define PCRE_MAKETABLES \
! 508: (use_pcre16? pcre16_maketables() : pcre_maketables())
! 509:
! 510: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
! 511: if (use_pcre16) \
! 512: PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
! 513: else \
! 514: PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
! 515:
! 516: #define PCRE_PRINTINT(re, outfile, debug_lengths) \
! 517: if (use_pcre16) \
! 518: PCRE_PRINTINT16(re, outfile, debug_lengths); \
! 519: else \
! 520: PCRE_PRINTINT8(re, outfile, debug_lengths)
! 521:
! 522: #define PCRE_STUDY(extra, re, options, error) \
! 523: if (use_pcre16) \
! 524: PCRE_STUDY16(extra, re, options, error); \
! 525: else \
! 526: PCRE_STUDY8(extra, re, options, error)
! 527:
! 528: /* ----- Only 8-bit mode is supported ----- */
! 529:
! 530: #elif defined SUPPORT_PCRE8
! 531: #define CHAR_SIZE 1
! 532: #define PCHARS PCHARS8
! 533: #define PCHARSV PCHARSV8
! 534: #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
! 535: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
! 536: #define STRLEN STRLEN8
! 537: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
! 538: #define PCRE_COMPILE PCRE_COMPILE8
! 539: #define PCRE_CONFIG pcre_config
! 540: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
! 541: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
! 542: #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
! 543: #define PCRE_EXEC PCRE_EXEC8
! 544: #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
! 545: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
! 546: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
! 547: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
! 548: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
! 549: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
! 550: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
! 551: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
! 552: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
! 553: #define PCRE_MAKETABLES pcre_maketables()
! 554: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
! 555: #define PCRE_PRINTINT PCRE_PRINTINT8
! 556: #define PCRE_STUDY PCRE_STUDY8
! 557:
! 558: /* ----- Only 16-bit mode is supported ----- */
! 559:
! 560: #else
! 561: #define CHAR_SIZE 2
! 562: #define PCHARS PCHARS16
! 563: #define PCHARSV PCHARSV16
! 564: #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
! 565: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
! 566: #define STRLEN STRLEN16
! 567: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
! 568: #define PCRE_COMPILE PCRE_COMPILE16
! 569: #define PCRE_CONFIG pcre16_config
! 570: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
! 571: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
! 572: #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
! 573: #define PCRE_EXEC PCRE_EXEC16
! 574: #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
! 575: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
! 576: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
! 577: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
! 578: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
! 579: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
! 580: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
! 581: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
! 582: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
! 583: #define PCRE_MAKETABLES pcre16_maketables()
! 584: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
! 585: #define PCRE_PRINTINT PCRE_PRINTINT16
! 586: #define PCRE_STUDY PCRE_STUDY16
! 587: #endif
! 588:
! 589: /* ----- End of mode-specific function call macros ----- */
! 590:
1.1 misho 591:
592: /* Other parameters */
593:
594: #ifndef CLOCKS_PER_SEC
595: #ifdef CLK_TCK
596: #define CLOCKS_PER_SEC CLK_TCK
597: #else
598: #define CLOCKS_PER_SEC 100
599: #endif
600: #endif
601:
602: /* This is the default loop count for timing. */
603:
604: #define LOOPREPEAT 500000
605:
606: /* Static variables */
607:
608: static FILE *outfile;
609: static int log_store = 0;
610: static int callout_count;
611: static int callout_extra;
612: static int callout_fail_count;
613: static int callout_fail_id;
614: static int debug_lengths;
615: static int first_callout;
616: static int locale_set = 0;
617: static int show_malloc;
1.1.1.2 ! misho 618: static int use_utf;
1.1 misho 619: static size_t gotten_store;
620: static size_t first_gotten_store = 0;
621: static const unsigned char *last_callout_mark = NULL;
622:
623: /* The buffers grow automatically if very long input lines are encountered. */
624:
625: static int buffer_size = 50000;
1.1.1.2 ! misho 626: static pcre_uint8 *buffer = NULL;
! 627: static pcre_uint8 *dbuffer = NULL;
! 628: static pcre_uint8 *pbuffer = NULL;
! 629:
! 630: /* Another buffer is needed translation to 16-bit character strings. It will
! 631: obtained and extended as required. */
! 632:
! 633: #ifdef SUPPORT_PCRE16
! 634: static int buffer16_size = 0;
! 635: static pcre_uint16 *buffer16 = NULL;
! 636:
! 637: #ifdef SUPPORT_PCRE8
! 638:
! 639: /* We need the table of operator lengths that is used for 16-bit compiling, in
! 640: order to swap bytes in a pattern for saving/reloading testing. Luckily, the
! 641: data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
! 642: appropriately for the 16-bit world. Just as a safety check, make sure that
! 643: COMPILE_PCRE16 is *not* set. */
! 644:
! 645: #ifdef COMPILE_PCRE16
! 646: #error COMPILE_PCRE16 must not be set when compiling pcretest.c
! 647: #endif
! 648:
! 649: #if LINK_SIZE == 2
! 650: #undef LINK_SIZE
! 651: #define LINK_SIZE 1
! 652: #elif LINK_SIZE == 3 || LINK_SIZE == 4
! 653: #undef LINK_SIZE
! 654: #define LINK_SIZE 2
! 655: #else
! 656: #error LINK_SIZE must be either 2, 3, or 4
! 657: #endif
! 658:
! 659: #undef IMM2_SIZE
! 660: #define IMM2_SIZE 1
! 661:
! 662: #endif /* SUPPORT_PCRE8 */
! 663:
! 664: static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
! 665: #endif /* SUPPORT_PCRE16 */
! 666:
! 667: /* If we have 8-bit support, default use_pcre16 to false; if there is also
! 668: 16-bit support, it can be changed by an option. If there is no 8-bit support,
! 669: there must be 16-bit support, so default it to 1. */
! 670:
! 671: #ifdef SUPPORT_PCRE8
! 672: static int use_pcre16 = 0;
! 673: #else
! 674: static int use_pcre16 = 1;
! 675: #endif
1.1 misho 676:
677: /* Textual explanations for runtime error codes */
678:
679: static const char *errtexts[] = {
680: NULL, /* 0 is no error */
681: NULL, /* NOMATCH is handled specially */
682: "NULL argument passed",
683: "bad option value",
684: "magic number missing",
685: "unknown opcode - pattern overwritten?",
686: "no more memory",
687: NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
688: "match limit exceeded",
689: "callout error code",
1.1.1.2 ! misho 690: NULL, /* BADUTF8/16 is handled specially */
! 691: NULL, /* BADUTF8/16 offset is handled specially */
1.1 misho 692: NULL, /* PARTIAL is handled specially */
693: "not used - internal error",
694: "internal error - pattern overwritten?",
695: "bad count value",
696: "item unsupported for DFA matching",
697: "backreference condition or recursion test not supported for DFA matching",
698: "match limit not supported for DFA matching",
699: "workspace size exceeded in DFA matching",
700: "too much recursion for DFA matching",
701: "recursion limit exceeded",
702: "not used - internal error",
703: "invalid combination of newline options",
704: "bad offset value",
1.1.1.2 ! misho 705: NULL, /* SHORTUTF8/16 is handled specially */
1.1 misho 706: "nested recursion at the same subject position",
1.1.1.2 ! misho 707: "JIT stack limit reached",
! 708: "pattern compiled in wrong mode: 8-bit/16-bit error"
1.1 misho 709: };
710:
711:
712: /*************************************************
713: * Alternate character tables *
714: *************************************************/
715:
716: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
717: using the default tables of the library. However, the T option can be used to
718: select alternate sets of tables, for different kinds of testing. Note also that
719: the L (locale) option also adjusts the tables. */
720:
721: /* This is the set of tables distributed as default with PCRE. It recognizes
722: only ASCII characters. */
723:
1.1.1.2 ! misho 724: static const pcre_uint8 tables0[] = {
1.1 misho 725:
726: /* This table is a lower casing table. */
727:
728: 0, 1, 2, 3, 4, 5, 6, 7,
729: 8, 9, 10, 11, 12, 13, 14, 15,
730: 16, 17, 18, 19, 20, 21, 22, 23,
731: 24, 25, 26, 27, 28, 29, 30, 31,
732: 32, 33, 34, 35, 36, 37, 38, 39,
733: 40, 41, 42, 43, 44, 45, 46, 47,
734: 48, 49, 50, 51, 52, 53, 54, 55,
735: 56, 57, 58, 59, 60, 61, 62, 63,
736: 64, 97, 98, 99,100,101,102,103,
737: 104,105,106,107,108,109,110,111,
738: 112,113,114,115,116,117,118,119,
739: 120,121,122, 91, 92, 93, 94, 95,
740: 96, 97, 98, 99,100,101,102,103,
741: 104,105,106,107,108,109,110,111,
742: 112,113,114,115,116,117,118,119,
743: 120,121,122,123,124,125,126,127,
744: 128,129,130,131,132,133,134,135,
745: 136,137,138,139,140,141,142,143,
746: 144,145,146,147,148,149,150,151,
747: 152,153,154,155,156,157,158,159,
748: 160,161,162,163,164,165,166,167,
749: 168,169,170,171,172,173,174,175,
750: 176,177,178,179,180,181,182,183,
751: 184,185,186,187,188,189,190,191,
752: 192,193,194,195,196,197,198,199,
753: 200,201,202,203,204,205,206,207,
754: 208,209,210,211,212,213,214,215,
755: 216,217,218,219,220,221,222,223,
756: 224,225,226,227,228,229,230,231,
757: 232,233,234,235,236,237,238,239,
758: 240,241,242,243,244,245,246,247,
759: 248,249,250,251,252,253,254,255,
760:
761: /* This table is a case flipping table. */
762:
763: 0, 1, 2, 3, 4, 5, 6, 7,
764: 8, 9, 10, 11, 12, 13, 14, 15,
765: 16, 17, 18, 19, 20, 21, 22, 23,
766: 24, 25, 26, 27, 28, 29, 30, 31,
767: 32, 33, 34, 35, 36, 37, 38, 39,
768: 40, 41, 42, 43, 44, 45, 46, 47,
769: 48, 49, 50, 51, 52, 53, 54, 55,
770: 56, 57, 58, 59, 60, 61, 62, 63,
771: 64, 97, 98, 99,100,101,102,103,
772: 104,105,106,107,108,109,110,111,
773: 112,113,114,115,116,117,118,119,
774: 120,121,122, 91, 92, 93, 94, 95,
775: 96, 65, 66, 67, 68, 69, 70, 71,
776: 72, 73, 74, 75, 76, 77, 78, 79,
777: 80, 81, 82, 83, 84, 85, 86, 87,
778: 88, 89, 90,123,124,125,126,127,
779: 128,129,130,131,132,133,134,135,
780: 136,137,138,139,140,141,142,143,
781: 144,145,146,147,148,149,150,151,
782: 152,153,154,155,156,157,158,159,
783: 160,161,162,163,164,165,166,167,
784: 168,169,170,171,172,173,174,175,
785: 176,177,178,179,180,181,182,183,
786: 184,185,186,187,188,189,190,191,
787: 192,193,194,195,196,197,198,199,
788: 200,201,202,203,204,205,206,207,
789: 208,209,210,211,212,213,214,215,
790: 216,217,218,219,220,221,222,223,
791: 224,225,226,227,228,229,230,231,
792: 232,233,234,235,236,237,238,239,
793: 240,241,242,243,244,245,246,247,
794: 248,249,250,251,252,253,254,255,
795:
796: /* This table contains bit maps for various character classes. Each map is 32
797: bytes long and the bits run from the least significant end of each byte. The
798: classes that have their own maps are: space, xdigit, digit, upper, lower, word,
799: graph, print, punct, and cntrl. Other classes are built from combinations. */
800:
801: 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
802: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805:
806: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
807: 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
808: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810:
811: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
812: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815:
816: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817: 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
818: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820:
821: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
823: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825:
826: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
827: 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
828: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830:
831: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
832: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
833: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835:
836: 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
837: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
838: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840:
841: 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
842: 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
843: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845:
846: 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
847: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
848: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850:
851: /* This table identifies various classes of character by individual bits:
852: 0x01 white space character
853: 0x02 letter
854: 0x04 decimal digit
855: 0x08 hexadecimal digit
856: 0x10 alphanumeric or '_'
857: 0x80 regular expression metacharacter or binary zero
858: */
859:
860: 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
861: 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
862: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
863: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
864: 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
865: 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
866: 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
867: 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
868: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
869: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
870: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
871: 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
872: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
873: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
874: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
875: 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
876: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
877: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
878: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
879: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
880: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
881: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
882: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
883: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
884: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
885: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
886: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
887: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
888: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
889: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
890: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
891: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
892:
893: /* This is a set of tables that came orginally from a Windows user. It seems to
894: be at least an approximation of ISO 8859. In particular, there are characters
895: greater than 128 that are marked as spaces, letters, etc. */
896:
1.1.1.2 ! misho 897: static const pcre_uint8 tables1[] = {
1.1 misho 898: 0,1,2,3,4,5,6,7,
899: 8,9,10,11,12,13,14,15,
900: 16,17,18,19,20,21,22,23,
901: 24,25,26,27,28,29,30,31,
902: 32,33,34,35,36,37,38,39,
903: 40,41,42,43,44,45,46,47,
904: 48,49,50,51,52,53,54,55,
905: 56,57,58,59,60,61,62,63,
906: 64,97,98,99,100,101,102,103,
907: 104,105,106,107,108,109,110,111,
908: 112,113,114,115,116,117,118,119,
909: 120,121,122,91,92,93,94,95,
910: 96,97,98,99,100,101,102,103,
911: 104,105,106,107,108,109,110,111,
912: 112,113,114,115,116,117,118,119,
913: 120,121,122,123,124,125,126,127,
914: 128,129,130,131,132,133,134,135,
915: 136,137,138,139,140,141,142,143,
916: 144,145,146,147,148,149,150,151,
917: 152,153,154,155,156,157,158,159,
918: 160,161,162,163,164,165,166,167,
919: 168,169,170,171,172,173,174,175,
920: 176,177,178,179,180,181,182,183,
921: 184,185,186,187,188,189,190,191,
922: 224,225,226,227,228,229,230,231,
923: 232,233,234,235,236,237,238,239,
924: 240,241,242,243,244,245,246,215,
925: 248,249,250,251,252,253,254,223,
926: 224,225,226,227,228,229,230,231,
927: 232,233,234,235,236,237,238,239,
928: 240,241,242,243,244,245,246,247,
929: 248,249,250,251,252,253,254,255,
930: 0,1,2,3,4,5,6,7,
931: 8,9,10,11,12,13,14,15,
932: 16,17,18,19,20,21,22,23,
933: 24,25,26,27,28,29,30,31,
934: 32,33,34,35,36,37,38,39,
935: 40,41,42,43,44,45,46,47,
936: 48,49,50,51,52,53,54,55,
937: 56,57,58,59,60,61,62,63,
938: 64,97,98,99,100,101,102,103,
939: 104,105,106,107,108,109,110,111,
940: 112,113,114,115,116,117,118,119,
941: 120,121,122,91,92,93,94,95,
942: 96,65,66,67,68,69,70,71,
943: 72,73,74,75,76,77,78,79,
944: 80,81,82,83,84,85,86,87,
945: 88,89,90,123,124,125,126,127,
946: 128,129,130,131,132,133,134,135,
947: 136,137,138,139,140,141,142,143,
948: 144,145,146,147,148,149,150,151,
949: 152,153,154,155,156,157,158,159,
950: 160,161,162,163,164,165,166,167,
951: 168,169,170,171,172,173,174,175,
952: 176,177,178,179,180,181,182,183,
953: 184,185,186,187,188,189,190,191,
954: 224,225,226,227,228,229,230,231,
955: 232,233,234,235,236,237,238,239,
956: 240,241,242,243,244,245,246,215,
957: 248,249,250,251,252,253,254,223,
958: 192,193,194,195,196,197,198,199,
959: 200,201,202,203,204,205,206,207,
960: 208,209,210,211,212,213,214,247,
961: 216,217,218,219,220,221,222,255,
962: 0,62,0,0,1,0,0,0,
963: 0,0,0,0,0,0,0,0,
964: 32,0,0,0,1,0,0,0,
965: 0,0,0,0,0,0,0,0,
966: 0,0,0,0,0,0,255,3,
967: 126,0,0,0,126,0,0,0,
968: 0,0,0,0,0,0,0,0,
969: 0,0,0,0,0,0,0,0,
970: 0,0,0,0,0,0,255,3,
971: 0,0,0,0,0,0,0,0,
972: 0,0,0,0,0,0,12,2,
973: 0,0,0,0,0,0,0,0,
974: 0,0,0,0,0,0,0,0,
975: 254,255,255,7,0,0,0,0,
976: 0,0,0,0,0,0,0,0,
977: 255,255,127,127,0,0,0,0,
978: 0,0,0,0,0,0,0,0,
979: 0,0,0,0,254,255,255,7,
980: 0,0,0,0,0,4,32,4,
981: 0,0,0,128,255,255,127,255,
982: 0,0,0,0,0,0,255,3,
983: 254,255,255,135,254,255,255,7,
984: 0,0,0,0,0,4,44,6,
985: 255,255,127,255,255,255,127,255,
986: 0,0,0,0,254,255,255,255,
987: 255,255,255,255,255,255,255,127,
988: 0,0,0,0,254,255,255,255,
989: 255,255,255,255,255,255,255,255,
990: 0,2,0,0,255,255,255,255,
991: 255,255,255,255,255,255,255,127,
992: 0,0,0,0,255,255,255,255,
993: 255,255,255,255,255,255,255,255,
994: 0,0,0,0,254,255,0,252,
995: 1,0,0,248,1,0,0,120,
996: 0,0,0,0,254,255,255,255,
997: 0,0,128,0,0,0,128,0,
998: 255,255,255,255,0,0,0,0,
999: 0,0,0,0,0,0,0,128,
1000: 255,255,255,255,0,0,0,0,
1001: 0,0,0,0,0,0,0,0,
1002: 128,0,0,0,0,0,0,0,
1003: 0,1,1,0,1,1,0,0,
1004: 0,0,0,0,0,0,0,0,
1005: 0,0,0,0,0,0,0,0,
1006: 1,0,0,0,128,0,0,0,
1007: 128,128,128,128,0,0,128,0,
1008: 28,28,28,28,28,28,28,28,
1009: 28,28,0,0,0,0,0,128,
1010: 0,26,26,26,26,26,26,18,
1011: 18,18,18,18,18,18,18,18,
1012: 18,18,18,18,18,18,18,18,
1013: 18,18,18,128,128,0,128,16,
1014: 0,26,26,26,26,26,26,18,
1015: 18,18,18,18,18,18,18,18,
1016: 18,18,18,18,18,18,18,18,
1017: 18,18,18,128,128,0,0,0,
1018: 0,0,0,0,0,1,0,0,
1019: 0,0,0,0,0,0,0,0,
1020: 0,0,0,0,0,0,0,0,
1021: 0,0,0,0,0,0,0,0,
1022: 1,0,0,0,0,0,0,0,
1023: 0,0,18,0,0,0,0,0,
1024: 0,0,20,20,0,18,0,0,
1025: 0,20,18,0,0,0,0,0,
1026: 18,18,18,18,18,18,18,18,
1027: 18,18,18,18,18,18,18,18,
1028: 18,18,18,18,18,18,18,0,
1029: 18,18,18,18,18,18,18,18,
1030: 18,18,18,18,18,18,18,18,
1031: 18,18,18,18,18,18,18,18,
1032: 18,18,18,18,18,18,18,0,
1033: 18,18,18,18,18,18,18,18
1034: };
1035:
1036:
1037:
1038:
1039: #ifndef HAVE_STRERROR
1040: /*************************************************
1041: * Provide strerror() for non-ANSI libraries *
1042: *************************************************/
1043:
1044: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1045: in their libraries, but can provide the same facility by this simple
1046: alternative function. */
1047:
1048: extern int sys_nerr;
1049: extern char *sys_errlist[];
1050:
1051: char *
1052: strerror(int n)
1053: {
1054: if (n < 0 || n >= sys_nerr) return "unknown error number";
1055: return sys_errlist[n];
1056: }
1057: #endif /* HAVE_STRERROR */
1058:
1059:
1060: /*************************************************
1061: * JIT memory callback *
1062: *************************************************/
1063:
1064: static pcre_jit_stack* jit_callback(void *arg)
1065: {
1066: return (pcre_jit_stack *)arg;
1067: }
1068:
1069:
1.1.1.2 ! misho 1070: #if !defined NOUTF || defined SUPPORT_PCRE16
! 1071: /*************************************************
! 1072: * Convert UTF-8 string to value *
! 1073: *************************************************/
! 1074:
! 1075: /* This function takes one or more bytes that represents a UTF-8 character,
! 1076: and returns the value of the character.
! 1077:
! 1078: Argument:
! 1079: utf8bytes a pointer to the byte vector
! 1080: vptr a pointer to an int to receive the value
! 1081:
! 1082: Returns: > 0 => the number of bytes consumed
! 1083: -6 to 0 => malformed UTF-8 character at offset = (-return)
! 1084: */
! 1085:
! 1086: static int
! 1087: utf82ord(pcre_uint8 *utf8bytes, int *vptr)
! 1088: {
! 1089: int c = *utf8bytes++;
! 1090: int d = c;
! 1091: int i, j, s;
! 1092:
! 1093: for (i = -1; i < 6; i++) /* i is number of additional bytes */
! 1094: {
! 1095: if ((d & 0x80) == 0) break;
! 1096: d <<= 1;
! 1097: }
! 1098:
! 1099: if (i == -1) { *vptr = c; return 1; } /* ascii character */
! 1100: if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
! 1101:
! 1102: /* i now has a value in the range 1-5 */
! 1103:
! 1104: s = 6*i;
! 1105: d = (c & utf8_table3[i]) << s;
! 1106:
! 1107: for (j = 0; j < i; j++)
! 1108: {
! 1109: c = *utf8bytes++;
! 1110: if ((c & 0xc0) != 0x80) return -(j+1);
! 1111: s -= 6;
! 1112: d |= (c & 0x3f) << s;
! 1113: }
! 1114:
! 1115: /* Check that encoding was the correct unique one */
! 1116:
! 1117: for (j = 0; j < utf8_table1_size; j++)
! 1118: if (d <= utf8_table1[j]) break;
! 1119: if (j != i) return -(i+1);
! 1120:
! 1121: /* Valid value */
! 1122:
! 1123: *vptr = d;
! 1124: return i+1;
! 1125: }
! 1126: #endif /* NOUTF || SUPPORT_PCRE16 */
! 1127:
! 1128:
! 1129:
! 1130: #if !defined NOUTF || defined SUPPORT_PCRE16
! 1131: /*************************************************
! 1132: * Convert character value to UTF-8 *
! 1133: *************************************************/
! 1134:
! 1135: /* This function takes an integer value in the range 0 - 0x7fffffff
! 1136: and encodes it as a UTF-8 character in 0 to 6 bytes.
! 1137:
! 1138: Arguments:
! 1139: cvalue the character value
! 1140: utf8bytes pointer to buffer for result - at least 6 bytes long
! 1141:
! 1142: Returns: number of characters placed in the buffer
! 1143: */
! 1144:
! 1145: static int
! 1146: ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
! 1147: {
! 1148: register int i, j;
! 1149: for (i = 0; i < utf8_table1_size; i++)
! 1150: if (cvalue <= utf8_table1[i]) break;
! 1151: utf8bytes += i;
! 1152: for (j = i; j > 0; j--)
! 1153: {
! 1154: *utf8bytes-- = 0x80 | (cvalue & 0x3f);
! 1155: cvalue >>= 6;
! 1156: }
! 1157: *utf8bytes = utf8_table2[i] | cvalue;
! 1158: return i + 1;
! 1159: }
! 1160: #endif
! 1161:
! 1162:
! 1163: #ifdef SUPPORT_PCRE16
! 1164: /*************************************************
! 1165: * Convert a string to 16-bit *
! 1166: *************************************************/
! 1167:
! 1168: /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
! 1169: 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
! 1170: double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
! 1171: in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
! 1172: result is always left in buffer16.
! 1173:
! 1174: Note that this function does not object to surrogate values. This is
! 1175: deliberate; it makes it possible to construct UTF-16 strings that are invalid,
! 1176: for the purpose of testing that they are correctly faulted.
! 1177:
! 1178: Patterns to be converted are either plain ASCII or UTF-8; data lines are always
! 1179: in UTF-8 so that values greater than 255 can be handled.
! 1180:
! 1181: Arguments:
! 1182: data TRUE if converting a data line; FALSE for a regex
! 1183: p points to a byte string
! 1184: utf true if UTF-8 (to be converted to UTF-16)
! 1185: len number of bytes in the string (excluding trailing zero)
! 1186:
! 1187: Returns: number of 16-bit data items used (excluding trailing zero)
! 1188: OR -1 if a UTF-8 string is malformed
! 1189: OR -2 if a value > 0x10ffff is encountered
! 1190: OR -3 if a value > 0xffff is encountered when not in UTF mode
! 1191: */
! 1192:
! 1193: static int
! 1194: to16(int data, pcre_uint8 *p, int utf, int len)
! 1195: {
! 1196: pcre_uint16 *pp;
! 1197:
! 1198: if (buffer16_size < 2*len + 2)
! 1199: {
! 1200: if (buffer16 != NULL) free(buffer16);
! 1201: buffer16_size = 2*len + 2;
! 1202: buffer16 = (pcre_uint16 *)malloc(buffer16_size);
! 1203: if (buffer16 == NULL)
! 1204: {
! 1205: fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
! 1206: exit(1);
! 1207: }
! 1208: }
! 1209:
! 1210: pp = buffer16;
! 1211:
! 1212: if (!utf && !data)
! 1213: {
! 1214: while (len-- > 0) *pp++ = *p++;
! 1215: }
! 1216:
! 1217: else
! 1218: {
! 1219: int c = 0;
! 1220: while (len > 0)
! 1221: {
! 1222: int chlen = utf82ord(p, &c);
! 1223: if (chlen <= 0) return -1;
! 1224: if (c > 0x10ffff) return -2;
! 1225: p += chlen;
! 1226: len -= chlen;
! 1227: if (c < 0x10000) *pp++ = c; else
! 1228: {
! 1229: if (!utf) return -3;
! 1230: c -= 0x10000;
! 1231: *pp++ = 0xD800 | (c >> 10);
! 1232: *pp++ = 0xDC00 | (c & 0x3ff);
! 1233: }
! 1234: }
! 1235: }
! 1236:
! 1237: *pp = 0;
! 1238: return pp - buffer16;
! 1239: }
! 1240: #endif
! 1241:
! 1242:
1.1 misho 1243: /*************************************************
1244: * Read or extend an input line *
1245: *************************************************/
1246:
1247: /* Input lines are read into buffer, but both patterns and data lines can be
1248: continued over multiple input lines. In addition, if the buffer fills up, we
1249: want to automatically expand it so as to be able to handle extremely large
1250: lines that are needed for certain stress tests. When the input buffer is
1251: expanded, the other two buffers must also be expanded likewise, and the
1252: contents of pbuffer, which are a copy of the input for callouts, must be
1253: preserved (for when expansion happens for a data line). This is not the most
1254: optimal way of handling this, but hey, this is just a test program!
1255:
1256: Arguments:
1257: f the file to read
1258: start where in buffer to start (this *must* be within buffer)
1259: prompt for stdin or readline()
1260:
1261: Returns: pointer to the start of new data
1262: could be a copy of start, or could be moved
1263: NULL if no data read and EOF reached
1264: */
1265:
1.1.1.2 ! misho 1266: static pcre_uint8 *
! 1267: extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1.1 misho 1268: {
1.1.1.2 ! misho 1269: pcre_uint8 *here = start;
1.1 misho 1270:
1271: for (;;)
1272: {
1.1.1.2 ! misho 1273: size_t rlen = (size_t)(buffer_size - (here - buffer));
1.1 misho 1274:
1275: if (rlen > 1000)
1276: {
1277: int dlen;
1278:
1279: /* If libreadline support is required, use readline() to read a line if the
1280: input is a terminal. Note that readline() removes the trailing newline, so
1281: we must put it back again, to be compatible with fgets(). */
1282:
1283: #ifdef SUPPORT_LIBREADLINE
1284: if (isatty(fileno(f)))
1285: {
1286: size_t len;
1287: char *s = readline(prompt);
1288: if (s == NULL) return (here == start)? NULL : start;
1289: len = strlen(s);
1290: if (len > 0) add_history(s);
1291: if (len > rlen - 1) len = rlen - 1;
1292: memcpy(here, s, len);
1293: here[len] = '\n';
1294: here[len+1] = 0;
1295: free(s);
1296: }
1297: else
1298: #endif
1299:
1300: /* Read the next line by normal means, prompting if the file is stdin. */
1301:
1302: {
1303: if (f == stdin) printf("%s", prompt);
1304: if (fgets((char *)here, rlen, f) == NULL)
1305: return (here == start)? NULL : start;
1306: }
1307:
1308: dlen = (int)strlen((char *)here);
1309: if (dlen > 0 && here[dlen - 1] == '\n') return start;
1310: here += dlen;
1311: }
1312:
1313: else
1314: {
1315: int new_buffer_size = 2*buffer_size;
1.1.1.2 ! misho 1316: pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
! 1317: pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
! 1318: pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1.1 misho 1319:
1320: if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1321: {
1322: fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1323: exit(1);
1324: }
1325:
1326: memcpy(new_buffer, buffer, buffer_size);
1327: memcpy(new_pbuffer, pbuffer, buffer_size);
1328:
1329: buffer_size = new_buffer_size;
1330:
1331: start = new_buffer + (start - buffer);
1332: here = new_buffer + (here - buffer);
1333:
1334: free(buffer);
1335: free(dbuffer);
1336: free(pbuffer);
1337:
1338: buffer = new_buffer;
1339: dbuffer = new_dbuffer;
1340: pbuffer = new_pbuffer;
1341: }
1342: }
1343:
1344: return NULL; /* Control never gets here */
1345: }
1346:
1347:
1348:
1349: /*************************************************
1350: * Read number from string *
1351: *************************************************/
1352:
1353: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1354: around with conditional compilation, just do the job by hand. It is only used
1355: for unpicking arguments, so just keep it simple.
1356:
1357: Arguments:
1358: str string to be converted
1359: endptr where to put the end pointer
1360:
1361: Returns: the unsigned long
1362: */
1363:
1364: static int
1.1.1.2 ! misho 1365: get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1.1 misho 1366: {
1367: int result = 0;
1368: while(*str != 0 && isspace(*str)) str++;
1369: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1370: *endptr = str;
1371: return(result);
1372: }
1373:
1374:
1375:
1376: /*************************************************
1.1.1.2 ! misho 1377: * Print one character *
1.1 misho 1378: *************************************************/
1379:
1.1.1.2 ! misho 1380: /* Print a single character either literally, or as a hex escape. */
1.1 misho 1381:
1.1.1.2 ! misho 1382: static int pchar(int c, FILE *f)
1.1 misho 1383: {
1.1.1.2 ! misho 1384: if (PRINTOK(c))
! 1385: {
! 1386: if (f != NULL) fprintf(f, "%c", c);
! 1387: return 1;
! 1388: }
1.1 misho 1389:
1.1.1.2 ! misho 1390: if (c < 0x100)
1.1 misho 1391: {
1.1.1.2 ! misho 1392: if (use_utf)
! 1393: {
! 1394: if (f != NULL) fprintf(f, "\\x{%02x}", c);
! 1395: return 6;
! 1396: }
! 1397: else
! 1398: {
! 1399: if (f != NULL) fprintf(f, "\\x%02x", c);
! 1400: return 4;
! 1401: }
1.1 misho 1402: }
1403:
1.1.1.2 ! misho 1404: if (f != NULL) fprintf(f, "\\x{%02x}", c);
! 1405: return (c <= 0x000000ff)? 6 :
! 1406: (c <= 0x00000fff)? 7 :
! 1407: (c <= 0x0000ffff)? 8 :
! 1408: (c <= 0x000fffff)? 9 : 10;
! 1409: }
1.1 misho 1410:
1411:
1412:
1.1.1.2 ! misho 1413: #ifdef SUPPORT_PCRE8
! 1414: /*************************************************
! 1415: * Print 8-bit character string *
! 1416: *************************************************/
1.1 misho 1417:
1.1.1.2 ! misho 1418: /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
! 1419: If handed a NULL file, just counts chars without printing. */
1.1 misho 1420:
1.1.1.2 ! misho 1421: static int pchars(pcre_uint8 *p, int length, FILE *f)
! 1422: {
! 1423: int c = 0;
! 1424: int yield = 0;
1.1 misho 1425:
1.1.1.2 ! misho 1426: if (length < 0)
! 1427: length = strlen((char *)p);
1.1 misho 1428:
1.1.1.2 ! misho 1429: while (length-- > 0)
! 1430: {
! 1431: #if !defined NOUTF
! 1432: if (use_utf)
! 1433: {
! 1434: int rc = utf82ord(p, &c);
! 1435: if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
! 1436: {
! 1437: length -= rc - 1;
! 1438: p += rc;
! 1439: yield += pchar(c, f);
! 1440: continue;
! 1441: }
! 1442: }
! 1443: #endif
! 1444: c = *p++;
! 1445: yield += pchar(c, f);
! 1446: }
1.1 misho 1447:
1.1.1.2 ! misho 1448: return yield;
! 1449: }
1.1 misho 1450: #endif
1451:
1452:
1453:
1.1.1.2 ! misho 1454: #ifdef SUPPORT_PCRE16
1.1 misho 1455: /*************************************************
1.1.1.2 ! misho 1456: * Find length of 0-terminated 16-bit string *
1.1 misho 1457: *************************************************/
1458:
1.1.1.2 ! misho 1459: static int strlen16(PCRE_SPTR16 p)
1.1 misho 1460: {
1.1.1.2 ! misho 1461: int len = 0;
! 1462: while (*p++ != 0) len++;
! 1463: return len;
1.1 misho 1464: }
1.1.1.2 ! misho 1465: #endif /* SUPPORT_PCRE16 */
1.1 misho 1466:
1467:
1.1.1.2 ! misho 1468: #ifdef SUPPORT_PCRE16
1.1 misho 1469: /*************************************************
1.1.1.2 ! misho 1470: * Print 16-bit character string *
1.1 misho 1471: *************************************************/
1472:
1.1.1.2 ! misho 1473: /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
! 1474: If handed a NULL file, just counts chars without printing. */
1.1 misho 1475:
1.1.1.2 ! misho 1476: static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1.1 misho 1477: {
1478: int yield = 0;
1479:
1.1.1.2 ! misho 1480: if (length < 0)
! 1481: length = strlen16(p);
! 1482:
1.1 misho 1483: while (length-- > 0)
1484: {
1.1.1.2 ! misho 1485: int c = *p++ & 0xffff;
! 1486: #if !defined NOUTF
! 1487: if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1.1 misho 1488: {
1.1.1.2 ! misho 1489: int d = *p & 0xffff;
! 1490: if (d >= 0xDC00 && d < 0xDFFF)
1.1 misho 1491: {
1.1.1.2 ! misho 1492: c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
! 1493: length--;
! 1494: p++;
1.1 misho 1495: }
1496: }
1497: #endif
1.1.1.2 ! misho 1498: yield += pchar(c, f);
! 1499: }
! 1500:
! 1501: return yield;
! 1502: }
! 1503: #endif /* SUPPORT_PCRE16 */
1.1 misho 1504:
1505:
1.1.1.2 ! misho 1506:
! 1507: #ifdef SUPPORT_PCRE8
! 1508: /*************************************************
! 1509: * Read a capture name (8-bit) and check it *
! 1510: *************************************************/
! 1511:
! 1512: static pcre_uint8 *
! 1513: read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
! 1514: {
! 1515: pcre_uint8 *npp = *pp;
! 1516: while (isalnum(*p)) *npp++ = *p++;
! 1517: *npp++ = 0;
! 1518: *npp = 0;
! 1519: if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
! 1520: {
! 1521: fprintf(outfile, "no parentheses with name \"");
! 1522: PCHARSV(*pp, 0, -1, outfile);
! 1523: fprintf(outfile, "\"\n");
1.1 misho 1524: }
1525:
1.1.1.2 ! misho 1526: *pp = npp;
! 1527: return p;
1.1 misho 1528: }
1.1.1.2 ! misho 1529: #endif /* SUPPORT_PCRE8 */
! 1530:
! 1531:
! 1532:
! 1533: #ifdef SUPPORT_PCRE16
! 1534: /*************************************************
! 1535: * Read a capture name (16-bit) and check it *
! 1536: *************************************************/
! 1537:
! 1538: /* Note that the text being read is 8-bit. */
! 1539:
! 1540: static pcre_uint8 *
! 1541: read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
! 1542: {
! 1543: pcre_uint16 *npp = *pp;
! 1544: while (isalnum(*p)) *npp++ = *p++;
! 1545: *npp++ = 0;
! 1546: *npp = 0;
! 1547: if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
! 1548: {
! 1549: fprintf(outfile, "no parentheses with name \"");
! 1550: PCHARSV(*pp, 0, -1, outfile);
! 1551: fprintf(outfile, "\"\n");
! 1552: }
! 1553: *pp = npp;
! 1554: return p;
! 1555: }
! 1556: #endif /* SUPPORT_PCRE16 */
1.1 misho 1557:
1558:
1559:
1560: /*************************************************
1561: * Callout function *
1562: *************************************************/
1563:
1564: /* Called from PCRE as a result of the (?C) item. We print out where we are in
1565: the match. Yield zero unless more callouts than the fail count, or the callout
1566: data is not zero. */
1567:
1568: static int callout(pcre_callout_block *cb)
1569: {
1570: FILE *f = (first_callout | callout_extra)? outfile : NULL;
1571: int i, pre_start, post_start, subject_length;
1572:
1573: if (callout_extra)
1574: {
1575: fprintf(f, "Callout %d: last capture = %d\n",
1576: cb->callout_number, cb->capture_last);
1577:
1578: for (i = 0; i < cb->capture_top * 2; i += 2)
1579: {
1580: if (cb->offset_vector[i] < 0)
1581: fprintf(f, "%2d: <unset>\n", i/2);
1582: else
1583: {
1584: fprintf(f, "%2d: ", i/2);
1.1.1.2 ! misho 1585: PCHARSV(cb->subject, cb->offset_vector[i],
1.1 misho 1586: cb->offset_vector[i+1] - cb->offset_vector[i], f);
1587: fprintf(f, "\n");
1588: }
1589: }
1590: }
1591:
1592: /* Re-print the subject in canonical form, the first time or if giving full
1593: datails. On subsequent calls in the same match, we use pchars just to find the
1594: printed lengths of the substrings. */
1595:
1596: if (f != NULL) fprintf(f, "--->");
1597:
1.1.1.2 ! misho 1598: PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
! 1599: PCHARS(post_start, cb->subject, cb->start_match,
1.1 misho 1600: cb->current_position - cb->start_match, f);
1601:
1.1.1.2 ! misho 1602: PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1.1 misho 1603:
1.1.1.2 ! misho 1604: PCHARSV(cb->subject, cb->current_position,
1.1 misho 1605: cb->subject_length - cb->current_position, f);
1606:
1607: if (f != NULL) fprintf(f, "\n");
1608:
1609: /* Always print appropriate indicators, with callout number if not already
1610: shown. For automatic callouts, show the pattern offset. */
1611:
1612: if (cb->callout_number == 255)
1613: {
1614: fprintf(outfile, "%+3d ", cb->pattern_position);
1615: if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1616: }
1617: else
1618: {
1619: if (callout_extra) fprintf(outfile, " ");
1620: else fprintf(outfile, "%3d ", cb->callout_number);
1621: }
1622:
1623: for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1624: fprintf(outfile, "^");
1625:
1626: if (post_start > 0)
1627: {
1628: for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1629: fprintf(outfile, "^");
1630: }
1631:
1632: for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1633: fprintf(outfile, " ");
1634:
1635: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1636: pbuffer + cb->pattern_position);
1637:
1638: fprintf(outfile, "\n");
1639: first_callout = 0;
1640:
1641: if (cb->mark != last_callout_mark)
1642: {
1.1.1.2 ! misho 1643: if (cb->mark == NULL)
! 1644: fprintf(outfile, "Latest Mark: <unset>\n");
! 1645: else
! 1646: {
! 1647: fprintf(outfile, "Latest Mark: ");
! 1648: PCHARSV(cb->mark, 0, -1, outfile);
! 1649: putc('\n', outfile);
! 1650: }
1.1 misho 1651: last_callout_mark = cb->mark;
1652: }
1653:
1654: if (cb->callout_data != NULL)
1655: {
1656: int callout_data = *((int *)(cb->callout_data));
1657: if (callout_data != 0)
1658: {
1659: fprintf(outfile, "Callout data = %d\n", callout_data);
1660: return callout_data;
1661: }
1662: }
1663:
1664: return (cb->callout_number != callout_fail_id)? 0 :
1665: (++callout_count >= callout_fail_count)? 1 : 0;
1666: }
1667:
1668:
1669: /*************************************************
1670: * Local malloc functions *
1671: *************************************************/
1672:
1673: /* Alternative malloc function, to test functionality and save the size of a
1674: compiled re, which is the first store request that pcre_compile() makes. The
1675: show_malloc variable is set only during matching. */
1676:
1677: static void *new_malloc(size_t size)
1678: {
1679: void *block = malloc(size);
1680: gotten_store = size;
1681: if (first_gotten_store == 0) first_gotten_store = size;
1682: if (show_malloc)
1683: fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1684: return block;
1685: }
1686:
1687: static void new_free(void *block)
1688: {
1689: if (show_malloc)
1690: fprintf(outfile, "free %p\n", block);
1691: free(block);
1692: }
1693:
1694: /* For recursion malloc/free, to test stacking calls */
1695:
1696: static void *stack_malloc(size_t size)
1697: {
1698: void *block = malloc(size);
1699: if (show_malloc)
1700: fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1701: return block;
1702: }
1703:
1704: static void stack_free(void *block)
1705: {
1706: if (show_malloc)
1707: fprintf(outfile, "stack_free %p\n", block);
1708: free(block);
1709: }
1710:
1711:
1.1.1.2 ! misho 1712: /*************************************************
! 1713: * Call pcre_fullinfo() *
! 1714: *************************************************/
! 1715:
! 1716: /* Get one piece of information from the pcre_fullinfo() function. When only
! 1717: one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
! 1718: value, but the code is defensive.
! 1719:
! 1720: Arguments:
! 1721: re compiled regex
! 1722: study study data
! 1723: option PCRE_INFO_xxx option
! 1724: ptr where to put the data
! 1725:
! 1726: Returns: 0 when OK, < 0 on error
! 1727: */
! 1728:
! 1729: static int
! 1730: new_info(pcre *re, pcre_extra *study, int option, void *ptr)
! 1731: {
! 1732: int rc;
! 1733:
! 1734: if (use_pcre16)
! 1735: #ifdef SUPPORT_PCRE16
! 1736: rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
! 1737: #else
! 1738: rc = PCRE_ERROR_BADMODE;
! 1739: #endif
! 1740: else
! 1741: #ifdef SUPPORT_PCRE8
! 1742: rc = pcre_fullinfo(re, study, option, ptr);
! 1743: #else
! 1744: rc = PCRE_ERROR_BADMODE;
! 1745: #endif
! 1746:
! 1747: if (rc < 0)
! 1748: {
! 1749: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
! 1750: use_pcre16? "16" : "", option);
! 1751: if (rc == PCRE_ERROR_BADMODE)
! 1752: fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
! 1753: "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
! 1754: }
! 1755:
! 1756: return rc;
! 1757: }
! 1758:
! 1759:
! 1760:
! 1761: /*************************************************
! 1762: * Swap byte functions *
! 1763: *************************************************/
! 1764:
! 1765: /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
! 1766: value, respectively.
! 1767:
! 1768: Arguments:
! 1769: value any number
! 1770:
! 1771: Returns: the byte swapped value
! 1772: */
! 1773:
! 1774: static pcre_uint32
! 1775: swap_uint32(pcre_uint32 value)
! 1776: {
! 1777: return ((value & 0x000000ff) << 24) |
! 1778: ((value & 0x0000ff00) << 8) |
! 1779: ((value & 0x00ff0000) >> 8) |
! 1780: (value >> 24);
! 1781: }
! 1782:
! 1783: static pcre_uint16
! 1784: swap_uint16(pcre_uint16 value)
! 1785: {
! 1786: return (value >> 8) | (value << 8);
! 1787: }
! 1788:
! 1789:
! 1790:
! 1791: /*************************************************
! 1792: * Flip bytes in a compiled pattern *
! 1793: *************************************************/
! 1794:
! 1795: /* This function is called if the 'F' option was present on a pattern that is
! 1796: to be written to a file. We flip the bytes of all the integer fields in the
! 1797: regex data block and the study block. In 16-bit mode this also flips relevant
! 1798: bytes in the pattern itself. This is to make it possible to test PCRE's
! 1799: ability to reload byte-flipped patterns, e.g. those compiled on a different
! 1800: architecture. */
! 1801:
! 1802: static void
! 1803: regexflip(pcre *ere, pcre_extra *extra)
! 1804: {
! 1805: REAL_PCRE *re = (REAL_PCRE *)ere;
! 1806: #ifdef SUPPORT_PCRE16
! 1807: int op;
! 1808: pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
! 1809: int length = re->name_count * re->name_entry_size;
! 1810: #ifdef SUPPORT_UTF
! 1811: BOOL utf = (re->options & PCRE_UTF16) != 0;
! 1812: BOOL utf16_char = FALSE;
! 1813: #endif /* SUPPORT_UTF */
! 1814: #endif /* SUPPORT_PCRE16 */
! 1815:
! 1816: /* Always flip the bytes in the main data block and study blocks. */
! 1817:
! 1818: re->magic_number = REVERSED_MAGIC_NUMBER;
! 1819: re->size = swap_uint32(re->size);
! 1820: re->options = swap_uint32(re->options);
! 1821: re->flags = swap_uint16(re->flags);
! 1822: re->top_bracket = swap_uint16(re->top_bracket);
! 1823: re->top_backref = swap_uint16(re->top_backref);
! 1824: re->first_char = swap_uint16(re->first_char);
! 1825: re->req_char = swap_uint16(re->req_char);
! 1826: re->name_table_offset = swap_uint16(re->name_table_offset);
! 1827: re->name_entry_size = swap_uint16(re->name_entry_size);
! 1828: re->name_count = swap_uint16(re->name_count);
! 1829:
! 1830: if (extra != NULL)
! 1831: {
! 1832: pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
! 1833: rsd->size = swap_uint32(rsd->size);
! 1834: rsd->flags = swap_uint32(rsd->flags);
! 1835: rsd->minlength = swap_uint32(rsd->minlength);
! 1836: }
! 1837:
! 1838: /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
! 1839: in the name table, if present, and then in the pattern itself. */
! 1840:
! 1841: #ifdef SUPPORT_PCRE16
! 1842: if (!use_pcre16) return;
! 1843:
! 1844: while(TRUE)
! 1845: {
! 1846: /* Swap previous characters. */
! 1847: while (length-- > 0)
! 1848: {
! 1849: *ptr = swap_uint16(*ptr);
! 1850: ptr++;
! 1851: }
! 1852: #ifdef SUPPORT_UTF
! 1853: if (utf16_char)
! 1854: {
! 1855: if ((ptr[-1] & 0xfc00) == 0xd800)
! 1856: {
! 1857: /* We know that there is only one extra character in UTF-16. */
! 1858: *ptr = swap_uint16(*ptr);
! 1859: ptr++;
! 1860: }
! 1861: }
! 1862: utf16_char = FALSE;
! 1863: #endif /* SUPPORT_UTF */
! 1864:
! 1865: /* Get next opcode. */
1.1 misho 1866:
1.1.1.2 ! misho 1867: length = 0;
! 1868: op = *ptr;
! 1869: *ptr++ = swap_uint16(op);
1.1 misho 1870:
1.1.1.2 ! misho 1871: switch (op)
! 1872: {
! 1873: case OP_END:
! 1874: return;
1.1 misho 1875:
1.1.1.2 ! misho 1876: #ifdef SUPPORT_UTF
! 1877: case OP_CHAR:
! 1878: case OP_CHARI:
! 1879: case OP_NOT:
! 1880: case OP_NOTI:
! 1881: case OP_STAR:
! 1882: case OP_MINSTAR:
! 1883: case OP_PLUS:
! 1884: case OP_MINPLUS:
! 1885: case OP_QUERY:
! 1886: case OP_MINQUERY:
! 1887: case OP_UPTO:
! 1888: case OP_MINUPTO:
! 1889: case OP_EXACT:
! 1890: case OP_POSSTAR:
! 1891: case OP_POSPLUS:
! 1892: case OP_POSQUERY:
! 1893: case OP_POSUPTO:
! 1894: case OP_STARI:
! 1895: case OP_MINSTARI:
! 1896: case OP_PLUSI:
! 1897: case OP_MINPLUSI:
! 1898: case OP_QUERYI:
! 1899: case OP_MINQUERYI:
! 1900: case OP_UPTOI:
! 1901: case OP_MINUPTOI:
! 1902: case OP_EXACTI:
! 1903: case OP_POSSTARI:
! 1904: case OP_POSPLUSI:
! 1905: case OP_POSQUERYI:
! 1906: case OP_POSUPTOI:
! 1907: case OP_NOTSTAR:
! 1908: case OP_NOTMINSTAR:
! 1909: case OP_NOTPLUS:
! 1910: case OP_NOTMINPLUS:
! 1911: case OP_NOTQUERY:
! 1912: case OP_NOTMINQUERY:
! 1913: case OP_NOTUPTO:
! 1914: case OP_NOTMINUPTO:
! 1915: case OP_NOTEXACT:
! 1916: case OP_NOTPOSSTAR:
! 1917: case OP_NOTPOSPLUS:
! 1918: case OP_NOTPOSQUERY:
! 1919: case OP_NOTPOSUPTO:
! 1920: case OP_NOTSTARI:
! 1921: case OP_NOTMINSTARI:
! 1922: case OP_NOTPLUSI:
! 1923: case OP_NOTMINPLUSI:
! 1924: case OP_NOTQUERYI:
! 1925: case OP_NOTMINQUERYI:
! 1926: case OP_NOTUPTOI:
! 1927: case OP_NOTMINUPTOI:
! 1928: case OP_NOTEXACTI:
! 1929: case OP_NOTPOSSTARI:
! 1930: case OP_NOTPOSPLUSI:
! 1931: case OP_NOTPOSQUERYI:
! 1932: case OP_NOTPOSUPTOI:
! 1933: if (utf) utf16_char = TRUE;
! 1934: #endif
! 1935: /* Fall through. */
1.1 misho 1936:
1.1.1.2 ! misho 1937: default:
! 1938: length = OP_lengths16[op] - 1;
! 1939: break;
! 1940:
! 1941: case OP_CLASS:
! 1942: case OP_NCLASS:
! 1943: /* Skip the character bit map. */
! 1944: ptr += 32/sizeof(pcre_uint16);
! 1945: length = 0;
! 1946: break;
! 1947:
! 1948: case OP_XCLASS:
! 1949: /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
! 1950: if (LINK_SIZE > 1)
! 1951: length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
! 1952: - (1 + LINK_SIZE + 1));
! 1953: else
! 1954: length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1.1 misho 1955:
1.1.1.2 ! misho 1956: /* Reverse the size of the XCLASS instance. */
! 1957: *ptr = swap_uint16(*ptr);
! 1958: ptr++;
! 1959: if (LINK_SIZE > 1)
! 1960: {
! 1961: *ptr = swap_uint16(*ptr);
! 1962: ptr++;
! 1963: }
1.1 misho 1964:
1.1.1.2 ! misho 1965: op = *ptr;
! 1966: *ptr = swap_uint16(op);
! 1967: ptr++;
! 1968: if ((op & XCL_MAP) != 0)
! 1969: {
! 1970: /* Skip the character bit map. */
! 1971: ptr += 32/sizeof(pcre_uint16);
! 1972: length -= 32/sizeof(pcre_uint16);
! 1973: }
! 1974: break;
! 1975: }
! 1976: }
! 1977: /* Control should never reach here in 16 bit mode. */
! 1978: #endif /* SUPPORT_PCRE16 */
1.1 misho 1979: }
1980:
1981:
1982:
1983: /*************************************************
1984: * Check match or recursion limit *
1985: *************************************************/
1986:
1987: static int
1.1.1.2 ! misho 1988: check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1.1 misho 1989: int start_offset, int options, int *use_offsets, int use_size_offsets,
1990: int flag, unsigned long int *limit, int errnumber, const char *msg)
1991: {
1992: int count;
1993: int min = 0;
1994: int mid = 64;
1995: int max = -1;
1996:
1997: extra->flags |= flag;
1998:
1999: for (;;)
2000: {
2001: *limit = mid;
2002:
1.1.1.2 ! misho 2003: PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1.1 misho 2004: use_offsets, use_size_offsets);
2005:
2006: if (count == errnumber)
2007: {
2008: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2009: min = mid;
2010: mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2011: }
2012:
2013: else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2014: count == PCRE_ERROR_PARTIAL)
2015: {
2016: if (mid == min + 1)
2017: {
2018: fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2019: break;
2020: }
2021: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2022: max = mid;
2023: mid = (min + mid)/2;
2024: }
2025: else break; /* Some other error */
2026: }
2027:
2028: extra->flags &= ~flag;
2029: return count;
2030: }
2031:
2032:
2033:
2034: /*************************************************
2035: * Case-independent strncmp() function *
2036: *************************************************/
2037:
2038: /*
2039: Arguments:
2040: s first string
2041: t second string
2042: n number of characters to compare
2043:
2044: Returns: < 0, = 0, or > 0, according to the comparison
2045: */
2046:
2047: static int
1.1.1.2 ! misho 2048: strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1.1 misho 2049: {
2050: while (n--)
2051: {
2052: int c = tolower(*s++) - tolower(*t++);
2053: if (c) return c;
2054: }
2055: return 0;
2056: }
2057:
2058:
2059:
2060: /*************************************************
2061: * Check newline indicator *
2062: *************************************************/
2063:
2064: /* This is used both at compile and run-time to check for <xxx> escapes. Print
2065: a message and return 0 if there is no match.
2066:
2067: Arguments:
2068: p points after the leading '<'
2069: f file for error message
2070:
2071: Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2072: */
2073:
2074: static int
1.1.1.2 ! misho 2075: check_newline(pcre_uint8 *p, FILE *f)
1.1 misho 2076: {
1.1.1.2 ! misho 2077: if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
! 2078: if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
! 2079: if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
! 2080: if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
! 2081: if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
! 2082: if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
! 2083: if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1.1 misho 2084: fprintf(f, "Unknown newline type at: <%s\n", p);
2085: return 0;
2086: }
2087:
2088:
2089:
2090: /*************************************************
2091: * Usage function *
2092: *************************************************/
2093:
2094: static void
2095: usage(void)
2096: {
2097: printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2098: printf("Input and output default to stdin and stdout.\n");
2099: #ifdef SUPPORT_LIBREADLINE
2100: printf("If input is a terminal, readline() is used to read from it.\n");
2101: #else
2102: printf("This version of pcretest is not linked with readline().\n");
2103: #endif
2104: printf("\nOptions:\n");
1.1.1.2 ! misho 2105: #ifdef SUPPORT_PCRE16
! 2106: printf(" -16 use the 16-bit library\n");
! 2107: #endif
! 2108: printf(" -b show compiled code\n");
1.1 misho 2109: printf(" -C show PCRE compile-time options and exit\n");
1.1.1.2 ! misho 2110: printf(" -C arg show a specific compile-time option\n");
! 2111: printf(" and exit with its value. The arg can be:\n");
! 2112: printf(" linksize internal link size [2, 3, 4]\n");
! 2113: printf(" pcre8 8 bit library support enabled [0, 1]\n");
! 2114: printf(" pcre16 16 bit library support enabled [0, 1]\n");
! 2115: printf(" utf Unicode Transformation Format supported [0, 1]\n");
! 2116: printf(" ucp Unicode Properties supported [0, 1]\n");
! 2117: printf(" jit Just-in-time compiler supported [0, 1]\n");
! 2118: printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
1.1 misho 2119: printf(" -d debug: show compiled code and information (-b and -i)\n");
2120: #if !defined NODFA
2121: printf(" -dfa force DFA matching for all subjects\n");
2122: #endif
2123: printf(" -help show usage information\n");
2124: printf(" -i show information about compiled patterns\n"
2125: " -M find MATCH_LIMIT minimum for each subject\n"
2126: " -m output memory used information\n"
2127: " -o <n> set size of offsets vector to <n>\n");
2128: #if !defined NOPOSIX
2129: printf(" -p use POSIX interface\n");
2130: #endif
2131: printf(" -q quiet: do not output PCRE version number at start\n");
2132: printf(" -S <n> set stack size to <n> megabytes\n");
2133: printf(" -s force each pattern to be studied at basic level\n"
2134: " -s+ force each pattern to be studied, using JIT if available\n"
2135: " -t time compilation and execution\n");
2136: printf(" -t <n> time compilation and execution, repeating <n> times\n");
2137: printf(" -tm time execution (matching) only\n");
2138: printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2139: }
2140:
2141:
2142:
2143: /*************************************************
2144: * Main Program *
2145: *************************************************/
2146:
2147: /* Read lines from named file or stdin and write to named file or stdout; lines
2148: consist of a regular expression, in delimiters and optionally followed by
2149: options, followed by a set of test data, terminated by an empty line. */
2150:
2151: int main(int argc, char **argv)
2152: {
2153: FILE *infile = stdin;
1.1.1.2 ! misho 2154: const char *version;
1.1 misho 2155: int options = 0;
2156: int study_options = 0;
2157: int default_find_match_limit = FALSE;
2158: int op = 1;
2159: int timeit = 0;
2160: int timeitm = 0;
2161: int showinfo = 0;
2162: int showstore = 0;
2163: int force_study = -1;
2164: int force_study_options = 0;
2165: int quiet = 0;
2166: int size_offsets = 45;
2167: int size_offsets_max;
2168: int *offsets = NULL;
2169: #if !defined NOPOSIX
2170: int posix = 0;
2171: #endif
2172: int debug = 0;
2173: int done = 0;
2174: int all_use_dfa = 0;
2175: int yield = 0;
2176: int stack_size;
2177:
2178: pcre_jit_stack *jit_stack = NULL;
2179:
1.1.1.2 ! misho 2180: /* These vectors store, end-to-end, a list of zero-terminated captured
! 2181: substring names, each list itself being terminated by an empty name. Assume
! 2182: that 1024 is plenty long enough for the few names we'll be testing. It is
! 2183: easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
! 2184: for the actual memory, to ensure alignment. */
! 2185:
! 2186: pcre_uint16 copynames[1024];
! 2187: pcre_uint16 getnames[1024];
! 2188:
! 2189: #ifdef SUPPORT_PCRE16
! 2190: pcre_uint16 *cn16ptr;
! 2191: pcre_uint16 *gn16ptr;
! 2192: #endif
1.1 misho 2193:
1.1.1.2 ! misho 2194: #ifdef SUPPORT_PCRE8
! 2195: pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
! 2196: pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
! 2197: pcre_uint8 *cn8ptr;
! 2198: pcre_uint8 *gn8ptr;
! 2199: #endif
1.1 misho 2200:
1.1.1.2 ! misho 2201: /* Get buffers from malloc() so that valgrind will check their misuse when
! 2202: debugging. They grow automatically when very long lines are read. The 16-bit
! 2203: buffer (buffer16) is obtained only if needed. */
! 2204:
! 2205: buffer = (pcre_uint8 *)malloc(buffer_size);
! 2206: dbuffer = (pcre_uint8 *)malloc(buffer_size);
! 2207: pbuffer = (pcre_uint8 *)malloc(buffer_size);
1.1 misho 2208:
2209: /* The outfile variable is static so that new_malloc can use it. */
2210:
2211: outfile = stdout;
2212:
2213: /* The following _setmode() stuff is some Windows magic that tells its runtime
2214: library to translate CRLF into a single LF character. At least, that's what
2215: I've been told: never having used Windows I take this all on trust. Originally
2216: it set 0x8000, but then I was advised that _O_BINARY was better. */
2217:
2218: #if defined(_WIN32) || defined(WIN32)
2219: _setmode( _fileno( stdout ), _O_BINARY );
2220: #endif
2221:
1.1.1.2 ! misho 2222: /* Get the version number: both pcre_version() and pcre16_version() give the
! 2223: same answer. We just need to ensure that we call one that is available. */
! 2224:
! 2225: #ifdef SUPPORT_PCRE8
! 2226: version = pcre_version();
! 2227: #else
! 2228: version = pcre16_version();
! 2229: #endif
! 2230:
1.1 misho 2231: /* Scan options */
2232:
2233: while (argc > 1 && argv[op][0] == '-')
2234: {
1.1.1.2 ! misho 2235: pcre_uint8 *endptr;
1.1 misho 2236:
2237: if (strcmp(argv[op], "-m") == 0) showstore = 1;
2238: else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2239: else if (strcmp(argv[op], "-s+") == 0)
2240: {
2241: force_study = 1;
2242: force_study_options = PCRE_STUDY_JIT_COMPILE;
2243: }
1.1.1.2 ! misho 2244: else if (strcmp(argv[op], "-16") == 0)
! 2245: {
! 2246: #ifdef SUPPORT_PCRE16
! 2247: use_pcre16 = 1;
! 2248: #else
! 2249: printf("** This version of PCRE was built without 16-bit support\n");
! 2250: exit(1);
! 2251: #endif
! 2252: }
1.1 misho 2253: else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2254: else if (strcmp(argv[op], "-b") == 0) debug = 1;
2255: else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2256: else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2257: else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2258: #if !defined NODFA
2259: else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2260: #endif
2261: else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1.1.1.2 ! misho 2262: ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 2263: *endptr == 0))
2264: {
2265: op++;
2266: argc--;
2267: }
2268: else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2269: {
2270: int both = argv[op][2] == 0;
2271: int temp;
1.1.1.2 ! misho 2272: if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1.1 misho 2273: *endptr == 0))
2274: {
2275: timeitm = temp;
2276: op++;
2277: argc--;
2278: }
2279: else timeitm = LOOPREPEAT;
2280: if (both) timeit = timeitm;
2281: }
2282: else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1.1.1.2 ! misho 2283: ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1 misho 2284: *endptr == 0))
2285: {
2286: #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2287: printf("PCRE: -S not supported on this OS\n");
2288: exit(1);
2289: #else
2290: int rc;
2291: struct rlimit rlim;
2292: getrlimit(RLIMIT_STACK, &rlim);
2293: rlim.rlim_cur = stack_size * 1024 * 1024;
2294: rc = setrlimit(RLIMIT_STACK, &rlim);
2295: if (rc != 0)
2296: {
2297: printf("PCRE: setrlimit() failed with error %d\n", rc);
2298: exit(1);
2299: }
2300: op++;
2301: argc--;
2302: #endif
2303: }
2304: #if !defined NOPOSIX
2305: else if (strcmp(argv[op], "-p") == 0) posix = 1;
2306: #endif
2307: else if (strcmp(argv[op], "-C") == 0)
2308: {
2309: int rc;
2310: unsigned long int lrc;
1.1.1.2 ! misho 2311:
! 2312: if (argc > 2)
! 2313: {
! 2314: if (strcmp(argv[op + 1], "linksize") == 0)
! 2315: {
! 2316: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
! 2317: printf("%d\n", rc);
! 2318: yield = rc;
! 2319: goto EXIT;
! 2320: }
! 2321: if (strcmp(argv[op + 1], "pcre8") == 0)
! 2322: {
! 2323: #ifdef SUPPORT_PCRE8
! 2324: printf("1\n");
! 2325: yield = 1;
! 2326: #else
! 2327: printf("0\n");
! 2328: yield = 0;
! 2329: #endif
! 2330: goto EXIT;
! 2331: }
! 2332: if (strcmp(argv[op + 1], "pcre16") == 0)
! 2333: {
! 2334: #ifdef SUPPORT_PCRE16
! 2335: printf("1\n");
! 2336: yield = 1;
! 2337: #else
! 2338: printf("0\n");
! 2339: yield = 0;
! 2340: #endif
! 2341: goto EXIT;
! 2342: }
! 2343: if (strcmp(argv[op + 1], "utf") == 0)
! 2344: {
! 2345: #ifdef SUPPORT_PCRE8
! 2346: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
! 2347: printf("%d\n", rc);
! 2348: yield = rc;
! 2349: #else
! 2350: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
! 2351: printf("%d\n", rc);
! 2352: yield = rc;
! 2353: #endif
! 2354: goto EXIT;
! 2355: }
! 2356: if (strcmp(argv[op + 1], "ucp") == 0)
! 2357: {
! 2358: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
! 2359: printf("%d\n", rc);
! 2360: yield = rc;
! 2361: goto EXIT;
! 2362: }
! 2363: if (strcmp(argv[op + 1], "jit") == 0)
! 2364: {
! 2365: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
! 2366: printf("%d\n", rc);
! 2367: yield = rc;
! 2368: goto EXIT;
! 2369: }
! 2370: if (strcmp(argv[op + 1], "newline") == 0)
! 2371: {
! 2372: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
! 2373: /* Note that these values are always the ASCII values, even
! 2374: in EBCDIC environments. CR is 13 and NL is 10. */
! 2375: printf("%s\n", (rc == 13)? "CR" :
! 2376: (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
! 2377: (rc == -2)? "ANYCRLF" :
! 2378: (rc == -1)? "ANY" : "???");
! 2379: goto EXIT;
! 2380: }
! 2381: printf("Unknown -C option: %s\n", argv[op + 1]);
! 2382: goto EXIT;
! 2383: }
! 2384:
! 2385: printf("PCRE version %s\n", version);
1.1 misho 2386: printf("Compiled with\n");
1.1.1.2 ! misho 2387:
! 2388: /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
! 2389: are set, either both UTFs are supported or both are not supported. */
! 2390:
! 2391: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
! 2392: printf(" 8-bit and 16-bit support\n");
! 2393: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
! 2394: if (rc)
! 2395: printf(" UTF-8 and UTF-16 support\n");
! 2396: else
! 2397: printf(" No UTF-8 or UTF-16 support\n");
! 2398: #elif defined SUPPORT_PCRE8
! 2399: printf(" 8-bit support only\n");
1.1 misho 2400: (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2401: printf(" %sUTF-8 support\n", rc? "" : "No ");
1.1.1.2 ! misho 2402: #else
! 2403: printf(" 16-bit support only\n");
! 2404: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
! 2405: printf(" %sUTF-16 support\n", rc? "" : "No ");
! 2406: #endif
! 2407:
! 2408: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1.1 misho 2409: printf(" %sUnicode properties support\n", rc? "" : "No ");
1.1.1.2 ! misho 2410: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
1.1 misho 2411: if (rc)
1.1.1.2 ! misho 2412: {
! 2413: const char *arch;
! 2414: (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
! 2415: printf(" Just-in-time compiler support: %s\n", arch);
! 2416: }
1.1 misho 2417: else
2418: printf(" No just-in-time compiler support\n");
1.1.1.2 ! misho 2419: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
1.1 misho 2420: /* Note that these values are always the ASCII values, even
2421: in EBCDIC environments. CR is 13 and NL is 10. */
2422: printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2423: (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2424: (rc == -2)? "ANYCRLF" :
2425: (rc == -1)? "ANY" : "???");
1.1.1.2 ! misho 2426: (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
1.1 misho 2427: printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2428: "all Unicode newlines");
1.1.1.2 ! misho 2429: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
1.1 misho 2430: printf(" Internal link size = %d\n", rc);
1.1.1.2 ! misho 2431: (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1.1 misho 2432: printf(" POSIX malloc threshold = %d\n", rc);
1.1.1.2 ! misho 2433: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1.1 misho 2434: printf(" Default match limit = %ld\n", lrc);
1.1.1.2 ! misho 2435: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1.1 misho 2436: printf(" Default recursion depth limit = %ld\n", lrc);
1.1.1.2 ! misho 2437: (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
! 2438: printf(" Match recursion uses %s", rc? "stack" : "heap");
! 2439: if (showstore)
! 2440: {
! 2441: PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
! 2442: printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
! 2443: }
! 2444: printf("\n");
1.1 misho 2445: goto EXIT;
2446: }
2447: else if (strcmp(argv[op], "-help") == 0 ||
2448: strcmp(argv[op], "--help") == 0)
2449: {
2450: usage();
2451: goto EXIT;
2452: }
2453: else
2454: {
2455: printf("** Unknown or malformed option %s\n", argv[op]);
2456: usage();
2457: yield = 1;
2458: goto EXIT;
2459: }
2460: op++;
2461: argc--;
2462: }
2463:
2464: /* Get the store for the offsets vector, and remember what it was */
2465:
2466: size_offsets_max = size_offsets;
2467: offsets = (int *)malloc(size_offsets_max * sizeof(int));
2468: if (offsets == NULL)
2469: {
2470: printf("** Failed to get %d bytes of memory for offsets vector\n",
2471: (int)(size_offsets_max * sizeof(int)));
2472: yield = 1;
2473: goto EXIT;
2474: }
2475:
2476: /* Sort out the input and output files */
2477:
2478: if (argc > 1)
2479: {
2480: infile = fopen(argv[op], INPUT_MODE);
2481: if (infile == NULL)
2482: {
2483: printf("** Failed to open %s\n", argv[op]);
2484: yield = 1;
2485: goto EXIT;
2486: }
2487: }
2488:
2489: if (argc > 2)
2490: {
2491: outfile = fopen(argv[op+1], OUTPUT_MODE);
2492: if (outfile == NULL)
2493: {
2494: printf("** Failed to open %s\n", argv[op+1]);
2495: yield = 1;
2496: goto EXIT;
2497: }
2498: }
2499:
2500: /* Set alternative malloc function */
2501:
1.1.1.2 ! misho 2502: #ifdef SUPPORT_PCRE8
1.1 misho 2503: pcre_malloc = new_malloc;
2504: pcre_free = new_free;
2505: pcre_stack_malloc = stack_malloc;
2506: pcre_stack_free = stack_free;
1.1.1.2 ! misho 2507: #endif
! 2508:
! 2509: #ifdef SUPPORT_PCRE16
! 2510: pcre16_malloc = new_malloc;
! 2511: pcre16_free = new_free;
! 2512: pcre16_stack_malloc = stack_malloc;
! 2513: pcre16_stack_free = stack_free;
! 2514: #endif
1.1 misho 2515:
2516: /* Heading line unless quiet, then prompt for first regex if stdin */
2517:
1.1.1.2 ! misho 2518: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
1.1 misho 2519:
2520: /* Main loop */
2521:
2522: while (!done)
2523: {
2524: pcre *re = NULL;
2525: pcre_extra *extra = NULL;
2526:
2527: #if !defined NOPOSIX /* There are still compilers that require no indent */
2528: regex_t preg;
2529: int do_posix = 0;
2530: #endif
2531:
2532: const char *error;
1.1.1.2 ! misho 2533: pcre_uint8 *markptr;
! 2534: pcre_uint8 *p, *pp, *ppp;
! 2535: pcre_uint8 *to_file = NULL;
! 2536: const pcre_uint8 *tables = NULL;
! 2537: unsigned long int get_options;
1.1 misho 2538: unsigned long int true_size, true_study_size = 0;
2539: size_t size, regex_gotten_store;
2540: int do_allcaps = 0;
2541: int do_mark = 0;
2542: int do_study = 0;
2543: int no_force_study = 0;
2544: int do_debug = debug;
2545: int do_G = 0;
2546: int do_g = 0;
2547: int do_showinfo = showinfo;
2548: int do_showrest = 0;
2549: int do_showcaprest = 0;
2550: int do_flip = 0;
2551: int erroroffset, len, delimiter, poffset;
2552:
1.1.1.2 ! misho 2553: use_utf = 0;
1.1 misho 2554: debug_lengths = 1;
2555:
2556: if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2557: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2558: fflush(outfile);
2559:
2560: p = buffer;
2561: while (isspace(*p)) p++;
2562: if (*p == 0) continue;
2563:
2564: /* See if the pattern is to be loaded pre-compiled from a file. */
2565:
2566: if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2567: {
1.1.1.2 ! misho 2568: pcre_uint32 magic;
! 2569: pcre_uint8 sbuf[8];
1.1 misho 2570: FILE *f;
2571:
2572: p++;
1.1.1.2 ! misho 2573: if (*p == '!')
! 2574: {
! 2575: do_debug = TRUE;
! 2576: do_showinfo = TRUE;
! 2577: p++;
! 2578: }
! 2579:
1.1 misho 2580: pp = p + (int)strlen((char *)p);
2581: while (isspace(pp[-1])) pp--;
2582: *pp = 0;
2583:
2584: f = fopen((char *)p, "rb");
2585: if (f == NULL)
2586: {
2587: fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2588: continue;
2589: }
2590:
1.1.1.2 ! misho 2591: first_gotten_store = 0;
1.1 misho 2592: if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2593:
2594: true_size =
2595: (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2596: true_study_size =
2597: (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2598:
1.1.1.2 ! misho 2599: re = (pcre *)new_malloc(true_size);
1.1 misho 2600: regex_gotten_store = first_gotten_store;
2601:
2602: if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2603:
1.1.1.2 ! misho 2604: magic = ((REAL_PCRE *)re)->magic_number;
1.1 misho 2605: if (magic != MAGIC_NUMBER)
2606: {
1.1.1.2 ! misho 2607: if (swap_uint32(magic) == MAGIC_NUMBER)
1.1 misho 2608: {
2609: do_flip = 1;
2610: }
2611: else
2612: {
2613: fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2614: fclose(f);
2615: continue;
2616: }
2617: }
2618:
1.1.1.2 ! misho 2619: /* We hide the byte-invert info for little and big endian tests. */
1.1 misho 2620: fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1.1.1.2 ! misho 2621: do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
1.1 misho 2622:
2623: /* Now see if there is any following study data. */
2624:
2625: if (true_study_size != 0)
2626: {
2627: pcre_study_data *psd;
2628:
2629: extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2630: extra->flags = PCRE_EXTRA_STUDY_DATA;
2631:
2632: psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2633: extra->study_data = psd;
2634:
2635: if (fread(psd, 1, true_study_size, f) != true_study_size)
2636: {
2637: FAIL_READ:
2638: fprintf(outfile, "Failed to read data from %s\n", p);
1.1.1.2 ! misho 2639: if (extra != NULL)
! 2640: {
! 2641: PCRE_FREE_STUDY(extra);
! 2642: }
1.1 misho 2643: if (re != NULL) new_free(re);
2644: fclose(f);
2645: continue;
2646: }
2647: fprintf(outfile, "Study data loaded from %s\n", p);
2648: do_study = 1; /* To get the data output if requested */
2649: }
2650: else fprintf(outfile, "No study data\n");
2651:
1.1.1.2 ! misho 2652: /* Flip the necessary bytes. */
! 2653: if (do_flip)
! 2654: {
! 2655: int rc;
! 2656: PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
! 2657: if (rc == PCRE_ERROR_BADMODE)
! 2658: {
! 2659: /* Simulate the result of the function call below. */
! 2660: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
! 2661: use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
! 2662: fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
! 2663: "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
! 2664: continue;
! 2665: }
! 2666: }
! 2667:
! 2668: /* Need to know if UTF-8 for printing data strings. */
! 2669:
! 2670: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
! 2671: use_utf = (get_options & PCRE_UTF8) != 0;
! 2672:
1.1 misho 2673: fclose(f);
2674: goto SHOW_INFO;
2675: }
2676:
2677: /* In-line pattern (the usual case). Get the delimiter and seek the end of
1.1.1.2 ! misho 2678: the pattern; if it isn't complete, read more. */
1.1 misho 2679:
2680: delimiter = *p++;
2681:
2682: if (isalnum(delimiter) || delimiter == '\\')
2683: {
2684: fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2685: goto SKIP_DATA;
2686: }
2687:
2688: pp = p;
2689: poffset = (int)(p - buffer);
2690:
2691: for(;;)
2692: {
2693: while (*pp != 0)
2694: {
2695: if (*pp == '\\' && pp[1] != 0) pp++;
2696: else if (*pp == delimiter) break;
2697: pp++;
2698: }
2699: if (*pp != 0) break;
2700: if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2701: {
2702: fprintf(outfile, "** Unexpected EOF\n");
2703: done = 1;
2704: goto CONTINUE;
2705: }
2706: if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2707: }
2708:
2709: /* The buffer may have moved while being extended; reset the start of data
2710: pointer to the correct relative point in the buffer. */
2711:
2712: p = buffer + poffset;
2713:
2714: /* If the first character after the delimiter is backslash, make
2715: the pattern end with backslash. This is purely to provide a way
2716: of testing for the error message when a pattern ends with backslash. */
2717:
2718: if (pp[1] == '\\') *pp++ = '\\';
2719:
2720: /* Terminate the pattern at the delimiter, and save a copy of the pattern
2721: for callouts. */
2722:
2723: *pp++ = 0;
2724: strcpy((char *)pbuffer, (char *)p);
2725:
2726: /* Look for options after final delimiter */
2727:
2728: options = 0;
2729: study_options = 0;
2730: log_store = showstore; /* default from command line */
2731:
2732: while (*pp != 0)
2733: {
2734: switch (*pp++)
2735: {
2736: case 'f': options |= PCRE_FIRSTLINE; break;
2737: case 'g': do_g = 1; break;
2738: case 'i': options |= PCRE_CASELESS; break;
2739: case 'm': options |= PCRE_MULTILINE; break;
2740: case 's': options |= PCRE_DOTALL; break;
2741: case 'x': options |= PCRE_EXTENDED; break;
2742:
2743: case '+':
2744: if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2745: break;
2746:
2747: case '=': do_allcaps = 1; break;
2748: case 'A': options |= PCRE_ANCHORED; break;
2749: case 'B': do_debug = 1; break;
2750: case 'C': options |= PCRE_AUTO_CALLOUT; break;
2751: case 'D': do_debug = do_showinfo = 1; break;
2752: case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2753: case 'F': do_flip = 1; break;
2754: case 'G': do_G = 1; break;
2755: case 'I': do_showinfo = 1; break;
2756: case 'J': options |= PCRE_DUPNAMES; break;
2757: case 'K': do_mark = 1; break;
2758: case 'M': log_store = 1; break;
2759: case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2760:
2761: #if !defined NOPOSIX
2762: case 'P': do_posix = 1; break;
2763: #endif
2764:
2765: case 'S':
2766: if (do_study == 0)
2767: {
2768: do_study = 1;
2769: if (*pp == '+')
2770: {
2771: study_options |= PCRE_STUDY_JIT_COMPILE;
2772: pp++;
2773: }
2774: }
2775: else
2776: {
2777: do_study = 0;
2778: no_force_study = 1;
2779: }
2780: break;
2781:
2782: case 'U': options |= PCRE_UNGREEDY; break;
2783: case 'W': options |= PCRE_UCP; break;
2784: case 'X': options |= PCRE_EXTRA; break;
2785: case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2786: case 'Z': debug_lengths = 0; break;
1.1.1.2 ! misho 2787: case '8': options |= PCRE_UTF8; use_utf = 1; break;
1.1 misho 2788: case '?': options |= PCRE_NO_UTF8_CHECK; break;
2789:
2790: case 'T':
2791: switch (*pp++)
2792: {
2793: case '0': tables = tables0; break;
2794: case '1': tables = tables1; break;
2795:
2796: case '\r':
2797: case '\n':
2798: case ' ':
2799: case 0:
2800: fprintf(outfile, "** Missing table number after /T\n");
2801: goto SKIP_DATA;
2802:
2803: default:
2804: fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2805: goto SKIP_DATA;
2806: }
2807: break;
2808:
2809: case 'L':
2810: ppp = pp;
2811: /* The '\r' test here is so that it works on Windows. */
2812: /* The '0' test is just in case this is an unterminated line. */
2813: while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2814: *ppp = 0;
2815: if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2816: {
2817: fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2818: goto SKIP_DATA;
2819: }
2820: locale_set = 1;
1.1.1.2 ! misho 2821: tables = PCRE_MAKETABLES;
1.1 misho 2822: pp = ppp;
2823: break;
2824:
2825: case '>':
2826: to_file = pp;
2827: while (*pp != 0) pp++;
2828: while (isspace(pp[-1])) pp--;
2829: *pp = 0;
2830: break;
2831:
2832: case '<':
2833: {
1.1.1.2 ! misho 2834: if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1.1 misho 2835: {
2836: options |= PCRE_JAVASCRIPT_COMPAT;
2837: pp += 3;
2838: }
2839: else
2840: {
2841: int x = check_newline(pp, outfile);
2842: if (x == 0) goto SKIP_DATA;
2843: options |= x;
2844: while (*pp++ != '>');
2845: }
2846: }
2847: break;
2848:
2849: case '\r': /* So that it works in Windows */
2850: case '\n':
2851: case ' ':
2852: break;
2853:
2854: default:
2855: fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2856: goto SKIP_DATA;
2857: }
2858: }
2859:
2860: /* Handle compiling via the POSIX interface, which doesn't support the
2861: timing, showing, or debugging options, nor the ability to pass over
1.1.1.2 ! misho 2862: local character tables. Neither does it have 16-bit support. */
1.1 misho 2863:
2864: #if !defined NOPOSIX
2865: if (posix || do_posix)
2866: {
2867: int rc;
2868: int cflags = 0;
2869:
2870: if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2871: if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2872: if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2873: if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2874: if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2875: if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2876: if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2877:
2878: first_gotten_store = 0;
2879: rc = regcomp(&preg, (char *)p, cflags);
2880:
2881: /* Compilation failed; go back for another re, skipping to blank line
2882: if non-interactive. */
2883:
2884: if (rc != 0)
2885: {
2886: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2887: fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2888: goto SKIP_DATA;
2889: }
2890: }
2891:
2892: /* Handle compiling via the native interface */
2893:
2894: else
2895: #endif /* !defined NOPOSIX */
2896:
2897: {
1.1.1.2 ! misho 2898: /* In 16-bit mode, convert the input. */
! 2899:
! 2900: #ifdef SUPPORT_PCRE16
! 2901: if (use_pcre16)
! 2902: {
! 2903: switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
! 2904: {
! 2905: case -1:
! 2906: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
! 2907: "converted to UTF-16\n");
! 2908: goto SKIP_DATA;
! 2909:
! 2910: case -2:
! 2911: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
! 2912: "cannot be converted to UTF-16\n");
! 2913: goto SKIP_DATA;
! 2914:
! 2915: case -3: /* "Impossible error" when to16 is called arg1 FALSE */
! 2916: fprintf(outfile, "**Failed: character value greater than 0xffff "
! 2917: "cannot be converted to 16-bit in non-UTF mode\n");
! 2918: goto SKIP_DATA;
! 2919:
! 2920: default:
! 2921: break;
! 2922: }
! 2923: p = (pcre_uint8 *)buffer16;
! 2924: }
! 2925: #endif
! 2926:
! 2927: /* Compile many times when timing */
1.1 misho 2928:
2929: if (timeit > 0)
2930: {
2931: register int i;
2932: clock_t time_taken;
2933: clock_t start_time = clock();
2934: for (i = 0; i < timeit; i++)
2935: {
1.1.1.2 ! misho 2936: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 2937: if (re != NULL) free(re);
2938: }
2939: time_taken = clock() - start_time;
2940: fprintf(outfile, "Compile time %.4f milliseconds\n",
2941: (((double)time_taken * 1000.0) / (double)timeit) /
2942: (double)CLOCKS_PER_SEC);
2943: }
2944:
2945: first_gotten_store = 0;
1.1.1.2 ! misho 2946: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1 misho 2947:
2948: /* Compilation failed; go back for another re, skipping to blank line
2949: if non-interactive. */
2950:
2951: if (re == NULL)
2952: {
2953: fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2954: SKIP_DATA:
2955: if (infile != stdin)
2956: {
2957: for (;;)
2958: {
2959: if (extend_inputline(infile, buffer, NULL) == NULL)
2960: {
2961: done = 1;
2962: goto CONTINUE;
2963: }
2964: len = (int)strlen((char *)buffer);
2965: while (len > 0 && isspace(buffer[len-1])) len--;
2966: if (len == 0) break;
2967: }
2968: fprintf(outfile, "\n");
2969: }
2970: goto CONTINUE;
2971: }
2972:
2973: /* Compilation succeeded. It is now possible to set the UTF-8 option from
2974: within the regex; check for this so that we know how to process the data
2975: lines. */
2976:
1.1.1.2 ! misho 2977: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
! 2978: goto SKIP_DATA;
! 2979: if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
1.1 misho 2980:
2981: /* Extract the size for possible writing before possibly flipping it,
2982: and remember the store that was got. */
2983:
1.1.1.2 ! misho 2984: true_size = ((REAL_PCRE *)re)->size;
1.1 misho 2985: regex_gotten_store = first_gotten_store;
2986:
2987: /* Output code size information if requested */
2988:
2989: if (log_store)
2990: fprintf(outfile, "Memory allocation (code space): %d\n",
2991: (int)(first_gotten_store -
1.1.1.2 ! misho 2992: sizeof(REAL_PCRE) -
! 2993: ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
1.1 misho 2994:
2995: /* If -s or /S was present, study the regex to generate additional info to
2996: help with the matching, unless the pattern has the SS option, which
2997: suppresses the effect of /S (used for a few test patterns where studying is
2998: never sensible). */
2999:
3000: if (do_study || (force_study >= 0 && !no_force_study))
3001: {
3002: if (timeit > 0)
3003: {
3004: register int i;
3005: clock_t time_taken;
3006: clock_t start_time = clock();
3007: for (i = 0; i < timeit; i++)
1.1.1.2 ! misho 3008: {
! 3009: PCRE_STUDY(extra, re, study_options | force_study_options, &error);
! 3010: }
1.1 misho 3011: time_taken = clock() - start_time;
1.1.1.2 ! misho 3012: if (extra != NULL)
! 3013: {
! 3014: PCRE_FREE_STUDY(extra);
! 3015: }
1.1 misho 3016: fprintf(outfile, " Study time %.4f milliseconds\n",
3017: (((double)time_taken * 1000.0) / (double)timeit) /
3018: (double)CLOCKS_PER_SEC);
3019: }
1.1.1.2 ! misho 3020: PCRE_STUDY(extra, re, study_options | force_study_options, &error);
1.1 misho 3021: if (error != NULL)
3022: fprintf(outfile, "Failed to study: %s\n", error);
3023: else if (extra != NULL)
3024: {
3025: true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3026: if (log_store)
3027: {
3028: size_t jitsize;
1.1.1.2 ! misho 3029: if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
! 3030: jitsize != 0)
! 3031: fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
1.1 misho 3032: }
3033: }
3034: }
3035:
3036: /* If /K was present, we set up for handling MARK data. */
3037:
3038: if (do_mark)
3039: {
3040: if (extra == NULL)
3041: {
3042: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3043: extra->flags = 0;
3044: }
3045: extra->mark = &markptr;
3046: extra->flags |= PCRE_EXTRA_MARK;
3047: }
3048:
1.1.1.2 ! misho 3049: /* Extract and display information from the compiled data if required. */
1.1 misho 3050:
3051: SHOW_INFO:
3052:
3053: if (do_debug)
3054: {
3055: fprintf(outfile, "------------------------------------------------------------------\n");
1.1.1.2 ! misho 3056: PCRE_PRINTINT(re, outfile, debug_lengths);
1.1 misho 3057: }
3058:
3059: /* We already have the options in get_options (see above) */
3060:
3061: if (do_showinfo)
3062: {
3063: unsigned long int all_options;
3064: int count, backrefmax, first_char, need_char, okpartial, jchanged,
3065: hascrorlf;
3066: int nameentrysize, namecount;
1.1.1.2 ! misho 3067: const pcre_uint8 *nametable;
1.1 misho 3068:
1.1.1.2 ! misho 3069: if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
! 3070: new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
! 3071: new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
! 3072: new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
! 3073: new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
! 3074: new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
! 3075: new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
! 3076: new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
! 3077: new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
! 3078: new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
! 3079: new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
! 3080: != 0)
! 3081: goto SKIP_DATA;
1.1 misho 3082:
3083: if (size != regex_gotten_store) fprintf(outfile,
3084: "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3085: (int)size, (int)regex_gotten_store);
3086:
3087: fprintf(outfile, "Capturing subpattern count = %d\n", count);
3088: if (backrefmax > 0)
3089: fprintf(outfile, "Max back reference = %d\n", backrefmax);
3090:
3091: if (namecount > 0)
3092: {
3093: fprintf(outfile, "Named capturing subpatterns:\n");
3094: while (namecount-- > 0)
3095: {
1.1.1.2 ! misho 3096: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
! 3097: int imm2_size = use_pcre16 ? 1 : 2;
! 3098: #else
! 3099: int imm2_size = IMM2_SIZE;
! 3100: #endif
! 3101: int length = (int)STRLEN(nametable + imm2_size);
! 3102: fprintf(outfile, " ");
! 3103: PCHARSV(nametable, imm2_size, length, outfile);
! 3104: while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
! 3105: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
! 3106: fprintf(outfile, "%3d\n", use_pcre16?
! 3107: (int)(((PCRE_SPTR16)nametable)[0])
! 3108: :((int)nametable[0] << 8) | (int)nametable[1]);
! 3109: nametable += nameentrysize * (use_pcre16 ? 2 : 1);
! 3110: #else
! 3111: fprintf(outfile, "%3d\n", GET2(nametable, 0));
! 3112: #ifdef SUPPORT_PCRE8
1.1 misho 3113: nametable += nameentrysize;
1.1.1.2 ! misho 3114: #else
! 3115: nametable += nameentrysize * 2;
! 3116: #endif
! 3117: #endif
1.1 misho 3118: }
3119: }
3120:
3121: if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3122: if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3123:
1.1.1.2 ! misho 3124: all_options = ((REAL_PCRE *)re)->options;
! 3125: if (do_flip) all_options = swap_uint32(all_options);
1.1 misho 3126:
3127: if (get_options == 0) fprintf(outfile, "No options\n");
3128: else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3129: ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3130: ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3131: ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3132: ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3133: ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3134: ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3135: ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3136: ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3137: ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3138: ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3139: ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3140: ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1.1.1.2 ! misho 3141: ((get_options & PCRE_UTF8) != 0)? " utf" : "",
1.1 misho 3142: ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1.1.1.2 ! misho 3143: ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
1.1 misho 3144: ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3145: ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3146:
3147: if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3148:
3149: switch (get_options & PCRE_NEWLINE_BITS)
3150: {
3151: case PCRE_NEWLINE_CR:
3152: fprintf(outfile, "Forced newline sequence: CR\n");
3153: break;
3154:
3155: case PCRE_NEWLINE_LF:
3156: fprintf(outfile, "Forced newline sequence: LF\n");
3157: break;
3158:
3159: case PCRE_NEWLINE_CRLF:
3160: fprintf(outfile, "Forced newline sequence: CRLF\n");
3161: break;
3162:
3163: case PCRE_NEWLINE_ANYCRLF:
3164: fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3165: break;
3166:
3167: case PCRE_NEWLINE_ANY:
3168: fprintf(outfile, "Forced newline sequence: ANY\n");
3169: break;
3170:
3171: default:
3172: break;
3173: }
3174:
3175: if (first_char == -1)
3176: {
3177: fprintf(outfile, "First char at start or follows newline\n");
3178: }
3179: else if (first_char < 0)
3180: {
3181: fprintf(outfile, "No first char\n");
3182: }
3183: else
3184: {
1.1.1.2 ! misho 3185: const char *caseless =
! 3186: ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
1.1 misho 3187: "" : " (caseless)";
1.1.1.2 ! misho 3188:
! 3189: if (PRINTOK(first_char))
! 3190: fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
1.1 misho 3191: else
1.1.1.2 ! misho 3192: {
! 3193: fprintf(outfile, "First char = ");
! 3194: pchar(first_char, outfile);
! 3195: fprintf(outfile, "%s\n", caseless);
! 3196: }
1.1 misho 3197: }
3198:
3199: if (need_char < 0)
3200: {
3201: fprintf(outfile, "No need char\n");
3202: }
3203: else
3204: {
1.1.1.2 ! misho 3205: const char *caseless =
! 3206: ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
1.1 misho 3207: "" : " (caseless)";
1.1.1.2 ! misho 3208:
! 3209: if (PRINTOK(need_char))
! 3210: fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
1.1 misho 3211: else
1.1.1.2 ! misho 3212: {
! 3213: fprintf(outfile, "Need char = ");
! 3214: pchar(need_char, outfile);
! 3215: fprintf(outfile, "%s\n", caseless);
! 3216: }
1.1 misho 3217: }
3218:
3219: /* Don't output study size; at present it is in any case a fixed
3220: value, but it varies, depending on the computer architecture, and
3221: so messes up the test suite. (And with the /F option, it might be
3222: flipped.) If study was forced by an external -s, don't show this
3223: information unless -i or -d was also present. This means that, except
3224: when auto-callouts are involved, the output from runs with and without
3225: -s should be identical. */
3226:
3227: if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3228: {
3229: if (extra == NULL)
3230: fprintf(outfile, "Study returned NULL\n");
3231: else
3232: {
1.1.1.2 ! misho 3233: pcre_uint8 *start_bits = NULL;
1.1 misho 3234: int minlength;
3235:
1.1.1.2 ! misho 3236: if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
! 3237: fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1.1 misho 3238:
1.1.1.2 ! misho 3239: if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
1.1 misho 3240: {
1.1.1.2 ! misho 3241: if (start_bits == NULL)
! 3242: fprintf(outfile, "No set of starting bytes\n");
! 3243: else
1.1 misho 3244: {
1.1.1.2 ! misho 3245: int i;
! 3246: int c = 24;
! 3247: fprintf(outfile, "Starting byte set: ");
! 3248: for (i = 0; i < 256; i++)
1.1 misho 3249: {
1.1.1.2 ! misho 3250: if ((start_bits[i/8] & (1<<(i&7))) != 0)
1.1 misho 3251: {
1.1.1.2 ! misho 3252: if (c > 75)
! 3253: {
! 3254: fprintf(outfile, "\n ");
! 3255: c = 2;
! 3256: }
! 3257: if (PRINTOK(i) && i != ' ')
! 3258: {
! 3259: fprintf(outfile, "%c ", i);
! 3260: c += 2;
! 3261: }
! 3262: else
! 3263: {
! 3264: fprintf(outfile, "\\x%02x ", i);
! 3265: c += 5;
! 3266: }
1.1 misho 3267: }
3268: }
1.1.1.2 ! misho 3269: fprintf(outfile, "\n");
1.1 misho 3270: }
3271: }
3272: }
3273:
3274: /* Show this only if the JIT was set by /S, not by -s. */
3275:
3276: if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3277: {
3278: int jit;
1.1.1.2 ! misho 3279: if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
! 3280: {
! 3281: if (jit)
! 3282: fprintf(outfile, "JIT study was successful\n");
! 3283: else
1.1 misho 3284: #ifdef SUPPORT_JIT
1.1.1.2 ! misho 3285: fprintf(outfile, "JIT study was not successful\n");
1.1 misho 3286: #else
1.1.1.2 ! misho 3287: fprintf(outfile, "JIT support is not available in this version of PCRE\n");
1.1 misho 3288: #endif
1.1.1.2 ! misho 3289: }
1.1 misho 3290: }
3291: }
3292: }
3293:
3294: /* If the '>' option was present, we write out the regex to a file, and
3295: that is all. The first 8 bytes of the file are the regex length and then
3296: the study length, in big-endian order. */
3297:
3298: if (to_file != NULL)
3299: {
3300: FILE *f = fopen((char *)to_file, "wb");
3301: if (f == NULL)
3302: {
3303: fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3304: }
3305: else
3306: {
1.1.1.2 ! misho 3307: pcre_uint8 sbuf[8];
! 3308:
! 3309: if (do_flip) regexflip(re, extra);
! 3310: sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
! 3311: sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
! 3312: sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
! 3313: sbuf[3] = (pcre_uint8)((true_size) & 255);
! 3314: sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
! 3315: sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
! 3316: sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
! 3317: sbuf[7] = (pcre_uint8)((true_study_size) & 255);
1.1 misho 3318:
3319: if (fwrite(sbuf, 1, 8, f) < 8 ||
3320: fwrite(re, 1, true_size, f) < true_size)
3321: {
3322: fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3323: }
3324: else
3325: {
3326: fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3327:
3328: /* If there is study data, write it. */
3329:
3330: if (extra != NULL)
3331: {
3332: if (fwrite(extra->study_data, 1, true_study_size, f) <
3333: true_study_size)
3334: {
3335: fprintf(outfile, "Write error on %s: %s\n", to_file,
3336: strerror(errno));
3337: }
3338: else fprintf(outfile, "Study data written to %s\n", to_file);
3339: }
3340: }
3341: fclose(f);
3342: }
3343:
3344: new_free(re);
1.1.1.2 ! misho 3345: if (extra != NULL)
! 3346: {
! 3347: PCRE_FREE_STUDY(extra);
! 3348: }
1.1 misho 3349: if (locale_set)
3350: {
3351: new_free((void *)tables);
3352: setlocale(LC_CTYPE, "C");
3353: locale_set = 0;
3354: }
3355: continue; /* With next regex */
3356: }
3357: } /* End of non-POSIX compile */
3358:
3359: /* Read data lines and test them */
3360:
3361: for (;;)
3362: {
1.1.1.2 ! misho 3363: pcre_uint8 *q;
! 3364: pcre_uint8 *bptr;
1.1 misho 3365: int *use_offsets = offsets;
3366: int use_size_offsets = size_offsets;
3367: int callout_data = 0;
3368: int callout_data_set = 0;
3369: int count, c;
3370: int copystrings = 0;
3371: int find_match_limit = default_find_match_limit;
3372: int getstrings = 0;
3373: int getlist = 0;
3374: int gmatched = 0;
3375: int start_offset = 0;
3376: int start_offset_sign = 1;
3377: int g_notempty = 0;
3378: int use_dfa = 0;
3379:
3380: *copynames = 0;
3381: *getnames = 0;
3382:
1.1.1.2 ! misho 3383: #ifdef SUPPORT_PCRE16
! 3384: cn16ptr = copynames;
! 3385: gn16ptr = getnames;
! 3386: #endif
! 3387: #ifdef SUPPORT_PCRE8
! 3388: cn8ptr = copynames8;
! 3389: gn8ptr = getnames8;
! 3390: #endif
1.1 misho 3391:
1.1.1.2 ! misho 3392: SET_PCRE_CALLOUT(callout);
1.1 misho 3393: first_callout = 1;
3394: last_callout_mark = NULL;
3395: callout_extra = 0;
3396: callout_count = 0;
3397: callout_fail_count = 999999;
3398: callout_fail_id = -1;
3399: show_malloc = 0;
1.1.1.2 ! misho 3400: options = 0;
1.1 misho 3401:
3402: if (extra != NULL) extra->flags &=
3403: ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3404:
3405: len = 0;
3406: for (;;)
3407: {
3408: if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3409: {
3410: if (len > 0) /* Reached EOF without hitting a newline */
3411: {
3412: fprintf(outfile, "\n");
3413: break;
3414: }
3415: done = 1;
3416: goto CONTINUE;
3417: }
3418: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3419: len = (int)strlen((char *)buffer);
3420: if (buffer[len-1] == '\n') break;
3421: }
3422:
3423: while (len > 0 && isspace(buffer[len-1])) len--;
3424: buffer[len] = 0;
3425: if (len == 0) break;
3426:
3427: p = buffer;
3428: while (isspace(*p)) p++;
3429:
3430: bptr = q = dbuffer;
3431: while ((c = *p++) != 0)
3432: {
3433: int i = 0;
3434: int n = 0;
3435:
1.1.1.2 ! misho 3436: /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
! 3437: In non-UTF mode, allow the value of the byte to fall through to later,
! 3438: where values greater than 127 are turned into UTF-8 when running in
! 3439: 16-bit mode. */
! 3440:
! 3441: if (c != '\\')
! 3442: {
! 3443: if (use_utf)
! 3444: {
! 3445: *q++ = c;
! 3446: continue;
! 3447: }
! 3448: }
! 3449:
! 3450: /* Handle backslash escapes */
! 3451:
! 3452: else switch ((c = *p++))
1.1 misho 3453: {
3454: case 'a': c = 7; break;
3455: case 'b': c = '\b'; break;
3456: case 'e': c = 27; break;
3457: case 'f': c = '\f'; break;
3458: case 'n': c = '\n'; break;
3459: case 'r': c = '\r'; break;
3460: case 't': c = '\t'; break;
3461: case 'v': c = '\v'; break;
3462:
3463: case '0': case '1': case '2': case '3':
3464: case '4': case '5': case '6': case '7':
3465: c -= '0';
3466: while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3467: c = c * 8 + *p++ - '0';
3468: break;
3469:
3470: case 'x':
3471: if (*p == '{')
3472: {
1.1.1.2 ! misho 3473: pcre_uint8 *pt = p;
1.1 misho 3474: c = 0;
3475:
3476: /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3477: when isxdigit() is a macro that refers to its argument more than
3478: once. This is banned by the C Standard, but apparently happens in at
3479: least one MacOS environment. */
3480:
3481: for (pt++; isxdigit(*pt); pt++)
1.1.1.2 ! misho 3482: {
! 3483: if (++i == 9)
! 3484: fprintf(outfile, "** Too many hex digits in \\x{...} item; "
! 3485: "using only the first eight.\n");
! 3486: else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
! 3487: }
1.1 misho 3488: if (*pt == '}')
3489: {
3490: p = pt + 1;
3491: break;
3492: }
1.1.1.2 ! misho 3493: /* Not correct form for \x{...}; fall through */
1.1 misho 3494: }
3495:
1.1.1.2 ! misho 3496: /* \x without {} always defines just one byte in 8-bit mode. This
! 3497: allows UTF-8 characters to be constructed byte by byte, and also allows
! 3498: invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
! 3499: Otherwise, pass it down to later code so that it can be turned into
! 3500: UTF-8 when running in 16-bit mode. */
1.1 misho 3501:
3502: c = 0;
3503: while (i++ < 2 && isxdigit(*p))
3504: {
3505: c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3506: p++;
3507: }
1.1.1.2 ! misho 3508: if (use_utf)
! 3509: {
! 3510: *q++ = c;
! 3511: continue;
! 3512: }
1.1 misho 3513: break;
3514:
3515: case 0: /* \ followed by EOF allows for an empty line */
3516: p--;
3517: continue;
3518:
3519: case '>':
3520: if (*p == '-')
3521: {
3522: start_offset_sign = -1;
3523: p++;
3524: }
3525: while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3526: start_offset *= start_offset_sign;
3527: continue;
3528:
3529: case 'A': /* Option setting */
3530: options |= PCRE_ANCHORED;
3531: continue;
3532:
3533: case 'B':
3534: options |= PCRE_NOTBOL;
3535: continue;
3536:
3537: case 'C':
3538: if (isdigit(*p)) /* Set copy string */
3539: {
3540: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3541: copystrings |= 1 << n;
3542: }
3543: else if (isalnum(*p))
3544: {
1.1.1.2 ! misho 3545: READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
1.1 misho 3546: }
3547: else if (*p == '+')
3548: {
3549: callout_extra = 1;
3550: p++;
3551: }
3552: else if (*p == '-')
3553: {
1.1.1.2 ! misho 3554: SET_PCRE_CALLOUT(NULL);
1.1 misho 3555: p++;
3556: }
3557: else if (*p == '!')
3558: {
3559: callout_fail_id = 0;
3560: p++;
3561: while(isdigit(*p))
3562: callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3563: callout_fail_count = 0;
3564: if (*p == '!')
3565: {
3566: p++;
3567: while(isdigit(*p))
3568: callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3569: }
3570: }
3571: else if (*p == '*')
3572: {
3573: int sign = 1;
3574: callout_data = 0;
3575: if (*(++p) == '-') { sign = -1; p++; }
3576: while(isdigit(*p))
3577: callout_data = callout_data * 10 + *p++ - '0';
3578: callout_data *= sign;
3579: callout_data_set = 1;
3580: }
3581: continue;
3582:
3583: #if !defined NODFA
3584: case 'D':
3585: #if !defined NOPOSIX
3586: if (posix || do_posix)
3587: printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3588: else
3589: #endif
3590: use_dfa = 1;
3591: continue;
3592: #endif
3593:
3594: #if !defined NODFA
3595: case 'F':
3596: options |= PCRE_DFA_SHORTEST;
3597: continue;
3598: #endif
3599:
3600: case 'G':
3601: if (isdigit(*p))
3602: {
3603: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3604: getstrings |= 1 << n;
3605: }
3606: else if (isalnum(*p))
3607: {
1.1.1.2 ! misho 3608: READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
1.1 misho 3609: }
3610: continue;
3611:
3612: case 'J':
3613: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3614: if (extra != NULL
3615: && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3616: && extra->executable_jit != NULL)
3617: {
1.1.1.2 ! misho 3618: if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
! 3619: jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
! 3620: PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
1.1 misho 3621: }
3622: continue;
3623:
3624: case 'L':
3625: getlist = 1;
3626: continue;
3627:
3628: case 'M':
3629: find_match_limit = 1;
3630: continue;
3631:
3632: case 'N':
3633: if ((options & PCRE_NOTEMPTY) != 0)
3634: options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3635: else
3636: options |= PCRE_NOTEMPTY;
3637: continue;
3638:
3639: case 'O':
3640: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3641: if (n > size_offsets_max)
3642: {
3643: size_offsets_max = n;
3644: free(offsets);
3645: use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3646: if (offsets == NULL)
3647: {
3648: printf("** Failed to get %d bytes of memory for offsets vector\n",
3649: (int)(size_offsets_max * sizeof(int)));
3650: yield = 1;
3651: goto EXIT;
3652: }
3653: }
3654: use_size_offsets = n;
3655: if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3656: continue;
3657:
3658: case 'P':
3659: options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3660: PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3661: continue;
3662:
3663: case 'Q':
3664: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3665: if (extra == NULL)
3666: {
3667: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3668: extra->flags = 0;
3669: }
3670: extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3671: extra->match_limit_recursion = n;
3672: continue;
3673:
3674: case 'q':
3675: while(isdigit(*p)) n = n * 10 + *p++ - '0';
3676: if (extra == NULL)
3677: {
3678: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3679: extra->flags = 0;
3680: }
3681: extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3682: extra->match_limit = n;
3683: continue;
3684:
3685: #if !defined NODFA
3686: case 'R':
3687: options |= PCRE_DFA_RESTART;
3688: continue;
3689: #endif
3690:
3691: case 'S':
3692: show_malloc = 1;
3693: continue;
3694:
3695: case 'Y':
3696: options |= PCRE_NO_START_OPTIMIZE;
3697: continue;
3698:
3699: case 'Z':
3700: options |= PCRE_NOTEOL;
3701: continue;
3702:
3703: case '?':
3704: options |= PCRE_NO_UTF8_CHECK;
3705: continue;
3706:
3707: case '<':
3708: {
3709: int x = check_newline(p, outfile);
3710: if (x == 0) goto NEXT_DATA;
3711: options |= x;
3712: while (*p++ != '>');
3713: }
3714: continue;
3715: }
1.1.1.2 ! misho 3716:
! 3717: /* We now have a character value in c that may be greater than 255. In
! 3718: 16-bit mode, we always convert characters to UTF-8 so that values greater
! 3719: than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
! 3720: convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
! 3721: mode must have come from \x{...} or octal constructs because values from
! 3722: \x.. get this far only in non-UTF mode. */
! 3723:
! 3724: #if !defined NOUTF || defined SUPPORT_PCRE16
! 3725: if (use_pcre16 || use_utf)
! 3726: {
! 3727: pcre_uint8 buff8[8];
! 3728: int ii, utn;
! 3729: utn = ord2utf8(c, buff8);
! 3730: for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
! 3731: }
! 3732: else
! 3733: #endif
! 3734: {
! 3735: if (c > 255)
! 3736: {
! 3737: fprintf(outfile, "** Character \\x{%x} is greater than 255 "
! 3738: "and UTF-8 mode is not enabled.\n", c);
! 3739: fprintf(outfile, "** Truncation will probably give the wrong "
! 3740: "result.\n");
! 3741: }
! 3742: *q++ = c;
! 3743: }
1.1 misho 3744: }
1.1.1.2 ! misho 3745:
! 3746: /* Reached end of subject string */
! 3747:
1.1 misho 3748: *q = 0;
3749: len = (int)(q - dbuffer);
3750:
3751: /* Move the data to the end of the buffer so that a read over the end of
3752: the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3753: we are using the POSIX interface, we must include the terminating zero. */
3754:
3755: #if !defined NOPOSIX
3756: if (posix || do_posix)
3757: {
3758: memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3759: bptr += buffer_size - len - 1;
3760: }
3761: else
3762: #endif
3763: {
3764: memmove(bptr + buffer_size - len, bptr, len);
3765: bptr += buffer_size - len;
3766: }
3767:
3768: if ((all_use_dfa || use_dfa) && find_match_limit)
3769: {
3770: printf("**Match limit not relevant for DFA matching: ignored\n");
3771: find_match_limit = 0;
3772: }
3773:
3774: /* Handle matching via the POSIX interface, which does not
3775: support timing or playing with the match limit or callout data. */
3776:
3777: #if !defined NOPOSIX
3778: if (posix || do_posix)
3779: {
3780: int rc;
3781: int eflags = 0;
3782: regmatch_t *pmatch = NULL;
3783: if (use_size_offsets > 0)
3784: pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3785: if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3786: if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3787: if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3788:
3789: rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3790:
3791: if (rc != 0)
3792: {
3793: (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3794: fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3795: }
3796: else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3797: != 0)
3798: {
3799: fprintf(outfile, "Matched with REG_NOSUB\n");
3800: }
3801: else
3802: {
3803: size_t i;
3804: for (i = 0; i < (size_t)use_size_offsets; i++)
3805: {
3806: if (pmatch[i].rm_so >= 0)
3807: {
3808: fprintf(outfile, "%2d: ", (int)i);
1.1.1.2 ! misho 3809: PCHARSV(dbuffer, pmatch[i].rm_so,
1.1 misho 3810: pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3811: fprintf(outfile, "\n");
3812: if (do_showcaprest || (i == 0 && do_showrest))
3813: {
3814: fprintf(outfile, "%2d+ ", (int)i);
1.1.1.2 ! misho 3815: PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1.1 misho 3816: outfile);
3817: fprintf(outfile, "\n");
3818: }
3819: }
3820: }
3821: }
3822: free(pmatch);
1.1.1.2 ! misho 3823: goto NEXT_DATA;
1.1 misho 3824: }
3825:
1.1.1.2 ! misho 3826: #endif /* !defined NOPOSIX */
! 3827:
1.1 misho 3828: /* Handle matching via the native interface - repeats for /g and /G */
3829:
1.1.1.2 ! misho 3830: #ifdef SUPPORT_PCRE16
! 3831: if (use_pcre16)
! 3832: {
! 3833: len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
! 3834: switch(len)
! 3835: {
! 3836: case -1:
! 3837: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
! 3838: "converted to UTF-16\n");
! 3839: goto NEXT_DATA;
! 3840:
! 3841: case -2:
! 3842: fprintf(outfile, "**Failed: character value greater than 0x10ffff "
! 3843: "cannot be converted to UTF-16\n");
! 3844: goto NEXT_DATA;
! 3845:
! 3846: case -3:
! 3847: fprintf(outfile, "**Failed: character value greater than 0xffff "
! 3848: "cannot be converted to 16-bit in non-UTF mode\n");
! 3849: goto NEXT_DATA;
! 3850:
! 3851: default:
! 3852: break;
! 3853: }
! 3854: bptr = (pcre_uint8 *)buffer16;
! 3855: }
! 3856: #endif
1.1 misho 3857:
3858: for (;; gmatched++) /* Loop for /g or /G */
3859: {
3860: markptr = NULL;
3861:
3862: if (timeitm > 0)
3863: {
3864: register int i;
3865: clock_t time_taken;
3866: clock_t start_time = clock();
3867:
3868: #if !defined NODFA
3869: if (all_use_dfa || use_dfa)
3870: {
3871: int workspace[1000];
3872: for (i = 0; i < timeitm; i++)
1.1.1.2 ! misho 3873: {
! 3874: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
! 3875: (options | g_notempty), use_offsets, use_size_offsets, workspace,
! 3876: (sizeof(workspace)/sizeof(int)));
! 3877: }
1.1 misho 3878: }
3879: else
3880: #endif
3881:
3882: for (i = 0; i < timeitm; i++)
1.1.1.2 ! misho 3883: {
! 3884: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
! 3885: (options | g_notempty), use_offsets, use_size_offsets);
! 3886: }
1.1 misho 3887: time_taken = clock() - start_time;
3888: fprintf(outfile, "Execute time %.4f milliseconds\n",
3889: (((double)time_taken * 1000.0) / (double)timeitm) /
3890: (double)CLOCKS_PER_SEC);
3891: }
3892:
3893: /* If find_match_limit is set, we want to do repeated matches with
3894: varying limits in order to find the minimum value for the match limit and
3895: for the recursion limit. The match limits are relevant only to the normal
3896: running of pcre_exec(), so disable the JIT optimization. This makes it
3897: possible to run the same set of tests with and without JIT externally
3898: requested. */
3899:
3900: if (find_match_limit)
3901: {
3902: if (extra == NULL)
3903: {
3904: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3905: extra->flags = 0;
3906: }
3907: else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3908:
3909: (void)check_match_limit(re, extra, bptr, len, start_offset,
3910: options|g_notempty, use_offsets, use_size_offsets,
3911: PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3912: PCRE_ERROR_MATCHLIMIT, "match()");
3913:
3914: count = check_match_limit(re, extra, bptr, len, start_offset,
3915: options|g_notempty, use_offsets, use_size_offsets,
3916: PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3917: PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3918: }
3919:
3920: /* If callout_data is set, use the interface with additional data */
3921:
3922: else if (callout_data_set)
3923: {
3924: if (extra == NULL)
3925: {
3926: extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3927: extra->flags = 0;
3928: }
3929: extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3930: extra->callout_data = &callout_data;
1.1.1.2 ! misho 3931: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
1.1 misho 3932: options | g_notempty, use_offsets, use_size_offsets);
3933: extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3934: }
3935:
3936: /* The normal case is just to do the match once, with the default
3937: value of match_limit. */
3938:
3939: #if !defined NODFA
3940: else if (all_use_dfa || use_dfa)
3941: {
3942: int workspace[1000];
1.1.1.2 ! misho 3943: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
! 3944: (options | g_notempty), use_offsets, use_size_offsets, workspace,
! 3945: (sizeof(workspace)/sizeof(int)));
1.1 misho 3946: if (count == 0)
3947: {
3948: fprintf(outfile, "Matched, but too many subsidiary matches\n");
3949: count = use_size_offsets/2;
3950: }
3951: }
3952: #endif
3953:
3954: else
3955: {
1.1.1.2 ! misho 3956: PCRE_EXEC(count, re, extra, bptr, len, start_offset,
! 3957: options | g_notempty, use_offsets, use_size_offsets);
1.1 misho 3958: if (count == 0)
3959: {
3960: fprintf(outfile, "Matched, but too many substrings\n");
3961: count = use_size_offsets/3;
3962: }
3963: }
3964:
3965: /* Matched */
3966:
3967: if (count >= 0)
3968: {
3969: int i, maxcount;
1.1.1.2 ! misho 3970: void *cnptr, *gnptr;
1.1 misho 3971:
3972: #if !defined NODFA
3973: if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3974: #endif
3975: maxcount = use_size_offsets/3;
3976:
3977: /* This is a check against a lunatic return value. */
3978:
3979: if (count > maxcount)
3980: {
3981: fprintf(outfile,
3982: "** PCRE error: returned count %d is too big for offset size %d\n",
3983: count, use_size_offsets);
3984: count = use_size_offsets/3;
3985: if (do_g || do_G)
3986: {
3987: fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3988: do_g = do_G = FALSE; /* Break g/G loop */
3989: }
3990: }
3991:
3992: /* do_allcaps requests showing of all captures in the pattern, to check
3993: unset ones at the end. */
3994:
3995: if (do_allcaps)
3996: {
1.1.1.2 ! misho 3997: if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
! 3998: goto SKIP_DATA;
1.1 misho 3999: count++; /* Allow for full match */
4000: if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4001: }
4002:
4003: /* Output the captured substrings */
4004:
4005: for (i = 0; i < count * 2; i += 2)
4006: {
4007: if (use_offsets[i] < 0)
4008: {
4009: if (use_offsets[i] != -1)
4010: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4011: use_offsets[i], i);
4012: if (use_offsets[i+1] != -1)
4013: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4014: use_offsets[i+1], i+1);
4015: fprintf(outfile, "%2d: <unset>\n", i/2);
4016: }
4017: else
4018: {
4019: fprintf(outfile, "%2d: ", i/2);
1.1.1.2 ! misho 4020: PCHARSV(bptr, use_offsets[i],
1.1 misho 4021: use_offsets[i+1] - use_offsets[i], outfile);
4022: fprintf(outfile, "\n");
4023: if (do_showcaprest || (i == 0 && do_showrest))
4024: {
4025: fprintf(outfile, "%2d+ ", i/2);
1.1.1.2 ! misho 4026: PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
1.1 misho 4027: outfile);
4028: fprintf(outfile, "\n");
4029: }
4030: }
4031: }
4032:
1.1.1.2 ! misho 4033: if (markptr != NULL)
! 4034: {
! 4035: fprintf(outfile, "MK: ");
! 4036: PCHARSV(markptr, 0, -1, outfile);
! 4037: fprintf(outfile, "\n");
! 4038: }
1.1 misho 4039:
4040: for (i = 0; i < 32; i++)
4041: {
4042: if ((copystrings & (1 << i)) != 0)
4043: {
1.1.1.2 ! misho 4044: int rc;
1.1 misho 4045: char copybuffer[256];
1.1.1.2 ! misho 4046: PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
! 4047: copybuffer, sizeof(copybuffer));
1.1 misho 4048: if (rc < 0)
4049: fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4050: else
1.1.1.2 ! misho 4051: {
! 4052: fprintf(outfile, "%2dC ", i);
! 4053: PCHARSV(copybuffer, 0, rc, outfile);
! 4054: fprintf(outfile, " (%d)\n", rc);
! 4055: }
1.1 misho 4056: }
4057: }
4058:
1.1.1.2 ! misho 4059: cnptr = copynames;
! 4060: for (;;)
1.1 misho 4061: {
1.1.1.2 ! misho 4062: int rc;
1.1 misho 4063: char copybuffer[256];
1.1.1.2 ! misho 4064:
! 4065: if (use_pcre16)
! 4066: {
! 4067: if (*(pcre_uint16 *)cnptr == 0) break;
! 4068: }
! 4069: else
! 4070: {
! 4071: if (*(pcre_uint8 *)cnptr == 0) break;
! 4072: }
! 4073:
! 4074: PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
! 4075: cnptr, copybuffer, sizeof(copybuffer));
! 4076:
1.1 misho 4077: if (rc < 0)
1.1.1.2 ! misho 4078: {
! 4079: fprintf(outfile, "copy substring ");
! 4080: PCHARSV(cnptr, 0, -1, outfile);
! 4081: fprintf(outfile, " failed %d\n", rc);
! 4082: }
1.1 misho 4083: else
1.1.1.2 ! misho 4084: {
! 4085: fprintf(outfile, " C ");
! 4086: PCHARSV(copybuffer, 0, rc, outfile);
! 4087: fprintf(outfile, " (%d) ", rc);
! 4088: PCHARSV(cnptr, 0, -1, outfile);
! 4089: putc('\n', outfile);
! 4090: }
! 4091:
! 4092: cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
1.1 misho 4093: }
4094:
4095: for (i = 0; i < 32; i++)
4096: {
4097: if ((getstrings & (1 << i)) != 0)
4098: {
1.1.1.2 ! misho 4099: int rc;
1.1 misho 4100: const char *substring;
1.1.1.2 ! misho 4101: PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
1.1 misho 4102: if (rc < 0)
4103: fprintf(outfile, "get substring %d failed %d\n", i, rc);
4104: else
4105: {
1.1.1.2 ! misho 4106: fprintf(outfile, "%2dG ", i);
! 4107: PCHARSV(substring, 0, rc, outfile);
! 4108: fprintf(outfile, " (%d)\n", rc);
! 4109: PCRE_FREE_SUBSTRING(substring);
1.1 misho 4110: }
4111: }
4112: }
4113:
1.1.1.2 ! misho 4114: gnptr = getnames;
! 4115: for (;;)
1.1 misho 4116: {
1.1.1.2 ! misho 4117: int rc;
1.1 misho 4118: const char *substring;
1.1.1.2 ! misho 4119:
! 4120: if (use_pcre16)
! 4121: {
! 4122: if (*(pcre_uint16 *)gnptr == 0) break;
! 4123: }
! 4124: else
! 4125: {
! 4126: if (*(pcre_uint8 *)gnptr == 0) break;
! 4127: }
! 4128:
! 4129: PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
! 4130: gnptr, &substring);
1.1 misho 4131: if (rc < 0)
1.1.1.2 ! misho 4132: {
! 4133: fprintf(outfile, "get substring ");
! 4134: PCHARSV(gnptr, 0, -1, outfile);
! 4135: fprintf(outfile, " failed %d\n", rc);
! 4136: }
1.1 misho 4137: else
4138: {
1.1.1.2 ! misho 4139: fprintf(outfile, " G ");
! 4140: PCHARSV(substring, 0, rc, outfile);
! 4141: fprintf(outfile, " (%d) ", rc);
! 4142: PCHARSV(gnptr, 0, -1, outfile);
! 4143: PCRE_FREE_SUBSTRING(substring);
! 4144: putc('\n', outfile);
1.1 misho 4145: }
1.1.1.2 ! misho 4146:
! 4147: gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
1.1 misho 4148: }
4149:
4150: if (getlist)
4151: {
1.1.1.2 ! misho 4152: int rc;
1.1 misho 4153: const char **stringlist;
1.1.1.2 ! misho 4154: PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
1.1 misho 4155: if (rc < 0)
4156: fprintf(outfile, "get substring list failed %d\n", rc);
4157: else
4158: {
4159: for (i = 0; i < count; i++)
1.1.1.2 ! misho 4160: {
! 4161: fprintf(outfile, "%2dL ", i);
! 4162: PCHARSV(stringlist[i], 0, -1, outfile);
! 4163: putc('\n', outfile);
! 4164: }
1.1 misho 4165: if (stringlist[i] != NULL)
4166: fprintf(outfile, "string list not terminated by NULL\n");
1.1.1.2 ! misho 4167: PCRE_FREE_SUBSTRING_LIST(stringlist);
1.1 misho 4168: }
4169: }
4170: }
4171:
4172: /* There was a partial match */
4173:
4174: else if (count == PCRE_ERROR_PARTIAL)
4175: {
4176: if (markptr == NULL) fprintf(outfile, "Partial match");
1.1.1.2 ! misho 4177: else
! 4178: {
! 4179: fprintf(outfile, "Partial match, mark=");
! 4180: PCHARSV(markptr, 0, -1, outfile);
! 4181: }
1.1 misho 4182: if (use_size_offsets > 1)
4183: {
4184: fprintf(outfile, ": ");
1.1.1.2 ! misho 4185: PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
1.1 misho 4186: outfile);
4187: }
4188: fprintf(outfile, "\n");
4189: break; /* Out of the /g loop */
4190: }
4191:
4192: /* Failed to match. If this is a /g or /G loop and we previously set
4193: g_notempty after a null match, this is not necessarily the end. We want
4194: to advance the start offset, and continue. We won't be at the end of the
4195: string - that was checked before setting g_notempty.
4196:
4197: Complication arises in the case when the newline convention is "any",
4198: "crlf", or "anycrlf". If the previous match was at the end of a line
4199: terminated by CRLF, an advance of one character just passes the \r,
4200: whereas we should prefer the longer newline sequence, as does the code in
4201: pcre_exec(). Fudge the offset value to achieve this. We check for a
1.1.1.2 ! misho 4202: newline setting in the pattern; if none was set, use PCRE_CONFIG() to
1.1 misho 4203: find the default.
4204:
4205: Otherwise, in the case of UTF-8 matching, the advance must be one
4206: character, not one byte. */
4207:
4208: else
4209: {
4210: if (g_notempty != 0)
4211: {
4212: int onechar = 1;
1.1.1.2 ! misho 4213: unsigned int obits = ((REAL_PCRE *)re)->options;
1.1 misho 4214: use_offsets[0] = start_offset;
4215: if ((obits & PCRE_NEWLINE_BITS) == 0)
4216: {
4217: int d;
1.1.1.2 ! misho 4218: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
1.1 misho 4219: /* Note that these values are always the ASCII ones, even in
4220: EBCDIC environments. CR = 13, NL = 10. */
4221: obits = (d == 13)? PCRE_NEWLINE_CR :
4222: (d == 10)? PCRE_NEWLINE_LF :
4223: (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4224: (d == -2)? PCRE_NEWLINE_ANYCRLF :
4225: (d == -1)? PCRE_NEWLINE_ANY : 0;
4226: }
4227: if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4228: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4229: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4230: &&
4231: start_offset < len - 1 &&
1.1.1.2 ! misho 4232: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
! 4233: (use_pcre16?
! 4234: ((PCRE_SPTR16)bptr)[start_offset] == '\r'
! 4235: && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
! 4236: :
! 4237: bptr[start_offset] == '\r'
! 4238: && bptr[start_offset + 1] == '\n')
! 4239: #elif defined SUPPORT_PCRE16
! 4240: ((PCRE_SPTR16)bptr)[start_offset] == '\r'
! 4241: && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
! 4242: #else
! 4243: bptr[start_offset] == '\r'
! 4244: && bptr[start_offset + 1] == '\n'
! 4245: #endif
! 4246: )
1.1 misho 4247: onechar++;
1.1.1.2 ! misho 4248: else if (use_utf)
1.1 misho 4249: {
4250: while (start_offset + onechar < len)
4251: {
4252: if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4253: onechar++;
4254: }
4255: }
4256: use_offsets[1] = start_offset + onechar;
4257: }
4258: else
4259: {
4260: switch(count)
4261: {
4262: case PCRE_ERROR_NOMATCH:
4263: if (gmatched == 0)
4264: {
1.1.1.2 ! misho 4265: if (markptr == NULL)
! 4266: {
! 4267: fprintf(outfile, "No match\n");
! 4268: }
! 4269: else
! 4270: {
! 4271: fprintf(outfile, "No match, mark = ");
! 4272: PCHARSV(markptr, 0, -1, outfile);
! 4273: putc('\n', outfile);
! 4274: }
1.1 misho 4275: }
4276: break;
4277:
4278: case PCRE_ERROR_BADUTF8:
4279: case PCRE_ERROR_SHORTUTF8:
1.1.1.2 ! misho 4280: fprintf(outfile, "Error %d (%s UTF-%s string)", count,
! 4281: (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
! 4282: use_pcre16? "16" : "8");
1.1 misho 4283: if (use_size_offsets >= 2)
4284: fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4285: use_offsets[1]);
4286: fprintf(outfile, "\n");
4287: break;
4288:
1.1.1.2 ! misho 4289: case PCRE_ERROR_BADUTF8_OFFSET:
! 4290: fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
! 4291: use_pcre16? "16" : "8");
! 4292: break;
! 4293:
1.1 misho 4294: default:
1.1.1.2 ! misho 4295: if (count < 0 &&
! 4296: (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
1.1 misho 4297: fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4298: else
4299: fprintf(outfile, "Error %d (Unexpected value)\n", count);
4300: break;
4301: }
4302:
4303: break; /* Out of the /g loop */
4304: }
4305: }
4306:
4307: /* If not /g or /G we are done */
4308:
4309: if (!do_g && !do_G) break;
4310:
4311: /* If we have matched an empty string, first check to see if we are at
4312: the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4313: Perl's /g options does. This turns out to be rather cunning. First we set
4314: PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4315: same point. If this fails (picked up above) we advance to the next
4316: character. */
4317:
4318: g_notempty = 0;
4319:
4320: if (use_offsets[0] == use_offsets[1])
4321: {
4322: if (use_offsets[0] == len) break;
4323: g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4324: }
4325:
4326: /* For /g, update the start offset, leaving the rest alone */
4327:
4328: if (do_g) start_offset = use_offsets[1];
4329:
4330: /* For /G, update the pointer and length */
4331:
4332: else
4333: {
1.1.1.2 ! misho 4334: bptr += use_offsets[1] * CHAR_SIZE;
1.1 misho 4335: len -= use_offsets[1];
4336: }
4337: } /* End of loop for /g and /G */
4338:
4339: NEXT_DATA: continue;
4340: } /* End of loop for data lines */
4341:
4342: CONTINUE:
4343:
4344: #if !defined NOPOSIX
4345: if (posix || do_posix) regfree(&preg);
4346: #endif
4347:
4348: if (re != NULL) new_free(re);
1.1.1.2 ! misho 4349: if (extra != NULL)
! 4350: {
! 4351: PCRE_FREE_STUDY(extra);
! 4352: }
1.1 misho 4353: if (locale_set)
4354: {
4355: new_free((void *)tables);
4356: setlocale(LC_CTYPE, "C");
4357: locale_set = 0;
4358: }
4359: if (jit_stack != NULL)
4360: {
1.1.1.2 ! misho 4361: PCRE_JIT_STACK_FREE(jit_stack);
1.1 misho 4362: jit_stack = NULL;
4363: }
4364: }
4365:
4366: if (infile == stdin) fprintf(outfile, "\n");
4367:
4368: EXIT:
4369:
4370: if (infile != NULL && infile != stdin) fclose(infile);
4371: if (outfile != NULL && outfile != stdout) fclose(outfile);
4372:
4373: free(buffer);
4374: free(dbuffer);
4375: free(pbuffer);
4376: free(offsets);
4377:
1.1.1.2 ! misho 4378: #ifdef SUPPORT_PCRE16
! 4379: if (buffer16 != NULL) free(buffer16);
! 4380: #endif
! 4381:
1.1 misho 4382: return yield;
4383: }
4384:
4385: /* End of pcretest.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>