embedaddon/pcre/pcretest.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcretest.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:50:25 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_30, HEAD

pcre

1: /************************************************* 2: * PCRE testing program * 3: *************************************************/ 4: 5: /* This program was hacked up as a tester for PCRE. I really should have 6: written it more tidily in the first place. Will I ever learn? It has grown and 7: been extended and consequently is now rather, er, *very* untidy in places. The 8: addition of 16-bit support has made it even worse. :-( 9: 10: ----------------------------------------------------------------------------- 11: Redistribution and use in source and binary forms, with or without 12: modification, are permitted provided that the following conditions are met: 13: 14: * Redistributions of source code must retain the above copyright notice, 15: this list of conditions and the following disclaimer. 16: 17: * Redistributions in binary form must reproduce the above copyright 18: notice, this list of conditions and the following disclaimer in the 19: documentation and/or other materials provided with the distribution. 20: 21: * Neither the name of the University of Cambridge nor the names of its 22: contributors may be used to endorse or promote products derived from 23: this software without specific prior written permission. 24: 25: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35: POSSIBILITY OF SUCH DAMAGE. 36: ----------------------------------------------------------------------------- 37: */ 38: 39: /* This program now supports the testing of both the 8-bit and 16-bit PCRE 40: libraries in a single program. This is different from the modules such as 41: pcre_compile.c in the library itself, which are compiled separately for each 42: mode. If both modes are enabled, for example, pcre_compile.c is compiled twice 43: (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is 44: compiled only once. Therefore, it must not make use of any of the macros from 45: pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does, 46: however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls 47: only supported library functions. */ 48: 49: 50: #ifdef HAVE_CONFIG_H 51: #include "config.h" 52: #endif 53: 54: #include <ctype.h> 55: #include <stdio.h> 56: #include <string.h> 57: #include <stdlib.h> 58: #include <time.h> 59: #include <locale.h> 60: #include <errno.h> 61: 62: #ifdef SUPPORT_LIBREADLINE 63: #ifdef HAVE_UNISTD_H 64: #include <unistd.h> 65: #endif 66: #include <readline/readline.h> 67: #include <readline/history.h> 68: #endif 69: 70: 71: /* A number of things vary for Windows builds. Originally, pcretest opened its 72: input and output without "b"; then I was told that "b" was needed in some 73: environments, so it was added for release 5.0 to both the input and output. (It 74: makes no difference on Unix-like systems.) Later I was told that it is wrong 75: for the input on Windows. I've now abstracted the modes into two macros that 76: are set here, to make it easier to fiddle with them, and removed "b" from the 77: input mode under Windows. */ 78: 79: #if defined(_WIN32) || defined(WIN32) 80: #include <io.h> /* For _setmode() */ 81: #include <fcntl.h> /* For _O_BINARY */ 82: #define INPUT_MODE "r" 83: #define OUTPUT_MODE "wb" 84: 85: #ifndef isatty 86: #define isatty _isatty /* This is what Windows calls them, I'm told, */ 87: #endif /* though in some environments they seem to */ 88: /* be already defined, hence the #ifndefs. */ 89: #ifndef fileno 90: #define fileno _fileno 91: #endif 92: 93: /* A user sent this fix for Borland Builder 5 under Windows. */ 94: 95: #ifdef __BORLANDC__ 96: #define _setmode(handle, mode) setmode(handle, mode) 97: #endif 98: 99: /* Not Windows */ 100: 101: #else 102: #include <sys/time.h> /* These two includes are needed */ 103: #include <sys/resource.h> /* for setrlimit(). */ 104: #define INPUT_MODE "rb" 105: #define OUTPUT_MODE "wb" 106: #endif 107: 108: #define PRIV(name) name 109: 110: /* We have to include pcre_internal.h because we need the internal info for 111: displaying the results of pcre_study() and we also need to know about the 112: internal macros, structures, and other internal data values; pcretest has 113: "inside information" compared to a program that strictly follows the PCRE API. 114: 115: Although pcre_internal.h does itself include pcre.h, we explicitly include it 116: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set 117: appropriately for an application, not for building PCRE. */ 118: 119: #include "pcre.h" 120: 121: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 122: /* Configure internal macros to 16 bit mode. */ 123: #define COMPILE_PCRE16 124: #endif 125: 126: #include "pcre_internal.h" 127: 128: /* The pcre_printint() function, which prints the internal form of a compiled 129: regex, is held in a separate file so that (a) it can be compiled in either 130: 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c 131: when that is compiled in debug mode. */ 132: 133: #ifdef SUPPORT_PCRE8 134: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); 135: #endif 136: #ifdef SUPPORT_PCRE16 137: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); 138: #endif 139: 140: /* We need access to some of the data tables that PCRE uses. So as not to have 141: to keep two copies, we include the source file here, changing the names of the 142: external symbols to prevent clashes. */ 143: 144: #define PCRE_INCLUDED 145: 146: #include "pcre_tables.c" 147: 148: /* The definition of the macro PRINTABLE, which determines whether to print an 149: output character as-is or as a hex value when showing compiled patterns, is 150: the same as in the printint.src file. We uses it here in cases when the locale 151: has not been explicitly changed, so as to get consistent output from systems 152: that differ in their output from isprint() even in the "C" locale. */ 153: 154: #ifdef EBCDIC 155: #define PRINTABLE(c) ((c) >= 64 && (c) < 255) 156: #else 157: #define PRINTABLE(c) ((c) >= 32 && (c) < 127) 158: #endif 159: 160: #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c)) 161: 162: /* Posix support is disabled in 16 bit only mode. */ 163: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX 164: #define NOPOSIX 165: #endif 166: 167: /* It is possible to compile this test program without including support for 168: testing the POSIX interface, though this is not available via the standard 169: Makefile. */ 170: 171: #if !defined NOPOSIX 172: #include "pcreposix.h" 173: #endif 174: 175: /* It is also possible, originally for the benefit of a version that was 176: imported into Exim, to build pcretest without support for UTF8 or UTF16 (define 177: NOUTF), without the interface to the DFA matcher (NODFA). In fact, we 178: automatically cut out the UTF support if PCRE is built without it. */ 179: 180: #ifndef SUPPORT_UTF 181: #ifndef NOUTF 182: #define NOUTF 183: #endif 184: #endif 185: 186: /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros 187: for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called 188: only from one place and is handled differently). I couldn't dream up any way of 189: using a single macro to do this in a generic way, because of the many different 190: argument requirements. We know that at least one of SUPPORT_PCRE8 and 191: SUPPORT_PCRE16 must be set. First define macros for each individual mode; then 192: use these in the definitions of generic macros. 193: 194: **** Special note about the PCHARSxxx macros: the address of the string to be 195: printed is always given as two arguments: a base address followed by an offset. 196: The base address is cast to the correct data size for 8 or 16 bit data; the 197: offset is in units of this size. If the string were given as base+offset in one 198: argument, the casting might be incorrectly applied. */ 199: 200: #ifdef SUPPORT_PCRE8 201: 202: #define PCHARS8(lv, p, offset, len, f) \ 203: lv = pchars((pcre_uint8 *)(p) + offset, len, f) 204: 205: #define PCHARSV8(p, offset, len, f) \ 206: (void)pchars((pcre_uint8 *)(p) + offset, len, f) 207: 208: #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \ 209: p = read_capture_name8(p, cn8, re) 210: 211: #define STRLEN8(p) ((int)strlen((char *)p)) 212: 213: #define SET_PCRE_CALLOUT8(callout) \ 214: pcre_callout = callout 215: 216: #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \ 217: pcre_assign_jit_stack(extra, callback, userdata) 218: 219: #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \ 220: re = pcre_compile((char *)pat, options, error, erroffset, tables) 221: 222: #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 223: namesptr, cbuffer, size) \ 224: rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \ 225: (char *)namesptr, cbuffer, size) 226: 227: #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \ 228: rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size) 229: 230: #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 231: offsets, size_offsets, workspace, size_workspace) \ 232: count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \ 233: offsets, size_offsets, workspace, size_workspace) 234: 235: #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 236: offsets, size_offsets) \ 237: count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \ 238: offsets, size_offsets) 239: 240: #define PCRE_FREE_STUDY8(extra) \ 241: pcre_free_study(extra) 242: 243: #define PCRE_FREE_SUBSTRING8(substring) \ 244: pcre_free_substring(substring) 245: 246: #define PCRE_FREE_SUBSTRING_LIST8(listptr) \ 247: pcre_free_substring_list(listptr) 248: 249: #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 250: getnamesptr, subsptr) \ 251: rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \ 252: (char *)getnamesptr, subsptr) 253: 254: #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \ 255: n = pcre_get_stringnumber(re, (char *)ptr) 256: 257: #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \ 258: rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr) 259: 260: #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \ 261: rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr) 262: 263: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \ 264: rc = pcre_pattern_to_host_byte_order(re, extra, tables) 265: 266: #define PCRE_PRINTINT8(re, outfile, debug_lengths) \ 267: pcre_printint(re, outfile, debug_lengths) 268: 269: #define PCRE_STUDY8(extra, re, options, error) \ 270: extra = pcre_study(re, options, error) 271: 272: #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \ 273: pcre_jit_stack_alloc(startsize, maxsize) 274: 275: #define PCRE_JIT_STACK_FREE8(stack) \ 276: pcre_jit_stack_free(stack) 277: 278: #endif /* SUPPORT_PCRE8 */ 279: 280: /* -----------------------------------------------------------*/ 281: 282: #ifdef SUPPORT_PCRE16 283: 284: #define PCHARS16(lv, p, offset, len, f) \ 285: lv = pchars16((PCRE_SPTR16)(p) + offset, len, f) 286: 287: #define PCHARSV16(p, offset, len, f) \ 288: (void)pchars16((PCRE_SPTR16)(p) + offset, len, f) 289: 290: #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \ 291: p = read_capture_name16(p, cn16, re) 292: 293: #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p)) 294: 295: #define SET_PCRE_CALLOUT16(callout) \ 296: pcre16_callout = (int (*)(pcre16_callout_block *))callout 297: 298: #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \ 299: pcre16_assign_jit_stack((pcre16_extra *)extra, \ 300: (pcre16_jit_callback)callback, userdata) 301: 302: #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \ 303: re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \ 304: tables) 305: 306: #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 307: namesptr, cbuffer, size) \ 308: rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ 309: count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2) 310: 311: #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \ 312: rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \ 313: (PCRE_UCHAR16 *)cbuffer, size/2) 314: 315: #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 316: offsets, size_offsets, workspace, size_workspace) \ 317: count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \ 318: (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \ 319: workspace, size_workspace) 320: 321: #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 322: offsets, size_offsets) \ 323: count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \ 324: len, start_offset, options, offsets, size_offsets) 325: 326: #define PCRE_FREE_STUDY16(extra) \ 327: pcre16_free_study((pcre16_extra *)extra) 328: 329: #define PCRE_FREE_SUBSTRING16(substring) \ 330: pcre16_free_substring((PCRE_SPTR16)substring) 331: 332: #define PCRE_FREE_SUBSTRING_LIST16(listptr) \ 333: pcre16_free_substring_list((PCRE_SPTR16 *)listptr) 334: 335: #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 336: getnamesptr, subsptr) \ 337: rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ 338: count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr) 339: 340: #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \ 341: n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr) 342: 343: #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \ 344: rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \ 345: (PCRE_SPTR16 *)(void*)subsptr) 346: 347: #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \ 348: rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \ 349: (PCRE_SPTR16 **)(void*)listptr) 350: 351: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \ 352: rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \ 353: tables) 354: 355: #define PCRE_PRINTINT16(re, outfile, debug_lengths) \ 356: pcre16_printint(re, outfile, debug_lengths) 357: 358: #define PCRE_STUDY16(extra, re, options, error) \ 359: extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error) 360: 361: #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ 362: (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize) 363: 364: #define PCRE_JIT_STACK_FREE16(stack) \ 365: pcre16_jit_stack_free((pcre16_jit_stack *)stack) 366: 367: #endif /* SUPPORT_PCRE16 */ 368: 369: 370: /* ----- Both modes are supported; a runtime test is needed, except for 371: pcre_config(), and the JIT stack functions, when it doesn't matter which 372: version is called. ----- */ 373: 374: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 375: 376: #define CHAR_SIZE (use_pcre16? 2:1) 377: 378: #define PCHARS(lv, p, offset, len, f) \ 379: if (use_pcre16) \ 380: PCHARS16(lv, p, offset, len, f); \ 381: else \ 382: PCHARS8(lv, p, offset, len, f) 383: 384: #define PCHARSV(p, offset, len, f) \ 385: if (use_pcre16) \ 386: PCHARSV16(p, offset, len, f); \ 387: else \ 388: PCHARSV8(p, offset, len, f) 389: 390: #define READ_CAPTURE_NAME(p, cn8, cn16, re) \ 391: if (use_pcre16) \ 392: READ_CAPTURE_NAME16(p, cn8, cn16, re); \ 393: else \ 394: READ_CAPTURE_NAME8(p, cn8, cn16, re) 395: 396: #define SET_PCRE_CALLOUT(callout) \ 397: if (use_pcre16) \ 398: SET_PCRE_CALLOUT16(callout); \ 399: else \ 400: SET_PCRE_CALLOUT8(callout) 401: 402: #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p)) 403: 404: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \ 405: if (use_pcre16) \ 406: PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \ 407: else \ 408: PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) 409: 410: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \ 411: if (use_pcre16) \ 412: PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \ 413: else \ 414: PCRE_COMPILE8(re, pat, options, error, erroffset, tables) 415: 416: #define PCRE_CONFIG pcre_config 417: 418: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 419: namesptr, cbuffer, size) \ 420: if (use_pcre16) \ 421: PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 422: namesptr, cbuffer, size); \ 423: else \ 424: PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 425: namesptr, cbuffer, size) 426: 427: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \ 428: if (use_pcre16) \ 429: PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \ 430: else \ 431: PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) 432: 433: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \ 434: offsets, size_offsets, workspace, size_workspace) \ 435: if (use_pcre16) \ 436: PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 437: offsets, size_offsets, workspace, size_workspace); \ 438: else \ 439: PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 440: offsets, size_offsets, workspace, size_workspace) 441: 442: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \ 443: offsets, size_offsets) \ 444: if (use_pcre16) \ 445: PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 446: offsets, size_offsets); \ 447: else \ 448: PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 449: offsets, size_offsets) 450: 451: #define PCRE_FREE_STUDY(extra) \ 452: if (use_pcre16) \ 453: PCRE_FREE_STUDY16(extra); \ 454: else \ 455: PCRE_FREE_STUDY8(extra) 456: 457: #define PCRE_FREE_SUBSTRING(substring) \ 458: if (use_pcre16) \ 459: PCRE_FREE_SUBSTRING16(substring); \ 460: else \ 461: PCRE_FREE_SUBSTRING8(substring) 462: 463: #define PCRE_FREE_SUBSTRING_LIST(listptr) \ 464: if (use_pcre16) \ 465: PCRE_FREE_SUBSTRING_LIST16(listptr); \ 466: else \ 467: PCRE_FREE_SUBSTRING_LIST8(listptr) 468: 469: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 470: getnamesptr, subsptr) \ 471: if (use_pcre16) \ 472: PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 473: getnamesptr, subsptr); \ 474: else \ 475: PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 476: getnamesptr, subsptr) 477: 478: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \ 479: if (use_pcre16) \ 480: PCRE_GET_STRINGNUMBER16(n, rc, ptr); \ 481: else \ 482: PCRE_GET_STRINGNUMBER8(n, rc, ptr) 483: 484: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \ 485: if (use_pcre16) \ 486: PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \ 487: else \ 488: PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr) 489: 490: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \ 491: if (use_pcre16) \ 492: PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \ 493: else \ 494: PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) 495: 496: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \ 497: (use_pcre16 ? \ 498: PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ 499: :PCRE_JIT_STACK_ALLOC8(startsize, maxsize)) 500: 501: #define PCRE_JIT_STACK_FREE(stack) \ 502: if (use_pcre16) \ 503: PCRE_JIT_STACK_FREE16(stack); \ 504: else \ 505: PCRE_JIT_STACK_FREE8(stack) 506: 507: #define PCRE_MAKETABLES \ 508: (use_pcre16? pcre16_maketables() : pcre_maketables()) 509: 510: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \ 511: if (use_pcre16) \ 512: PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \ 513: else \ 514: PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) 515: 516: #define PCRE_PRINTINT(re, outfile, debug_lengths) \ 517: if (use_pcre16) \ 518: PCRE_PRINTINT16(re, outfile, debug_lengths); \ 519: else \ 520: PCRE_PRINTINT8(re, outfile, debug_lengths) 521: 522: #define PCRE_STUDY(extra, re, options, error) \ 523: if (use_pcre16) \ 524: PCRE_STUDY16(extra, re, options, error); \ 525: else \ 526: PCRE_STUDY8(extra, re, options, error) 527: 528: /* ----- Only 8-bit mode is supported ----- */ 529: 530: #elif defined SUPPORT_PCRE8 531: #define CHAR_SIZE 1 532: #define PCHARS PCHARS8 533: #define PCHARSV PCHARSV8 534: #define READ_CAPTURE_NAME READ_CAPTURE_NAME8 535: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8 536: #define STRLEN STRLEN8 537: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8 538: #define PCRE_COMPILE PCRE_COMPILE8 539: #define PCRE_CONFIG pcre_config 540: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8 541: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8 542: #define PCRE_DFA_EXEC PCRE_DFA_EXEC8 543: #define PCRE_EXEC PCRE_EXEC8 544: #define PCRE_FREE_STUDY PCRE_FREE_STUDY8 545: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8 546: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8 547: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8 548: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8 549: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8 550: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8 551: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8 552: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8 553: #define PCRE_MAKETABLES pcre_maketables() 554: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8 555: #define PCRE_PRINTINT PCRE_PRINTINT8 556: #define PCRE_STUDY PCRE_STUDY8 557: 558: /* ----- Only 16-bit mode is supported ----- */ 559: 560: #else 561: #define CHAR_SIZE 2 562: #define PCHARS PCHARS16 563: #define PCHARSV PCHARSV16 564: #define READ_CAPTURE_NAME READ_CAPTURE_NAME16 565: #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16 566: #define STRLEN STRLEN16 567: #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16 568: #define PCRE_COMPILE PCRE_COMPILE16 569: #define PCRE_CONFIG pcre16_config 570: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16 571: #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16 572: #define PCRE_DFA_EXEC PCRE_DFA_EXEC16 573: #define PCRE_EXEC PCRE_EXEC16 574: #define PCRE_FREE_STUDY PCRE_FREE_STUDY16 575: #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16 576: #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16 577: #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16 578: #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16 579: #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16 580: #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16 581: #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16 582: #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16 583: #define PCRE_MAKETABLES pcre16_maketables() 584: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16 585: #define PCRE_PRINTINT PCRE_PRINTINT16 586: #define PCRE_STUDY PCRE_STUDY16 587: #endif 588: 589: /* ----- End of mode-specific function call macros ----- */ 590: 591: 592: /* Other parameters */ 593: 594: #ifndef CLOCKS_PER_SEC 595: #ifdef CLK_TCK 596: #define CLOCKS_PER_SEC CLK_TCK 597: #else 598: #define CLOCKS_PER_SEC 100 599: #endif 600: #endif 601: 602: /* This is the default loop count for timing. */ 603: 604: #define LOOPREPEAT 500000 605: 606: /* Static variables */ 607: 608: static FILE *outfile; 609: static int log_store = 0; 610: static int callout_count; 611: static int callout_extra; 612: static int callout_fail_count; 613: static int callout_fail_id; 614: static int debug_lengths; 615: static int first_callout; 616: static int locale_set = 0; 617: static int show_malloc; 618: static int use_utf; 619: static size_t gotten_store; 620: static size_t first_gotten_store = 0; 621: static const unsigned char *last_callout_mark = NULL; 622: 623: /* The buffers grow automatically if very long input lines are encountered. */ 624: 625: static int buffer_size = 50000; 626: static pcre_uint8 *buffer = NULL; 627: static pcre_uint8 *dbuffer = NULL; 628: static pcre_uint8 *pbuffer = NULL; 629: 630: /* Another buffer is needed translation to 16-bit character strings. It will 631: obtained and extended as required. */ 632: 633: #ifdef SUPPORT_PCRE16 634: static int buffer16_size = 0; 635: static pcre_uint16 *buffer16 = NULL; 636: 637: #ifdef SUPPORT_PCRE8 638: 639: /* We need the table of operator lengths that is used for 16-bit compiling, in 640: order to swap bytes in a pattern for saving/reloading testing. Luckily, the 641: data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted 642: appropriately for the 16-bit world. Just as a safety check, make sure that 643: COMPILE_PCRE16 is *not* set. */ 644: 645: #ifdef COMPILE_PCRE16 646: #error COMPILE_PCRE16 must not be set when compiling pcretest.c 647: #endif 648: 649: #if LINK_SIZE == 2 650: #undef LINK_SIZE 651: #define LINK_SIZE 1 652: #elif LINK_SIZE == 3 || LINK_SIZE == 4 653: #undef LINK_SIZE 654: #define LINK_SIZE 2 655: #else 656: #error LINK_SIZE must be either 2, 3, or 4 657: #endif 658: 659: #undef IMM2_SIZE 660: #define IMM2_SIZE 1 661: 662: #endif /* SUPPORT_PCRE8 */ 663: 664: static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS }; 665: #endif /* SUPPORT_PCRE16 */ 666: 667: /* If we have 8-bit support, default use_pcre16 to false; if there is also 668: 16-bit support, it can be changed by an option. If there is no 8-bit support, 669: there must be 16-bit support, so default it to 1. */ 670: 671: #ifdef SUPPORT_PCRE8 672: static int use_pcre16 = 0; 673: #else 674: static int use_pcre16 = 1; 675: #endif 676: 677: /* Textual explanations for runtime error codes */ 678: 679: static const char *errtexts[] = { 680: NULL, /* 0 is no error */ 681: NULL, /* NOMATCH is handled specially */ 682: "NULL argument passed", 683: "bad option value", 684: "magic number missing", 685: "unknown opcode - pattern overwritten?", 686: "no more memory", 687: NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */ 688: "match limit exceeded", 689: "callout error code", 690: NULL, /* BADUTF8/16 is handled specially */ 691: NULL, /* BADUTF8/16 offset is handled specially */ 692: NULL, /* PARTIAL is handled specially */ 693: "not used - internal error", 694: "internal error - pattern overwritten?", 695: "bad count value", 696: "item unsupported for DFA matching", 697: "backreference condition or recursion test not supported for DFA matching", 698: "match limit not supported for DFA matching", 699: "workspace size exceeded in DFA matching", 700: "too much recursion for DFA matching", 701: "recursion limit exceeded", 702: "not used - internal error", 703: "invalid combination of newline options", 704: "bad offset value", 705: NULL, /* SHORTUTF8/16 is handled specially */ 706: "nested recursion at the same subject position", 707: "JIT stack limit reached", 708: "pattern compiled in wrong mode: 8-bit/16-bit error" 709: }; 710: 711: 712: /************************************************* 713: * Alternate character tables * 714: *************************************************/ 715: 716: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby 717: using the default tables of the library. However, the T option can be used to 718: select alternate sets of tables, for different kinds of testing. Note also that 719: the L (locale) option also adjusts the tables. */ 720: 721: /* This is the set of tables distributed as default with PCRE. It recognizes 722: only ASCII characters. */ 723: 724: static const pcre_uint8 tables0[] = { 725: 726: /* This table is a lower casing table. */ 727: 728: 0, 1, 2, 3, 4, 5, 6, 7, 729: 8, 9, 10, 11, 12, 13, 14, 15, 730: 16, 17, 18, 19, 20, 21, 22, 23, 731: 24, 25, 26, 27, 28, 29, 30, 31, 732: 32, 33, 34, 35, 36, 37, 38, 39, 733: 40, 41, 42, 43, 44, 45, 46, 47, 734: 48, 49, 50, 51, 52, 53, 54, 55, 735: 56, 57, 58, 59, 60, 61, 62, 63, 736: 64, 97, 98, 99,100,101,102,103, 737: 104,105,106,107,108,109,110,111, 738: 112,113,114,115,116,117,118,119, 739: 120,121,122, 91, 92, 93, 94, 95, 740: 96, 97, 98, 99,100,101,102,103, 741: 104,105,106,107,108,109,110,111, 742: 112,113,114,115,116,117,118,119, 743: 120,121,122,123,124,125,126,127, 744: 128,129,130,131,132,133,134,135, 745: 136,137,138,139,140,141,142,143, 746: 144,145,146,147,148,149,150,151, 747: 152,153,154,155,156,157,158,159, 748: 160,161,162,163,164,165,166,167, 749: 168,169,170,171,172,173,174,175, 750: 176,177,178,179,180,181,182,183, 751: 184,185,186,187,188,189,190,191, 752: 192,193,194,195,196,197,198,199, 753: 200,201,202,203,204,205,206,207, 754: 208,209,210,211,212,213,214,215, 755: 216,217,218,219,220,221,222,223, 756: 224,225,226,227,228,229,230,231, 757: 232,233,234,235,236,237,238,239, 758: 240,241,242,243,244,245,246,247, 759: 248,249,250,251,252,253,254,255, 760: 761: /* This table is a case flipping table. */ 762: 763: 0, 1, 2, 3, 4, 5, 6, 7, 764: 8, 9, 10, 11, 12, 13, 14, 15, 765: 16, 17, 18, 19, 20, 21, 22, 23, 766: 24, 25, 26, 27, 28, 29, 30, 31, 767: 32, 33, 34, 35, 36, 37, 38, 39, 768: 40, 41, 42, 43, 44, 45, 46, 47, 769: 48, 49, 50, 51, 52, 53, 54, 55, 770: 56, 57, 58, 59, 60, 61, 62, 63, 771: 64, 97, 98, 99,100,101,102,103, 772: 104,105,106,107,108,109,110,111, 773: 112,113,114,115,116,117,118,119, 774: 120,121,122, 91, 92, 93, 94, 95, 775: 96, 65, 66, 67, 68, 69, 70, 71, 776: 72, 73, 74, 75, 76, 77, 78, 79, 777: 80, 81, 82, 83, 84, 85, 86, 87, 778: 88, 89, 90,123,124,125,126,127, 779: 128,129,130,131,132,133,134,135, 780: 136,137,138,139,140,141,142,143, 781: 144,145,146,147,148,149,150,151, 782: 152,153,154,155,156,157,158,159, 783: 160,161,162,163,164,165,166,167, 784: 168,169,170,171,172,173,174,175, 785: 176,177,178,179,180,181,182,183, 786: 184,185,186,187,188,189,190,191, 787: 192,193,194,195,196,197,198,199, 788: 200,201,202,203,204,205,206,207, 789: 208,209,210,211,212,213,214,215, 790: 216,217,218,219,220,221,222,223, 791: 224,225,226,227,228,229,230,231, 792: 232,233,234,235,236,237,238,239, 793: 240,241,242,243,244,245,246,247, 794: 248,249,250,251,252,253,254,255, 795: 796: /* This table contains bit maps for various character classes. Each map is 32 797: bytes long and the bits run from the least significant end of each byte. The 798: classes that have their own maps are: space, xdigit, digit, upper, lower, word, 799: graph, print, punct, and cntrl. Other classes are built from combinations. */ 800: 801: 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 802: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 803: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 804: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 805: 806: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 807: 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, 808: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 809: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 810: 811: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 812: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 813: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 814: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 815: 816: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 817: 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, 818: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 819: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 820: 821: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 822: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, 823: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 824: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 825: 826: 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 827: 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, 828: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 829: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 830: 831: 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, 832: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 833: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 834: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 835: 836: 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, 837: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 838: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 839: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 840: 841: 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, 842: 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, 843: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 844: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 845: 846: 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, 847: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, 848: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 849: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 850: 851: /* This table identifies various classes of character by individual bits: 852: 0x01 white space character 853: 0x02 letter 854: 0x04 decimal digit 855: 0x08 hexadecimal digit 856: 0x10 alphanumeric or '_' 857: 0x80 regular expression metacharacter or binary zero 858: */ 859: 860: 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 861: 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ 862: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 863: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 864: 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ 865: 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ 866: 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 867: 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ 868: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 869: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 870: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ 871: 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ 872: 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 873: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 874: 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ 875: 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ 876: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 877: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 878: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 879: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 880: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 881: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 882: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 883: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 884: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 885: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 886: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 887: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 888: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 889: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 890: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 891: 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 892: 893: /* This is a set of tables that came orginally from a Windows user. It seems to 894: be at least an approximation of ISO 8859. In particular, there are characters 895: greater than 128 that are marked as spaces, letters, etc. */ 896: 897: static const pcre_uint8 tables1[] = { 898: 0,1,2,3,4,5,6,7, 899: 8,9,10,11,12,13,14,15, 900: 16,17,18,19,20,21,22,23, 901: 24,25,26,27,28,29,30,31, 902: 32,33,34,35,36,37,38,39, 903: 40,41,42,43,44,45,46,47, 904: 48,49,50,51,52,53,54,55, 905: 56,57,58,59,60,61,62,63, 906: 64,97,98,99,100,101,102,103, 907: 104,105,106,107,108,109,110,111, 908: 112,113,114,115,116,117,118,119, 909: 120,121,122,91,92,93,94,95, 910: 96,97,98,99,100,101,102,103, 911: 104,105,106,107,108,109,110,111, 912: 112,113,114,115,116,117,118,119, 913: 120,121,122,123,124,125,126,127, 914: 128,129,130,131,132,133,134,135, 915: 136,137,138,139,140,141,142,143, 916: 144,145,146,147,148,149,150,151, 917: 152,153,154,155,156,157,158,159, 918: 160,161,162,163,164,165,166,167, 919: 168,169,170,171,172,173,174,175, 920: 176,177,178,179,180,181,182,183, 921: 184,185,186,187,188,189,190,191, 922: 224,225,226,227,228,229,230,231, 923: 232,233,234,235,236,237,238,239, 924: 240,241,242,243,244,245,246,215, 925: 248,249,250,251,252,253,254,223, 926: 224,225,226,227,228,229,230,231, 927: 232,233,234,235,236,237,238,239, 928: 240,241,242,243,244,245,246,247, 929: 248,249,250,251,252,253,254,255, 930: 0,1,2,3,4,5,6,7, 931: 8,9,10,11,12,13,14,15, 932: 16,17,18,19,20,21,22,23, 933: 24,25,26,27,28,29,30,31, 934: 32,33,34,35,36,37,38,39, 935: 40,41,42,43,44,45,46,47, 936: 48,49,50,51,52,53,54,55, 937: 56,57,58,59,60,61,62,63, 938: 64,97,98,99,100,101,102,103, 939: 104,105,106,107,108,109,110,111, 940: 112,113,114,115,116,117,118,119, 941: 120,121,122,91,92,93,94,95, 942: 96,65,66,67,68,69,70,71, 943: 72,73,74,75,76,77,78,79, 944: 80,81,82,83,84,85,86,87, 945: 88,89,90,123,124,125,126,127, 946: 128,129,130,131,132,133,134,135, 947: 136,137,138,139,140,141,142,143, 948: 144,145,146,147,148,149,150,151, 949: 152,153,154,155,156,157,158,159, 950: 160,161,162,163,164,165,166,167, 951: 168,169,170,171,172,173,174,175, 952: 176,177,178,179,180,181,182,183, 953: 184,185,186,187,188,189,190,191, 954: 224,225,226,227,228,229,230,231, 955: 232,233,234,235,236,237,238,239, 956: 240,241,242,243,244,245,246,215, 957: 248,249,250,251,252,253,254,223, 958: 192,193,194,195,196,197,198,199, 959: 200,201,202,203,204,205,206,207, 960: 208,209,210,211,212,213,214,247, 961: 216,217,218,219,220,221,222,255, 962: 0,62,0,0,1,0,0,0, 963: 0,0,0,0,0,0,0,0, 964: 32,0,0,0,1,0,0,0, 965: 0,0,0,0,0,0,0,0, 966: 0,0,0,0,0,0,255,3, 967: 126,0,0,0,126,0,0,0, 968: 0,0,0,0,0,0,0,0, 969: 0,0,0,0,0,0,0,0, 970: 0,0,0,0,0,0,255,3, 971: 0,0,0,0,0,0,0,0, 972: 0,0,0,0,0,0,12,2, 973: 0,0,0,0,0,0,0,0, 974: 0,0,0,0,0,0,0,0, 975: 254,255,255,7,0,0,0,0, 976: 0,0,0,0,0,0,0,0, 977: 255,255,127,127,0,0,0,0, 978: 0,0,0,0,0,0,0,0, 979: 0,0,0,0,254,255,255,7, 980: 0,0,0,0,0,4,32,4, 981: 0,0,0,128,255,255,127,255, 982: 0,0,0,0,0,0,255,3, 983: 254,255,255,135,254,255,255,7, 984: 0,0,0,0,0,4,44,6, 985: 255,255,127,255,255,255,127,255, 986: 0,0,0,0,254,255,255,255, 987: 255,255,255,255,255,255,255,127, 988: 0,0,0,0,254,255,255,255, 989: 255,255,255,255,255,255,255,255, 990: 0,2,0,0,255,255,255,255, 991: 255,255,255,255,255,255,255,127, 992: 0,0,0,0,255,255,255,255, 993: 255,255,255,255,255,255,255,255, 994: 0,0,0,0,254,255,0,252, 995: 1,0,0,248,1,0,0,120, 996: 0,0,0,0,254,255,255,255, 997: 0,0,128,0,0,0,128,0, 998: 255,255,255,255,0,0,0,0, 999: 0,0,0,0,0,0,0,128, 1000: 255,255,255,255,0,0,0,0, 1001: 0,0,0,0,0,0,0,0, 1002: 128,0,0,0,0,0,0,0, 1003: 0,1,1,0,1,1,0,0, 1004: 0,0,0,0,0,0,0,0, 1005: 0,0,0,0,0,0,0,0, 1006: 1,0,0,0,128,0,0,0, 1007: 128,128,128,128,0,0,128,0, 1008: 28,28,28,28,28,28,28,28, 1009: 28,28,0,0,0,0,0,128, 1010: 0,26,26,26,26,26,26,18, 1011: 18,18,18,18,18,18,18,18, 1012: 18,18,18,18,18,18,18,18, 1013: 18,18,18,128,128,0,128,16, 1014: 0,26,26,26,26,26,26,18, 1015: 18,18,18,18,18,18,18,18, 1016: 18,18,18,18,18,18,18,18, 1017: 18,18,18,128,128,0,0,0, 1018: 0,0,0,0,0,1,0,0, 1019: 0,0,0,0,0,0,0,0, 1020: 0,0,0,0,0,0,0,0, 1021: 0,0,0,0,0,0,0,0, 1022: 1,0,0,0,0,0,0,0, 1023: 0,0,18,0,0,0,0,0, 1024: 0,0,20,20,0,18,0,0, 1025: 0,20,18,0,0,0,0,0, 1026: 18,18,18,18,18,18,18,18, 1027: 18,18,18,18,18,18,18,18, 1028: 18,18,18,18,18,18,18,0, 1029: 18,18,18,18,18,18,18,18, 1030: 18,18,18,18,18,18,18,18, 1031: 18,18,18,18,18,18,18,18, 1032: 18,18,18,18,18,18,18,0, 1033: 18,18,18,18,18,18,18,18 1034: }; 1035: 1036: 1037: 1038: 1039: #ifndef HAVE_STRERROR 1040: /************************************************* 1041: * Provide strerror() for non-ANSI libraries * 1042: *************************************************/ 1043: 1044: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 1045: in their libraries, but can provide the same facility by this simple 1046: alternative function. */ 1047: 1048: extern int sys_nerr; 1049: extern char *sys_errlist[]; 1050: 1051: char * 1052: strerror(int n) 1053: { 1054: if (n < 0 || n >= sys_nerr) return "unknown error number"; 1055: return sys_errlist[n]; 1056: } 1057: #endif /* HAVE_STRERROR */ 1058: 1059: 1060: /************************************************* 1061: * JIT memory callback * 1062: *************************************************/ 1063: 1064: static pcre_jit_stack* jit_callback(void *arg) 1065: { 1066: return (pcre_jit_stack *)arg; 1067: } 1068: 1069: 1070: #if !defined NOUTF || defined SUPPORT_PCRE16 1071: /************************************************* 1072: * Convert UTF-8 string to value * 1073: *************************************************/ 1074: 1075: /* This function takes one or more bytes that represents a UTF-8 character, 1076: and returns the value of the character. 1077: 1078: Argument: 1079: utf8bytes a pointer to the byte vector 1080: vptr a pointer to an int to receive the value 1081: 1082: Returns: > 0 => the number of bytes consumed 1083: -6 to 0 => malformed UTF-8 character at offset = (-return) 1084: */ 1085: 1086: static int 1087: utf82ord(pcre_uint8 *utf8bytes, int *vptr) 1088: { 1089: int c = *utf8bytes++; 1090: int d = c; 1091: int i, j, s; 1092: 1093: for (i = -1; i < 6; i++) /* i is number of additional bytes */ 1094: { 1095: if ((d & 0x80) == 0) break; 1096: d <<= 1; 1097: } 1098: 1099: if (i == -1) { *vptr = c; return 1; } /* ascii character */ 1100: if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ 1101: 1102: /* i now has a value in the range 1-5 */ 1103: 1104: s = 6*i; 1105: d = (c & utf8_table3[i]) << s; 1106: 1107: for (j = 0; j < i; j++) 1108: { 1109: c = *utf8bytes++; 1110: if ((c & 0xc0) != 0x80) return -(j+1); 1111: s -= 6; 1112: d |= (c & 0x3f) << s; 1113: } 1114: 1115: /* Check that encoding was the correct unique one */ 1116: 1117: for (j = 0; j < utf8_table1_size; j++) 1118: if (d <= utf8_table1[j]) break; 1119: if (j != i) return -(i+1); 1120: 1121: /* Valid value */ 1122: 1123: *vptr = d; 1124: return i+1; 1125: } 1126: #endif /* NOUTF || SUPPORT_PCRE16 */ 1127: 1128: 1129: 1130: #if !defined NOUTF || defined SUPPORT_PCRE16 1131: /************************************************* 1132: * Convert character value to UTF-8 * 1133: *************************************************/ 1134: 1135: /* This function takes an integer value in the range 0 - 0x7fffffff 1136: and encodes it as a UTF-8 character in 0 to 6 bytes. 1137: 1138: Arguments: 1139: cvalue the character value 1140: utf8bytes pointer to buffer for result - at least 6 bytes long 1141: 1142: Returns: number of characters placed in the buffer 1143: */ 1144: 1145: static int 1146: ord2utf8(int cvalue, pcre_uint8 *utf8bytes) 1147: { 1148: register int i, j; 1149: for (i = 0; i < utf8_table1_size; i++) 1150: if (cvalue <= utf8_table1[i]) break; 1151: utf8bytes += i; 1152: for (j = i; j > 0; j--) 1153: { 1154: *utf8bytes-- = 0x80 | (cvalue & 0x3f); 1155: cvalue >>= 6; 1156: } 1157: *utf8bytes = utf8_table2[i] | cvalue; 1158: return i + 1; 1159: } 1160: #endif 1161: 1162: 1163: #ifdef SUPPORT_PCRE16 1164: /************************************************* 1165: * Convert a string to 16-bit * 1166: *************************************************/ 1167: 1168: /* In non-UTF mode, the space needed for a 16-bit string is exactly double the 1169: 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than 1170: double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 1171: in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The 1172: result is always left in buffer16. 1173: 1174: Note that this function does not object to surrogate values. This is 1175: deliberate; it makes it possible to construct UTF-16 strings that are invalid, 1176: for the purpose of testing that they are correctly faulted. 1177: 1178: Patterns to be converted are either plain ASCII or UTF-8; data lines are always 1179: in UTF-8 so that values greater than 255 can be handled. 1180: 1181: Arguments: 1182: data TRUE if converting a data line; FALSE for a regex 1183: p points to a byte string 1184: utf true if UTF-8 (to be converted to UTF-16) 1185: len number of bytes in the string (excluding trailing zero) 1186: 1187: Returns: number of 16-bit data items used (excluding trailing zero) 1188: OR -1 if a UTF-8 string is malformed 1189: OR -2 if a value > 0x10ffff is encountered 1190: OR -3 if a value > 0xffff is encountered when not in UTF mode 1191: */ 1192: 1193: static int 1194: to16(int data, pcre_uint8 *p, int utf, int len) 1195: { 1196: pcre_uint16 *pp; 1197: 1198: if (buffer16_size < 2*len + 2) 1199: { 1200: if (buffer16 != NULL) free(buffer16); 1201: buffer16_size = 2*len + 2; 1202: buffer16 = (pcre_uint16 *)malloc(buffer16_size); 1203: if (buffer16 == NULL) 1204: { 1205: fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size); 1206: exit(1); 1207: } 1208: } 1209: 1210: pp = buffer16; 1211: 1212: if (!utf && !data) 1213: { 1214: while (len-- > 0) *pp++ = *p++; 1215: } 1216: 1217: else 1218: { 1219: int c = 0; 1220: while (len > 0) 1221: { 1222: int chlen = utf82ord(p, &c); 1223: if (chlen <= 0) return -1; 1224: if (c > 0x10ffff) return -2; 1225: p += chlen; 1226: len -= chlen; 1227: if (c < 0x10000) *pp++ = c; else 1228: { 1229: if (!utf) return -3; 1230: c -= 0x10000; 1231: *pp++ = 0xD800 | (c >> 10); 1232: *pp++ = 0xDC00 | (c & 0x3ff); 1233: } 1234: } 1235: } 1236: 1237: *pp = 0; 1238: return pp - buffer16; 1239: } 1240: #endif 1241: 1242: 1243: /************************************************* 1244: * Read or extend an input line * 1245: *************************************************/ 1246: 1247: /* Input lines are read into buffer, but both patterns and data lines can be 1248: continued over multiple input lines. In addition, if the buffer fills up, we 1249: want to automatically expand it so as to be able to handle extremely large 1250: lines that are needed for certain stress tests. When the input buffer is 1251: expanded, the other two buffers must also be expanded likewise, and the 1252: contents of pbuffer, which are a copy of the input for callouts, must be 1253: preserved (for when expansion happens for a data line). This is not the most 1254: optimal way of handling this, but hey, this is just a test program! 1255: 1256: Arguments: 1257: f the file to read 1258: start where in buffer to start (this *must* be within buffer) 1259: prompt for stdin or readline() 1260: 1261: Returns: pointer to the start of new data 1262: could be a copy of start, or could be moved 1263: NULL if no data read and EOF reached 1264: */ 1265: 1266: static pcre_uint8 * 1267: extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt) 1268: { 1269: pcre_uint8 *here = start; 1270: 1271: for (;;) 1272: { 1273: size_t rlen = (size_t)(buffer_size - (here - buffer)); 1274: 1275: if (rlen > 1000) 1276: { 1277: int dlen; 1278: 1279: /* If libreadline support is required, use readline() to read a line if the 1280: input is a terminal. Note that readline() removes the trailing newline, so 1281: we must put it back again, to be compatible with fgets(). */ 1282: 1283: #ifdef SUPPORT_LIBREADLINE 1284: if (isatty(fileno(f))) 1285: { 1286: size_t len; 1287: char *s = readline(prompt); 1288: if (s == NULL) return (here == start)? NULL : start; 1289: len = strlen(s); 1290: if (len > 0) add_history(s); 1291: if (len > rlen - 1) len = rlen - 1; 1292: memcpy(here, s, len); 1293: here[len] = '\n'; 1294: here[len+1] = 0; 1295: free(s); 1296: } 1297: else 1298: #endif 1299: 1300: /* Read the next line by normal means, prompting if the file is stdin. */ 1301: 1302: { 1303: if (f == stdin) printf("%s", prompt); 1304: if (fgets((char *)here, rlen, f) == NULL) 1305: return (here == start)? NULL : start; 1306: } 1307: 1308: dlen = (int)strlen((char *)here); 1309: if (dlen > 0 && here[dlen - 1] == '\n') return start; 1310: here += dlen; 1311: } 1312: 1313: else 1314: { 1315: int new_buffer_size = 2*buffer_size; 1316: pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size); 1317: pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size); 1318: pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size); 1319: 1320: if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL) 1321: { 1322: fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size); 1323: exit(1); 1324: } 1325: 1326: memcpy(new_buffer, buffer, buffer_size); 1327: memcpy(new_pbuffer, pbuffer, buffer_size); 1328: 1329: buffer_size = new_buffer_size; 1330: 1331: start = new_buffer + (start - buffer); 1332: here = new_buffer + (here - buffer); 1333: 1334: free(buffer); 1335: free(dbuffer); 1336: free(pbuffer); 1337: 1338: buffer = new_buffer; 1339: dbuffer = new_dbuffer; 1340: pbuffer = new_pbuffer; 1341: } 1342: } 1343: 1344: return NULL; /* Control never gets here */ 1345: } 1346: 1347: 1348: 1349: /************************************************* 1350: * Read number from string * 1351: *************************************************/ 1352: 1353: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess 1354: around with conditional compilation, just do the job by hand. It is only used 1355: for unpicking arguments, so just keep it simple. 1356: 1357: Arguments: 1358: str string to be converted 1359: endptr where to put the end pointer 1360: 1361: Returns: the unsigned long 1362: */ 1363: 1364: static int 1365: get_value(pcre_uint8 *str, pcre_uint8 **endptr) 1366: { 1367: int result = 0; 1368: while(*str != 0 && isspace(*str)) str++; 1369: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); 1370: *endptr = str; 1371: return(result); 1372: } 1373: 1374: 1375: 1376: /************************************************* 1377: * Print one character * 1378: *************************************************/ 1379: 1380: /* Print a single character either literally, or as a hex escape. */ 1381: 1382: static int pchar(int c, FILE *f) 1383: { 1384: if (PRINTOK(c)) 1385: { 1386: if (f != NULL) fprintf(f, "%c", c); 1387: return 1; 1388: } 1389: 1390: if (c < 0x100) 1391: { 1392: if (use_utf) 1393: { 1394: if (f != NULL) fprintf(f, "\\x{%02x}", c); 1395: return 6; 1396: } 1397: else 1398: { 1399: if (f != NULL) fprintf(f, "\\x%02x", c); 1400: return 4; 1401: } 1402: } 1403: 1404: if (f != NULL) fprintf(f, "\\x{%02x}", c); 1405: return (c <= 0x000000ff)? 6 : 1406: (c <= 0x00000fff)? 7 : 1407: (c <= 0x0000ffff)? 8 : 1408: (c <= 0x000fffff)? 9 : 10; 1409: } 1410: 1411: 1412: 1413: #ifdef SUPPORT_PCRE8 1414: /************************************************* 1415: * Print 8-bit character string * 1416: *************************************************/ 1417: 1418: /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. 1419: If handed a NULL file, just counts chars without printing. */ 1420: 1421: static int pchars(pcre_uint8 *p, int length, FILE *f) 1422: { 1423: int c = 0; 1424: int yield = 0; 1425: 1426: if (length < 0) 1427: length = strlen((char *)p); 1428: 1429: while (length-- > 0) 1430: { 1431: #if !defined NOUTF 1432: if (use_utf) 1433: { 1434: int rc = utf82ord(p, &c); 1435: if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ 1436: { 1437: length -= rc - 1; 1438: p += rc; 1439: yield += pchar(c, f); 1440: continue; 1441: } 1442: } 1443: #endif 1444: c = *p++; 1445: yield += pchar(c, f); 1446: } 1447: 1448: return yield; 1449: } 1450: #endif 1451: 1452: 1453: 1454: #ifdef SUPPORT_PCRE16 1455: /************************************************* 1456: * Find length of 0-terminated 16-bit string * 1457: *************************************************/ 1458: 1459: static int strlen16(PCRE_SPTR16 p) 1460: { 1461: int len = 0; 1462: while (*p++ != 0) len++; 1463: return len; 1464: } 1465: #endif /* SUPPORT_PCRE16 */ 1466: 1467: 1468: #ifdef SUPPORT_PCRE16 1469: /************************************************* 1470: * Print 16-bit character string * 1471: *************************************************/ 1472: 1473: /* Must handle UTF-16 strings in utf mode. Yields number of characters printed. 1474: If handed a NULL file, just counts chars without printing. */ 1475: 1476: static int pchars16(PCRE_SPTR16 p, int length, FILE *f) 1477: { 1478: int yield = 0; 1479: 1480: if (length < 0) 1481: length = strlen16(p); 1482: 1483: while (length-- > 0) 1484: { 1485: int c = *p++ & 0xffff; 1486: #if !defined NOUTF 1487: if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0) 1488: { 1489: int d = *p & 0xffff; 1490: if (d >= 0xDC00 && d < 0xDFFF) 1491: { 1492: c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000; 1493: length--; 1494: p++; 1495: } 1496: } 1497: #endif 1498: yield += pchar(c, f); 1499: } 1500: 1501: return yield; 1502: } 1503: #endif /* SUPPORT_PCRE16 */ 1504: 1505: 1506: 1507: #ifdef SUPPORT_PCRE8 1508: /************************************************* 1509: * Read a capture name (8-bit) and check it * 1510: *************************************************/ 1511: 1512: static pcre_uint8 * 1513: read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re) 1514: { 1515: pcre_uint8 *npp = *pp; 1516: while (isalnum(*p)) *npp++ = *p++; 1517: *npp++ = 0; 1518: *npp = 0; 1519: if (pcre_get_stringnumber(re, (char *)(*pp)) < 0) 1520: { 1521: fprintf(outfile, "no parentheses with name \""); 1522: PCHARSV(*pp, 0, -1, outfile); 1523: fprintf(outfile, "\"\n"); 1524: } 1525: 1526: *pp = npp; 1527: return p; 1528: } 1529: #endif /* SUPPORT_PCRE8 */ 1530: 1531: 1532: 1533: #ifdef SUPPORT_PCRE16 1534: /************************************************* 1535: * Read a capture name (16-bit) and check it * 1536: *************************************************/ 1537: 1538: /* Note that the text being read is 8-bit. */ 1539: 1540: static pcre_uint8 * 1541: read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re) 1542: { 1543: pcre_uint16 *npp = *pp; 1544: while (isalnum(*p)) *npp++ = *p++; 1545: *npp++ = 0; 1546: *npp = 0; 1547: if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0) 1548: { 1549: fprintf(outfile, "no parentheses with name \""); 1550: PCHARSV(*pp, 0, -1, outfile); 1551: fprintf(outfile, "\"\n"); 1552: } 1553: *pp = npp; 1554: return p; 1555: } 1556: #endif /* SUPPORT_PCRE16 */ 1557: 1558: 1559: 1560: /************************************************* 1561: * Callout function * 1562: *************************************************/ 1563: 1564: /* Called from PCRE as a result of the (?C) item. We print out where we are in 1565: the match. Yield zero unless more callouts than the fail count, or the callout 1566: data is not zero. */ 1567: 1568: static int callout(pcre_callout_block *cb) 1569: { 1570: FILE *f = (first_callout | callout_extra)? outfile : NULL; 1571: int i, pre_start, post_start, subject_length; 1572: 1573: if (callout_extra) 1574: { 1575: fprintf(f, "Callout %d: last capture = %d\n", 1576: cb->callout_number, cb->capture_last); 1577: 1578: for (i = 0; i < cb->capture_top * 2; i += 2) 1579: { 1580: if (cb->offset_vector[i] < 0) 1581: fprintf(f, "%2d: <unset>\n", i/2); 1582: else 1583: { 1584: fprintf(f, "%2d: ", i/2); 1585: PCHARSV(cb->subject, cb->offset_vector[i], 1586: cb->offset_vector[i+1] - cb->offset_vector[i], f); 1587: fprintf(f, "\n"); 1588: } 1589: } 1590: } 1591: 1592: /* Re-print the subject in canonical form, the first time or if giving full 1593: datails. On subsequent calls in the same match, we use pchars just to find the 1594: printed lengths of the substrings. */ 1595: 1596: if (f != NULL) fprintf(f, "--->"); 1597: 1598: PCHARS(pre_start, cb->subject, 0, cb->start_match, f); 1599: PCHARS(post_start, cb->subject, cb->start_match, 1600: cb->current_position - cb->start_match, f); 1601: 1602: PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL); 1603: 1604: PCHARSV(cb->subject, cb->current_position, 1605: cb->subject_length - cb->current_position, f); 1606: 1607: if (f != NULL) fprintf(f, "\n"); 1608: 1609: /* Always print appropriate indicators, with callout number if not already 1610: shown. For automatic callouts, show the pattern offset. */ 1611: 1612: if (cb->callout_number == 255) 1613: { 1614: fprintf(outfile, "%+3d ", cb->pattern_position); 1615: if (cb->pattern_position > 99) fprintf(outfile, "\n "); 1616: } 1617: else 1618: { 1619: if (callout_extra) fprintf(outfile, " "); 1620: else fprintf(outfile, "%3d ", cb->callout_number); 1621: } 1622: 1623: for (i = 0; i < pre_start; i++) fprintf(outfile, " "); 1624: fprintf(outfile, "^"); 1625: 1626: if (post_start > 0) 1627: { 1628: for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); 1629: fprintf(outfile, "^"); 1630: } 1631: 1632: for (i = 0; i < subject_length - pre_start - post_start + 4; i++) 1633: fprintf(outfile, " "); 1634: 1635: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length, 1636: pbuffer + cb->pattern_position); 1637: 1638: fprintf(outfile, "\n"); 1639: first_callout = 0; 1640: 1641: if (cb->mark != last_callout_mark) 1642: { 1643: if (cb->mark == NULL) 1644: fprintf(outfile, "Latest Mark: <unset>\n"); 1645: else 1646: { 1647: fprintf(outfile, "Latest Mark: "); 1648: PCHARSV(cb->mark, 0, -1, outfile); 1649: putc('\n', outfile); 1650: } 1651: last_callout_mark = cb->mark; 1652: } 1653: 1654: if (cb->callout_data != NULL) 1655: { 1656: int callout_data = *((int *)(cb->callout_data)); 1657: if (callout_data != 0) 1658: { 1659: fprintf(outfile, "Callout data = %d\n", callout_data); 1660: return callout_data; 1661: } 1662: } 1663: 1664: return (cb->callout_number != callout_fail_id)? 0 : 1665: (++callout_count >= callout_fail_count)? 1 : 0; 1666: } 1667: 1668: 1669: /************************************************* 1670: * Local malloc functions * 1671: *************************************************/ 1672: 1673: /* Alternative malloc function, to test functionality and save the size of a 1674: compiled re, which is the first store request that pcre_compile() makes. The 1675: show_malloc variable is set only during matching. */ 1676: 1677: static void *new_malloc(size_t size) 1678: { 1679: void *block = malloc(size); 1680: gotten_store = size; 1681: if (first_gotten_store == 0) first_gotten_store = size; 1682: if (show_malloc) 1683: fprintf(outfile, "malloc %3d %p\n", (int)size, block); 1684: return block; 1685: } 1686: 1687: static void new_free(void *block) 1688: { 1689: if (show_malloc) 1690: fprintf(outfile, "free %p\n", block); 1691: free(block); 1692: } 1693: 1694: /* For recursion malloc/free, to test stacking calls */ 1695: 1696: static void *stack_malloc(size_t size) 1697: { 1698: void *block = malloc(size); 1699: if (show_malloc) 1700: fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block); 1701: return block; 1702: } 1703: 1704: static void stack_free(void *block) 1705: { 1706: if (show_malloc) 1707: fprintf(outfile, "stack_free %p\n", block); 1708: free(block); 1709: } 1710: 1711: 1712: /************************************************* 1713: * Call pcre_fullinfo() * 1714: *************************************************/ 1715: 1716: /* Get one piece of information from the pcre_fullinfo() function. When only 1717: one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct 1718: value, but the code is defensive. 1719: 1720: Arguments: 1721: re compiled regex 1722: study study data 1723: option PCRE_INFO_xxx option 1724: ptr where to put the data 1725: 1726: Returns: 0 when OK, < 0 on error 1727: */ 1728: 1729: static int 1730: new_info(pcre *re, pcre_extra *study, int option, void *ptr) 1731: { 1732: int rc; 1733: 1734: if (use_pcre16) 1735: #ifdef SUPPORT_PCRE16 1736: rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr); 1737: #else 1738: rc = PCRE_ERROR_BADMODE; 1739: #endif 1740: else 1741: #ifdef SUPPORT_PCRE8 1742: rc = pcre_fullinfo(re, study, option, ptr); 1743: #else 1744: rc = PCRE_ERROR_BADMODE; 1745: #endif 1746: 1747: if (rc < 0) 1748: { 1749: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, 1750: use_pcre16? "16" : "", option); 1751: if (rc == PCRE_ERROR_BADMODE) 1752: fprintf(outfile, "Running in %s-bit mode but pattern was compiled in " 1753: "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16"); 1754: } 1755: 1756: return rc; 1757: } 1758: 1759: 1760: 1761: /************************************************* 1762: * Swap byte functions * 1763: *************************************************/ 1764: 1765: /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32 1766: value, respectively. 1767: 1768: Arguments: 1769: value any number 1770: 1771: Returns: the byte swapped value 1772: */ 1773: 1774: static pcre_uint32 1775: swap_uint32(pcre_uint32 value) 1776: { 1777: return ((value & 0x000000ff) << 24) | 1778: ((value & 0x0000ff00) << 8) | 1779: ((value & 0x00ff0000) >> 8) | 1780: (value >> 24); 1781: } 1782: 1783: static pcre_uint16 1784: swap_uint16(pcre_uint16 value) 1785: { 1786: return (value >> 8) | (value << 8); 1787: } 1788: 1789: 1790: 1791: /************************************************* 1792: * Flip bytes in a compiled pattern * 1793: *************************************************/ 1794: 1795: /* This function is called if the 'F' option was present on a pattern that is 1796: to be written to a file. We flip the bytes of all the integer fields in the 1797: regex data block and the study block. In 16-bit mode this also flips relevant 1798: bytes in the pattern itself. This is to make it possible to test PCRE's 1799: ability to reload byte-flipped patterns, e.g. those compiled on a different 1800: architecture. */ 1801: 1802: static void 1803: regexflip(pcre *ere, pcre_extra *extra) 1804: { 1805: REAL_PCRE *re = (REAL_PCRE *)ere; 1806: #ifdef SUPPORT_PCRE16 1807: int op; 1808: pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset; 1809: int length = re->name_count * re->name_entry_size; 1810: #ifdef SUPPORT_UTF 1811: BOOL utf = (re->options & PCRE_UTF16) != 0; 1812: BOOL utf16_char = FALSE; 1813: #endif /* SUPPORT_UTF */ 1814: #endif /* SUPPORT_PCRE16 */ 1815: 1816: /* Always flip the bytes in the main data block and study blocks. */ 1817: 1818: re->magic_number = REVERSED_MAGIC_NUMBER; 1819: re->size = swap_uint32(re->size); 1820: re->options = swap_uint32(re->options); 1821: re->flags = swap_uint16(re->flags); 1822: re->top_bracket = swap_uint16(re->top_bracket); 1823: re->top_backref = swap_uint16(re->top_backref); 1824: re->first_char = swap_uint16(re->first_char); 1825: re->req_char = swap_uint16(re->req_char); 1826: re->name_table_offset = swap_uint16(re->name_table_offset); 1827: re->name_entry_size = swap_uint16(re->name_entry_size); 1828: re->name_count = swap_uint16(re->name_count); 1829: 1830: if (extra != NULL) 1831: { 1832: pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); 1833: rsd->size = swap_uint32(rsd->size); 1834: rsd->flags = swap_uint32(rsd->flags); 1835: rsd->minlength = swap_uint32(rsd->minlength); 1836: } 1837: 1838: /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes 1839: in the name table, if present, and then in the pattern itself. */ 1840: 1841: #ifdef SUPPORT_PCRE16 1842: if (!use_pcre16) return; 1843: 1844: while(TRUE) 1845: { 1846: /* Swap previous characters. */ 1847: while (length-- > 0) 1848: { 1849: *ptr = swap_uint16(*ptr); 1850: ptr++; 1851: } 1852: #ifdef SUPPORT_UTF 1853: if (utf16_char) 1854: { 1855: if ((ptr[-1] & 0xfc00) == 0xd800) 1856: { 1857: /* We know that there is only one extra character in UTF-16. */ 1858: *ptr = swap_uint16(*ptr); 1859: ptr++; 1860: } 1861: } 1862: utf16_char = FALSE; 1863: #endif /* SUPPORT_UTF */ 1864: 1865: /* Get next opcode. */ 1866: 1867: length = 0; 1868: op = *ptr; 1869: *ptr++ = swap_uint16(op); 1870: 1871: switch (op) 1872: { 1873: case OP_END: 1874: return; 1875: 1876: #ifdef SUPPORT_UTF 1877: case OP_CHAR: 1878: case OP_CHARI: 1879: case OP_NOT: 1880: case OP_NOTI: 1881: case OP_STAR: 1882: case OP_MINSTAR: 1883: case OP_PLUS: 1884: case OP_MINPLUS: 1885: case OP_QUERY: 1886: case OP_MINQUERY: 1887: case OP_UPTO: 1888: case OP_MINUPTO: 1889: case OP_EXACT: 1890: case OP_POSSTAR: 1891: case OP_POSPLUS: 1892: case OP_POSQUERY: 1893: case OP_POSUPTO: 1894: case OP_STARI: 1895: case OP_MINSTARI: 1896: case OP_PLUSI: 1897: case OP_MINPLUSI: 1898: case OP_QUERYI: 1899: case OP_MINQUERYI: 1900: case OP_UPTOI: 1901: case OP_MINUPTOI: 1902: case OP_EXACTI: 1903: case OP_POSSTARI: 1904: case OP_POSPLUSI: 1905: case OP_POSQUERYI: 1906: case OP_POSUPTOI: 1907: case OP_NOTSTAR: 1908: case OP_NOTMINSTAR: 1909: case OP_NOTPLUS: 1910: case OP_NOTMINPLUS: 1911: case OP_NOTQUERY: 1912: case OP_NOTMINQUERY: 1913: case OP_NOTUPTO: 1914: case OP_NOTMINUPTO: 1915: case OP_NOTEXACT: 1916: case OP_NOTPOSSTAR: 1917: case OP_NOTPOSPLUS: 1918: case OP_NOTPOSQUERY: 1919: case OP_NOTPOSUPTO: 1920: case OP_NOTSTARI: 1921: case OP_NOTMINSTARI: 1922: case OP_NOTPLUSI: 1923: case OP_NOTMINPLUSI: 1924: case OP_NOTQUERYI: 1925: case OP_NOTMINQUERYI: 1926: case OP_NOTUPTOI: 1927: case OP_NOTMINUPTOI: 1928: case OP_NOTEXACTI: 1929: case OP_NOTPOSSTARI: 1930: case OP_NOTPOSPLUSI: 1931: case OP_NOTPOSQUERYI: 1932: case OP_NOTPOSUPTOI: 1933: if (utf) utf16_char = TRUE; 1934: #endif 1935: /* Fall through. */ 1936: 1937: default: 1938: length = OP_lengths16[op] - 1; 1939: break; 1940: 1941: case OP_CLASS: 1942: case OP_NCLASS: 1943: /* Skip the character bit map. */ 1944: ptr += 32/sizeof(pcre_uint16); 1945: length = 0; 1946: break; 1947: 1948: case OP_XCLASS: 1949: /* LINK_SIZE can be 1 or 2 in 16 bit mode. */ 1950: if (LINK_SIZE > 1) 1951: length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1])) 1952: - (1 + LINK_SIZE + 1)); 1953: else 1954: length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1)); 1955: 1956: /* Reverse the size of the XCLASS instance. */ 1957: *ptr = swap_uint16(*ptr); 1958: ptr++; 1959: if (LINK_SIZE > 1) 1960: { 1961: *ptr = swap_uint16(*ptr); 1962: ptr++; 1963: } 1964: 1965: op = *ptr; 1966: *ptr = swap_uint16(op); 1967: ptr++; 1968: if ((op & XCL_MAP) != 0) 1969: { 1970: /* Skip the character bit map. */ 1971: ptr += 32/sizeof(pcre_uint16); 1972: length -= 32/sizeof(pcre_uint16); 1973: } 1974: break; 1975: } 1976: } 1977: /* Control should never reach here in 16 bit mode. */ 1978: #endif /* SUPPORT_PCRE16 */ 1979: } 1980: 1981: 1982: 1983: /************************************************* 1984: * Check match or recursion limit * 1985: *************************************************/ 1986: 1987: static int 1988: check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len, 1989: int start_offset, int options, int *use_offsets, int use_size_offsets, 1990: int flag, unsigned long int *limit, int errnumber, const char *msg) 1991: { 1992: int count; 1993: int min = 0; 1994: int mid = 64; 1995: int max = -1; 1996: 1997: extra->flags |= flag; 1998: 1999: for (;;) 2000: { 2001: *limit = mid; 2002: 2003: PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, 2004: use_offsets, use_size_offsets); 2005: 2006: if (count == errnumber) 2007: { 2008: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */ 2009: min = mid; 2010: mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2; 2011: } 2012: 2013: else if (count >= 0 || count == PCRE_ERROR_NOMATCH || 2014: count == PCRE_ERROR_PARTIAL) 2015: { 2016: if (mid == min + 1) 2017: { 2018: fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); 2019: break; 2020: } 2021: /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */ 2022: max = mid; 2023: mid = (min + mid)/2; 2024: } 2025: else break; /* Some other error */ 2026: } 2027: 2028: extra->flags &= ~flag; 2029: return count; 2030: } 2031: 2032: 2033: 2034: /************************************************* 2035: * Case-independent strncmp() function * 2036: *************************************************/ 2037: 2038: /* 2039: Arguments: 2040: s first string 2041: t second string 2042: n number of characters to compare 2043: 2044: Returns: < 0, = 0, or > 0, according to the comparison 2045: */ 2046: 2047: static int 2048: strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n) 2049: { 2050: while (n--) 2051: { 2052: int c = tolower(*s++) - tolower(*t++); 2053: if (c) return c; 2054: } 2055: return 0; 2056: } 2057: 2058: 2059: 2060: /************************************************* 2061: * Check newline indicator * 2062: *************************************************/ 2063: 2064: /* This is used both at compile and run-time to check for <xxx> escapes. Print 2065: a message and return 0 if there is no match. 2066: 2067: Arguments: 2068: p points after the leading '<' 2069: f file for error message 2070: 2071: Returns: appropriate PCRE_NEWLINE_xxx flags, or 0 2072: */ 2073: 2074: static int 2075: check_newline(pcre_uint8 *p, FILE *f) 2076: { 2077: if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR; 2078: if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF; 2079: if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF; 2080: if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF; 2081: if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY; 2082: if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF; 2083: if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE; 2084: fprintf(f, "Unknown newline type at: <%s\n", p); 2085: return 0; 2086: } 2087: 2088: 2089: 2090: /************************************************* 2091: * Usage function * 2092: *************************************************/ 2093: 2094: static void 2095: usage(void) 2096: { 2097: printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n"); 2098: printf("Input and output default to stdin and stdout.\n"); 2099: #ifdef SUPPORT_LIBREADLINE 2100: printf("If input is a terminal, readline() is used to read from it.\n"); 2101: #else 2102: printf("This version of pcretest is not linked with readline().\n"); 2103: #endif 2104: printf("\nOptions:\n"); 2105: #ifdef SUPPORT_PCRE16 2106: printf(" -16 use the 16-bit library\n"); 2107: #endif 2108: printf(" -b show compiled code\n"); 2109: printf(" -C show PCRE compile-time options and exit\n"); 2110: printf(" -C arg show a specific compile-time option\n"); 2111: printf(" and exit with its value. The arg can be:\n"); 2112: printf(" linksize internal link size [2, 3, 4]\n"); 2113: printf(" pcre8 8 bit library support enabled [0, 1]\n"); 2114: printf(" pcre16 16 bit library support enabled [0, 1]\n"); 2115: printf(" utf Unicode Transformation Format supported [0, 1]\n"); 2116: printf(" ucp Unicode Properties supported [0, 1]\n"); 2117: printf(" jit Just-in-time compiler supported [0, 1]\n"); 2118: printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n"); 2119: printf(" -d debug: show compiled code and information (-b and -i)\n"); 2120: #if !defined NODFA 2121: printf(" -dfa force DFA matching for all subjects\n"); 2122: #endif 2123: printf(" -help show usage information\n"); 2124: printf(" -i show information about compiled patterns\n" 2125: " -M find MATCH_LIMIT minimum for each subject\n" 2126: " -m output memory used information\n" 2127: " -o <n> set size of offsets vector to <n>\n"); 2128: #if !defined NOPOSIX 2129: printf(" -p use POSIX interface\n"); 2130: #endif 2131: printf(" -q quiet: do not output PCRE version number at start\n"); 2132: printf(" -S <n> set stack size to <n> megabytes\n"); 2133: printf(" -s force each pattern to be studied at basic level\n" 2134: " -s+ force each pattern to be studied, using JIT if available\n" 2135: " -t time compilation and execution\n"); 2136: printf(" -t <n> time compilation and execution, repeating <n> times\n"); 2137: printf(" -tm time execution (matching) only\n"); 2138: printf(" -tm <n> time execution (matching) only, repeating <n> times\n"); 2139: } 2140: 2141: 2142: 2143: /************************************************* 2144: * Main Program * 2145: *************************************************/ 2146: 2147: /* Read lines from named file or stdin and write to named file or stdout; lines 2148: consist of a regular expression, in delimiters and optionally followed by 2149: options, followed by a set of test data, terminated by an empty line. */ 2150: 2151: int main(int argc, char **argv) 2152: { 2153: FILE *infile = stdin; 2154: const char *version; 2155: int options = 0; 2156: int study_options = 0; 2157: int default_find_match_limit = FALSE; 2158: int op = 1; 2159: int timeit = 0; 2160: int timeitm = 0; 2161: int showinfo = 0; 2162: int showstore = 0; 2163: int force_study = -1; 2164: int force_study_options = 0; 2165: int quiet = 0; 2166: int size_offsets = 45; 2167: int size_offsets_max; 2168: int *offsets = NULL; 2169: #if !defined NOPOSIX 2170: int posix = 0; 2171: #endif 2172: int debug = 0; 2173: int done = 0; 2174: int all_use_dfa = 0; 2175: int yield = 0; 2176: int stack_size; 2177: 2178: pcre_jit_stack *jit_stack = NULL; 2179: 2180: /* These vectors store, end-to-end, a list of zero-terminated captured 2181: substring names, each list itself being terminated by an empty name. Assume 2182: that 1024 is plenty long enough for the few names we'll be testing. It is 2183: easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version 2184: for the actual memory, to ensure alignment. */ 2185: 2186: pcre_uint16 copynames[1024]; 2187: pcre_uint16 getnames[1024]; 2188: 2189: #ifdef SUPPORT_PCRE16 2190: pcre_uint16 *cn16ptr; 2191: pcre_uint16 *gn16ptr; 2192: #endif 2193: 2194: #ifdef SUPPORT_PCRE8 2195: pcre_uint8 *copynames8 = (pcre_uint8 *)copynames; 2196: pcre_uint8 *getnames8 = (pcre_uint8 *)getnames; 2197: pcre_uint8 *cn8ptr; 2198: pcre_uint8 *gn8ptr; 2199: #endif 2200: 2201: /* Get buffers from malloc() so that valgrind will check their misuse when 2202: debugging. They grow automatically when very long lines are read. The 16-bit 2203: buffer (buffer16) is obtained only if needed. */ 2204: 2205: buffer = (pcre_uint8 *)malloc(buffer_size); 2206: dbuffer = (pcre_uint8 *)malloc(buffer_size); 2207: pbuffer = (pcre_uint8 *)malloc(buffer_size); 2208: 2209: /* The outfile variable is static so that new_malloc can use it. */ 2210: 2211: outfile = stdout; 2212: 2213: /* The following _setmode() stuff is some Windows magic that tells its runtime 2214: library to translate CRLF into a single LF character. At least, that's what 2215: I've been told: never having used Windows I take this all on trust. Originally 2216: it set 0x8000, but then I was advised that _O_BINARY was better. */ 2217: 2218: #if defined(_WIN32) || defined(WIN32) 2219: _setmode( _fileno( stdout ), _O_BINARY ); 2220: #endif 2221: 2222: /* Get the version number: both pcre_version() and pcre16_version() give the 2223: same answer. We just need to ensure that we call one that is available. */ 2224: 2225: #ifdef SUPPORT_PCRE8 2226: version = pcre_version(); 2227: #else 2228: version = pcre16_version(); 2229: #endif 2230: 2231: /* Scan options */ 2232: 2233: while (argc > 1 && argv[op][0] == '-') 2234: { 2235: pcre_uint8 *endptr; 2236: 2237: if (strcmp(argv[op], "-m") == 0) showstore = 1; 2238: else if (strcmp(argv[op], "-s") == 0) force_study = 0; 2239: else if (strcmp(argv[op], "-s+") == 0) 2240: { 2241: force_study = 1; 2242: force_study_options = PCRE_STUDY_JIT_COMPILE; 2243: } 2244: else if (strcmp(argv[op], "-16") == 0) 2245: { 2246: #ifdef SUPPORT_PCRE16 2247: use_pcre16 = 1; 2248: #else 2249: printf("** This version of PCRE was built without 16-bit support\n"); 2250: exit(1); 2251: #endif 2252: } 2253: else if (strcmp(argv[op], "-q") == 0) quiet = 1; 2254: else if (strcmp(argv[op], "-b") == 0) debug = 1; 2255: else if (strcmp(argv[op], "-i") == 0) showinfo = 1; 2256: else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; 2257: else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE; 2258: #if !defined NODFA 2259: else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1; 2260: #endif 2261: else if (strcmp(argv[op], "-o") == 0 && argc > 2 && 2262: ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)), 2263: *endptr == 0)) 2264: { 2265: op++; 2266: argc--; 2267: } 2268: else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0) 2269: { 2270: int both = argv[op][2] == 0; 2271: int temp; 2272: if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr), 2273: *endptr == 0)) 2274: { 2275: timeitm = temp; 2276: op++; 2277: argc--; 2278: } 2279: else timeitm = LOOPREPEAT; 2280: if (both) timeit = timeitm; 2281: } 2282: else if (strcmp(argv[op], "-S") == 0 && argc > 2 && 2283: ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)), 2284: *endptr == 0)) 2285: { 2286: #if defined(_WIN32) || defined(WIN32) || defined(__minix) 2287: printf("PCRE: -S not supported on this OS\n"); 2288: exit(1); 2289: #else 2290: int rc; 2291: struct rlimit rlim; 2292: getrlimit(RLIMIT_STACK, &rlim); 2293: rlim.rlim_cur = stack_size * 1024 * 1024; 2294: rc = setrlimit(RLIMIT_STACK, &rlim); 2295: if (rc != 0) 2296: { 2297: printf("PCRE: setrlimit() failed with error %d\n", rc); 2298: exit(1); 2299: } 2300: op++; 2301: argc--; 2302: #endif 2303: } 2304: #if !defined NOPOSIX 2305: else if (strcmp(argv[op], "-p") == 0) posix = 1; 2306: #endif 2307: else if (strcmp(argv[op], "-C") == 0) 2308: { 2309: int rc; 2310: unsigned long int lrc; 2311: 2312: if (argc > 2) 2313: { 2314: if (strcmp(argv[op + 1], "linksize") == 0) 2315: { 2316: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); 2317: printf("%d\n", rc); 2318: yield = rc; 2319: goto EXIT; 2320: } 2321: if (strcmp(argv[op + 1], "pcre8") == 0) 2322: { 2323: #ifdef SUPPORT_PCRE8 2324: printf("1\n"); 2325: yield = 1; 2326: #else 2327: printf("0\n"); 2328: yield = 0; 2329: #endif 2330: goto EXIT; 2331: } 2332: if (strcmp(argv[op + 1], "pcre16") == 0) 2333: { 2334: #ifdef SUPPORT_PCRE16 2335: printf("1\n"); 2336: yield = 1; 2337: #else 2338: printf("0\n"); 2339: yield = 0; 2340: #endif 2341: goto EXIT; 2342: } 2343: if (strcmp(argv[op + 1], "utf") == 0) 2344: { 2345: #ifdef SUPPORT_PCRE8 2346: (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 2347: printf("%d\n", rc); 2348: yield = rc; 2349: #else 2350: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); 2351: printf("%d\n", rc); 2352: yield = rc; 2353: #endif 2354: goto EXIT; 2355: } 2356: if (strcmp(argv[op + 1], "ucp") == 0) 2357: { 2358: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); 2359: printf("%d\n", rc); 2360: yield = rc; 2361: goto EXIT; 2362: } 2363: if (strcmp(argv[op + 1], "jit") == 0) 2364: { 2365: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); 2366: printf("%d\n", rc); 2367: yield = rc; 2368: goto EXIT; 2369: } 2370: if (strcmp(argv[op + 1], "newline") == 0) 2371: { 2372: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); 2373: /* Note that these values are always the ASCII values, even 2374: in EBCDIC environments. CR is 13 and NL is 10. */ 2375: printf("%s\n", (rc == 13)? "CR" : 2376: (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" : 2377: (rc == -2)? "ANYCRLF" : 2378: (rc == -1)? "ANY" : "???"); 2379: goto EXIT; 2380: } 2381: printf("Unknown -C option: %s\n", argv[op + 1]); 2382: goto EXIT; 2383: } 2384: 2385: printf("PCRE version %s\n", version); 2386: printf("Compiled with\n"); 2387: 2388: /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both 2389: are set, either both UTFs are supported or both are not supported. */ 2390: 2391: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 2392: printf(" 8-bit and 16-bit support\n"); 2393: (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 2394: if (rc) 2395: printf(" UTF-8 and UTF-16 support\n"); 2396: else 2397: printf(" No UTF-8 or UTF-16 support\n"); 2398: #elif defined SUPPORT_PCRE8 2399: printf(" 8-bit support only\n"); 2400: (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 2401: printf(" %sUTF-8 support\n", rc? "" : "No "); 2402: #else 2403: printf(" 16-bit support only\n"); 2404: (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); 2405: printf(" %sUTF-16 support\n", rc? "" : "No "); 2406: #endif 2407: 2408: (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); 2409: printf(" %sUnicode properties support\n", rc? "" : "No "); 2410: (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); 2411: if (rc) 2412: { 2413: const char *arch; 2414: (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch)); 2415: printf(" Just-in-time compiler support: %s\n", arch); 2416: } 2417: else 2418: printf(" No just-in-time compiler support\n"); 2419: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); 2420: /* Note that these values are always the ASCII values, even 2421: in EBCDIC environments. CR is 13 and NL is 10. */ 2422: printf(" Newline sequence is %s\n", (rc == 13)? "CR" : 2423: (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" : 2424: (rc == -2)? "ANYCRLF" : 2425: (rc == -1)? "ANY" : "???"); 2426: (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc); 2427: printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" : 2428: "all Unicode newlines"); 2429: (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); 2430: printf(" Internal link size = %d\n", rc); 2431: (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); 2432: printf(" POSIX malloc threshold = %d\n", rc); 2433: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc); 2434: printf(" Default match limit = %ld\n", lrc); 2435: (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc); 2436: printf(" Default recursion depth limit = %ld\n", lrc); 2437: (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc); 2438: printf(" Match recursion uses %s", rc? "stack" : "heap"); 2439: if (showstore) 2440: { 2441: PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0); 2442: printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size); 2443: } 2444: printf("\n"); 2445: goto EXIT; 2446: } 2447: else if (strcmp(argv[op], "-help") == 0 || 2448: strcmp(argv[op], "--help") == 0) 2449: { 2450: usage(); 2451: goto EXIT; 2452: } 2453: else 2454: { 2455: printf("** Unknown or malformed option %s\n", argv[op]); 2456: usage(); 2457: yield = 1; 2458: goto EXIT; 2459: } 2460: op++; 2461: argc--; 2462: } 2463: 2464: /* Get the store for the offsets vector, and remember what it was */ 2465: 2466: size_offsets_max = size_offsets; 2467: offsets = (int *)malloc(size_offsets_max * sizeof(int)); 2468: if (offsets == NULL) 2469: { 2470: printf("** Failed to get %d bytes of memory for offsets vector\n", 2471: (int)(size_offsets_max * sizeof(int))); 2472: yield = 1; 2473: goto EXIT; 2474: } 2475: 2476: /* Sort out the input and output files */ 2477: 2478: if (argc > 1) 2479: { 2480: infile = fopen(argv[op], INPUT_MODE); 2481: if (infile == NULL) 2482: { 2483: printf("** Failed to open %s\n", argv[op]); 2484: yield = 1; 2485: goto EXIT; 2486: } 2487: } 2488: 2489: if (argc > 2) 2490: { 2491: outfile = fopen(argv[op+1], OUTPUT_MODE); 2492: if (outfile == NULL) 2493: { 2494: printf("** Failed to open %s\n", argv[op+1]); 2495: yield = 1; 2496: goto EXIT; 2497: } 2498: } 2499: 2500: /* Set alternative malloc function */ 2501: 2502: #ifdef SUPPORT_PCRE8 2503: pcre_malloc = new_malloc; 2504: pcre_free = new_free; 2505: pcre_stack_malloc = stack_malloc; 2506: pcre_stack_free = stack_free; 2507: #endif 2508: 2509: #ifdef SUPPORT_PCRE16 2510: pcre16_malloc = new_malloc; 2511: pcre16_free = new_free; 2512: pcre16_stack_malloc = stack_malloc; 2513: pcre16_stack_free = stack_free; 2514: #endif 2515: 2516: /* Heading line unless quiet, then prompt for first regex if stdin */ 2517: 2518: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version); 2519: 2520: /* Main loop */ 2521: 2522: while (!done) 2523: { 2524: pcre *re = NULL; 2525: pcre_extra *extra = NULL; 2526: 2527: #if !defined NOPOSIX /* There are still compilers that require no indent */ 2528: regex_t preg; 2529: int do_posix = 0; 2530: #endif 2531: 2532: const char *error; 2533: pcre_uint8 *markptr; 2534: pcre_uint8 *p, *pp, *ppp; 2535: pcre_uint8 *to_file = NULL; 2536: const pcre_uint8 *tables = NULL; 2537: unsigned long int get_options; 2538: unsigned long int true_size, true_study_size = 0; 2539: size_t size, regex_gotten_store; 2540: int do_allcaps = 0; 2541: int do_mark = 0; 2542: int do_study = 0; 2543: int no_force_study = 0; 2544: int do_debug = debug; 2545: int do_G = 0; 2546: int do_g = 0; 2547: int do_showinfo = showinfo; 2548: int do_showrest = 0; 2549: int do_showcaprest = 0; 2550: int do_flip = 0; 2551: int erroroffset, len, delimiter, poffset; 2552: 2553: use_utf = 0; 2554: debug_lengths = 1; 2555: 2556: if (extend_inputline(infile, buffer, " re> ") == NULL) break; 2557: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 2558: fflush(outfile); 2559: 2560: p = buffer; 2561: while (isspace(*p)) p++; 2562: if (*p == 0) continue; 2563: 2564: /* See if the pattern is to be loaded pre-compiled from a file. */ 2565: 2566: if (*p == '<' && strchr((char *)(p+1), '<') == NULL) 2567: { 2568: pcre_uint32 magic; 2569: pcre_uint8 sbuf[8]; 2570: FILE *f; 2571: 2572: p++; 2573: if (*p == '!') 2574: { 2575: do_debug = TRUE; 2576: do_showinfo = TRUE; 2577: p++; 2578: } 2579: 2580: pp = p + (int)strlen((char *)p); 2581: while (isspace(pp[-1])) pp--; 2582: *pp = 0; 2583: 2584: f = fopen((char *)p, "rb"); 2585: if (f == NULL) 2586: { 2587: fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno)); 2588: continue; 2589: } 2590: 2591: first_gotten_store = 0; 2592: if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ; 2593: 2594: true_size = 2595: (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3]; 2596: true_study_size = 2597: (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7]; 2598: 2599: re = (pcre *)new_malloc(true_size); 2600: regex_gotten_store = first_gotten_store; 2601: 2602: if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ; 2603: 2604: magic = ((REAL_PCRE *)re)->magic_number; 2605: if (magic != MAGIC_NUMBER) 2606: { 2607: if (swap_uint32(magic) == MAGIC_NUMBER) 2608: { 2609: do_flip = 1; 2610: } 2611: else 2612: { 2613: fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p); 2614: fclose(f); 2615: continue; 2616: } 2617: } 2618: 2619: /* We hide the byte-invert info for little and big endian tests. */ 2620: fprintf(outfile, "Compiled pattern%s loaded from %s\n", 2621: do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p); 2622: 2623: /* Now see if there is any following study data. */ 2624: 2625: if (true_study_size != 0) 2626: { 2627: pcre_study_data *psd; 2628: 2629: extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size); 2630: extra->flags = PCRE_EXTRA_STUDY_DATA; 2631: 2632: psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra)); 2633: extra->study_data = psd; 2634: 2635: if (fread(psd, 1, true_study_size, f) != true_study_size) 2636: { 2637: FAIL_READ: 2638: fprintf(outfile, "Failed to read data from %s\n", p); 2639: if (extra != NULL) 2640: { 2641: PCRE_FREE_STUDY(extra); 2642: } 2643: if (re != NULL) new_free(re); 2644: fclose(f); 2645: continue; 2646: } 2647: fprintf(outfile, "Study data loaded from %s\n", p); 2648: do_study = 1; /* To get the data output if requested */ 2649: } 2650: else fprintf(outfile, "No study data\n"); 2651: 2652: /* Flip the necessary bytes. */ 2653: if (do_flip) 2654: { 2655: int rc; 2656: PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL); 2657: if (rc == PCRE_ERROR_BADMODE) 2658: { 2659: /* Simulate the result of the function call below. */ 2660: fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, 2661: use_pcre16? "16" : "", PCRE_INFO_OPTIONS); 2662: fprintf(outfile, "Running in %s-bit mode but pattern was compiled in " 2663: "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16"); 2664: continue; 2665: } 2666: } 2667: 2668: /* Need to know if UTF-8 for printing data strings. */ 2669: 2670: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue; 2671: use_utf = (get_options & PCRE_UTF8) != 0; 2672: 2673: fclose(f); 2674: goto SHOW_INFO; 2675: } 2676: 2677: /* In-line pattern (the usual case). Get the delimiter and seek the end of 2678: the pattern; if it isn't complete, read more. */ 2679: 2680: delimiter = *p++; 2681: 2682: if (isalnum(delimiter) || delimiter == '\\') 2683: { 2684: fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n"); 2685: goto SKIP_DATA; 2686: } 2687: 2688: pp = p; 2689: poffset = (int)(p - buffer); 2690: 2691: for(;;) 2692: { 2693: while (*pp != 0) 2694: { 2695: if (*pp == '\\' && pp[1] != 0) pp++; 2696: else if (*pp == delimiter) break; 2697: pp++; 2698: } 2699: if (*pp != 0) break; 2700: if ((pp = extend_inputline(infile, pp, " > ")) == NULL) 2701: { 2702: fprintf(outfile, "** Unexpected EOF\n"); 2703: done = 1; 2704: goto CONTINUE; 2705: } 2706: if (infile != stdin) fprintf(outfile, "%s", (char *)pp); 2707: } 2708: 2709: /* The buffer may have moved while being extended; reset the start of data 2710: pointer to the correct relative point in the buffer. */ 2711: 2712: p = buffer + poffset; 2713: 2714: /* If the first character after the delimiter is backslash, make 2715: the pattern end with backslash. This is purely to provide a way 2716: of testing for the error message when a pattern ends with backslash. */ 2717: 2718: if (pp[1] == '\\') *pp++ = '\\'; 2719: 2720: /* Terminate the pattern at the delimiter, and save a copy of the pattern 2721: for callouts. */ 2722: 2723: *pp++ = 0; 2724: strcpy((char *)pbuffer, (char *)p); 2725: 2726: /* Look for options after final delimiter */ 2727: 2728: options = 0; 2729: study_options = 0; 2730: log_store = showstore; /* default from command line */ 2731: 2732: while (*pp != 0) 2733: { 2734: switch (*pp++) 2735: { 2736: case 'f': options |= PCRE_FIRSTLINE; break; 2737: case 'g': do_g = 1; break; 2738: case 'i': options |= PCRE_CASELESS; break; 2739: case 'm': options |= PCRE_MULTILINE; break; 2740: case 's': options |= PCRE_DOTALL; break; 2741: case 'x': options |= PCRE_EXTENDED; break; 2742: 2743: case '+': 2744: if (do_showrest) do_showcaprest = 1; else do_showrest = 1; 2745: break; 2746: 2747: case '=': do_allcaps = 1; break; 2748: case 'A': options |= PCRE_ANCHORED; break; 2749: case 'B': do_debug = 1; break; 2750: case 'C': options |= PCRE_AUTO_CALLOUT; break; 2751: case 'D': do_debug = do_showinfo = 1; break; 2752: case 'E': options |= PCRE_DOLLAR_ENDONLY; break; 2753: case 'F': do_flip = 1; break; 2754: case 'G': do_G = 1; break; 2755: case 'I': do_showinfo = 1; break; 2756: case 'J': options |= PCRE_DUPNAMES; break; 2757: case 'K': do_mark = 1; break; 2758: case 'M': log_store = 1; break; 2759: case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; 2760: 2761: #if !defined NOPOSIX 2762: case 'P': do_posix = 1; break; 2763: #endif 2764: 2765: case 'S': 2766: if (do_study == 0) 2767: { 2768: do_study = 1; 2769: if (*pp == '+') 2770: { 2771: study_options |= PCRE_STUDY_JIT_COMPILE; 2772: pp++; 2773: } 2774: } 2775: else 2776: { 2777: do_study = 0; 2778: no_force_study = 1; 2779: } 2780: break; 2781: 2782: case 'U': options |= PCRE_UNGREEDY; break; 2783: case 'W': options |= PCRE_UCP; break; 2784: case 'X': options |= PCRE_EXTRA; break; 2785: case 'Y': options |= PCRE_NO_START_OPTIMISE; break; 2786: case 'Z': debug_lengths = 0; break; 2787: case '8': options |= PCRE_UTF8; use_utf = 1; break; 2788: case '?': options |= PCRE_NO_UTF8_CHECK; break; 2789: 2790: case 'T': 2791: switch (*pp++) 2792: { 2793: case '0': tables = tables0; break; 2794: case '1': tables = tables1; break; 2795: 2796: case '\r': 2797: case '\n': 2798: case ' ': 2799: case 0: 2800: fprintf(outfile, "** Missing table number after /T\n"); 2801: goto SKIP_DATA; 2802: 2803: default: 2804: fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]); 2805: goto SKIP_DATA; 2806: } 2807: break; 2808: 2809: case 'L': 2810: ppp = pp; 2811: /* The '\r' test here is so that it works on Windows. */ 2812: /* The '0' test is just in case this is an unterminated line. */ 2813: while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++; 2814: *ppp = 0; 2815: if (setlocale(LC_CTYPE, (const char *)pp) == NULL) 2816: { 2817: fprintf(outfile, "** Failed to set locale \"%s\"\n", pp); 2818: goto SKIP_DATA; 2819: } 2820: locale_set = 1; 2821: tables = PCRE_MAKETABLES; 2822: pp = ppp; 2823: break; 2824: 2825: case '>': 2826: to_file = pp; 2827: while (*pp != 0) pp++; 2828: while (isspace(pp[-1])) pp--; 2829: *pp = 0; 2830: break; 2831: 2832: case '<': 2833: { 2834: if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0) 2835: { 2836: options |= PCRE_JAVASCRIPT_COMPAT; 2837: pp += 3; 2838: } 2839: else 2840: { 2841: int x = check_newline(pp, outfile); 2842: if (x == 0) goto SKIP_DATA; 2843: options |= x; 2844: while (*pp++ != '>'); 2845: } 2846: } 2847: break; 2848: 2849: case '\r': /* So that it works in Windows */ 2850: case '\n': 2851: case ' ': 2852: break; 2853: 2854: default: 2855: fprintf(outfile, "** Unknown option '%c'\n", pp[-1]); 2856: goto SKIP_DATA; 2857: } 2858: } 2859: 2860: /* Handle compiling via the POSIX interface, which doesn't support the 2861: timing, showing, or debugging options, nor the ability to pass over 2862: local character tables. Neither does it have 16-bit support. */ 2863: 2864: #if !defined NOPOSIX 2865: if (posix || do_posix) 2866: { 2867: int rc; 2868: int cflags = 0; 2869: 2870: if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE; 2871: if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE; 2872: if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL; 2873: if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; 2874: if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8; 2875: if ((options & PCRE_UCP) != 0) cflags |= REG_UCP; 2876: if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY; 2877: 2878: first_gotten_store = 0; 2879: rc = regcomp(&preg, (char *)p, cflags); 2880: 2881: /* Compilation failed; go back for another re, skipping to blank line 2882: if non-interactive. */ 2883: 2884: if (rc != 0) 2885: { 2886: (void)regerror(rc, &preg, (char *)buffer, buffer_size); 2887: fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); 2888: goto SKIP_DATA; 2889: } 2890: } 2891: 2892: /* Handle compiling via the native interface */ 2893: 2894: else 2895: #endif /* !defined NOPOSIX */ 2896: 2897: { 2898: /* In 16-bit mode, convert the input. */ 2899: 2900: #ifdef SUPPORT_PCRE16 2901: if (use_pcre16) 2902: { 2903: switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p))) 2904: { 2905: case -1: 2906: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " 2907: "converted to UTF-16\n"); 2908: goto SKIP_DATA; 2909: 2910: case -2: 2911: fprintf(outfile, "**Failed: character value greater than 0x10ffff " 2912: "cannot be converted to UTF-16\n"); 2913: goto SKIP_DATA; 2914: 2915: case -3: /* "Impossible error" when to16 is called arg1 FALSE */ 2916: fprintf(outfile, "**Failed: character value greater than 0xffff " 2917: "cannot be converted to 16-bit in non-UTF mode\n"); 2918: goto SKIP_DATA; 2919: 2920: default: 2921: break; 2922: } 2923: p = (pcre_uint8 *)buffer16; 2924: } 2925: #endif 2926: 2927: /* Compile many times when timing */ 2928: 2929: if (timeit > 0) 2930: { 2931: register int i; 2932: clock_t time_taken; 2933: clock_t start_time = clock(); 2934: for (i = 0; i < timeit; i++) 2935: { 2936: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); 2937: if (re != NULL) free(re); 2938: } 2939: time_taken = clock() - start_time; 2940: fprintf(outfile, "Compile time %.4f milliseconds\n", 2941: (((double)time_taken * 1000.0) / (double)timeit) / 2942: (double)CLOCKS_PER_SEC); 2943: } 2944: 2945: first_gotten_store = 0; 2946: PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); 2947: 2948: /* Compilation failed; go back for another re, skipping to blank line 2949: if non-interactive. */ 2950: 2951: if (re == NULL) 2952: { 2953: fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset); 2954: SKIP_DATA: 2955: if (infile != stdin) 2956: { 2957: for (;;) 2958: { 2959: if (extend_inputline(infile, buffer, NULL) == NULL) 2960: { 2961: done = 1; 2962: goto CONTINUE; 2963: } 2964: len = (int)strlen((char *)buffer); 2965: while (len > 0 && isspace(buffer[len-1])) len--; 2966: if (len == 0) break; 2967: } 2968: fprintf(outfile, "\n"); 2969: } 2970: goto CONTINUE; 2971: } 2972: 2973: /* Compilation succeeded. It is now possible to set the UTF-8 option from 2974: within the regex; check for this so that we know how to process the data 2975: lines. */ 2976: 2977: if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) 2978: goto SKIP_DATA; 2979: if ((get_options & PCRE_UTF8) != 0) use_utf = 1; 2980: 2981: /* Extract the size for possible writing before possibly flipping it, 2982: and remember the store that was got. */ 2983: 2984: true_size = ((REAL_PCRE *)re)->size; 2985: regex_gotten_store = first_gotten_store; 2986: 2987: /* Output code size information if requested */ 2988: 2989: if (log_store) 2990: fprintf(outfile, "Memory allocation (code space): %d\n", 2991: (int)(first_gotten_store - 2992: sizeof(REAL_PCRE) - 2993: ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size)); 2994: 2995: /* If -s or /S was present, study the regex to generate additional info to 2996: help with the matching, unless the pattern has the SS option, which 2997: suppresses the effect of /S (used for a few test patterns where studying is 2998: never sensible). */ 2999: 3000: if (do_study || (force_study >= 0 && !no_force_study)) 3001: { 3002: if (timeit > 0) 3003: { 3004: register int i; 3005: clock_t time_taken; 3006: clock_t start_time = clock(); 3007: for (i = 0; i < timeit; i++) 3008: { 3009: PCRE_STUDY(extra, re, study_options | force_study_options, &error); 3010: } 3011: time_taken = clock() - start_time; 3012: if (extra != NULL) 3013: { 3014: PCRE_FREE_STUDY(extra); 3015: } 3016: fprintf(outfile, " Study time %.4f milliseconds\n", 3017: (((double)time_taken * 1000.0) / (double)timeit) / 3018: (double)CLOCKS_PER_SEC); 3019: } 3020: PCRE_STUDY(extra, re, study_options | force_study_options, &error); 3021: if (error != NULL) 3022: fprintf(outfile, "Failed to study: %s\n", error); 3023: else if (extra != NULL) 3024: { 3025: true_study_size = ((pcre_study_data *)(extra->study_data))->size; 3026: if (log_store) 3027: { 3028: size_t jitsize; 3029: if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 && 3030: jitsize != 0) 3031: fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize); 3032: } 3033: } 3034: } 3035: 3036: /* If /K was present, we set up for handling MARK data. */ 3037: 3038: if (do_mark) 3039: { 3040: if (extra == NULL) 3041: { 3042: extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3043: extra->flags = 0; 3044: } 3045: extra->mark = &markptr; 3046: extra->flags |= PCRE_EXTRA_MARK; 3047: } 3048: 3049: /* Extract and display information from the compiled data if required. */ 3050: 3051: SHOW_INFO: 3052: 3053: if (do_debug) 3054: { 3055: fprintf(outfile, "------------------------------------------------------------------\n"); 3056: PCRE_PRINTINT(re, outfile, debug_lengths); 3057: } 3058: 3059: /* We already have the options in get_options (see above) */ 3060: 3061: if (do_showinfo) 3062: { 3063: unsigned long int all_options; 3064: int count, backrefmax, first_char, need_char, okpartial, jchanged, 3065: hascrorlf; 3066: int nameentrysize, namecount; 3067: const pcre_uint8 *nametable; 3068: 3069: if (new_info(re, NULL, PCRE_INFO_SIZE, &size) + 3070: new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) + 3071: new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) + 3072: new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) + 3073: new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) + 3074: new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) + 3075: new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) + 3076: new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) + 3077: new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) + 3078: new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) + 3079: new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) 3080: != 0) 3081: goto SKIP_DATA; 3082: 3083: if (size != regex_gotten_store) fprintf(outfile, 3084: "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n", 3085: (int)size, (int)regex_gotten_store); 3086: 3087: fprintf(outfile, "Capturing subpattern count = %d\n", count); 3088: if (backrefmax > 0) 3089: fprintf(outfile, "Max back reference = %d\n", backrefmax); 3090: 3091: if (namecount > 0) 3092: { 3093: fprintf(outfile, "Named capturing subpatterns:\n"); 3094: while (namecount-- > 0) 3095: { 3096: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 3097: int imm2_size = use_pcre16 ? 1 : 2; 3098: #else 3099: int imm2_size = IMM2_SIZE; 3100: #endif 3101: int length = (int)STRLEN(nametable + imm2_size); 3102: fprintf(outfile, " "); 3103: PCHARSV(nametable, imm2_size, length, outfile); 3104: while (length++ < nameentrysize - imm2_size) putc(' ', outfile); 3105: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 3106: fprintf(outfile, "%3d\n", use_pcre16? 3107: (int)(((PCRE_SPTR16)nametable)[0]) 3108: :((int)nametable[0] << 8) | (int)nametable[1]); 3109: nametable += nameentrysize * (use_pcre16 ? 2 : 1); 3110: #else 3111: fprintf(outfile, "%3d\n", GET2(nametable, 0)); 3112: #ifdef SUPPORT_PCRE8 3113: nametable += nameentrysize; 3114: #else 3115: nametable += nameentrysize * 2; 3116: #endif 3117: #endif 3118: } 3119: } 3120: 3121: if (!okpartial) fprintf(outfile, "Partial matching not supported\n"); 3122: if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); 3123: 3124: all_options = ((REAL_PCRE *)re)->options; 3125: if (do_flip) all_options = swap_uint32(all_options); 3126: 3127: if (get_options == 0) fprintf(outfile, "No options\n"); 3128: else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", 3129: ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", 3130: ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", 3131: ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", 3132: ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "", 3133: ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "", 3134: ((get_options & PCRE_DOTALL) != 0)? " dotall" : "", 3135: ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "", 3136: ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "", 3137: ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", 3138: ((get_options & PCRE_EXTRA) != 0)? " extra" : "", 3139: ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", 3140: ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", 3141: ((get_options & PCRE_UTF8) != 0)? " utf" : "", 3142: ((get_options & PCRE_UCP) != 0)? " ucp" : "", 3143: ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "", 3144: ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", 3145: ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : ""); 3146: 3147: if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); 3148: 3149: switch (get_options & PCRE_NEWLINE_BITS) 3150: { 3151: case PCRE_NEWLINE_CR: 3152: fprintf(outfile, "Forced newline sequence: CR\n"); 3153: break; 3154: 3155: case PCRE_NEWLINE_LF: 3156: fprintf(outfile, "Forced newline sequence: LF\n"); 3157: break; 3158: 3159: case PCRE_NEWLINE_CRLF: 3160: fprintf(outfile, "Forced newline sequence: CRLF\n"); 3161: break; 3162: 3163: case PCRE_NEWLINE_ANYCRLF: 3164: fprintf(outfile, "Forced newline sequence: ANYCRLF\n"); 3165: break; 3166: 3167: case PCRE_NEWLINE_ANY: 3168: fprintf(outfile, "Forced newline sequence: ANY\n"); 3169: break; 3170: 3171: default: 3172: break; 3173: } 3174: 3175: if (first_char == -1) 3176: { 3177: fprintf(outfile, "First char at start or follows newline\n"); 3178: } 3179: else if (first_char < 0) 3180: { 3181: fprintf(outfile, "No first char\n"); 3182: } 3183: else 3184: { 3185: const char *caseless = 3186: ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)? 3187: "" : " (caseless)"; 3188: 3189: if (PRINTOK(first_char)) 3190: fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless); 3191: else 3192: { 3193: fprintf(outfile, "First char = "); 3194: pchar(first_char, outfile); 3195: fprintf(outfile, "%s\n", caseless); 3196: } 3197: } 3198: 3199: if (need_char < 0) 3200: { 3201: fprintf(outfile, "No need char\n"); 3202: } 3203: else 3204: { 3205: const char *caseless = 3206: ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)? 3207: "" : " (caseless)"; 3208: 3209: if (PRINTOK(need_char)) 3210: fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless); 3211: else 3212: { 3213: fprintf(outfile, "Need char = "); 3214: pchar(need_char, outfile); 3215: fprintf(outfile, "%s\n", caseless); 3216: } 3217: } 3218: 3219: /* Don't output study size; at present it is in any case a fixed 3220: value, but it varies, depending on the computer architecture, and 3221: so messes up the test suite. (And with the /F option, it might be 3222: flipped.) If study was forced by an external -s, don't show this 3223: information unless -i or -d was also present. This means that, except 3224: when auto-callouts are involved, the output from runs with and without 3225: -s should be identical. */ 3226: 3227: if (do_study || (force_study >= 0 && showinfo && !no_force_study)) 3228: { 3229: if (extra == NULL) 3230: fprintf(outfile, "Study returned NULL\n"); 3231: else 3232: { 3233: pcre_uint8 *start_bits = NULL; 3234: int minlength; 3235: 3236: if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0) 3237: fprintf(outfile, "Subject length lower bound = %d\n", minlength); 3238: 3239: if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0) 3240: { 3241: if (start_bits == NULL) 3242: fprintf(outfile, "No set of starting bytes\n"); 3243: else 3244: { 3245: int i; 3246: int c = 24; 3247: fprintf(outfile, "Starting byte set: "); 3248: for (i = 0; i < 256; i++) 3249: { 3250: if ((start_bits[i/8] & (1<<(i&7))) != 0) 3251: { 3252: if (c > 75) 3253: { 3254: fprintf(outfile, "\n "); 3255: c = 2; 3256: } 3257: if (PRINTOK(i) && i != ' ') 3258: { 3259: fprintf(outfile, "%c ", i); 3260: c += 2; 3261: } 3262: else 3263: { 3264: fprintf(outfile, "\\x%02x ", i); 3265: c += 5; 3266: } 3267: } 3268: } 3269: fprintf(outfile, "\n"); 3270: } 3271: } 3272: } 3273: 3274: /* Show this only if the JIT was set by /S, not by -s. */ 3275: 3276: if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0) 3277: { 3278: int jit; 3279: if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0) 3280: { 3281: if (jit) 3282: fprintf(outfile, "JIT study was successful\n"); 3283: else 3284: #ifdef SUPPORT_JIT 3285: fprintf(outfile, "JIT study was not successful\n"); 3286: #else 3287: fprintf(outfile, "JIT support is not available in this version of PCRE\n"); 3288: #endif 3289: } 3290: } 3291: } 3292: } 3293: 3294: /* If the '>' option was present, we write out the regex to a file, and 3295: that is all. The first 8 bytes of the file are the regex length and then 3296: the study length, in big-endian order. */ 3297: 3298: if (to_file != NULL) 3299: { 3300: FILE *f = fopen((char *)to_file, "wb"); 3301: if (f == NULL) 3302: { 3303: fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno)); 3304: } 3305: else 3306: { 3307: pcre_uint8 sbuf[8]; 3308: 3309: if (do_flip) regexflip(re, extra); 3310: sbuf[0] = (pcre_uint8)((true_size >> 24) & 255); 3311: sbuf[1] = (pcre_uint8)((true_size >> 16) & 255); 3312: sbuf[2] = (pcre_uint8)((true_size >> 8) & 255); 3313: sbuf[3] = (pcre_uint8)((true_size) & 255); 3314: sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255); 3315: sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255); 3316: sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255); 3317: sbuf[7] = (pcre_uint8)((true_study_size) & 255); 3318: 3319: if (fwrite(sbuf, 1, 8, f) < 8 || 3320: fwrite(re, 1, true_size, f) < true_size) 3321: { 3322: fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno)); 3323: } 3324: else 3325: { 3326: fprintf(outfile, "Compiled pattern written to %s\n", to_file); 3327: 3328: /* If there is study data, write it. */ 3329: 3330: if (extra != NULL) 3331: { 3332: if (fwrite(extra->study_data, 1, true_study_size, f) < 3333: true_study_size) 3334: { 3335: fprintf(outfile, "Write error on %s: %s\n", to_file, 3336: strerror(errno)); 3337: } 3338: else fprintf(outfile, "Study data written to %s\n", to_file); 3339: } 3340: } 3341: fclose(f); 3342: } 3343: 3344: new_free(re); 3345: if (extra != NULL) 3346: { 3347: PCRE_FREE_STUDY(extra); 3348: } 3349: if (locale_set) 3350: { 3351: new_free((void *)tables); 3352: setlocale(LC_CTYPE, "C"); 3353: locale_set = 0; 3354: } 3355: continue; /* With next regex */ 3356: } 3357: } /* End of non-POSIX compile */ 3358: 3359: /* Read data lines and test them */ 3360: 3361: for (;;) 3362: { 3363: pcre_uint8 *q; 3364: pcre_uint8 *bptr; 3365: int *use_offsets = offsets; 3366: int use_size_offsets = size_offsets; 3367: int callout_data = 0; 3368: int callout_data_set = 0; 3369: int count, c; 3370: int copystrings = 0; 3371: int find_match_limit = default_find_match_limit; 3372: int getstrings = 0; 3373: int getlist = 0; 3374: int gmatched = 0; 3375: int start_offset = 0; 3376: int start_offset_sign = 1; 3377: int g_notempty = 0; 3378: int use_dfa = 0; 3379: 3380: *copynames = 0; 3381: *getnames = 0; 3382: 3383: #ifdef SUPPORT_PCRE16 3384: cn16ptr = copynames; 3385: gn16ptr = getnames; 3386: #endif 3387: #ifdef SUPPORT_PCRE8 3388: cn8ptr = copynames8; 3389: gn8ptr = getnames8; 3390: #endif 3391: 3392: SET_PCRE_CALLOUT(callout); 3393: first_callout = 1; 3394: last_callout_mark = NULL; 3395: callout_extra = 0; 3396: callout_count = 0; 3397: callout_fail_count = 999999; 3398: callout_fail_id = -1; 3399: show_malloc = 0; 3400: options = 0; 3401: 3402: if (extra != NULL) extra->flags &= 3403: ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION); 3404: 3405: len = 0; 3406: for (;;) 3407: { 3408: if (extend_inputline(infile, buffer + len, "data> ") == NULL) 3409: { 3410: if (len > 0) /* Reached EOF without hitting a newline */ 3411: { 3412: fprintf(outfile, "\n"); 3413: break; 3414: } 3415: done = 1; 3416: goto CONTINUE; 3417: } 3418: if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 3419: len = (int)strlen((char *)buffer); 3420: if (buffer[len-1] == '\n') break; 3421: } 3422: 3423: while (len > 0 && isspace(buffer[len-1])) len--; 3424: buffer[len] = 0; 3425: if (len == 0) break; 3426: 3427: p = buffer; 3428: while (isspace(*p)) p++; 3429: 3430: bptr = q = dbuffer; 3431: while ((c = *p++) != 0) 3432: { 3433: int i = 0; 3434: int n = 0; 3435: 3436: /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes. 3437: In non-UTF mode, allow the value of the byte to fall through to later, 3438: where values greater than 127 are turned into UTF-8 when running in 3439: 16-bit mode. */ 3440: 3441: if (c != '\\') 3442: { 3443: if (use_utf) 3444: { 3445: *q++ = c; 3446: continue; 3447: } 3448: } 3449: 3450: /* Handle backslash escapes */ 3451: 3452: else switch ((c = *p++)) 3453: { 3454: case 'a': c = 7; break; 3455: case 'b': c = '\b'; break; 3456: case 'e': c = 27; break; 3457: case 'f': c = '\f'; break; 3458: case 'n': c = '\n'; break; 3459: case 'r': c = '\r'; break; 3460: case 't': c = '\t'; break; 3461: case 'v': c = '\v'; break; 3462: 3463: case '0': case '1': case '2': case '3': 3464: case '4': case '5': case '6': case '7': 3465: c -= '0'; 3466: while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') 3467: c = c * 8 + *p++ - '0'; 3468: break; 3469: 3470: case 'x': 3471: if (*p == '{') 3472: { 3473: pcre_uint8 *pt = p; 3474: c = 0; 3475: 3476: /* We used to have "while (isxdigit(*(++pt)))" here, but it fails 3477: when isxdigit() is a macro that refers to its argument more than 3478: once. This is banned by the C Standard, but apparently happens in at 3479: least one MacOS environment. */ 3480: 3481: for (pt++; isxdigit(*pt); pt++) 3482: { 3483: if (++i == 9) 3484: fprintf(outfile, "** Too many hex digits in \\x{...} item; " 3485: "using only the first eight.\n"); 3486: else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); 3487: } 3488: if (*pt == '}') 3489: { 3490: p = pt + 1; 3491: break; 3492: } 3493: /* Not correct form for \x{...}; fall through */ 3494: } 3495: 3496: /* \x without {} always defines just one byte in 8-bit mode. This 3497: allows UTF-8 characters to be constructed byte by byte, and also allows 3498: invalid UTF-8 sequences to be made. Just copy the byte in UTF mode. 3499: Otherwise, pass it down to later code so that it can be turned into 3500: UTF-8 when running in 16-bit mode. */ 3501: 3502: c = 0; 3503: while (i++ < 2 && isxdigit(*p)) 3504: { 3505: c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); 3506: p++; 3507: } 3508: if (use_utf) 3509: { 3510: *q++ = c; 3511: continue; 3512: } 3513: break; 3514: 3515: case 0: /* \ followed by EOF allows for an empty line */ 3516: p--; 3517: continue; 3518: 3519: case '>': 3520: if (*p == '-') 3521: { 3522: start_offset_sign = -1; 3523: p++; 3524: } 3525: while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0'; 3526: start_offset *= start_offset_sign; 3527: continue; 3528: 3529: case 'A': /* Option setting */ 3530: options |= PCRE_ANCHORED; 3531: continue; 3532: 3533: case 'B': 3534: options |= PCRE_NOTBOL; 3535: continue; 3536: 3537: case 'C': 3538: if (isdigit(*p)) /* Set copy string */ 3539: { 3540: while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3541: copystrings |= 1 << n; 3542: } 3543: else if (isalnum(*p)) 3544: { 3545: READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re); 3546: } 3547: else if (*p == '+') 3548: { 3549: callout_extra = 1; 3550: p++; 3551: } 3552: else if (*p == '-') 3553: { 3554: SET_PCRE_CALLOUT(NULL); 3555: p++; 3556: } 3557: else if (*p == '!') 3558: { 3559: callout_fail_id = 0; 3560: p++; 3561: while(isdigit(*p)) 3562: callout_fail_id = callout_fail_id * 10 + *p++ - '0'; 3563: callout_fail_count = 0; 3564: if (*p == '!') 3565: { 3566: p++; 3567: while(isdigit(*p)) 3568: callout_fail_count = callout_fail_count * 10 + *p++ - '0'; 3569: } 3570: } 3571: else if (*p == '*') 3572: { 3573: int sign = 1; 3574: callout_data = 0; 3575: if (*(++p) == '-') { sign = -1; p++; } 3576: while(isdigit(*p)) 3577: callout_data = callout_data * 10 + *p++ - '0'; 3578: callout_data *= sign; 3579: callout_data_set = 1; 3580: } 3581: continue; 3582: 3583: #if !defined NODFA 3584: case 'D': 3585: #if !defined NOPOSIX 3586: if (posix || do_posix) 3587: printf("** Can't use dfa matching in POSIX mode: \\D ignored\n"); 3588: else 3589: #endif 3590: use_dfa = 1; 3591: continue; 3592: #endif 3593: 3594: #if !defined NODFA 3595: case 'F': 3596: options |= PCRE_DFA_SHORTEST; 3597: continue; 3598: #endif 3599: 3600: case 'G': 3601: if (isdigit(*p)) 3602: { 3603: while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3604: getstrings |= 1 << n; 3605: } 3606: else if (isalnum(*p)) 3607: { 3608: READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re); 3609: } 3610: continue; 3611: 3612: case 'J': 3613: while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3614: if (extra != NULL 3615: && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 3616: && extra->executable_jit != NULL) 3617: { 3618: if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); } 3619: jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024); 3620: PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); 3621: } 3622: continue; 3623: 3624: case 'L': 3625: getlist = 1; 3626: continue; 3627: 3628: case 'M': 3629: find_match_limit = 1; 3630: continue; 3631: 3632: case 'N': 3633: if ((options & PCRE_NOTEMPTY) != 0) 3634: options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART; 3635: else 3636: options |= PCRE_NOTEMPTY; 3637: continue; 3638: 3639: case 'O': 3640: while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3641: if (n > size_offsets_max) 3642: { 3643: size_offsets_max = n; 3644: free(offsets); 3645: use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int)); 3646: if (offsets == NULL) 3647: { 3648: printf("** Failed to get %d bytes of memory for offsets vector\n", 3649: (int)(size_offsets_max * sizeof(int))); 3650: yield = 1; 3651: goto EXIT; 3652: } 3653: } 3654: use_size_offsets = n; 3655: if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */ 3656: continue; 3657: 3658: case 'P': 3659: options |= ((options & PCRE_PARTIAL_SOFT) == 0)? 3660: PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD; 3661: continue; 3662: 3663: case 'Q': 3664: while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3665: if (extra == NULL) 3666: { 3667: extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3668: extra->flags = 0; 3669: } 3670: extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 3671: extra->match_limit_recursion = n; 3672: continue; 3673: 3674: case 'q': 3675: while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3676: if (extra == NULL) 3677: { 3678: extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3679: extra->flags = 0; 3680: } 3681: extra->flags |= PCRE_EXTRA_MATCH_LIMIT; 3682: extra->match_limit = n; 3683: continue; 3684: 3685: #if !defined NODFA 3686: case 'R': 3687: options |= PCRE_DFA_RESTART; 3688: continue; 3689: #endif 3690: 3691: case 'S': 3692: show_malloc = 1; 3693: continue; 3694: 3695: case 'Y': 3696: options |= PCRE_NO_START_OPTIMIZE; 3697: continue; 3698: 3699: case 'Z': 3700: options |= PCRE_NOTEOL; 3701: continue; 3702: 3703: case '?': 3704: options |= PCRE_NO_UTF8_CHECK; 3705: continue; 3706: 3707: case '<': 3708: { 3709: int x = check_newline(p, outfile); 3710: if (x == 0) goto NEXT_DATA; 3711: options |= x; 3712: while (*p++ != '>'); 3713: } 3714: continue; 3715: } 3716: 3717: /* We now have a character value in c that may be greater than 255. In 3718: 16-bit mode, we always convert characters to UTF-8 so that values greater 3719: than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we 3720: convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF 3721: mode must have come from \x{...} or octal constructs because values from 3722: \x.. get this far only in non-UTF mode. */ 3723: 3724: #if !defined NOUTF || defined SUPPORT_PCRE16 3725: if (use_pcre16 || use_utf) 3726: { 3727: pcre_uint8 buff8[8]; 3728: int ii, utn; 3729: utn = ord2utf8(c, buff8); 3730: for (ii = 0; ii < utn; ii++) *q++ = buff8[ii]; 3731: } 3732: else 3733: #endif 3734: { 3735: if (c > 255) 3736: { 3737: fprintf(outfile, "** Character \\x{%x} is greater than 255 " 3738: "and UTF-8 mode is not enabled.\n", c); 3739: fprintf(outfile, "** Truncation will probably give the wrong " 3740: "result.\n"); 3741: } 3742: *q++ = c; 3743: } 3744: } 3745: 3746: /* Reached end of subject string */ 3747: 3748: *q = 0; 3749: len = (int)(q - dbuffer); 3750: 3751: /* Move the data to the end of the buffer so that a read over the end of 3752: the buffer will be seen by valgrind, even if it doesn't cause a crash. If 3753: we are using the POSIX interface, we must include the terminating zero. */ 3754: 3755: #if !defined NOPOSIX 3756: if (posix || do_posix) 3757: { 3758: memmove(bptr + buffer_size - len - 1, bptr, len + 1); 3759: bptr += buffer_size - len - 1; 3760: } 3761: else 3762: #endif 3763: { 3764: memmove(bptr + buffer_size - len, bptr, len); 3765: bptr += buffer_size - len; 3766: } 3767: 3768: if ((all_use_dfa || use_dfa) && find_match_limit) 3769: { 3770: printf("**Match limit not relevant for DFA matching: ignored\n"); 3771: find_match_limit = 0; 3772: } 3773: 3774: /* Handle matching via the POSIX interface, which does not 3775: support timing or playing with the match limit or callout data. */ 3776: 3777: #if !defined NOPOSIX 3778: if (posix || do_posix) 3779: { 3780: int rc; 3781: int eflags = 0; 3782: regmatch_t *pmatch = NULL; 3783: if (use_size_offsets > 0) 3784: pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets); 3785: if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; 3786: if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; 3787: if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; 3788: 3789: rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); 3790: 3791: if (rc != 0) 3792: { 3793: (void)regerror(rc, &preg, (char *)buffer, buffer_size); 3794: fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer); 3795: } 3796: else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE) 3797: != 0) 3798: { 3799: fprintf(outfile, "Matched with REG_NOSUB\n"); 3800: } 3801: else 3802: { 3803: size_t i; 3804: for (i = 0; i < (size_t)use_size_offsets; i++) 3805: { 3806: if (pmatch[i].rm_so >= 0) 3807: { 3808: fprintf(outfile, "%2d: ", (int)i); 3809: PCHARSV(dbuffer, pmatch[i].rm_so, 3810: pmatch[i].rm_eo - pmatch[i].rm_so, outfile); 3811: fprintf(outfile, "\n"); 3812: if (do_showcaprest || (i == 0 && do_showrest)) 3813: { 3814: fprintf(outfile, "%2d+ ", (int)i); 3815: PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo, 3816: outfile); 3817: fprintf(outfile, "\n"); 3818: } 3819: } 3820: } 3821: } 3822: free(pmatch); 3823: goto NEXT_DATA; 3824: } 3825: 3826: #endif /* !defined NOPOSIX */ 3827: 3828: /* Handle matching via the native interface - repeats for /g and /G */ 3829: 3830: #ifdef SUPPORT_PCRE16 3831: if (use_pcre16) 3832: { 3833: len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len); 3834: switch(len) 3835: { 3836: case -1: 3837: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " 3838: "converted to UTF-16\n"); 3839: goto NEXT_DATA; 3840: 3841: case -2: 3842: fprintf(outfile, "**Failed: character value greater than 0x10ffff " 3843: "cannot be converted to UTF-16\n"); 3844: goto NEXT_DATA; 3845: 3846: case -3: 3847: fprintf(outfile, "**Failed: character value greater than 0xffff " 3848: "cannot be converted to 16-bit in non-UTF mode\n"); 3849: goto NEXT_DATA; 3850: 3851: default: 3852: break; 3853: } 3854: bptr = (pcre_uint8 *)buffer16; 3855: } 3856: #endif 3857: 3858: for (;; gmatched++) /* Loop for /g or /G */ 3859: { 3860: markptr = NULL; 3861: 3862: if (timeitm > 0) 3863: { 3864: register int i; 3865: clock_t time_taken; 3866: clock_t start_time = clock(); 3867: 3868: #if !defined NODFA 3869: if (all_use_dfa || use_dfa) 3870: { 3871: int workspace[1000]; 3872: for (i = 0; i < timeitm; i++) 3873: { 3874: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, 3875: (options | g_notempty), use_offsets, use_size_offsets, workspace, 3876: (sizeof(workspace)/sizeof(int))); 3877: } 3878: } 3879: else 3880: #endif 3881: 3882: for (i = 0; i < timeitm; i++) 3883: { 3884: PCRE_EXEC(count, re, extra, bptr, len, start_offset, 3885: (options | g_notempty), use_offsets, use_size_offsets); 3886: } 3887: time_taken = clock() - start_time; 3888: fprintf(outfile, "Execute time %.4f milliseconds\n", 3889: (((double)time_taken * 1000.0) / (double)timeitm) / 3890: (double)CLOCKS_PER_SEC); 3891: } 3892: 3893: /* If find_match_limit is set, we want to do repeated matches with 3894: varying limits in order to find the minimum value for the match limit and 3895: for the recursion limit. The match limits are relevant only to the normal 3896: running of pcre_exec(), so disable the JIT optimization. This makes it 3897: possible to run the same set of tests with and without JIT externally 3898: requested. */ 3899: 3900: if (find_match_limit) 3901: { 3902: if (extra == NULL) 3903: { 3904: extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3905: extra->flags = 0; 3906: } 3907: else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT; 3908: 3909: (void)check_match_limit(re, extra, bptr, len, start_offset, 3910: options|g_notempty, use_offsets, use_size_offsets, 3911: PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit), 3912: PCRE_ERROR_MATCHLIMIT, "match()"); 3913: 3914: count = check_match_limit(re, extra, bptr, len, start_offset, 3915: options|g_notempty, use_offsets, use_size_offsets, 3916: PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion), 3917: PCRE_ERROR_RECURSIONLIMIT, "match() recursion"); 3918: } 3919: 3920: /* If callout_data is set, use the interface with additional data */ 3921: 3922: else if (callout_data_set) 3923: { 3924: if (extra == NULL) 3925: { 3926: extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3927: extra->flags = 0; 3928: } 3929: extra->flags |= PCRE_EXTRA_CALLOUT_DATA; 3930: extra->callout_data = &callout_data; 3931: PCRE_EXEC(count, re, extra, bptr, len, start_offset, 3932: options | g_notempty, use_offsets, use_size_offsets); 3933: extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA; 3934: } 3935: 3936: /* The normal case is just to do the match once, with the default 3937: value of match_limit. */ 3938: 3939: #if !defined NODFA 3940: else if (all_use_dfa || use_dfa) 3941: { 3942: int workspace[1000]; 3943: PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, 3944: (options | g_notempty), use_offsets, use_size_offsets, workspace, 3945: (sizeof(workspace)/sizeof(int))); 3946: if (count == 0) 3947: { 3948: fprintf(outfile, "Matched, but too many subsidiary matches\n"); 3949: count = use_size_offsets/2; 3950: } 3951: } 3952: #endif 3953: 3954: else 3955: { 3956: PCRE_EXEC(count, re, extra, bptr, len, start_offset, 3957: options | g_notempty, use_offsets, use_size_offsets); 3958: if (count == 0) 3959: { 3960: fprintf(outfile, "Matched, but too many substrings\n"); 3961: count = use_size_offsets/3; 3962: } 3963: } 3964: 3965: /* Matched */ 3966: 3967: if (count >= 0) 3968: { 3969: int i, maxcount; 3970: void *cnptr, *gnptr; 3971: 3972: #if !defined NODFA 3973: if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else 3974: #endif 3975: maxcount = use_size_offsets/3; 3976: 3977: /* This is a check against a lunatic return value. */ 3978: 3979: if (count > maxcount) 3980: { 3981: fprintf(outfile, 3982: "** PCRE error: returned count %d is too big for offset size %d\n", 3983: count, use_size_offsets); 3984: count = use_size_offsets/3; 3985: if (do_g || do_G) 3986: { 3987: fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G'); 3988: do_g = do_G = FALSE; /* Break g/G loop */ 3989: } 3990: } 3991: 3992: /* do_allcaps requests showing of all captures in the pattern, to check 3993: unset ones at the end. */ 3994: 3995: if (do_allcaps) 3996: { 3997: if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0) 3998: goto SKIP_DATA; 3999: count++; /* Allow for full match */ 4000: if (count * 2 > use_size_offsets) count = use_size_offsets/2; 4001: } 4002: 4003: /* Output the captured substrings */ 4004: 4005: for (i = 0; i < count * 2; i += 2) 4006: { 4007: if (use_offsets[i] < 0) 4008: { 4009: if (use_offsets[i] != -1) 4010: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n", 4011: use_offsets[i], i); 4012: if (use_offsets[i+1] != -1) 4013: fprintf(outfile, "ERROR: bad negative value %d for offset %d\n", 4014: use_offsets[i+1], i+1); 4015: fprintf(outfile, "%2d: <unset>\n", i/2); 4016: } 4017: else 4018: { 4019: fprintf(outfile, "%2d: ", i/2); 4020: PCHARSV(bptr, use_offsets[i], 4021: use_offsets[i+1] - use_offsets[i], outfile); 4022: fprintf(outfile, "\n"); 4023: if (do_showcaprest || (i == 0 && do_showrest)) 4024: { 4025: fprintf(outfile, "%2d+ ", i/2); 4026: PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1], 4027: outfile); 4028: fprintf(outfile, "\n"); 4029: } 4030: } 4031: } 4032: 4033: if (markptr != NULL) 4034: { 4035: fprintf(outfile, "MK: "); 4036: PCHARSV(markptr, 0, -1, outfile); 4037: fprintf(outfile, "\n"); 4038: } 4039: 4040: for (i = 0; i < 32; i++) 4041: { 4042: if ((copystrings & (1 << i)) != 0) 4043: { 4044: int rc; 4045: char copybuffer[256]; 4046: PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i, 4047: copybuffer, sizeof(copybuffer)); 4048: if (rc < 0) 4049: fprintf(outfile, "copy substring %d failed %d\n", i, rc); 4050: else 4051: { 4052: fprintf(outfile, "%2dC ", i); 4053: PCHARSV(copybuffer, 0, rc, outfile); 4054: fprintf(outfile, " (%d)\n", rc); 4055: } 4056: } 4057: } 4058: 4059: cnptr = copynames; 4060: for (;;) 4061: { 4062: int rc; 4063: char copybuffer[256]; 4064: 4065: if (use_pcre16) 4066: { 4067: if (*(pcre_uint16 *)cnptr == 0) break; 4068: } 4069: else 4070: { 4071: if (*(pcre_uint8 *)cnptr == 0) break; 4072: } 4073: 4074: PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count, 4075: cnptr, copybuffer, sizeof(copybuffer)); 4076: 4077: if (rc < 0) 4078: { 4079: fprintf(outfile, "copy substring "); 4080: PCHARSV(cnptr, 0, -1, outfile); 4081: fprintf(outfile, " failed %d\n", rc); 4082: } 4083: else 4084: { 4085: fprintf(outfile, " C "); 4086: PCHARSV(copybuffer, 0, rc, outfile); 4087: fprintf(outfile, " (%d) ", rc); 4088: PCHARSV(cnptr, 0, -1, outfile); 4089: putc('\n', outfile); 4090: } 4091: 4092: cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE; 4093: } 4094: 4095: for (i = 0; i < 32; i++) 4096: { 4097: if ((getstrings & (1 << i)) != 0) 4098: { 4099: int rc; 4100: const char *substring; 4101: PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring); 4102: if (rc < 0) 4103: fprintf(outfile, "get substring %d failed %d\n", i, rc); 4104: else 4105: { 4106: fprintf(outfile, "%2dG ", i); 4107: PCHARSV(substring, 0, rc, outfile); 4108: fprintf(outfile, " (%d)\n", rc); 4109: PCRE_FREE_SUBSTRING(substring); 4110: } 4111: } 4112: } 4113: 4114: gnptr = getnames; 4115: for (;;) 4116: { 4117: int rc; 4118: const char *substring; 4119: 4120: if (use_pcre16) 4121: { 4122: if (*(pcre_uint16 *)gnptr == 0) break; 4123: } 4124: else 4125: { 4126: if (*(pcre_uint8 *)gnptr == 0) break; 4127: } 4128: 4129: PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count, 4130: gnptr, &substring); 4131: if (rc < 0) 4132: { 4133: fprintf(outfile, "get substring "); 4134: PCHARSV(gnptr, 0, -1, outfile); 4135: fprintf(outfile, " failed %d\n", rc); 4136: } 4137: else 4138: { 4139: fprintf(outfile, " G "); 4140: PCHARSV(substring, 0, rc, outfile); 4141: fprintf(outfile, " (%d) ", rc); 4142: PCHARSV(gnptr, 0, -1, outfile); 4143: PCRE_FREE_SUBSTRING(substring); 4144: putc('\n', outfile); 4145: } 4146: 4147: gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE; 4148: } 4149: 4150: if (getlist) 4151: { 4152: int rc; 4153: const char **stringlist; 4154: PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist); 4155: if (rc < 0) 4156: fprintf(outfile, "get substring list failed %d\n", rc); 4157: else 4158: { 4159: for (i = 0; i < count; i++) 4160: { 4161: fprintf(outfile, "%2dL ", i); 4162: PCHARSV(stringlist[i], 0, -1, outfile); 4163: putc('\n', outfile); 4164: } 4165: if (stringlist[i] != NULL) 4166: fprintf(outfile, "string list not terminated by NULL\n"); 4167: PCRE_FREE_SUBSTRING_LIST(stringlist); 4168: } 4169: } 4170: } 4171: 4172: /* There was a partial match */ 4173: 4174: else if (count == PCRE_ERROR_PARTIAL) 4175: { 4176: if (markptr == NULL) fprintf(outfile, "Partial match"); 4177: else 4178: { 4179: fprintf(outfile, "Partial match, mark="); 4180: PCHARSV(markptr, 0, -1, outfile); 4181: } 4182: if (use_size_offsets > 1) 4183: { 4184: fprintf(outfile, ": "); 4185: PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0], 4186: outfile); 4187: } 4188: fprintf(outfile, "\n"); 4189: break; /* Out of the /g loop */ 4190: } 4191: 4192: /* Failed to match. If this is a /g or /G loop and we previously set 4193: g_notempty after a null match, this is not necessarily the end. We want 4194: to advance the start offset, and continue. We won't be at the end of the 4195: string - that was checked before setting g_notempty. 4196: 4197: Complication arises in the case when the newline convention is "any", 4198: "crlf", or "anycrlf". If the previous match was at the end of a line 4199: terminated by CRLF, an advance of one character just passes the \r, 4200: whereas we should prefer the longer newline sequence, as does the code in 4201: pcre_exec(). Fudge the offset value to achieve this. We check for a 4202: newline setting in the pattern; if none was set, use PCRE_CONFIG() to 4203: find the default. 4204: 4205: Otherwise, in the case of UTF-8 matching, the advance must be one 4206: character, not one byte. */ 4207: 4208: else 4209: { 4210: if (g_notempty != 0) 4211: { 4212: int onechar = 1; 4213: unsigned int obits = ((REAL_PCRE *)re)->options; 4214: use_offsets[0] = start_offset; 4215: if ((obits & PCRE_NEWLINE_BITS) == 0) 4216: { 4217: int d; 4218: (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d); 4219: /* Note that these values are always the ASCII ones, even in 4220: EBCDIC environments. CR = 13, NL = 10. */ 4221: obits = (d == 13)? PCRE_NEWLINE_CR : 4222: (d == 10)? PCRE_NEWLINE_LF : 4223: (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF : 4224: (d == -2)? PCRE_NEWLINE_ANYCRLF : 4225: (d == -1)? PCRE_NEWLINE_ANY : 0; 4226: } 4227: if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY || 4228: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF || 4229: (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF) 4230: && 4231: start_offset < len - 1 && 4232: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 4233: (use_pcre16? 4234: ((PCRE_SPTR16)bptr)[start_offset] == '\r' 4235: && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n' 4236: : 4237: bptr[start_offset] == '\r' 4238: && bptr[start_offset + 1] == '\n') 4239: #elif defined SUPPORT_PCRE16 4240: ((PCRE_SPTR16)bptr)[start_offset] == '\r' 4241: && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n' 4242: #else 4243: bptr[start_offset] == '\r' 4244: && bptr[start_offset + 1] == '\n' 4245: #endif 4246: ) 4247: onechar++; 4248: else if (use_utf) 4249: { 4250: while (start_offset + onechar < len) 4251: { 4252: if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break; 4253: onechar++; 4254: } 4255: } 4256: use_offsets[1] = start_offset + onechar; 4257: } 4258: else 4259: { 4260: switch(count) 4261: { 4262: case PCRE_ERROR_NOMATCH: 4263: if (gmatched == 0) 4264: { 4265: if (markptr == NULL) 4266: { 4267: fprintf(outfile, "No match\n"); 4268: } 4269: else 4270: { 4271: fprintf(outfile, "No match, mark = "); 4272: PCHARSV(markptr, 0, -1, outfile); 4273: putc('\n', outfile); 4274: } 4275: } 4276: break; 4277: 4278: case PCRE_ERROR_BADUTF8: 4279: case PCRE_ERROR_SHORTUTF8: 4280: fprintf(outfile, "Error %d (%s UTF-%s string)", count, 4281: (count == PCRE_ERROR_BADUTF8)? "bad" : "short", 4282: use_pcre16? "16" : "8"); 4283: if (use_size_offsets >= 2) 4284: fprintf(outfile, " offset=%d reason=%d", use_offsets[0], 4285: use_offsets[1]); 4286: fprintf(outfile, "\n"); 4287: break; 4288: 4289: case PCRE_ERROR_BADUTF8_OFFSET: 4290: fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count, 4291: use_pcre16? "16" : "8"); 4292: break; 4293: 4294: default: 4295: if (count < 0 && 4296: (-count) < (int)(sizeof(errtexts)/sizeof(const char *))) 4297: fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]); 4298: else 4299: fprintf(outfile, "Error %d (Unexpected value)\n", count); 4300: break; 4301: } 4302: 4303: break; /* Out of the /g loop */ 4304: } 4305: } 4306: 4307: /* If not /g or /G we are done */ 4308: 4309: if (!do_g && !do_G) break; 4310: 4311: /* If we have matched an empty string, first check to see if we are at 4312: the end of the subject. If so, the /g loop is over. Otherwise, mimic what 4313: Perl's /g options does. This turns out to be rather cunning. First we set 4314: PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the 4315: same point. If this fails (picked up above) we advance to the next 4316: character. */ 4317: 4318: g_notempty = 0; 4319: 4320: if (use_offsets[0] == use_offsets[1]) 4321: { 4322: if (use_offsets[0] == len) break; 4323: g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; 4324: } 4325: 4326: /* For /g, update the start offset, leaving the rest alone */ 4327: 4328: if (do_g) start_offset = use_offsets[1]; 4329: 4330: /* For /G, update the pointer and length */ 4331: 4332: else 4333: { 4334: bptr += use_offsets[1] * CHAR_SIZE; 4335: len -= use_offsets[1]; 4336: } 4337: } /* End of loop for /g and /G */ 4338: 4339: NEXT_DATA: continue; 4340: } /* End of loop for data lines */ 4341: 4342: CONTINUE: 4343: 4344: #if !defined NOPOSIX 4345: if (posix || do_posix) regfree(&preg); 4346: #endif 4347: 4348: if (re != NULL) new_free(re); 4349: if (extra != NULL) 4350: { 4351: PCRE_FREE_STUDY(extra); 4352: } 4353: if (locale_set) 4354: { 4355: new_free((void *)tables); 4356: setlocale(LC_CTYPE, "C"); 4357: locale_set = 0; 4358: } 4359: if (jit_stack != NULL) 4360: { 4361: PCRE_JIT_STACK_FREE(jit_stack); 4362: jit_stack = NULL; 4363: } 4364: } 4365: 4366: if (infile == stdin) fprintf(outfile, "\n"); 4367: 4368: EXIT: 4369: 4370: if (infile != NULL && infile != stdin) fclose(infile); 4371: if (outfile != NULL && outfile != stdout) fclose(outfile); 4372: 4373: free(buffer); 4374: free(dbuffer); 4375: free(pbuffer); 4376: free(offsets); 4377: 4378: #ifdef SUPPORT_PCRE16 4379: if (buffer16 != NULL) free(buffer16); 4380: #endif 4381: 4382: return yield; 4383: } 4384: 4385: /* End of pcretest.c */