embedaddon/pcre/pcretest.c - annotate

Return to pcretest.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre
Annotation of embedaddon/pcre/pcretest.c, revision 1.1.1.3

1.1       misho       1: /*************************************************
                      2: *             PCRE testing program               *
                      3: *************************************************/
                      4: 
                      5: /* This program was hacked up as a tester for PCRE. I really should have
                      6: written it more tidily in the first place. Will I ever learn? It has grown and
1.1.1.2   misho       7: been extended and consequently is now rather, er, *very* untidy in places. The
                      8: addition of 16-bit support has made it even worse. :-(
1.1       misho       9: 
                     10: -----------------------------------------------------------------------------
                     11: Redistribution and use in source and binary forms, with or without
                     12: modification, are permitted provided that the following conditions are met:
                     13: 
                     14:     * Redistributions of source code must retain the above copyright notice,
                     15:       this list of conditions and the following disclaimer.
                     16: 
                     17:     * Redistributions in binary form must reproduce the above copyright
                     18:       notice, this list of conditions and the following disclaimer in the
                     19:       documentation and/or other materials provided with the distribution.
                     20: 
                     21:     * Neither the name of the University of Cambridge nor the names of its
                     22:       contributors may be used to endorse or promote products derived from
                     23:       this software without specific prior written permission.
                     24: 
                     25: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     26: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     27: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     28: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     29: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     30: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     31: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     32: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     33: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     34: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     35: POSSIBILITY OF SUCH DAMAGE.
                     36: -----------------------------------------------------------------------------
                     37: */
                     38: 
1.1.1.2   misho      39: /* This program now supports the testing of both the 8-bit and 16-bit PCRE
                     40: libraries in a single program. This is different from the modules such as
                     41: pcre_compile.c in the library itself, which are compiled separately for each
                     42: mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
                     43: (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
                     44: compiled only once. Therefore, it must not make use of any of the macros from
                     45: pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
                     46: however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
                     47: only supported library functions. */
                     48: 
1.1       misho      49: #ifdef HAVE_CONFIG_H
                     50: #include "config.h"
                     51: #endif
                     52: 
                     53: #include <ctype.h>
                     54: #include <stdio.h>
                     55: #include <string.h>
                     56: #include <stdlib.h>
                     57: #include <time.h>
                     58: #include <locale.h>
                     59: #include <errno.h>
                     60: 
1.1.1.3 ! misho      61: /* Both libreadline and libedit are optionally supported. The user-supplied
        !            62: original patch uses readline/readline.h for libedit, but in at least one system
        !            63: it is installed as editline/readline.h, so the configuration code now looks for
        !            64: that first, falling back to readline/readline.h. */
        !            65: 
        !            66: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1       misho      67: #ifdef HAVE_UNISTD_H
                     68: #include <unistd.h>
                     69: #endif
1.1.1.3 ! misho      70: #if defined(SUPPORT_LIBREADLINE)
1.1       misho      71: #include <readline/readline.h>
                     72: #include <readline/history.h>
1.1.1.3 ! misho      73: #else
        !            74: #if defined(HAVE_EDITLINE_READLINE_H)
        !            75: #include <editline/readline.h>
        !            76: #else
        !            77: #include <readline/readline.h>
        !            78: #endif
        !            79: #endif
1.1       misho      80: #endif
                     81: 
                     82: /* A number of things vary for Windows builds. Originally, pcretest opened its
                     83: input and output without "b"; then I was told that "b" was needed in some
                     84: environments, so it was added for release 5.0 to both the input and output. (It
                     85: makes no difference on Unix-like systems.) Later I was told that it is wrong
                     86: for the input on Windows. I've now abstracted the modes into two macros that
                     87: are set here, to make it easier to fiddle with them, and removed "b" from the
                     88: input mode under Windows. */
                     89: 
                     90: #if defined(_WIN32) || defined(WIN32)
                     91: #include <io.h>                /* For _setmode() */
                     92: #include <fcntl.h>             /* For _O_BINARY */
                     93: #define INPUT_MODE   "r"
                     94: #define OUTPUT_MODE  "wb"
                     95: 
                     96: #ifndef isatty
                     97: #define isatty _isatty         /* This is what Windows calls them, I'm told, */
                     98: #endif                         /* though in some environments they seem to   */
                     99:                                /* be already defined, hence the #ifndefs.    */
                    100: #ifndef fileno
                    101: #define fileno _fileno
                    102: #endif
                    103: 
                    104: /* A user sent this fix for Borland Builder 5 under Windows. */
                    105: 
                    106: #ifdef __BORLANDC__
                    107: #define _setmode(handle, mode) setmode(handle, mode)
                    108: #endif
                    109: 
                    110: /* Not Windows */
                    111: 
                    112: #else
                    113: #include <sys/time.h>          /* These two includes are needed */
                    114: #include <sys/resource.h>      /* for setrlimit(). */
                    115: #define INPUT_MODE   "rb"
                    116: #define OUTPUT_MODE  "wb"
                    117: #endif
                    118: 
1.1.1.2   misho     119: #define PRIV(name) name
1.1       misho     120: 
                    121: /* We have to include pcre_internal.h because we need the internal info for
                    122: displaying the results of pcre_study() and we also need to know about the
                    123: internal macros, structures, and other internal data values; pcretest has
                    124: "inside information" compared to a program that strictly follows the PCRE API.
                    125: 
                    126: Although pcre_internal.h does itself include pcre.h, we explicitly include it
                    127: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
                    128: appropriately for an application, not for building PCRE. */
                    129: 
                    130: #include "pcre.h"
1.1.1.2   misho     131: 
                    132: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
                    133: /* Configure internal macros to 16 bit mode. */
                    134: #define COMPILE_PCRE16
                    135: #endif
                    136: 
1.1       misho     137: #include "pcre_internal.h"
                    138: 
1.1.1.2   misho     139: /* The pcre_printint() function, which prints the internal form of a compiled
                    140: regex, is held in a separate file so that (a) it can be compiled in either
                    141: 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
                    142: when that is compiled in debug mode. */
                    143: 
                    144: #ifdef SUPPORT_PCRE8
                    145: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
                    146: #endif
                    147: #ifdef SUPPORT_PCRE16
                    148: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
                    149: #endif
                    150: 
1.1       misho     151: /* We need access to some of the data tables that PCRE uses. So as not to have
                    152: to keep two copies, we include the source file here, changing the names of the
                    153: external symbols to prevent clashes. */
                    154: 
1.1.1.2   misho     155: #define PCRE_INCLUDED
1.1       misho     156: 
                    157: #include "pcre_tables.c"
                    158: 
                    159: /* The definition of the macro PRINTABLE, which determines whether to print an
                    160: output character as-is or as a hex value when showing compiled patterns, is
1.1.1.2   misho     161: the same as in the printint.src file. We uses it here in cases when the locale
                    162: has not been explicitly changed, so as to get consistent output from systems
                    163: that differ in their output from isprint() even in the "C" locale. */
1.1       misho     164: 
1.1.1.2   misho     165: #ifdef EBCDIC
                    166: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
                    167: #else
                    168: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
                    169: #endif
                    170: 
                    171: #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
                    172: 
                    173: /* Posix support is disabled in 16 bit only mode. */
                    174: #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
                    175: #define NOPOSIX
                    176: #endif
1.1       misho     177: 
                    178: /* It is possible to compile this test program without including support for
                    179: testing the POSIX interface, though this is not available via the standard
                    180: Makefile. */
                    181: 
                    182: #if !defined NOPOSIX
                    183: #include "pcreposix.h"
                    184: #endif
                    185: 
1.1.1.2   misho     186: /* It is also possible, originally for the benefit of a version that was
                    187: imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
                    188: NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
                    189: automatically cut out the UTF support if PCRE is built without it. */
                    190: 
                    191: #ifndef SUPPORT_UTF
                    192: #ifndef NOUTF
                    193: #define NOUTF
1.1       misho     194: #endif
                    195: #endif
                    196: 
1.1.1.2   misho     197: /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
                    198: for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
                    199: only from one place and is handled differently). I couldn't dream up any way of
                    200: using a single macro to do this in a generic way, because of the many different
                    201: argument requirements. We know that at least one of SUPPORT_PCRE8 and
                    202: SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
                    203: use these in the definitions of generic macros.
                    204: 
                    205: **** Special note about the PCHARSxxx macros: the address of the string to be
                    206: printed is always given as two arguments: a base address followed by an offset.
                    207: The base address is cast to the correct data size for 8 or 16 bit data; the
                    208: offset is in units of this size. If the string were given as base+offset in one
                    209: argument, the casting might be incorrectly applied. */
                    210: 
                    211: #ifdef SUPPORT_PCRE8
                    212: 
                    213: #define PCHARS8(lv, p, offset, len, f) \
                    214:   lv = pchars((pcre_uint8 *)(p) + offset, len, f)
                    215: 
                    216: #define PCHARSV8(p, offset, len, f) \
                    217:   (void)pchars((pcre_uint8 *)(p) + offset, len, f)
                    218: 
                    219: #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
                    220:   p = read_capture_name8(p, cn8, re)
                    221: 
                    222: #define STRLEN8(p) ((int)strlen((char *)p))
                    223: 
                    224: #define SET_PCRE_CALLOUT8(callout) \
                    225:   pcre_callout = callout
                    226: 
                    227: #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
                    228:    pcre_assign_jit_stack(extra, callback, userdata)
                    229: 
                    230: #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
                    231:   re = pcre_compile((char *)pat, options, error, erroffset, tables)
                    232: 
                    233: #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
                    234:     namesptr, cbuffer, size) \
                    235:   rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
                    236:     (char *)namesptr, cbuffer, size)
                    237: 
                    238: #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
                    239:   rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
                    240: 
                    241: #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
                    242:     offsets, size_offsets, workspace, size_workspace) \
                    243:   count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
                    244:     offsets, size_offsets, workspace, size_workspace)
                    245: 
                    246: #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
                    247:     offsets, size_offsets) \
                    248:   count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
                    249:     offsets, size_offsets)
                    250: 
                    251: #define PCRE_FREE_STUDY8(extra) \
                    252:   pcre_free_study(extra)
                    253: 
                    254: #define PCRE_FREE_SUBSTRING8(substring) \
                    255:   pcre_free_substring(substring)
                    256: 
                    257: #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
                    258:   pcre_free_substring_list(listptr)
                    259: 
                    260: #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
                    261:     getnamesptr, subsptr) \
                    262:   rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
                    263:     (char *)getnamesptr, subsptr)
                    264: 
                    265: #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
                    266:   n = pcre_get_stringnumber(re, (char *)ptr)
                    267: 
                    268: #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
                    269:   rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
                    270: 
                    271: #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
                    272:   rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
                    273: 
                    274: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
                    275:   rc = pcre_pattern_to_host_byte_order(re, extra, tables)
                    276: 
                    277: #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
                    278:   pcre_printint(re, outfile, debug_lengths)
                    279: 
                    280: #define PCRE_STUDY8(extra, re, options, error) \
                    281:   extra = pcre_study(re, options, error)
                    282: 
                    283: #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
                    284:   pcre_jit_stack_alloc(startsize, maxsize)
                    285: 
                    286: #define PCRE_JIT_STACK_FREE8(stack) \
                    287:   pcre_jit_stack_free(stack)
                    288: 
                    289: #endif /* SUPPORT_PCRE8 */
                    290: 
                    291: /* -----------------------------------------------------------*/
                    292: 
                    293: #ifdef SUPPORT_PCRE16
                    294: 
                    295: #define PCHARS16(lv, p, offset, len, f) \
                    296:   lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
                    297: 
                    298: #define PCHARSV16(p, offset, len, f) \
                    299:   (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
                    300: 
                    301: #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
                    302:   p = read_capture_name16(p, cn16, re)
                    303: 
                    304: #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
                    305: 
                    306: #define SET_PCRE_CALLOUT16(callout) \
                    307:   pcre16_callout = (int (*)(pcre16_callout_block *))callout
                    308: 
                    309: #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
                    310:   pcre16_assign_jit_stack((pcre16_extra *)extra, \
                    311:     (pcre16_jit_callback)callback, userdata)
                    312: 
                    313: #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
                    314:   re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
                    315:     tables)
                    316: 
                    317: #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
                    318:     namesptr, cbuffer, size) \
                    319:   rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
                    320:     count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
                    321: 
                    322: #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
                    323:   rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
                    324:     (PCRE_UCHAR16 *)cbuffer, size/2)
                    325: 
                    326: #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
                    327:     offsets, size_offsets, workspace, size_workspace) \
                    328:   count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
                    329:     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
                    330:     workspace, size_workspace)
                    331: 
                    332: #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
                    333:     offsets, size_offsets) \
                    334:   count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
                    335:     len, start_offset, options, offsets, size_offsets)
                    336: 
                    337: #define PCRE_FREE_STUDY16(extra) \
                    338:   pcre16_free_study((pcre16_extra *)extra)
                    339: 
                    340: #define PCRE_FREE_SUBSTRING16(substring) \
                    341:   pcre16_free_substring((PCRE_SPTR16)substring)
                    342: 
                    343: #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
                    344:   pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
                    345: 
                    346: #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
                    347:     getnamesptr, subsptr) \
                    348:   rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
                    349:     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
                    350: 
                    351: #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
                    352:   n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
                    353: 
                    354: #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
                    355:   rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
                    356:     (PCRE_SPTR16 *)(void*)subsptr)
                    357: 
                    358: #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
                    359:   rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
                    360:     (PCRE_SPTR16 **)(void*)listptr)
                    361: 
                    362: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
                    363:   rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
                    364:     tables)
                    365: 
                    366: #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
                    367:   pcre16_printint(re, outfile, debug_lengths)
                    368: 
                    369: #define PCRE_STUDY16(extra, re, options, error) \
                    370:   extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
                    371: 
                    372: #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
                    373:   (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
                    374: 
                    375: #define PCRE_JIT_STACK_FREE16(stack) \
                    376:   pcre16_jit_stack_free((pcre16_jit_stack *)stack)
                    377: 
                    378: #endif /* SUPPORT_PCRE16 */
                    379: 
                    380: 
                    381: /* ----- Both modes are supported; a runtime test is needed, except for
                    382: pcre_config(), and the JIT stack functions, when it doesn't matter which
                    383: version is called. ----- */
                    384: 
                    385: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
                    386: 
                    387: #define CHAR_SIZE (use_pcre16? 2:1)
                    388: 
                    389: #define PCHARS(lv, p, offset, len, f) \
                    390:   if (use_pcre16) \
                    391:     PCHARS16(lv, p, offset, len, f); \
                    392:   else \
                    393:     PCHARS8(lv, p, offset, len, f)
                    394: 
                    395: #define PCHARSV(p, offset, len, f) \
                    396:   if (use_pcre16) \
                    397:     PCHARSV16(p, offset, len, f); \
                    398:   else \
                    399:     PCHARSV8(p, offset, len, f)
                    400: 
                    401: #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
                    402:   if (use_pcre16) \
                    403:     READ_CAPTURE_NAME16(p, cn8, cn16, re); \
                    404:   else \
                    405:     READ_CAPTURE_NAME8(p, cn8, cn16, re)
                    406: 
                    407: #define SET_PCRE_CALLOUT(callout) \
                    408:   if (use_pcre16) \
                    409:     SET_PCRE_CALLOUT16(callout); \
                    410:   else \
                    411:     SET_PCRE_CALLOUT8(callout)
                    412: 
                    413: #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
                    414: 
                    415: #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
                    416:   if (use_pcre16) \
                    417:     PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
                    418:   else \
                    419:     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
                    420: 
                    421: #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
                    422:   if (use_pcre16) \
                    423:     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
                    424:   else \
                    425:     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
                    426: 
                    427: #define PCRE_CONFIG pcre_config
                    428: 
                    429: #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
                    430:     namesptr, cbuffer, size) \
                    431:   if (use_pcre16) \
                    432:     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
                    433:       namesptr, cbuffer, size); \
                    434:   else \
                    435:     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
                    436:       namesptr, cbuffer, size)
                    437: 
                    438: #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
                    439:   if (use_pcre16) \
                    440:     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
                    441:   else \
                    442:     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
                    443: 
                    444: #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
                    445:     offsets, size_offsets, workspace, size_workspace) \
                    446:   if (use_pcre16) \
                    447:     PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
                    448:       offsets, size_offsets, workspace, size_workspace); \
                    449:   else \
                    450:     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
                    451:       offsets, size_offsets, workspace, size_workspace)
                    452: 
                    453: #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
                    454:     offsets, size_offsets) \
                    455:   if (use_pcre16) \
                    456:     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
                    457:       offsets, size_offsets); \
                    458:   else \
                    459:     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
                    460:       offsets, size_offsets)
                    461: 
                    462: #define PCRE_FREE_STUDY(extra) \
                    463:   if (use_pcre16) \
                    464:     PCRE_FREE_STUDY16(extra); \
                    465:   else \
                    466:     PCRE_FREE_STUDY8(extra)
                    467: 
                    468: #define PCRE_FREE_SUBSTRING(substring) \
                    469:   if (use_pcre16) \
                    470:     PCRE_FREE_SUBSTRING16(substring); \
                    471:   else \
                    472:     PCRE_FREE_SUBSTRING8(substring)
                    473: 
                    474: #define PCRE_FREE_SUBSTRING_LIST(listptr) \
                    475:   if (use_pcre16) \
                    476:     PCRE_FREE_SUBSTRING_LIST16(listptr); \
                    477:   else \
                    478:     PCRE_FREE_SUBSTRING_LIST8(listptr)
                    479: 
                    480: #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
                    481:     getnamesptr, subsptr) \
                    482:   if (use_pcre16) \
                    483:     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
                    484:       getnamesptr, subsptr); \
                    485:   else \
                    486:     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
                    487:       getnamesptr, subsptr)
                    488: 
                    489: #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
                    490:   if (use_pcre16) \
                    491:     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
                    492:   else \
                    493:     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
                    494: 
                    495: #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
                    496:   if (use_pcre16) \
                    497:     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
                    498:   else \
                    499:     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
                    500: 
                    501: #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
                    502:   if (use_pcre16) \
                    503:     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
                    504:   else \
                    505:     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
                    506: 
                    507: #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
                    508:   (use_pcre16 ? \
                    509:      PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
                    510:     :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
                    511: 
                    512: #define PCRE_JIT_STACK_FREE(stack) \
                    513:   if (use_pcre16) \
                    514:     PCRE_JIT_STACK_FREE16(stack); \
                    515:   else \
                    516:     PCRE_JIT_STACK_FREE8(stack)
                    517: 
                    518: #define PCRE_MAKETABLES \
                    519:   (use_pcre16? pcre16_maketables() : pcre_maketables())
                    520: 
                    521: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
                    522:   if (use_pcre16) \
                    523:     PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
                    524:   else \
                    525:     PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
                    526: 
                    527: #define PCRE_PRINTINT(re, outfile, debug_lengths) \
                    528:   if (use_pcre16) \
                    529:     PCRE_PRINTINT16(re, outfile, debug_lengths); \
                    530:   else \
                    531:     PCRE_PRINTINT8(re, outfile, debug_lengths)
                    532: 
                    533: #define PCRE_STUDY(extra, re, options, error) \
                    534:   if (use_pcre16) \
                    535:     PCRE_STUDY16(extra, re, options, error); \
                    536:   else \
                    537:     PCRE_STUDY8(extra, re, options, error)
                    538: 
                    539: /* ----- Only 8-bit mode is supported ----- */
                    540: 
                    541: #elif defined SUPPORT_PCRE8
                    542: #define CHAR_SIZE                 1
                    543: #define PCHARS                    PCHARS8
                    544: #define PCHARSV                   PCHARSV8
                    545: #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
                    546: #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
                    547: #define STRLEN                    STRLEN8
                    548: #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
                    549: #define PCRE_COMPILE              PCRE_COMPILE8
                    550: #define PCRE_CONFIG               pcre_config
                    551: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
                    552: #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
                    553: #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
                    554: #define PCRE_EXEC                 PCRE_EXEC8
                    555: #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
                    556: #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
                    557: #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
                    558: #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
                    559: #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
                    560: #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
                    561: #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
                    562: #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
                    563: #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
                    564: #define PCRE_MAKETABLES           pcre_maketables()
                    565: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
                    566: #define PCRE_PRINTINT             PCRE_PRINTINT8
                    567: #define PCRE_STUDY                PCRE_STUDY8
                    568: 
                    569: /* ----- Only 16-bit mode is supported ----- */
                    570: 
                    571: #else
                    572: #define CHAR_SIZE                 2
                    573: #define PCHARS                    PCHARS16
                    574: #define PCHARSV                   PCHARSV16
                    575: #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
                    576: #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
                    577: #define STRLEN                    STRLEN16
                    578: #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
                    579: #define PCRE_COMPILE              PCRE_COMPILE16
                    580: #define PCRE_CONFIG               pcre16_config
                    581: #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
                    582: #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
                    583: #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
                    584: #define PCRE_EXEC                 PCRE_EXEC16
                    585: #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
                    586: #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
                    587: #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
                    588: #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
                    589: #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
                    590: #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
                    591: #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
                    592: #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
                    593: #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
                    594: #define PCRE_MAKETABLES           pcre16_maketables()
                    595: #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
                    596: #define PCRE_PRINTINT             PCRE_PRINTINT16
                    597: #define PCRE_STUDY                PCRE_STUDY16
                    598: #endif
                    599: 
                    600: /* ----- End of mode-specific function call macros ----- */
                    601: 
1.1       misho     602: 
                    603: /* Other parameters */
                    604: 
                    605: #ifndef CLOCKS_PER_SEC
                    606: #ifdef CLK_TCK
                    607: #define CLOCKS_PER_SEC CLK_TCK
                    608: #else
                    609: #define CLOCKS_PER_SEC 100
                    610: #endif
                    611: #endif
                    612: 
1.1.1.3 ! misho     613: #if !defined NODFA
        !           614: #define DFA_WS_DIMENSION 1000
        !           615: #endif
        !           616: 
1.1       misho     617: /* This is the default loop count for timing. */
                    618: 
                    619: #define LOOPREPEAT 500000
                    620: 
                    621: /* Static variables */
                    622: 
                    623: static FILE *outfile;
                    624: static int log_store = 0;
                    625: static int callout_count;
                    626: static int callout_extra;
                    627: static int callout_fail_count;
                    628: static int callout_fail_id;
                    629: static int debug_lengths;
                    630: static int first_callout;
1.1.1.3 ! misho     631: static int jit_was_used;
1.1       misho     632: static int locale_set = 0;
                    633: static int show_malloc;
1.1.1.2   misho     634: static int use_utf;
1.1       misho     635: static size_t gotten_store;
                    636: static size_t first_gotten_store = 0;
                    637: static const unsigned char *last_callout_mark = NULL;
                    638: 
                    639: /* The buffers grow automatically if very long input lines are encountered. */
                    640: 
                    641: static int buffer_size = 50000;
1.1.1.2   misho     642: static pcre_uint8 *buffer = NULL;
                    643: static pcre_uint8 *dbuffer = NULL;
                    644: static pcre_uint8 *pbuffer = NULL;
                    645: 
                    646: /* Another buffer is needed translation to 16-bit character strings. It will
                    647: obtained and extended as required. */
                    648: 
                    649: #ifdef SUPPORT_PCRE16
                    650: static int buffer16_size = 0;
                    651: static pcre_uint16 *buffer16 = NULL;
                    652: 
                    653: #ifdef SUPPORT_PCRE8
                    654: 
                    655: /* We need the table of operator lengths that is used for 16-bit compiling, in
                    656: order to swap bytes in a pattern for saving/reloading testing. Luckily, the
                    657: data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
                    658: appropriately for the 16-bit world. Just as a safety check, make sure that
                    659: COMPILE_PCRE16 is *not* set. */
                    660: 
                    661: #ifdef COMPILE_PCRE16
                    662: #error COMPILE_PCRE16 must not be set when compiling pcretest.c
                    663: #endif
                    664: 
                    665: #if LINK_SIZE == 2
                    666: #undef LINK_SIZE
                    667: #define LINK_SIZE 1
                    668: #elif LINK_SIZE == 3 || LINK_SIZE == 4
                    669: #undef LINK_SIZE
                    670: #define LINK_SIZE 2
                    671: #else
                    672: #error LINK_SIZE must be either 2, 3, or 4
                    673: #endif
                    674: 
                    675: #undef IMM2_SIZE
                    676: #define IMM2_SIZE 1
                    677: 
                    678: #endif /* SUPPORT_PCRE8 */
                    679: 
                    680: static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
                    681: #endif  /* SUPPORT_PCRE16 */
                    682: 
                    683: /* If we have 8-bit support, default use_pcre16 to false; if there is also
                    684: 16-bit support, it can be changed by an option. If there is no 8-bit support,
                    685: there must be 16-bit support, so default it to 1. */
                    686: 
                    687: #ifdef SUPPORT_PCRE8
                    688: static int use_pcre16 = 0;
                    689: #else
                    690: static int use_pcre16 = 1;
                    691: #endif
1.1       misho     692: 
1.1.1.3 ! misho     693: /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
        !           694: 
        !           695: static int jit_study_bits[] =
        !           696:   {
        !           697:   PCRE_STUDY_JIT_COMPILE,
        !           698:   PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
        !           699:   PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
        !           700:   PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
        !           701:   PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
        !           702:   PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
        !           703:   PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
        !           704:     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
        !           705: };
        !           706: 
1.1       misho     707: /* Textual explanations for runtime error codes */
                    708: 
                    709: static const char *errtexts[] = {
                    710:   NULL,  /* 0 is no error */
                    711:   NULL,  /* NOMATCH is handled specially */
                    712:   "NULL argument passed",
                    713:   "bad option value",
                    714:   "magic number missing",
                    715:   "unknown opcode - pattern overwritten?",
                    716:   "no more memory",
                    717:   NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
                    718:   "match limit exceeded",
                    719:   "callout error code",
1.1.1.2   misho     720:   NULL,  /* BADUTF8/16 is handled specially */
                    721:   NULL,  /* BADUTF8/16 offset is handled specially */
1.1       misho     722:   NULL,  /* PARTIAL is handled specially */
                    723:   "not used - internal error",
                    724:   "internal error - pattern overwritten?",
                    725:   "bad count value",
                    726:   "item unsupported for DFA matching",
                    727:   "backreference condition or recursion test not supported for DFA matching",
                    728:   "match limit not supported for DFA matching",
                    729:   "workspace size exceeded in DFA matching",
                    730:   "too much recursion for DFA matching",
                    731:   "recursion limit exceeded",
                    732:   "not used - internal error",
                    733:   "invalid combination of newline options",
                    734:   "bad offset value",
1.1.1.2   misho     735:   NULL,  /* SHORTUTF8/16 is handled specially */
1.1       misho     736:   "nested recursion at the same subject position",
1.1.1.2   misho     737:   "JIT stack limit reached",
1.1.1.3 ! misho     738:   "pattern compiled in wrong mode: 8-bit/16-bit error",
        !           739:   "pattern compiled with other endianness",
        !           740:   "invalid data in workspace for DFA restart"
1.1       misho     741: };
                    742: 
                    743: 
                    744: /*************************************************
                    745: *         Alternate character tables             *
                    746: *************************************************/
                    747: 
                    748: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
                    749: using the default tables of the library. However, the T option can be used to
                    750: select alternate sets of tables, for different kinds of testing. Note also that
                    751: the L (locale) option also adjusts the tables. */
                    752: 
                    753: /* This is the set of tables distributed as default with PCRE. It recognizes
                    754: only ASCII characters. */
                    755: 
1.1.1.2   misho     756: static const pcre_uint8 tables0[] = {
1.1       misho     757: 
                    758: /* This table is a lower casing table. */
                    759: 
                    760:     0,  1,  2,  3,  4,  5,  6,  7,
                    761:     8,  9, 10, 11, 12, 13, 14, 15,
                    762:    16, 17, 18, 19, 20, 21, 22, 23,
                    763:    24, 25, 26, 27, 28, 29, 30, 31,
                    764:    32, 33, 34, 35, 36, 37, 38, 39,
                    765:    40, 41, 42, 43, 44, 45, 46, 47,
                    766:    48, 49, 50, 51, 52, 53, 54, 55,
                    767:    56, 57, 58, 59, 60, 61, 62, 63,
                    768:    64, 97, 98, 99,100,101,102,103,
                    769:   104,105,106,107,108,109,110,111,
                    770:   112,113,114,115,116,117,118,119,
                    771:   120,121,122, 91, 92, 93, 94, 95,
                    772:    96, 97, 98, 99,100,101,102,103,
                    773:   104,105,106,107,108,109,110,111,
                    774:   112,113,114,115,116,117,118,119,
                    775:   120,121,122,123,124,125,126,127,
                    776:   128,129,130,131,132,133,134,135,
                    777:   136,137,138,139,140,141,142,143,
                    778:   144,145,146,147,148,149,150,151,
                    779:   152,153,154,155,156,157,158,159,
                    780:   160,161,162,163,164,165,166,167,
                    781:   168,169,170,171,172,173,174,175,
                    782:   176,177,178,179,180,181,182,183,
                    783:   184,185,186,187,188,189,190,191,
                    784:   192,193,194,195,196,197,198,199,
                    785:   200,201,202,203,204,205,206,207,
                    786:   208,209,210,211,212,213,214,215,
                    787:   216,217,218,219,220,221,222,223,
                    788:   224,225,226,227,228,229,230,231,
                    789:   232,233,234,235,236,237,238,239,
                    790:   240,241,242,243,244,245,246,247,
                    791:   248,249,250,251,252,253,254,255,
                    792: 
                    793: /* This table is a case flipping table. */
                    794: 
                    795:     0,  1,  2,  3,  4,  5,  6,  7,
                    796:     8,  9, 10, 11, 12, 13, 14, 15,
                    797:    16, 17, 18, 19, 20, 21, 22, 23,
                    798:    24, 25, 26, 27, 28, 29, 30, 31,
                    799:    32, 33, 34, 35, 36, 37, 38, 39,
                    800:    40, 41, 42, 43, 44, 45, 46, 47,
                    801:    48, 49, 50, 51, 52, 53, 54, 55,
                    802:    56, 57, 58, 59, 60, 61, 62, 63,
                    803:    64, 97, 98, 99,100,101,102,103,
                    804:   104,105,106,107,108,109,110,111,
                    805:   112,113,114,115,116,117,118,119,
                    806:   120,121,122, 91, 92, 93, 94, 95,
                    807:    96, 65, 66, 67, 68, 69, 70, 71,
                    808:    72, 73, 74, 75, 76, 77, 78, 79,
                    809:    80, 81, 82, 83, 84, 85, 86, 87,
                    810:    88, 89, 90,123,124,125,126,127,
                    811:   128,129,130,131,132,133,134,135,
                    812:   136,137,138,139,140,141,142,143,
                    813:   144,145,146,147,148,149,150,151,
                    814:   152,153,154,155,156,157,158,159,
                    815:   160,161,162,163,164,165,166,167,
                    816:   168,169,170,171,172,173,174,175,
                    817:   176,177,178,179,180,181,182,183,
                    818:   184,185,186,187,188,189,190,191,
                    819:   192,193,194,195,196,197,198,199,
                    820:   200,201,202,203,204,205,206,207,
                    821:   208,209,210,211,212,213,214,215,
                    822:   216,217,218,219,220,221,222,223,
                    823:   224,225,226,227,228,229,230,231,
                    824:   232,233,234,235,236,237,238,239,
                    825:   240,241,242,243,244,245,246,247,
                    826:   248,249,250,251,252,253,254,255,
                    827: 
                    828: /* This table contains bit maps for various character classes. Each map is 32
                    829: bytes long and the bits run from the least significant end of each byte. The
                    830: classes that have their own maps are: space, xdigit, digit, upper, lower, word,
                    831: graph, print, punct, and cntrl. Other classes are built from combinations. */
                    832: 
                    833:   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
                    834:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    835:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    836:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    837: 
                    838:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
                    839:   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
                    840:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    841:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    842: 
                    843:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
                    844:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    845:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    846:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    847: 
                    848:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    849:   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
                    850:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    851:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    852: 
                    853:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    854:   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
                    855:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    856:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    857: 
                    858:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
                    859:   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
                    860:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    861:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    862: 
                    863:   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
                    864:   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
                    865:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    866:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    867: 
                    868:   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
                    869:   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
                    870:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    871:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    872: 
                    873:   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
                    874:   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
                    875:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    876:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    877: 
                    878:   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
                    879:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
                    880:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    881:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    882: 
                    883: /* This table identifies various classes of character by individual bits:
                    884:   0x01   white space character
                    885:   0x02   letter
                    886:   0x04   decimal digit
                    887:   0x08   hexadecimal digit
                    888:   0x10   alphanumeric or '_'
                    889:   0x80   regular expression metacharacter or binary zero
                    890: */
                    891: 
                    892:   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
                    893:   0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
                    894:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
                    895:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
                    896:   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
                    897:   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
                    898:   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
                    899:   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
                    900:   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
                    901:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
                    902:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
                    903:   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
                    904:   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
                    905:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
                    906:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
                    907:   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
                    908:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
                    909:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
                    910:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
                    911:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
                    912:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
                    913:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
                    914:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
                    915:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
                    916:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
                    917:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
                    918:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
                    919:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
                    920:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
                    921:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
                    922:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
                    923:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
                    924: 
                    925: /* This is a set of tables that came orginally from a Windows user. It seems to
                    926: be at least an approximation of ISO 8859. In particular, there are characters
                    927: greater than 128 that are marked as spaces, letters, etc. */
                    928: 
1.1.1.2   misho     929: static const pcre_uint8 tables1[] = {
1.1       misho     930: 0,1,2,3,4,5,6,7,
                    931: 8,9,10,11,12,13,14,15,
                    932: 16,17,18,19,20,21,22,23,
                    933: 24,25,26,27,28,29,30,31,
                    934: 32,33,34,35,36,37,38,39,
                    935: 40,41,42,43,44,45,46,47,
                    936: 48,49,50,51,52,53,54,55,
                    937: 56,57,58,59,60,61,62,63,
                    938: 64,97,98,99,100,101,102,103,
                    939: 104,105,106,107,108,109,110,111,
                    940: 112,113,114,115,116,117,118,119,
                    941: 120,121,122,91,92,93,94,95,
                    942: 96,97,98,99,100,101,102,103,
                    943: 104,105,106,107,108,109,110,111,
                    944: 112,113,114,115,116,117,118,119,
                    945: 120,121,122,123,124,125,126,127,
                    946: 128,129,130,131,132,133,134,135,
                    947: 136,137,138,139,140,141,142,143,
                    948: 144,145,146,147,148,149,150,151,
                    949: 152,153,154,155,156,157,158,159,
                    950: 160,161,162,163,164,165,166,167,
                    951: 168,169,170,171,172,173,174,175,
                    952: 176,177,178,179,180,181,182,183,
                    953: 184,185,186,187,188,189,190,191,
                    954: 224,225,226,227,228,229,230,231,
                    955: 232,233,234,235,236,237,238,239,
                    956: 240,241,242,243,244,245,246,215,
                    957: 248,249,250,251,252,253,254,223,
                    958: 224,225,226,227,228,229,230,231,
                    959: 232,233,234,235,236,237,238,239,
                    960: 240,241,242,243,244,245,246,247,
                    961: 248,249,250,251,252,253,254,255,
                    962: 0,1,2,3,4,5,6,7,
                    963: 8,9,10,11,12,13,14,15,
                    964: 16,17,18,19,20,21,22,23,
                    965: 24,25,26,27,28,29,30,31,
                    966: 32,33,34,35,36,37,38,39,
                    967: 40,41,42,43,44,45,46,47,
                    968: 48,49,50,51,52,53,54,55,
                    969: 56,57,58,59,60,61,62,63,
                    970: 64,97,98,99,100,101,102,103,
                    971: 104,105,106,107,108,109,110,111,
                    972: 112,113,114,115,116,117,118,119,
                    973: 120,121,122,91,92,93,94,95,
                    974: 96,65,66,67,68,69,70,71,
                    975: 72,73,74,75,76,77,78,79,
                    976: 80,81,82,83,84,85,86,87,
                    977: 88,89,90,123,124,125,126,127,
                    978: 128,129,130,131,132,133,134,135,
                    979: 136,137,138,139,140,141,142,143,
                    980: 144,145,146,147,148,149,150,151,
                    981: 152,153,154,155,156,157,158,159,
                    982: 160,161,162,163,164,165,166,167,
                    983: 168,169,170,171,172,173,174,175,
                    984: 176,177,178,179,180,181,182,183,
                    985: 184,185,186,187,188,189,190,191,
                    986: 224,225,226,227,228,229,230,231,
                    987: 232,233,234,235,236,237,238,239,
                    988: 240,241,242,243,244,245,246,215,
                    989: 248,249,250,251,252,253,254,223,
                    990: 192,193,194,195,196,197,198,199,
                    991: 200,201,202,203,204,205,206,207,
                    992: 208,209,210,211,212,213,214,247,
                    993: 216,217,218,219,220,221,222,255,
                    994: 0,62,0,0,1,0,0,0,
                    995: 0,0,0,0,0,0,0,0,
                    996: 32,0,0,0,1,0,0,0,
                    997: 0,0,0,0,0,0,0,0,
                    998: 0,0,0,0,0,0,255,3,
                    999: 126,0,0,0,126,0,0,0,
                   1000: 0,0,0,0,0,0,0,0,
                   1001: 0,0,0,0,0,0,0,0,
                   1002: 0,0,0,0,0,0,255,3,
                   1003: 0,0,0,0,0,0,0,0,
                   1004: 0,0,0,0,0,0,12,2,
                   1005: 0,0,0,0,0,0,0,0,
                   1006: 0,0,0,0,0,0,0,0,
                   1007: 254,255,255,7,0,0,0,0,
                   1008: 0,0,0,0,0,0,0,0,
                   1009: 255,255,127,127,0,0,0,0,
                   1010: 0,0,0,0,0,0,0,0,
                   1011: 0,0,0,0,254,255,255,7,
                   1012: 0,0,0,0,0,4,32,4,
                   1013: 0,0,0,128,255,255,127,255,
                   1014: 0,0,0,0,0,0,255,3,
                   1015: 254,255,255,135,254,255,255,7,
                   1016: 0,0,0,0,0,4,44,6,
                   1017: 255,255,127,255,255,255,127,255,
                   1018: 0,0,0,0,254,255,255,255,
                   1019: 255,255,255,255,255,255,255,127,
                   1020: 0,0,0,0,254,255,255,255,
                   1021: 255,255,255,255,255,255,255,255,
                   1022: 0,2,0,0,255,255,255,255,
                   1023: 255,255,255,255,255,255,255,127,
                   1024: 0,0,0,0,255,255,255,255,
                   1025: 255,255,255,255,255,255,255,255,
                   1026: 0,0,0,0,254,255,0,252,
                   1027: 1,0,0,248,1,0,0,120,
                   1028: 0,0,0,0,254,255,255,255,
                   1029: 0,0,128,0,0,0,128,0,
                   1030: 255,255,255,255,0,0,0,0,
                   1031: 0,0,0,0,0,0,0,128,
                   1032: 255,255,255,255,0,0,0,0,
                   1033: 0,0,0,0,0,0,0,0,
                   1034: 128,0,0,0,0,0,0,0,
                   1035: 0,1,1,0,1,1,0,0,
                   1036: 0,0,0,0,0,0,0,0,
                   1037: 0,0,0,0,0,0,0,0,
                   1038: 1,0,0,0,128,0,0,0,
                   1039: 128,128,128,128,0,0,128,0,
                   1040: 28,28,28,28,28,28,28,28,
                   1041: 28,28,0,0,0,0,0,128,
                   1042: 0,26,26,26,26,26,26,18,
                   1043: 18,18,18,18,18,18,18,18,
                   1044: 18,18,18,18,18,18,18,18,
                   1045: 18,18,18,128,128,0,128,16,
                   1046: 0,26,26,26,26,26,26,18,
                   1047: 18,18,18,18,18,18,18,18,
                   1048: 18,18,18,18,18,18,18,18,
                   1049: 18,18,18,128,128,0,0,0,
                   1050: 0,0,0,0,0,1,0,0,
                   1051: 0,0,0,0,0,0,0,0,
                   1052: 0,0,0,0,0,0,0,0,
                   1053: 0,0,0,0,0,0,0,0,
                   1054: 1,0,0,0,0,0,0,0,
                   1055: 0,0,18,0,0,0,0,0,
                   1056: 0,0,20,20,0,18,0,0,
                   1057: 0,20,18,0,0,0,0,0,
                   1058: 18,18,18,18,18,18,18,18,
                   1059: 18,18,18,18,18,18,18,18,
                   1060: 18,18,18,18,18,18,18,0,
                   1061: 18,18,18,18,18,18,18,18,
                   1062: 18,18,18,18,18,18,18,18,
                   1063: 18,18,18,18,18,18,18,18,
                   1064: 18,18,18,18,18,18,18,0,
                   1065: 18,18,18,18,18,18,18,18
                   1066: };
                   1067: 
                   1068: 
                   1069: 
                   1070: 
                   1071: #ifndef HAVE_STRERROR
                   1072: /*************************************************
                   1073: *     Provide strerror() for non-ANSI libraries  *
                   1074: *************************************************/
                   1075: 
                   1076: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
                   1077: in their libraries, but can provide the same facility by this simple
                   1078: alternative function. */
                   1079: 
                   1080: extern int   sys_nerr;
                   1081: extern char *sys_errlist[];
                   1082: 
                   1083: char *
                   1084: strerror(int n)
                   1085: {
                   1086: if (n < 0 || n >= sys_nerr) return "unknown error number";
                   1087: return sys_errlist[n];
                   1088: }
                   1089: #endif /* HAVE_STRERROR */
                   1090: 
                   1091: 
                   1092: /*************************************************
                   1093: *         JIT memory callback                    *
                   1094: *************************************************/
                   1095: 
                   1096: static pcre_jit_stack* jit_callback(void *arg)
                   1097: {
1.1.1.3 ! misho    1098: jit_was_used = TRUE;
1.1       misho    1099: return (pcre_jit_stack *)arg;
                   1100: }
                   1101: 
                   1102: 
1.1.1.2   misho    1103: #if !defined NOUTF || defined SUPPORT_PCRE16
                   1104: /*************************************************
                   1105: *            Convert UTF-8 string to value       *
                   1106: *************************************************/
                   1107: 
                   1108: /* This function takes one or more bytes that represents a UTF-8 character,
                   1109: and returns the value of the character.
                   1110: 
                   1111: Argument:
                   1112:   utf8bytes   a pointer to the byte vector
                   1113:   vptr        a pointer to an int to receive the value
                   1114: 
                   1115: Returns:      >  0 => the number of bytes consumed
                   1116:               -6 to 0 => malformed UTF-8 character at offset = (-return)
                   1117: */
                   1118: 
                   1119: static int
                   1120: utf82ord(pcre_uint8 *utf8bytes, int *vptr)
                   1121: {
                   1122: int c = *utf8bytes++;
                   1123: int d = c;
                   1124: int i, j, s;
                   1125: 
                   1126: for (i = -1; i < 6; i++)               /* i is number of additional bytes */
                   1127:   {
                   1128:   if ((d & 0x80) == 0) break;
                   1129:   d <<= 1;
                   1130:   }
                   1131: 
                   1132: if (i == -1) { *vptr = c; return 1; }  /* ascii character */
                   1133: if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
                   1134: 
                   1135: /* i now has a value in the range 1-5 */
                   1136: 
                   1137: s = 6*i;
                   1138: d = (c & utf8_table3[i]) << s;
                   1139: 
                   1140: for (j = 0; j < i; j++)
                   1141:   {
                   1142:   c = *utf8bytes++;
                   1143:   if ((c & 0xc0) != 0x80) return -(j+1);
                   1144:   s -= 6;
                   1145:   d |= (c & 0x3f) << s;
                   1146:   }
                   1147: 
                   1148: /* Check that encoding was the correct unique one */
                   1149: 
                   1150: for (j = 0; j < utf8_table1_size; j++)
                   1151:   if (d <= utf8_table1[j]) break;
                   1152: if (j != i) return -(i+1);
                   1153: 
                   1154: /* Valid value */
                   1155: 
                   1156: *vptr = d;
                   1157: return i+1;
                   1158: }
                   1159: #endif /* NOUTF || SUPPORT_PCRE16 */
                   1160: 
                   1161: 
                   1162: 
                   1163: #if !defined NOUTF || defined SUPPORT_PCRE16
                   1164: /*************************************************
                   1165: *       Convert character value to UTF-8         *
                   1166: *************************************************/
                   1167: 
                   1168: /* This function takes an integer value in the range 0 - 0x7fffffff
                   1169: and encodes it as a UTF-8 character in 0 to 6 bytes.
                   1170: 
                   1171: Arguments:
                   1172:   cvalue     the character value
                   1173:   utf8bytes  pointer to buffer for result - at least 6 bytes long
                   1174: 
                   1175: Returns:     number of characters placed in the buffer
                   1176: */
                   1177: 
                   1178: static int
                   1179: ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
                   1180: {
                   1181: register int i, j;
                   1182: for (i = 0; i < utf8_table1_size; i++)
                   1183:   if (cvalue <= utf8_table1[i]) break;
                   1184: utf8bytes += i;
                   1185: for (j = i; j > 0; j--)
                   1186:  {
                   1187:  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
                   1188:  cvalue >>= 6;
                   1189:  }
                   1190: *utf8bytes = utf8_table2[i] | cvalue;
                   1191: return i + 1;
                   1192: }
                   1193: #endif
                   1194: 
                   1195: 
                   1196: #ifdef SUPPORT_PCRE16
                   1197: /*************************************************
                   1198: *         Convert a string to 16-bit             *
                   1199: *************************************************/
                   1200: 
                   1201: /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
                   1202: 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
                   1203: double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
                   1204: in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
                   1205: result is always left in buffer16.
                   1206: 
                   1207: Note that this function does not object to surrogate values. This is
                   1208: deliberate; it makes it possible to construct UTF-16 strings that are invalid,
                   1209: for the purpose of testing that they are correctly faulted.
                   1210: 
                   1211: Patterns to be converted are either plain ASCII or UTF-8; data lines are always
                   1212: in UTF-8 so that values greater than 255 can be handled.
                   1213: 
                   1214: Arguments:
                   1215:   data       TRUE if converting a data line; FALSE for a regex
                   1216:   p          points to a byte string
                   1217:   utf        true if UTF-8 (to be converted to UTF-16)
                   1218:   len        number of bytes in the string (excluding trailing zero)
                   1219: 
                   1220: Returns:     number of 16-bit data items used (excluding trailing zero)
                   1221:              OR -1 if a UTF-8 string is malformed
                   1222:              OR -2 if a value > 0x10ffff is encountered
                   1223:              OR -3 if a value > 0xffff is encountered when not in UTF mode
                   1224: */
                   1225: 
                   1226: static int
                   1227: to16(int data, pcre_uint8 *p, int utf, int len)
                   1228: {
                   1229: pcre_uint16 *pp;
                   1230: 
                   1231: if (buffer16_size < 2*len + 2)
                   1232:   {
                   1233:   if (buffer16 != NULL) free(buffer16);
                   1234:   buffer16_size = 2*len + 2;
                   1235:   buffer16 = (pcre_uint16 *)malloc(buffer16_size);
                   1236:   if (buffer16 == NULL)
                   1237:     {
                   1238:     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
                   1239:     exit(1);
                   1240:     }
                   1241:   }
                   1242: 
                   1243: pp = buffer16;
                   1244: 
                   1245: if (!utf && !data)
                   1246:   {
                   1247:   while (len-- > 0) *pp++ = *p++;
                   1248:   }
                   1249: 
                   1250: else
                   1251:   {
                   1252:   int c = 0;
                   1253:   while (len > 0)
                   1254:     {
                   1255:     int chlen = utf82ord(p, &c);
                   1256:     if (chlen <= 0) return -1;
                   1257:     if (c > 0x10ffff) return -2;
                   1258:     p += chlen;
                   1259:     len -= chlen;
                   1260:     if (c < 0x10000) *pp++ = c; else
                   1261:       {
                   1262:       if (!utf) return -3;
                   1263:       c -= 0x10000;
                   1264:       *pp++ = 0xD800 | (c >> 10);
                   1265:       *pp++ = 0xDC00 | (c & 0x3ff);
                   1266:       }
                   1267:     }
                   1268:   }
                   1269: 
                   1270: *pp = 0;
                   1271: return pp - buffer16;
                   1272: }
                   1273: #endif
                   1274: 
                   1275: 
1.1       misho    1276: /*************************************************
                   1277: *        Read or extend an input line            *
                   1278: *************************************************/
                   1279: 
                   1280: /* Input lines are read into buffer, but both patterns and data lines can be
                   1281: continued over multiple input lines. In addition, if the buffer fills up, we
                   1282: want to automatically expand it so as to be able to handle extremely large
                   1283: lines that are needed for certain stress tests. When the input buffer is
                   1284: expanded, the other two buffers must also be expanded likewise, and the
                   1285: contents of pbuffer, which are a copy of the input for callouts, must be
                   1286: preserved (for when expansion happens for a data line). This is not the most
                   1287: optimal way of handling this, but hey, this is just a test program!
                   1288: 
                   1289: Arguments:
                   1290:   f            the file to read
                   1291:   start        where in buffer to start (this *must* be within buffer)
                   1292:   prompt       for stdin or readline()
                   1293: 
                   1294: Returns:       pointer to the start of new data
                   1295:                could be a copy of start, or could be moved
                   1296:                NULL if no data read and EOF reached
                   1297: */
                   1298: 
1.1.1.2   misho    1299: static pcre_uint8 *
                   1300: extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1.1       misho    1301: {
1.1.1.2   misho    1302: pcre_uint8 *here = start;
1.1       misho    1303: 
                   1304: for (;;)
                   1305:   {
1.1.1.2   misho    1306:   size_t rlen = (size_t)(buffer_size - (here - buffer));
1.1       misho    1307: 
                   1308:   if (rlen > 1000)
                   1309:     {
                   1310:     int dlen;
                   1311: 
1.1.1.3 ! misho    1312:     /* If libreadline or libedit support is required, use readline() to read a
        !          1313:     line if the input is a terminal. Note that readline() removes the trailing
        !          1314:     newline, so we must put it back again, to be compatible with fgets(). */
1.1       misho    1315: 
1.1.1.3 ! misho    1316: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1       misho    1317:     if (isatty(fileno(f)))
                   1318:       {
                   1319:       size_t len;
                   1320:       char *s = readline(prompt);
                   1321:       if (s == NULL) return (here == start)? NULL : start;
                   1322:       len = strlen(s);
                   1323:       if (len > 0) add_history(s);
                   1324:       if (len > rlen - 1) len = rlen - 1;
                   1325:       memcpy(here, s, len);
                   1326:       here[len] = '\n';
                   1327:       here[len+1] = 0;
                   1328:       free(s);
                   1329:       }
                   1330:     else
                   1331: #endif
                   1332: 
                   1333:     /* Read the next line by normal means, prompting if the file is stdin. */
                   1334: 
                   1335:       {
                   1336:       if (f == stdin) printf("%s", prompt);
                   1337:       if (fgets((char *)here, rlen,  f) == NULL)
                   1338:         return (here == start)? NULL : start;
                   1339:       }
                   1340: 
                   1341:     dlen = (int)strlen((char *)here);
                   1342:     if (dlen > 0 && here[dlen - 1] == '\n') return start;
                   1343:     here += dlen;
                   1344:     }
                   1345: 
                   1346:   else
                   1347:     {
                   1348:     int new_buffer_size = 2*buffer_size;
1.1.1.2   misho    1349:     pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
                   1350:     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
                   1351:     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1.1       misho    1352: 
                   1353:     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
                   1354:       {
                   1355:       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
                   1356:       exit(1);
                   1357:       }
                   1358: 
                   1359:     memcpy(new_buffer, buffer, buffer_size);
                   1360:     memcpy(new_pbuffer, pbuffer, buffer_size);
                   1361: 
                   1362:     buffer_size = new_buffer_size;
                   1363: 
                   1364:     start = new_buffer + (start - buffer);
                   1365:     here = new_buffer + (here - buffer);
                   1366: 
                   1367:     free(buffer);
                   1368:     free(dbuffer);
                   1369:     free(pbuffer);
                   1370: 
                   1371:     buffer = new_buffer;
                   1372:     dbuffer = new_dbuffer;
                   1373:     pbuffer = new_pbuffer;
                   1374:     }
                   1375:   }
                   1376: 
                   1377: return NULL;  /* Control never gets here */
                   1378: }
                   1379: 
                   1380: 
                   1381: 
                   1382: /*************************************************
                   1383: *          Read number from string               *
                   1384: *************************************************/
                   1385: 
                   1386: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
                   1387: around with conditional compilation, just do the job by hand. It is only used
                   1388: for unpicking arguments, so just keep it simple.
                   1389: 
                   1390: Arguments:
                   1391:   str           string to be converted
                   1392:   endptr        where to put the end pointer
                   1393: 
                   1394: Returns:        the unsigned long
                   1395: */
                   1396: 
                   1397: static int
1.1.1.2   misho    1398: get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1.1       misho    1399: {
                   1400: int result = 0;
                   1401: while(*str != 0 && isspace(*str)) str++;
                   1402: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
                   1403: *endptr = str;
                   1404: return(result);
                   1405: }
                   1406: 
                   1407: 
                   1408: 
                   1409: /*************************************************
1.1.1.2   misho    1410: *             Print one character                *
1.1       misho    1411: *************************************************/
                   1412: 
1.1.1.2   misho    1413: /* Print a single character either literally, or as a hex escape. */
1.1       misho    1414: 
1.1.1.2   misho    1415: static int pchar(int c, FILE *f)
1.1       misho    1416: {
1.1.1.2   misho    1417: if (PRINTOK(c))
                   1418:   {
                   1419:   if (f != NULL) fprintf(f, "%c", c);
                   1420:   return 1;
                   1421:   }
1.1       misho    1422: 
1.1.1.2   misho    1423: if (c < 0x100)
1.1       misho    1424:   {
1.1.1.2   misho    1425:   if (use_utf)
                   1426:     {
                   1427:     if (f != NULL) fprintf(f, "\\x{%02x}", c);
                   1428:     return 6;
                   1429:     }
                   1430:   else
                   1431:     {
                   1432:     if (f != NULL) fprintf(f, "\\x%02x", c);
                   1433:     return 4;
                   1434:     }
1.1       misho    1435:   }
                   1436: 
1.1.1.2   misho    1437: if (f != NULL) fprintf(f, "\\x{%02x}", c);
                   1438: return (c <= 0x000000ff)? 6 :
                   1439:        (c <= 0x00000fff)? 7 :
                   1440:        (c <= 0x0000ffff)? 8 :
                   1441:        (c <= 0x000fffff)? 9 : 10;
                   1442: }
1.1       misho    1443: 
                   1444: 
                   1445: 
1.1.1.2   misho    1446: #ifdef SUPPORT_PCRE8
                   1447: /*************************************************
                   1448: *         Print 8-bit character string           *
                   1449: *************************************************/
1.1       misho    1450: 
1.1.1.2   misho    1451: /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
                   1452: If handed a NULL file, just counts chars without printing. */
1.1       misho    1453: 
1.1.1.2   misho    1454: static int pchars(pcre_uint8 *p, int length, FILE *f)
                   1455: {
                   1456: int c = 0;
                   1457: int yield = 0;
1.1       misho    1458: 
1.1.1.2   misho    1459: if (length < 0)
                   1460:   length = strlen((char *)p);
1.1       misho    1461: 
1.1.1.2   misho    1462: while (length-- > 0)
                   1463:   {
                   1464: #if !defined NOUTF
                   1465:   if (use_utf)
                   1466:     {
                   1467:     int rc = utf82ord(p, &c);
                   1468:     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
                   1469:       {
                   1470:       length -= rc - 1;
                   1471:       p += rc;
                   1472:       yield += pchar(c, f);
                   1473:       continue;
                   1474:       }
                   1475:     }
                   1476: #endif
                   1477:   c = *p++;
                   1478:   yield += pchar(c, f);
                   1479:   }
1.1       misho    1480: 
1.1.1.2   misho    1481: return yield;
                   1482: }
1.1       misho    1483: #endif
                   1484: 
                   1485: 
                   1486: 
1.1.1.2   misho    1487: #ifdef SUPPORT_PCRE16
1.1       misho    1488: /*************************************************
1.1.1.2   misho    1489: *    Find length of 0-terminated 16-bit string   *
1.1       misho    1490: *************************************************/
                   1491: 
1.1.1.2   misho    1492: static int strlen16(PCRE_SPTR16 p)
1.1       misho    1493: {
1.1.1.2   misho    1494: int len = 0;
                   1495: while (*p++ != 0) len++;
                   1496: return len;
1.1       misho    1497: }
1.1.1.2   misho    1498: #endif  /* SUPPORT_PCRE16 */
1.1       misho    1499: 
                   1500: 
1.1.1.2   misho    1501: #ifdef SUPPORT_PCRE16
1.1       misho    1502: /*************************************************
1.1.1.2   misho    1503: *           Print 16-bit character string        *
1.1       misho    1504: *************************************************/
                   1505: 
1.1.1.2   misho    1506: /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
                   1507: If handed a NULL file, just counts chars without printing. */
1.1       misho    1508: 
1.1.1.2   misho    1509: static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1.1       misho    1510: {
                   1511: int yield = 0;
                   1512: 
1.1.1.2   misho    1513: if (length < 0)
                   1514:   length = strlen16(p);
                   1515: 
1.1       misho    1516: while (length-- > 0)
                   1517:   {
1.1.1.2   misho    1518:   int c = *p++ & 0xffff;
                   1519: #if !defined NOUTF
                   1520:   if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1.1       misho    1521:     {
1.1.1.2   misho    1522:     int d = *p & 0xffff;
                   1523:     if (d >= 0xDC00 && d < 0xDFFF)
1.1       misho    1524:       {
1.1.1.2   misho    1525:       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
                   1526:       length--;
                   1527:       p++;
1.1       misho    1528:       }
                   1529:     }
                   1530: #endif
1.1.1.2   misho    1531:   yield += pchar(c, f);
                   1532:   }
                   1533: 
                   1534: return yield;
                   1535: }
                   1536: #endif  /* SUPPORT_PCRE16 */
1.1       misho    1537: 
                   1538: 
1.1.1.2   misho    1539: 
                   1540: #ifdef SUPPORT_PCRE8
                   1541: /*************************************************
                   1542: *     Read a capture name (8-bit) and check it   *
                   1543: *************************************************/
                   1544: 
                   1545: static pcre_uint8 *
                   1546: read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
                   1547: {
                   1548: pcre_uint8 *npp = *pp;
                   1549: while (isalnum(*p)) *npp++ = *p++;
                   1550: *npp++ = 0;
                   1551: *npp = 0;
                   1552: if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
                   1553:   {
                   1554:   fprintf(outfile, "no parentheses with name \"");
                   1555:   PCHARSV(*pp, 0, -1, outfile);
                   1556:   fprintf(outfile, "\"\n");
1.1       misho    1557:   }
                   1558: 
1.1.1.2   misho    1559: *pp = npp;
                   1560: return p;
1.1       misho    1561: }
1.1.1.2   misho    1562: #endif  /* SUPPORT_PCRE8 */
                   1563: 
                   1564: 
                   1565: 
                   1566: #ifdef SUPPORT_PCRE16
                   1567: /*************************************************
                   1568: *     Read a capture name (16-bit) and check it  *
                   1569: *************************************************/
                   1570: 
                   1571: /* Note that the text being read is 8-bit. */
                   1572: 
                   1573: static pcre_uint8 *
                   1574: read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
                   1575: {
                   1576: pcre_uint16 *npp = *pp;
                   1577: while (isalnum(*p)) *npp++ = *p++;
                   1578: *npp++ = 0;
                   1579: *npp = 0;
                   1580: if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
                   1581:   {
                   1582:   fprintf(outfile, "no parentheses with name \"");
                   1583:   PCHARSV(*pp, 0, -1, outfile);
                   1584:   fprintf(outfile, "\"\n");
                   1585:   }
                   1586: *pp = npp;
                   1587: return p;
                   1588: }
                   1589: #endif  /* SUPPORT_PCRE16 */
1.1       misho    1590: 
                   1591: 
                   1592: 
                   1593: /*************************************************
                   1594: *              Callout function                  *
                   1595: *************************************************/
                   1596: 
                   1597: /* Called from PCRE as a result of the (?C) item. We print out where we are in
                   1598: the match. Yield zero unless more callouts than the fail count, or the callout
                   1599: data is not zero. */
                   1600: 
                   1601: static int callout(pcre_callout_block *cb)
                   1602: {
                   1603: FILE *f = (first_callout | callout_extra)? outfile : NULL;
                   1604: int i, pre_start, post_start, subject_length;
                   1605: 
                   1606: if (callout_extra)
                   1607:   {
                   1608:   fprintf(f, "Callout %d: last capture = %d\n",
                   1609:     cb->callout_number, cb->capture_last);
                   1610: 
                   1611:   for (i = 0; i < cb->capture_top * 2; i += 2)
                   1612:     {
                   1613:     if (cb->offset_vector[i] < 0)
                   1614:       fprintf(f, "%2d: <unset>\n", i/2);
                   1615:     else
                   1616:       {
                   1617:       fprintf(f, "%2d: ", i/2);
1.1.1.2   misho    1618:       PCHARSV(cb->subject, cb->offset_vector[i],
1.1       misho    1619:         cb->offset_vector[i+1] - cb->offset_vector[i], f);
                   1620:       fprintf(f, "\n");
                   1621:       }
                   1622:     }
                   1623:   }
                   1624: 
                   1625: /* Re-print the subject in canonical form, the first time or if giving full
                   1626: datails. On subsequent calls in the same match, we use pchars just to find the
                   1627: printed lengths of the substrings. */
                   1628: 
                   1629: if (f != NULL) fprintf(f, "--->");
                   1630: 
1.1.1.2   misho    1631: PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
                   1632: PCHARS(post_start, cb->subject, cb->start_match,
1.1       misho    1633:   cb->current_position - cb->start_match, f);
                   1634: 
1.1.1.2   misho    1635: PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1.1       misho    1636: 
1.1.1.2   misho    1637: PCHARSV(cb->subject, cb->current_position,
1.1       misho    1638:   cb->subject_length - cb->current_position, f);
                   1639: 
                   1640: if (f != NULL) fprintf(f, "\n");
                   1641: 
                   1642: /* Always print appropriate indicators, with callout number if not already
                   1643: shown. For automatic callouts, show the pattern offset. */
                   1644: 
                   1645: if (cb->callout_number == 255)
                   1646:   {
                   1647:   fprintf(outfile, "%+3d ", cb->pattern_position);
                   1648:   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
                   1649:   }
                   1650: else
                   1651:   {
                   1652:   if (callout_extra) fprintf(outfile, "    ");
                   1653:     else fprintf(outfile, "%3d ", cb->callout_number);
                   1654:   }
                   1655: 
                   1656: for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
                   1657: fprintf(outfile, "^");
                   1658: 
                   1659: if (post_start > 0)
                   1660:   {
                   1661:   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
                   1662:   fprintf(outfile, "^");
                   1663:   }
                   1664: 
                   1665: for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
                   1666:   fprintf(outfile, " ");
                   1667: 
                   1668: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
                   1669:   pbuffer + cb->pattern_position);
                   1670: 
                   1671: fprintf(outfile, "\n");
                   1672: first_callout = 0;
                   1673: 
                   1674: if (cb->mark != last_callout_mark)
                   1675:   {
1.1.1.2   misho    1676:   if (cb->mark == NULL)
                   1677:     fprintf(outfile, "Latest Mark: <unset>\n");
                   1678:   else
                   1679:     {
                   1680:     fprintf(outfile, "Latest Mark: ");
                   1681:     PCHARSV(cb->mark, 0, -1, outfile);
                   1682:     putc('\n', outfile);
                   1683:     }
1.1       misho    1684:   last_callout_mark = cb->mark;
                   1685:   }
                   1686: 
                   1687: if (cb->callout_data != NULL)
                   1688:   {
                   1689:   int callout_data = *((int *)(cb->callout_data));
                   1690:   if (callout_data != 0)
                   1691:     {
                   1692:     fprintf(outfile, "Callout data = %d\n", callout_data);
                   1693:     return callout_data;
                   1694:     }
                   1695:   }
                   1696: 
                   1697: return (cb->callout_number != callout_fail_id)? 0 :
                   1698:        (++callout_count >= callout_fail_count)? 1 : 0;
                   1699: }
                   1700: 
                   1701: 
                   1702: /*************************************************
                   1703: *            Local malloc functions              *
                   1704: *************************************************/
                   1705: 
                   1706: /* Alternative malloc function, to test functionality and save the size of a
                   1707: compiled re, which is the first store request that pcre_compile() makes. The
                   1708: show_malloc variable is set only during matching. */
                   1709: 
                   1710: static void *new_malloc(size_t size)
                   1711: {
                   1712: void *block = malloc(size);
                   1713: gotten_store = size;
                   1714: if (first_gotten_store == 0) first_gotten_store = size;
                   1715: if (show_malloc)
                   1716:   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
                   1717: return block;
                   1718: }
                   1719: 
                   1720: static void new_free(void *block)
                   1721: {
                   1722: if (show_malloc)
                   1723:   fprintf(outfile, "free             %p\n", block);
                   1724: free(block);
                   1725: }
                   1726: 
                   1727: /* For recursion malloc/free, to test stacking calls */
                   1728: 
                   1729: static void *stack_malloc(size_t size)
                   1730: {
                   1731: void *block = malloc(size);
                   1732: if (show_malloc)
                   1733:   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
                   1734: return block;
                   1735: }
                   1736: 
                   1737: static void stack_free(void *block)
                   1738: {
                   1739: if (show_malloc)
                   1740:   fprintf(outfile, "stack_free       %p\n", block);
                   1741: free(block);
                   1742: }
                   1743: 
                   1744: 
1.1.1.2   misho    1745: /*************************************************
                   1746: *          Call pcre_fullinfo()                  *
                   1747: *************************************************/
                   1748: 
                   1749: /* Get one piece of information from the pcre_fullinfo() function. When only
                   1750: one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
                   1751: value, but the code is defensive.
                   1752: 
                   1753: Arguments:
                   1754:   re        compiled regex
                   1755:   study     study data
                   1756:   option    PCRE_INFO_xxx option
                   1757:   ptr       where to put the data
                   1758: 
                   1759: Returns:    0 when OK, < 0 on error
                   1760: */
                   1761: 
                   1762: static int
                   1763: new_info(pcre *re, pcre_extra *study, int option, void *ptr)
                   1764: {
                   1765: int rc;
                   1766: 
                   1767: if (use_pcre16)
                   1768: #ifdef SUPPORT_PCRE16
                   1769:   rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
                   1770: #else
                   1771:   rc = PCRE_ERROR_BADMODE;
                   1772: #endif
                   1773: else
                   1774: #ifdef SUPPORT_PCRE8
                   1775:   rc = pcre_fullinfo(re, study, option, ptr);
                   1776: #else
                   1777:   rc = PCRE_ERROR_BADMODE;
                   1778: #endif
                   1779: 
                   1780: if (rc < 0)
                   1781:   {
                   1782:   fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
                   1783:     use_pcre16? "16" : "", option);
                   1784:   if (rc == PCRE_ERROR_BADMODE)
                   1785:     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
                   1786:       "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
                   1787:   }
                   1788: 
                   1789: return rc;
                   1790: }
                   1791: 
                   1792: 
                   1793: 
                   1794: /*************************************************
                   1795: *             Swap byte functions                *
                   1796: *************************************************/
                   1797: 
                   1798: /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
                   1799: value, respectively.
                   1800: 
                   1801: Arguments:
                   1802:   value        any number
                   1803: 
                   1804: Returns:       the byte swapped value
                   1805: */
                   1806: 
                   1807: static pcre_uint32
                   1808: swap_uint32(pcre_uint32 value)
                   1809: {
                   1810: return ((value & 0x000000ff) << 24) |
                   1811:        ((value & 0x0000ff00) <<  8) |
                   1812:        ((value & 0x00ff0000) >>  8) |
                   1813:        (value >> 24);
                   1814: }
                   1815: 
                   1816: static pcre_uint16
                   1817: swap_uint16(pcre_uint16 value)
                   1818: {
                   1819: return (value >> 8) | (value << 8);
                   1820: }
                   1821: 
                   1822: 
                   1823: 
                   1824: /*************************************************
                   1825: *        Flip bytes in a compiled pattern        *
                   1826: *************************************************/
                   1827: 
                   1828: /* This function is called if the 'F' option was present on a pattern that is
                   1829: to be written to a file. We flip the bytes of all the integer fields in the
                   1830: regex data block and the study block. In 16-bit mode this also flips relevant
                   1831: bytes in the pattern itself. This is to make it possible to test PCRE's
                   1832: ability to reload byte-flipped patterns, e.g. those compiled on a different
                   1833: architecture. */
                   1834: 
                   1835: static void
                   1836: regexflip(pcre *ere, pcre_extra *extra)
                   1837: {
                   1838: REAL_PCRE *re = (REAL_PCRE *)ere;
                   1839: #ifdef SUPPORT_PCRE16
                   1840: int op;
                   1841: pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
                   1842: int length = re->name_count * re->name_entry_size;
                   1843: #ifdef SUPPORT_UTF
                   1844: BOOL utf = (re->options & PCRE_UTF16) != 0;
                   1845: BOOL utf16_char = FALSE;
                   1846: #endif /* SUPPORT_UTF */
                   1847: #endif /* SUPPORT_PCRE16 */
                   1848: 
                   1849: /* Always flip the bytes in the main data block and study blocks. */
                   1850: 
                   1851: re->magic_number = REVERSED_MAGIC_NUMBER;
                   1852: re->size = swap_uint32(re->size);
                   1853: re->options = swap_uint32(re->options);
                   1854: re->flags = swap_uint16(re->flags);
                   1855: re->top_bracket = swap_uint16(re->top_bracket);
                   1856: re->top_backref = swap_uint16(re->top_backref);
                   1857: re->first_char = swap_uint16(re->first_char);
                   1858: re->req_char = swap_uint16(re->req_char);
                   1859: re->name_table_offset = swap_uint16(re->name_table_offset);
                   1860: re->name_entry_size = swap_uint16(re->name_entry_size);
                   1861: re->name_count = swap_uint16(re->name_count);
                   1862: 
                   1863: if (extra != NULL)
                   1864:   {
                   1865:   pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
                   1866:   rsd->size = swap_uint32(rsd->size);
                   1867:   rsd->flags = swap_uint32(rsd->flags);
                   1868:   rsd->minlength = swap_uint32(rsd->minlength);
                   1869:   }
                   1870: 
                   1871: /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
                   1872: in the name table, if present, and then in the pattern itself. */
                   1873: 
                   1874: #ifdef SUPPORT_PCRE16
                   1875: if (!use_pcre16) return;
                   1876: 
                   1877: while(TRUE)
                   1878:   {
                   1879:   /* Swap previous characters. */
                   1880:   while (length-- > 0)
                   1881:     {
                   1882:     *ptr = swap_uint16(*ptr);
                   1883:     ptr++;
                   1884:     }
                   1885: #ifdef SUPPORT_UTF
                   1886:   if (utf16_char)
                   1887:     {
                   1888:     if ((ptr[-1] & 0xfc00) == 0xd800)
                   1889:       {
                   1890:       /* We know that there is only one extra character in UTF-16. */
                   1891:       *ptr = swap_uint16(*ptr);
                   1892:       ptr++;
                   1893:       }
                   1894:     }
                   1895:   utf16_char = FALSE;
                   1896: #endif /* SUPPORT_UTF */
                   1897: 
                   1898:   /* Get next opcode. */
1.1       misho    1899: 
1.1.1.2   misho    1900:   length = 0;
                   1901:   op = *ptr;
                   1902:   *ptr++ = swap_uint16(op);
1.1       misho    1903: 
1.1.1.2   misho    1904:   switch (op)
                   1905:     {
                   1906:     case OP_END:
                   1907:     return;
1.1       misho    1908: 
1.1.1.2   misho    1909: #ifdef SUPPORT_UTF
                   1910:     case OP_CHAR:
                   1911:     case OP_CHARI:
                   1912:     case OP_NOT:
                   1913:     case OP_NOTI:
                   1914:     case OP_STAR:
                   1915:     case OP_MINSTAR:
                   1916:     case OP_PLUS:
                   1917:     case OP_MINPLUS:
                   1918:     case OP_QUERY:
                   1919:     case OP_MINQUERY:
                   1920:     case OP_UPTO:
                   1921:     case OP_MINUPTO:
                   1922:     case OP_EXACT:
                   1923:     case OP_POSSTAR:
                   1924:     case OP_POSPLUS:
                   1925:     case OP_POSQUERY:
                   1926:     case OP_POSUPTO:
                   1927:     case OP_STARI:
                   1928:     case OP_MINSTARI:
                   1929:     case OP_PLUSI:
                   1930:     case OP_MINPLUSI:
                   1931:     case OP_QUERYI:
                   1932:     case OP_MINQUERYI:
                   1933:     case OP_UPTOI:
                   1934:     case OP_MINUPTOI:
                   1935:     case OP_EXACTI:
                   1936:     case OP_POSSTARI:
                   1937:     case OP_POSPLUSI:
                   1938:     case OP_POSQUERYI:
                   1939:     case OP_POSUPTOI:
                   1940:     case OP_NOTSTAR:
                   1941:     case OP_NOTMINSTAR:
                   1942:     case OP_NOTPLUS:
                   1943:     case OP_NOTMINPLUS:
                   1944:     case OP_NOTQUERY:
                   1945:     case OP_NOTMINQUERY:
                   1946:     case OP_NOTUPTO:
                   1947:     case OP_NOTMINUPTO:
                   1948:     case OP_NOTEXACT:
                   1949:     case OP_NOTPOSSTAR:
                   1950:     case OP_NOTPOSPLUS:
                   1951:     case OP_NOTPOSQUERY:
                   1952:     case OP_NOTPOSUPTO:
                   1953:     case OP_NOTSTARI:
                   1954:     case OP_NOTMINSTARI:
                   1955:     case OP_NOTPLUSI:
                   1956:     case OP_NOTMINPLUSI:
                   1957:     case OP_NOTQUERYI:
                   1958:     case OP_NOTMINQUERYI:
                   1959:     case OP_NOTUPTOI:
                   1960:     case OP_NOTMINUPTOI:
                   1961:     case OP_NOTEXACTI:
                   1962:     case OP_NOTPOSSTARI:
                   1963:     case OP_NOTPOSPLUSI:
                   1964:     case OP_NOTPOSQUERYI:
                   1965:     case OP_NOTPOSUPTOI:
                   1966:     if (utf) utf16_char = TRUE;
                   1967: #endif
                   1968:     /* Fall through. */
1.1       misho    1969: 
1.1.1.2   misho    1970:     default:
                   1971:     length = OP_lengths16[op] - 1;
                   1972:     break;
                   1973: 
                   1974:     case OP_CLASS:
                   1975:     case OP_NCLASS:
                   1976:     /* Skip the character bit map. */
                   1977:     ptr += 32/sizeof(pcre_uint16);
                   1978:     length = 0;
                   1979:     break;
                   1980: 
                   1981:     case OP_XCLASS:
                   1982:     /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
                   1983:     if (LINK_SIZE > 1)
                   1984:       length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
                   1985:         - (1 + LINK_SIZE + 1));
                   1986:     else
                   1987:       length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1.1       misho    1988: 
1.1.1.2   misho    1989:     /* Reverse the size of the XCLASS instance. */
                   1990:     *ptr = swap_uint16(*ptr);
                   1991:     ptr++;
                   1992:     if (LINK_SIZE > 1)
                   1993:       {
                   1994:       *ptr = swap_uint16(*ptr);
                   1995:       ptr++;
                   1996:       }
1.1       misho    1997: 
1.1.1.2   misho    1998:     op = *ptr;
                   1999:     *ptr = swap_uint16(op);
                   2000:     ptr++;
                   2001:     if ((op & XCL_MAP) != 0)
                   2002:       {
                   2003:       /* Skip the character bit map. */
                   2004:       ptr += 32/sizeof(pcre_uint16);
                   2005:       length -= 32/sizeof(pcre_uint16);
                   2006:       }
                   2007:     break;
                   2008:     }
                   2009:   }
                   2010: /* Control should never reach here in 16 bit mode. */
                   2011: #endif /* SUPPORT_PCRE16 */
1.1       misho    2012: }
                   2013: 
                   2014: 
                   2015: 
                   2016: /*************************************************
                   2017: *        Check match or recursion limit          *
                   2018: *************************************************/
                   2019: 
                   2020: static int
1.1.1.2   misho    2021: check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1.1       misho    2022:   int start_offset, int options, int *use_offsets, int use_size_offsets,
                   2023:   int flag, unsigned long int *limit, int errnumber, const char *msg)
                   2024: {
                   2025: int count;
                   2026: int min = 0;
                   2027: int mid = 64;
                   2028: int max = -1;
                   2029: 
                   2030: extra->flags |= flag;
                   2031: 
                   2032: for (;;)
                   2033:   {
                   2034:   *limit = mid;
                   2035: 
1.1.1.2   misho    2036:   PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1.1       misho    2037:     use_offsets, use_size_offsets);
                   2038: 
                   2039:   if (count == errnumber)
                   2040:     {
                   2041:     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
                   2042:     min = mid;
                   2043:     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
                   2044:     }
                   2045: 
                   2046:   else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
                   2047:                          count == PCRE_ERROR_PARTIAL)
                   2048:     {
                   2049:     if (mid == min + 1)
                   2050:       {
                   2051:       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
                   2052:       break;
                   2053:       }
                   2054:     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
                   2055:     max = mid;
                   2056:     mid = (min + mid)/2;
                   2057:     }
                   2058:   else break;    /* Some other error */
                   2059:   }
                   2060: 
                   2061: extra->flags &= ~flag;
                   2062: return count;
                   2063: }
                   2064: 
                   2065: 
                   2066: 
                   2067: /*************************************************
                   2068: *         Case-independent strncmp() function    *
                   2069: *************************************************/
                   2070: 
                   2071: /*
                   2072: Arguments:
                   2073:   s         first string
                   2074:   t         second string
                   2075:   n         number of characters to compare
                   2076: 
                   2077: Returns:    < 0, = 0, or > 0, according to the comparison
                   2078: */
                   2079: 
                   2080: static int
1.1.1.2   misho    2081: strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1.1       misho    2082: {
                   2083: while (n--)
                   2084:   {
                   2085:   int c = tolower(*s++) - tolower(*t++);
                   2086:   if (c) return c;
                   2087:   }
                   2088: return 0;
                   2089: }
                   2090: 
                   2091: 
                   2092: 
                   2093: /*************************************************
                   2094: *         Check newline indicator                *
                   2095: *************************************************/
                   2096: 
                   2097: /* This is used both at compile and run-time to check for <xxx> escapes. Print
                   2098: a message and return 0 if there is no match.
                   2099: 
                   2100: Arguments:
                   2101:   p           points after the leading '<'
                   2102:   f           file for error message
                   2103: 
                   2104: Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
                   2105: */
                   2106: 
                   2107: static int
1.1.1.2   misho    2108: check_newline(pcre_uint8 *p, FILE *f)
1.1       misho    2109: {
1.1.1.2   misho    2110: if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
                   2111: if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
                   2112: if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
                   2113: if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
                   2114: if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
                   2115: if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
                   2116: if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1.1       misho    2117: fprintf(f, "Unknown newline type at: <%s\n", p);
                   2118: return 0;
                   2119: }
                   2120: 
                   2121: 
                   2122: 
                   2123: /*************************************************
                   2124: *             Usage function                     *
                   2125: *************************************************/
                   2126: 
                   2127: static void
                   2128: usage(void)
                   2129: {
                   2130: printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
                   2131: printf("Input and output default to stdin and stdout.\n");
1.1.1.3 ! misho    2132: #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1.1       misho    2133: printf("If input is a terminal, readline() is used to read from it.\n");
                   2134: #else
                   2135: printf("This version of pcretest is not linked with readline().\n");
                   2136: #endif
                   2137: printf("\nOptions:\n");
1.1.1.2   misho    2138: #ifdef SUPPORT_PCRE16
                   2139: printf("  -16      use the 16-bit library\n");
                   2140: #endif
                   2141: printf("  -b       show compiled code\n");
1.1       misho    2142: printf("  -C       show PCRE compile-time options and exit\n");
1.1.1.2   misho    2143: printf("  -C arg   show a specific compile-time option\n");
                   2144: printf("           and exit with its value. The arg can be:\n");
                   2145: printf("     linksize     internal link size [2, 3, 4]\n");
                   2146: printf("     pcre8        8 bit library support enabled [0, 1]\n");
                   2147: printf("     pcre16       16 bit library support enabled [0, 1]\n");
                   2148: printf("     utf          Unicode Transformation Format supported [0, 1]\n");
                   2149: printf("     ucp          Unicode Properties supported [0, 1]\n");
                   2150: printf("     jit          Just-in-time compiler supported [0, 1]\n");
                   2151: printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
1.1       misho    2152: printf("  -d       debug: show compiled code and information (-b and -i)\n");
                   2153: #if !defined NODFA
                   2154: printf("  -dfa     force DFA matching for all subjects\n");
                   2155: #endif
                   2156: printf("  -help    show usage information\n");
                   2157: printf("  -i       show information about compiled patterns\n"
                   2158:        "  -M       find MATCH_LIMIT minimum for each subject\n"
                   2159:        "  -m       output memory used information\n"
                   2160:        "  -o <n>   set size of offsets vector to <n>\n");
                   2161: #if !defined NOPOSIX
                   2162: printf("  -p       use POSIX interface\n");
                   2163: #endif
                   2164: printf("  -q       quiet: do not output PCRE version number at start\n");
                   2165: printf("  -S <n>   set stack size to <n> megabytes\n");
                   2166: printf("  -s       force each pattern to be studied at basic level\n"
                   2167:        "  -s+      force each pattern to be studied, using JIT if available\n"
1.1.1.3 ! misho    2168:        "  -s++     ditto, verifying when JIT was actually used\n"
        !          2169:        "  -s+n     force each pattern to be studied, using JIT if available,\n"
        !          2170:        "             where 1 <= n <= 7 selects JIT options\n"
        !          2171:        "  -s++n    ditto, verifying when JIT was actually used\n"
1.1       misho    2172:        "  -t       time compilation and execution\n");
                   2173: printf("  -t <n>   time compilation and execution, repeating <n> times\n");
                   2174: printf("  -tm      time execution (matching) only\n");
                   2175: printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
                   2176: }
                   2177: 
                   2178: 
                   2179: 
                   2180: /*************************************************
                   2181: *                Main Program                    *
                   2182: *************************************************/
                   2183: 
                   2184: /* Read lines from named file or stdin and write to named file or stdout; lines
                   2185: consist of a regular expression, in delimiters and optionally followed by
                   2186: options, followed by a set of test data, terminated by an empty line. */
                   2187: 
                   2188: int main(int argc, char **argv)
                   2189: {
                   2190: FILE *infile = stdin;
1.1.1.2   misho    2191: const char *version;
1.1       misho    2192: int options = 0;
                   2193: int study_options = 0;
                   2194: int default_find_match_limit = FALSE;
                   2195: int op = 1;
                   2196: int timeit = 0;
                   2197: int timeitm = 0;
                   2198: int showinfo = 0;
                   2199: int showstore = 0;
                   2200: int force_study = -1;
                   2201: int force_study_options = 0;
                   2202: int quiet = 0;
                   2203: int size_offsets = 45;
                   2204: int size_offsets_max;
                   2205: int *offsets = NULL;
                   2206: int debug = 0;
                   2207: int done = 0;
                   2208: int all_use_dfa = 0;
1.1.1.3 ! misho    2209: int verify_jit = 0;
1.1       misho    2210: int yield = 0;
                   2211: int stack_size;
                   2212: 
1.1.1.3 ! misho    2213: #if !defined NOPOSIX
        !          2214: int posix = 0;
        !          2215: #endif
        !          2216: #if !defined NODFA
        !          2217: int *dfa_workspace = NULL;
        !          2218: #endif
        !          2219: 
1.1       misho    2220: pcre_jit_stack *jit_stack = NULL;
                   2221: 
1.1.1.2   misho    2222: /* These vectors store, end-to-end, a list of zero-terminated captured
                   2223: substring names, each list itself being terminated by an empty name. Assume
                   2224: that 1024 is plenty long enough for the few names we'll be testing. It is
                   2225: easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
                   2226: for the actual memory, to ensure alignment. */
                   2227: 
                   2228: pcre_uint16 copynames[1024];
                   2229: pcre_uint16 getnames[1024];
                   2230: 
                   2231: #ifdef SUPPORT_PCRE16
                   2232: pcre_uint16 *cn16ptr;
                   2233: pcre_uint16 *gn16ptr;
                   2234: #endif
1.1       misho    2235: 
1.1.1.2   misho    2236: #ifdef SUPPORT_PCRE8
                   2237: pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
                   2238: pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
                   2239: pcre_uint8 *cn8ptr;
                   2240: pcre_uint8 *gn8ptr;
                   2241: #endif
1.1       misho    2242: 
1.1.1.2   misho    2243: /* Get buffers from malloc() so that valgrind will check their misuse when
                   2244: debugging. They grow automatically when very long lines are read. The 16-bit
                   2245: buffer (buffer16) is obtained only if needed. */
                   2246: 
                   2247: buffer = (pcre_uint8 *)malloc(buffer_size);
                   2248: dbuffer = (pcre_uint8 *)malloc(buffer_size);
                   2249: pbuffer = (pcre_uint8 *)malloc(buffer_size);
1.1       misho    2250: 
                   2251: /* The outfile variable is static so that new_malloc can use it. */
                   2252: 
                   2253: outfile = stdout;
                   2254: 
                   2255: /* The following  _setmode() stuff is some Windows magic that tells its runtime
                   2256: library to translate CRLF into a single LF character. At least, that's what
                   2257: I've been told: never having used Windows I take this all on trust. Originally
                   2258: it set 0x8000, but then I was advised that _O_BINARY was better. */
                   2259: 
                   2260: #if defined(_WIN32) || defined(WIN32)
                   2261: _setmode( _fileno( stdout ), _O_BINARY );
                   2262: #endif
                   2263: 
1.1.1.2   misho    2264: /* Get the version number: both pcre_version() and pcre16_version() give the
                   2265: same answer. We just need to ensure that we call one that is available. */
                   2266: 
                   2267: #ifdef SUPPORT_PCRE8
                   2268: version = pcre_version();
                   2269: #else
                   2270: version = pcre16_version();
                   2271: #endif
                   2272: 
1.1       misho    2273: /* Scan options */
                   2274: 
                   2275: while (argc > 1 && argv[op][0] == '-')
                   2276:   {
1.1.1.2   misho    2277:   pcre_uint8 *endptr;
1.1.1.3 ! misho    2278:   char *arg = argv[op];
1.1       misho    2279: 
1.1.1.3 ! misho    2280:   if (strcmp(arg, "-m") == 0) showstore = 1;
        !          2281:   else if (strcmp(arg, "-s") == 0) force_study = 0;
        !          2282: 
        !          2283:   else if (strncmp(arg, "-s+", 3) == 0)
1.1       misho    2284:     {
1.1.1.3 ! misho    2285:     arg += 3;
        !          2286:     if (*arg == '+') { arg++; verify_jit = TRUE; }
1.1       misho    2287:     force_study = 1;
1.1.1.3 ! misho    2288:     if (*arg == 0)
        !          2289:       force_study_options = jit_study_bits[6];
        !          2290:     else if (*arg >= '1' && *arg <= '7')
        !          2291:       force_study_options = jit_study_bits[*arg - '1'];
        !          2292:     else goto BAD_ARG;
1.1       misho    2293:     }
1.1.1.3 ! misho    2294:   else if (strcmp(arg, "-16") == 0)
1.1.1.2   misho    2295:     {
                   2296: #ifdef SUPPORT_PCRE16
                   2297:     use_pcre16 = 1;
                   2298: #else
                   2299:     printf("** This version of PCRE was built without 16-bit support\n");
                   2300:     exit(1);
                   2301: #endif
                   2302:     }
1.1.1.3 ! misho    2303:   else if (strcmp(arg, "-q") == 0) quiet = 1;
        !          2304:   else if (strcmp(arg, "-b") == 0) debug = 1;
        !          2305:   else if (strcmp(arg, "-i") == 0) showinfo = 1;
        !          2306:   else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
        !          2307:   else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
1.1       misho    2308: #if !defined NODFA
1.1.1.3 ! misho    2309:   else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
1.1       misho    2310: #endif
1.1.1.3 ! misho    2311:   else if (strcmp(arg, "-o") == 0 && argc > 2 &&
1.1.1.2   misho    2312:       ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1       misho    2313:         *endptr == 0))
                   2314:     {
                   2315:     op++;
                   2316:     argc--;
                   2317:     }
1.1.1.3 ! misho    2318:   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
1.1       misho    2319:     {
1.1.1.3 ! misho    2320:     int both = arg[2] == 0;
1.1       misho    2321:     int temp;
1.1.1.2   misho    2322:     if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1.1       misho    2323:                      *endptr == 0))
                   2324:       {
                   2325:       timeitm = temp;
                   2326:       op++;
                   2327:       argc--;
                   2328:       }
                   2329:     else timeitm = LOOPREPEAT;
                   2330:     if (both) timeit = timeitm;
                   2331:     }
1.1.1.3 ! misho    2332:   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
1.1.1.2   misho    2333:       ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1.1       misho    2334:         *endptr == 0))
                   2335:     {
                   2336: #if defined(_WIN32) || defined(WIN32) || defined(__minix)
                   2337:     printf("PCRE: -S not supported on this OS\n");
                   2338:     exit(1);
                   2339: #else
                   2340:     int rc;
                   2341:     struct rlimit rlim;
                   2342:     getrlimit(RLIMIT_STACK, &rlim);
                   2343:     rlim.rlim_cur = stack_size * 1024 * 1024;
                   2344:     rc = setrlimit(RLIMIT_STACK, &rlim);
                   2345:     if (rc != 0)
                   2346:       {
                   2347:     printf("PCRE: setrlimit() failed with error %d\n", rc);
                   2348:     exit(1);
                   2349:       }
                   2350:     op++;
                   2351:     argc--;
                   2352: #endif
                   2353:     }
                   2354: #if !defined NOPOSIX
1.1.1.3 ! misho    2355:   else if (strcmp(arg, "-p") == 0) posix = 1;
1.1       misho    2356: #endif
1.1.1.3 ! misho    2357:   else if (strcmp(arg, "-C") == 0)
1.1       misho    2358:     {
                   2359:     int rc;
                   2360:     unsigned long int lrc;
1.1.1.2   misho    2361: 
                   2362:     if (argc > 2)
                   2363:       {
                   2364:       if (strcmp(argv[op + 1], "linksize") == 0)
                   2365:         {
                   2366:         (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
                   2367:         printf("%d\n", rc);
                   2368:         yield = rc;
                   2369:         goto EXIT;
                   2370:         }
                   2371:       if (strcmp(argv[op + 1], "pcre8") == 0)
                   2372:         {
                   2373: #ifdef SUPPORT_PCRE8
                   2374:         printf("1\n");
                   2375:         yield = 1;
                   2376: #else
                   2377:         printf("0\n");
                   2378:         yield = 0;
                   2379: #endif
                   2380:         goto EXIT;
                   2381:         }
                   2382:       if (strcmp(argv[op + 1], "pcre16") == 0)
                   2383:         {
                   2384: #ifdef SUPPORT_PCRE16
                   2385:         printf("1\n");
                   2386:         yield = 1;
                   2387: #else
                   2388:         printf("0\n");
                   2389:         yield = 0;
                   2390: #endif
                   2391:         goto EXIT;
                   2392:         }
                   2393:       if (strcmp(argv[op + 1], "utf") == 0)
                   2394:         {
                   2395: #ifdef SUPPORT_PCRE8
                   2396:         (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
                   2397:         printf("%d\n", rc);
                   2398:         yield = rc;
                   2399: #else
                   2400:         (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
                   2401:         printf("%d\n", rc);
                   2402:         yield = rc;
                   2403: #endif
                   2404:         goto EXIT;
                   2405:         }
                   2406:       if (strcmp(argv[op + 1], "ucp") == 0)
                   2407:         {
                   2408:         (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
                   2409:         printf("%d\n", rc);
                   2410:         yield = rc;
                   2411:         goto EXIT;
                   2412:         }
                   2413:       if (strcmp(argv[op + 1], "jit") == 0)
                   2414:         {
                   2415:         (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
                   2416:         printf("%d\n", rc);
                   2417:         yield = rc;
                   2418:         goto EXIT;
                   2419:         }
                   2420:       if (strcmp(argv[op + 1], "newline") == 0)
                   2421:         {
                   2422:         (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
                   2423:         /* Note that these values are always the ASCII values, even
                   2424:         in EBCDIC environments. CR is 13 and NL is 10. */
                   2425:         printf("%s\n", (rc == 13)? "CR" :
                   2426:           (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
                   2427:           (rc == -2)? "ANYCRLF" :
                   2428:           (rc == -1)? "ANY" : "???");
                   2429:         goto EXIT;
                   2430:         }
                   2431:       printf("Unknown -C option: %s\n", argv[op + 1]);
                   2432:       goto EXIT;
                   2433:       }
                   2434: 
                   2435:     printf("PCRE version %s\n", version);
1.1       misho    2436:     printf("Compiled with\n");
1.1.1.2   misho    2437: 
                   2438: /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
                   2439: are set, either both UTFs are supported or both are not supported. */
                   2440: 
                   2441: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
                   2442:     printf("  8-bit and 16-bit support\n");
                   2443:     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
                   2444:     if (rc)
                   2445:       printf("  UTF-8 and UTF-16 support\n");
                   2446:     else
                   2447:       printf("  No UTF-8 or UTF-16 support\n");
                   2448: #elif defined SUPPORT_PCRE8
                   2449:     printf("  8-bit support only\n");
1.1       misho    2450:     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
                   2451:     printf("  %sUTF-8 support\n", rc? "" : "No ");
1.1.1.2   misho    2452: #else
                   2453:     printf("  16-bit support only\n");
                   2454:     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
                   2455:     printf("  %sUTF-16 support\n", rc? "" : "No ");
                   2456: #endif
                   2457: 
                   2458:     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1.1       misho    2459:     printf("  %sUnicode properties support\n", rc? "" : "No ");
1.1.1.2   misho    2460:     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
1.1       misho    2461:     if (rc)
1.1.1.2   misho    2462:       {
                   2463:       const char *arch;
                   2464:       (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
                   2465:       printf("  Just-in-time compiler support: %s\n", arch);
                   2466:       }
1.1       misho    2467:     else
                   2468:       printf("  No just-in-time compiler support\n");
1.1.1.2   misho    2469:     (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
1.1       misho    2470:     /* Note that these values are always the ASCII values, even
                   2471:     in EBCDIC environments. CR is 13 and NL is 10. */
                   2472:     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
                   2473:       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
                   2474:       (rc == -2)? "ANYCRLF" :
                   2475:       (rc == -1)? "ANY" : "???");
1.1.1.2   misho    2476:     (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
1.1       misho    2477:     printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
                   2478:                                      "all Unicode newlines");
1.1.1.2   misho    2479:     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
1.1       misho    2480:     printf("  Internal link size = %d\n", rc);
1.1.1.2   misho    2481:     (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1.1       misho    2482:     printf("  POSIX malloc threshold = %d\n", rc);
1.1.1.2   misho    2483:     (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1.1       misho    2484:     printf("  Default match limit = %ld\n", lrc);
1.1.1.2   misho    2485:     (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1.1       misho    2486:     printf("  Default recursion depth limit = %ld\n", lrc);
1.1.1.2   misho    2487:     (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
                   2488:     printf("  Match recursion uses %s", rc? "stack" : "heap");
                   2489:     if (showstore)
                   2490:       {
                   2491:       PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
                   2492:       printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
                   2493:       }
                   2494:     printf("\n");
1.1       misho    2495:     goto EXIT;
                   2496:     }
1.1.1.3 ! misho    2497:   else if (strcmp(arg, "-help") == 0 ||
        !          2498:            strcmp(arg, "--help") == 0)
1.1       misho    2499:     {
                   2500:     usage();
                   2501:     goto EXIT;
                   2502:     }
                   2503:   else
                   2504:     {
1.1.1.3 ! misho    2505:     BAD_ARG:
        !          2506:     printf("** Unknown or malformed option %s\n", arg);
1.1       misho    2507:     usage();
                   2508:     yield = 1;
                   2509:     goto EXIT;
                   2510:     }
                   2511:   op++;
                   2512:   argc--;
                   2513:   }
                   2514: 
                   2515: /* Get the store for the offsets vector, and remember what it was */
                   2516: 
                   2517: size_offsets_max = size_offsets;
                   2518: offsets = (int *)malloc(size_offsets_max * sizeof(int));
                   2519: if (offsets == NULL)
                   2520:   {
                   2521:   printf("** Failed to get %d bytes of memory for offsets vector\n",
                   2522:     (int)(size_offsets_max * sizeof(int)));
                   2523:   yield = 1;
                   2524:   goto EXIT;
                   2525:   }
                   2526: 
                   2527: /* Sort out the input and output files */
                   2528: 
                   2529: if (argc > 1)
                   2530:   {
                   2531:   infile = fopen(argv[op], INPUT_MODE);
                   2532:   if (infile == NULL)
                   2533:     {
                   2534:     printf("** Failed to open %s\n", argv[op]);
                   2535:     yield = 1;
                   2536:     goto EXIT;
                   2537:     }
                   2538:   }
                   2539: 
                   2540: if (argc > 2)
                   2541:   {
                   2542:   outfile = fopen(argv[op+1], OUTPUT_MODE);
                   2543:   if (outfile == NULL)
                   2544:     {
                   2545:     printf("** Failed to open %s\n", argv[op+1]);
                   2546:     yield = 1;
                   2547:     goto EXIT;
                   2548:     }
                   2549:   }
                   2550: 
                   2551: /* Set alternative malloc function */
                   2552: 
1.1.1.2   misho    2553: #ifdef SUPPORT_PCRE8
1.1       misho    2554: pcre_malloc = new_malloc;
                   2555: pcre_free = new_free;
                   2556: pcre_stack_malloc = stack_malloc;
                   2557: pcre_stack_free = stack_free;
1.1.1.2   misho    2558: #endif
                   2559: 
                   2560: #ifdef SUPPORT_PCRE16
                   2561: pcre16_malloc = new_malloc;
                   2562: pcre16_free = new_free;
                   2563: pcre16_stack_malloc = stack_malloc;
                   2564: pcre16_stack_free = stack_free;
                   2565: #endif
1.1       misho    2566: 
                   2567: /* Heading line unless quiet, then prompt for first regex if stdin */
                   2568: 
1.1.1.2   misho    2569: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
1.1       misho    2570: 
                   2571: /* Main loop */
                   2572: 
                   2573: while (!done)
                   2574:   {
                   2575:   pcre *re = NULL;
                   2576:   pcre_extra *extra = NULL;
                   2577: 
                   2578: #if !defined NOPOSIX  /* There are still compilers that require no indent */
                   2579:   regex_t preg;
                   2580:   int do_posix = 0;
                   2581: #endif
                   2582: 
                   2583:   const char *error;
1.1.1.2   misho    2584:   pcre_uint8 *markptr;
                   2585:   pcre_uint8 *p, *pp, *ppp;
                   2586:   pcre_uint8 *to_file = NULL;
                   2587:   const pcre_uint8 *tables = NULL;
                   2588:   unsigned long int get_options;
1.1       misho    2589:   unsigned long int true_size, true_study_size = 0;
                   2590:   size_t size, regex_gotten_store;
                   2591:   int do_allcaps = 0;
                   2592:   int do_mark = 0;
                   2593:   int do_study = 0;
                   2594:   int no_force_study = 0;
                   2595:   int do_debug = debug;
                   2596:   int do_G = 0;
                   2597:   int do_g = 0;
                   2598:   int do_showinfo = showinfo;
                   2599:   int do_showrest = 0;
                   2600:   int do_showcaprest = 0;
                   2601:   int do_flip = 0;
                   2602:   int erroroffset, len, delimiter, poffset;
                   2603: 
1.1.1.3 ! misho    2604: #if !defined NODFA
        !          2605:   int dfa_matched = 0;
        !          2606: #endif
        !          2607: 
1.1.1.2   misho    2608:   use_utf = 0;
1.1       misho    2609:   debug_lengths = 1;
                   2610: 
                   2611:   if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
                   2612:   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
                   2613:   fflush(outfile);
                   2614: 
                   2615:   p = buffer;
                   2616:   while (isspace(*p)) p++;
                   2617:   if (*p == 0) continue;
                   2618: 
                   2619:   /* See if the pattern is to be loaded pre-compiled from a file. */
                   2620: 
                   2621:   if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
                   2622:     {
1.1.1.2   misho    2623:     pcre_uint32 magic;
                   2624:     pcre_uint8 sbuf[8];
1.1       misho    2625:     FILE *f;
                   2626: 
                   2627:     p++;
1.1.1.2   misho    2628:     if (*p == '!')
                   2629:       {
                   2630:       do_debug = TRUE;
                   2631:       do_showinfo = TRUE;
                   2632:       p++;
                   2633:       }
                   2634: 
1.1       misho    2635:     pp = p + (int)strlen((char *)p);
                   2636:     while (isspace(pp[-1])) pp--;
                   2637:     *pp = 0;
                   2638: 
                   2639:     f = fopen((char *)p, "rb");
                   2640:     if (f == NULL)
                   2641:       {
                   2642:       fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
                   2643:       continue;
                   2644:       }
                   2645: 
1.1.1.2   misho    2646:     first_gotten_store = 0;
1.1       misho    2647:     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
                   2648: 
                   2649:     true_size =
                   2650:       (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
                   2651:     true_study_size =
                   2652:       (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
                   2653: 
1.1.1.2   misho    2654:     re = (pcre *)new_malloc(true_size);
1.1       misho    2655:     regex_gotten_store = first_gotten_store;
                   2656: 
                   2657:     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
                   2658: 
1.1.1.2   misho    2659:     magic = ((REAL_PCRE *)re)->magic_number;
1.1       misho    2660:     if (magic != MAGIC_NUMBER)
                   2661:       {
1.1.1.2   misho    2662:       if (swap_uint32(magic) == MAGIC_NUMBER)
1.1       misho    2663:         {
                   2664:         do_flip = 1;
                   2665:         }
                   2666:       else
                   2667:         {
                   2668:         fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
                   2669:         fclose(f);
                   2670:         continue;
                   2671:         }
                   2672:       }
                   2673: 
1.1.1.2   misho    2674:     /* We hide the byte-invert info for little and big endian tests. */
1.1       misho    2675:     fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1.1.1.2   misho    2676:       do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
1.1       misho    2677: 
                   2678:     /* Now see if there is any following study data. */
                   2679: 
                   2680:     if (true_study_size != 0)
                   2681:       {
                   2682:       pcre_study_data *psd;
                   2683: 
                   2684:       extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
                   2685:       extra->flags = PCRE_EXTRA_STUDY_DATA;
                   2686: 
                   2687:       psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
                   2688:       extra->study_data = psd;
                   2689: 
                   2690:       if (fread(psd, 1, true_study_size, f) != true_study_size)
                   2691:         {
                   2692:         FAIL_READ:
                   2693:         fprintf(outfile, "Failed to read data from %s\n", p);
1.1.1.2   misho    2694:         if (extra != NULL)
                   2695:           {
                   2696:           PCRE_FREE_STUDY(extra);
                   2697:           }
1.1       misho    2698:         if (re != NULL) new_free(re);
                   2699:         fclose(f);
                   2700:         continue;
                   2701:         }
                   2702:       fprintf(outfile, "Study data loaded from %s\n", p);
                   2703:       do_study = 1;     /* To get the data output if requested */
                   2704:       }
                   2705:     else fprintf(outfile, "No study data\n");
                   2706: 
1.1.1.2   misho    2707:     /* Flip the necessary bytes. */
                   2708:     if (do_flip)
                   2709:       {
                   2710:       int rc;
                   2711:       PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
                   2712:       if (rc == PCRE_ERROR_BADMODE)
                   2713:         {
                   2714:         /* Simulate the result of the function call below. */
                   2715:         fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
                   2716:           use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
                   2717:         fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
                   2718:           "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
                   2719:         continue;
                   2720:         }
                   2721:       }
                   2722: 
                   2723:     /* Need to know if UTF-8 for printing data strings. */
                   2724: 
                   2725:     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
                   2726:     use_utf = (get_options & PCRE_UTF8) != 0;
                   2727: 
1.1       misho    2728:     fclose(f);
                   2729:     goto SHOW_INFO;
                   2730:     }
                   2731: 
                   2732:   /* In-line pattern (the usual case). Get the delimiter and seek the end of
1.1.1.2   misho    2733:   the pattern; if it isn't complete, read more. */
1.1       misho    2734: 
                   2735:   delimiter = *p++;
                   2736: 
                   2737:   if (isalnum(delimiter) || delimiter == '\\')
                   2738:     {
                   2739:     fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
                   2740:     goto SKIP_DATA;
                   2741:     }
                   2742: 
                   2743:   pp = p;
                   2744:   poffset = (int)(p - buffer);
                   2745: 
                   2746:   for(;;)
                   2747:     {
                   2748:     while (*pp != 0)
                   2749:       {
                   2750:       if (*pp == '\\' && pp[1] != 0) pp++;
                   2751:         else if (*pp == delimiter) break;
                   2752:       pp++;
                   2753:       }
                   2754:     if (*pp != 0) break;
                   2755:     if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
                   2756:       {
                   2757:       fprintf(outfile, "** Unexpected EOF\n");
                   2758:       done = 1;
                   2759:       goto CONTINUE;
                   2760:       }
                   2761:     if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
                   2762:     }
                   2763: 
                   2764:   /* The buffer may have moved while being extended; reset the start of data
                   2765:   pointer to the correct relative point in the buffer. */
                   2766: 
                   2767:   p = buffer + poffset;
                   2768: 
                   2769:   /* If the first character after the delimiter is backslash, make
                   2770:   the pattern end with backslash. This is purely to provide a way
                   2771:   of testing for the error message when a pattern ends with backslash. */
                   2772: 
                   2773:   if (pp[1] == '\\') *pp++ = '\\';
                   2774: 
                   2775:   /* Terminate the pattern at the delimiter, and save a copy of the pattern
                   2776:   for callouts. */
                   2777: 
                   2778:   *pp++ = 0;
                   2779:   strcpy((char *)pbuffer, (char *)p);
                   2780: 
                   2781:   /* Look for options after final delimiter */
                   2782: 
                   2783:   options = 0;
                   2784:   study_options = 0;
                   2785:   log_store = showstore;  /* default from command line */
                   2786: 
                   2787:   while (*pp != 0)
                   2788:     {
                   2789:     switch (*pp++)
                   2790:       {
                   2791:       case 'f': options |= PCRE_FIRSTLINE; break;
                   2792:       case 'g': do_g = 1; break;
                   2793:       case 'i': options |= PCRE_CASELESS; break;
                   2794:       case 'm': options |= PCRE_MULTILINE; break;
                   2795:       case 's': options |= PCRE_DOTALL; break;
                   2796:       case 'x': options |= PCRE_EXTENDED; break;
                   2797: 
                   2798:       case '+':
                   2799:       if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
                   2800:       break;
                   2801: 
                   2802:       case '=': do_allcaps = 1; break;
                   2803:       case 'A': options |= PCRE_ANCHORED; break;
                   2804:       case 'B': do_debug = 1; break;
                   2805:       case 'C': options |= PCRE_AUTO_CALLOUT; break;
                   2806:       case 'D': do_debug = do_showinfo = 1; break;
                   2807:       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
                   2808:       case 'F': do_flip = 1; break;
                   2809:       case 'G': do_G = 1; break;
                   2810:       case 'I': do_showinfo = 1; break;
                   2811:       case 'J': options |= PCRE_DUPNAMES; break;
                   2812:       case 'K': do_mark = 1; break;
                   2813:       case 'M': log_store = 1; break;
                   2814:       case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
                   2815: 
                   2816: #if !defined NOPOSIX
                   2817:       case 'P': do_posix = 1; break;
                   2818: #endif
                   2819: 
                   2820:       case 'S':
                   2821:       if (do_study == 0)
                   2822:         {
                   2823:         do_study = 1;
                   2824:         if (*pp == '+')
                   2825:           {
1.1.1.3 ! misho    2826:           if (*(++pp) == '+')
        !          2827:             {
        !          2828:             verify_jit = TRUE;
        !          2829:             pp++;
        !          2830:             }
        !          2831:           if (*pp >= '1' && *pp <= '7')
        !          2832:             study_options |= jit_study_bits[*pp++ - '1'];
        !          2833:           else
        !          2834:             study_options |= jit_study_bits[6];
1.1       misho    2835:           }
                   2836:         }
                   2837:       else
                   2838:         {
                   2839:         do_study = 0;
                   2840:         no_force_study = 1;
                   2841:         }
                   2842:       break;
                   2843: 
                   2844:       case 'U': options |= PCRE_UNGREEDY; break;
                   2845:       case 'W': options |= PCRE_UCP; break;
                   2846:       case 'X': options |= PCRE_EXTRA; break;
                   2847:       case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
                   2848:       case 'Z': debug_lengths = 0; break;
1.1.1.2   misho    2849:       case '8': options |= PCRE_UTF8; use_utf = 1; break;
1.1       misho    2850:       case '?': options |= PCRE_NO_UTF8_CHECK; break;
                   2851: 
                   2852:       case 'T':
                   2853:       switch (*pp++)
                   2854:         {
                   2855:         case '0': tables = tables0; break;
                   2856:         case '1': tables = tables1; break;
                   2857: 
                   2858:         case '\r':
                   2859:         case '\n':
                   2860:         case ' ':
                   2861:         case 0:
                   2862:         fprintf(outfile, "** Missing table number after /T\n");
                   2863:         goto SKIP_DATA;
                   2864: 
                   2865:         default:
                   2866:         fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
                   2867:         goto SKIP_DATA;
                   2868:         }
                   2869:       break;
                   2870: 
                   2871:       case 'L':
                   2872:       ppp = pp;
                   2873:       /* The '\r' test here is so that it works on Windows. */
                   2874:       /* The '0' test is just in case this is an unterminated line. */
                   2875:       while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
                   2876:       *ppp = 0;
                   2877:       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
                   2878:         {
                   2879:         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
                   2880:         goto SKIP_DATA;
                   2881:         }
                   2882:       locale_set = 1;
1.1.1.2   misho    2883:       tables = PCRE_MAKETABLES;
1.1       misho    2884:       pp = ppp;
                   2885:       break;
                   2886: 
                   2887:       case '>':
                   2888:       to_file = pp;
                   2889:       while (*pp != 0) pp++;
                   2890:       while (isspace(pp[-1])) pp--;
                   2891:       *pp = 0;
                   2892:       break;
                   2893: 
                   2894:       case '<':
                   2895:         {
1.1.1.2   misho    2896:         if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1.1       misho    2897:           {
                   2898:           options |= PCRE_JAVASCRIPT_COMPAT;
                   2899:           pp += 3;
                   2900:           }
                   2901:         else
                   2902:           {
                   2903:           int x = check_newline(pp, outfile);
                   2904:           if (x == 0) goto SKIP_DATA;
                   2905:           options |= x;
                   2906:           while (*pp++ != '>');
                   2907:           }
                   2908:         }
                   2909:       break;
                   2910: 
                   2911:       case '\r':                      /* So that it works in Windows */
                   2912:       case '\n':
                   2913:       case ' ':
                   2914:       break;
                   2915: 
                   2916:       default:
                   2917:       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
                   2918:       goto SKIP_DATA;
                   2919:       }
                   2920:     }
                   2921: 
                   2922:   /* Handle compiling via the POSIX interface, which doesn't support the
                   2923:   timing, showing, or debugging options, nor the ability to pass over
1.1.1.2   misho    2924:   local character tables. Neither does it have 16-bit support. */
1.1       misho    2925: 
                   2926: #if !defined NOPOSIX
                   2927:   if (posix || do_posix)
                   2928:     {
                   2929:     int rc;
                   2930:     int cflags = 0;
                   2931: 
                   2932:     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
                   2933:     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
                   2934:     if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
                   2935:     if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
                   2936:     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
                   2937:     if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
                   2938:     if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
                   2939: 
                   2940:     first_gotten_store = 0;
                   2941:     rc = regcomp(&preg, (char *)p, cflags);
                   2942: 
                   2943:     /* Compilation failed; go back for another re, skipping to blank line
                   2944:     if non-interactive. */
                   2945: 
                   2946:     if (rc != 0)
                   2947:       {
                   2948:       (void)regerror(rc, &preg, (char *)buffer, buffer_size);
                   2949:       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
                   2950:       goto SKIP_DATA;
                   2951:       }
                   2952:     }
                   2953: 
                   2954:   /* Handle compiling via the native interface */
                   2955: 
                   2956:   else
                   2957: #endif  /* !defined NOPOSIX */
                   2958: 
                   2959:     {
1.1.1.2   misho    2960:     /* In 16-bit mode, convert the input. */
                   2961: 
                   2962: #ifdef SUPPORT_PCRE16
                   2963:     if (use_pcre16)
                   2964:       {
                   2965:       switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
                   2966:         {
                   2967:         case -1:
                   2968:         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
                   2969:           "converted to UTF-16\n");
                   2970:         goto SKIP_DATA;
                   2971: 
                   2972:         case -2:
                   2973:         fprintf(outfile, "**Failed: character value greater than 0x10ffff "
                   2974:           "cannot be converted to UTF-16\n");
                   2975:         goto SKIP_DATA;
                   2976: 
                   2977:         case -3: /* "Impossible error" when to16 is called arg1 FALSE */
                   2978:         fprintf(outfile, "**Failed: character value greater than 0xffff "
                   2979:           "cannot be converted to 16-bit in non-UTF mode\n");
                   2980:         goto SKIP_DATA;
                   2981: 
                   2982:         default:
                   2983:         break;
                   2984:         }
                   2985:       p = (pcre_uint8 *)buffer16;
                   2986:       }
                   2987: #endif
                   2988: 
                   2989:     /* Compile many times when timing */
1.1       misho    2990: 
                   2991:     if (timeit > 0)
                   2992:       {
                   2993:       register int i;
                   2994:       clock_t time_taken;
                   2995:       clock_t start_time = clock();
                   2996:       for (i = 0; i < timeit; i++)
                   2997:         {
1.1.1.2   misho    2998:         PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1       misho    2999:         if (re != NULL) free(re);
                   3000:         }
                   3001:       time_taken = clock() - start_time;
                   3002:       fprintf(outfile, "Compile time %.4f milliseconds\n",
                   3003:         (((double)time_taken * 1000.0) / (double)timeit) /
                   3004:           (double)CLOCKS_PER_SEC);
                   3005:       }
                   3006: 
                   3007:     first_gotten_store = 0;
1.1.1.2   misho    3008:     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
1.1       misho    3009: 
                   3010:     /* Compilation failed; go back for another re, skipping to blank line
                   3011:     if non-interactive. */
                   3012: 
                   3013:     if (re == NULL)
                   3014:       {
                   3015:       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
                   3016:       SKIP_DATA:
                   3017:       if (infile != stdin)
                   3018:         {
                   3019:         for (;;)
                   3020:           {
                   3021:           if (extend_inputline(infile, buffer, NULL) == NULL)
                   3022:             {
                   3023:             done = 1;
                   3024:             goto CONTINUE;
                   3025:             }
                   3026:           len = (int)strlen((char *)buffer);
                   3027:           while (len > 0 && isspace(buffer[len-1])) len--;
                   3028:           if (len == 0) break;
                   3029:           }
                   3030:         fprintf(outfile, "\n");
                   3031:         }
                   3032:       goto CONTINUE;
                   3033:       }
                   3034: 
                   3035:     /* Compilation succeeded. It is now possible to set the UTF-8 option from
                   3036:     within the regex; check for this so that we know how to process the data
                   3037:     lines. */
                   3038: 
1.1.1.2   misho    3039:     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
                   3040:       goto SKIP_DATA;
                   3041:     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
1.1       misho    3042: 
                   3043:     /* Extract the size for possible writing before possibly flipping it,
                   3044:     and remember the store that was got. */
                   3045: 
1.1.1.2   misho    3046:     true_size = ((REAL_PCRE *)re)->size;
1.1       misho    3047:     regex_gotten_store = first_gotten_store;
                   3048: 
                   3049:     /* Output code size information if requested */
                   3050: 
                   3051:     if (log_store)
                   3052:       fprintf(outfile, "Memory allocation (code space): %d\n",
                   3053:         (int)(first_gotten_store -
1.1.1.2   misho    3054:               sizeof(REAL_PCRE) -
                   3055:               ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
1.1       misho    3056: 
                   3057:     /* If -s or /S was present, study the regex to generate additional info to
                   3058:     help with the matching, unless the pattern has the SS option, which
                   3059:     suppresses the effect of /S (used for a few test patterns where studying is
                   3060:     never sensible). */
                   3061: 
                   3062:     if (do_study || (force_study >= 0 && !no_force_study))
                   3063:       {
                   3064:       if (timeit > 0)
                   3065:         {
                   3066:         register int i;
                   3067:         clock_t time_taken;
                   3068:         clock_t start_time = clock();
                   3069:         for (i = 0; i < timeit; i++)
1.1.1.2   misho    3070:           {
                   3071:           PCRE_STUDY(extra, re, study_options | force_study_options, &error);
                   3072:           }
1.1       misho    3073:         time_taken = clock() - start_time;
1.1.1.2   misho    3074:         if (extra != NULL)
                   3075:           {
                   3076:           PCRE_FREE_STUDY(extra);
                   3077:           }
1.1       misho    3078:         fprintf(outfile, "  Study time %.4f milliseconds\n",
                   3079:           (((double)time_taken * 1000.0) / (double)timeit) /
                   3080:             (double)CLOCKS_PER_SEC);
                   3081:         }
1.1.1.2   misho    3082:       PCRE_STUDY(extra, re, study_options | force_study_options, &error);
1.1       misho    3083:       if (error != NULL)
                   3084:         fprintf(outfile, "Failed to study: %s\n", error);
                   3085:       else if (extra != NULL)
                   3086:         {
                   3087:         true_study_size = ((pcre_study_data *)(extra->study_data))->size;
                   3088:         if (log_store)
                   3089:           {
                   3090:           size_t jitsize;
1.1.1.2   misho    3091:           if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
                   3092:               jitsize != 0)
                   3093:             fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
1.1       misho    3094:           }
                   3095:         }
                   3096:       }
                   3097: 
                   3098:     /* If /K was present, we set up for handling MARK data. */
                   3099: 
                   3100:     if (do_mark)
                   3101:       {
                   3102:       if (extra == NULL)
                   3103:         {
                   3104:         extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   3105:         extra->flags = 0;
                   3106:         }
                   3107:       extra->mark = &markptr;
                   3108:       extra->flags |= PCRE_EXTRA_MARK;
                   3109:       }
                   3110: 
1.1.1.2   misho    3111:     /* Extract and display information from the compiled data if required. */
1.1       misho    3112: 
                   3113:     SHOW_INFO:
                   3114: 
                   3115:     if (do_debug)
                   3116:       {
                   3117:       fprintf(outfile, "------------------------------------------------------------------\n");
1.1.1.2   misho    3118:       PCRE_PRINTINT(re, outfile, debug_lengths);
1.1       misho    3119:       }
                   3120: 
                   3121:     /* We already have the options in get_options (see above) */
                   3122: 
                   3123:     if (do_showinfo)
                   3124:       {
                   3125:       unsigned long int all_options;
                   3126:       int count, backrefmax, first_char, need_char, okpartial, jchanged,
1.1.1.3 ! misho    3127:         hascrorlf, maxlookbehind;
1.1       misho    3128:       int nameentrysize, namecount;
1.1.1.2   misho    3129:       const pcre_uint8 *nametable;
1.1       misho    3130: 
1.1.1.2   misho    3131:       if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
                   3132:           new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
                   3133:           new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
                   3134:           new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
                   3135:           new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
                   3136:           new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
                   3137:           new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
                   3138:           new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
                   3139:           new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
                   3140:           new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
1.1.1.3 ! misho    3141:           new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
        !          3142:           new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
1.1.1.2   misho    3143:           != 0)
                   3144:         goto SKIP_DATA;
1.1       misho    3145: 
                   3146:       if (size != regex_gotten_store) fprintf(outfile,
                   3147:         "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
                   3148:         (int)size, (int)regex_gotten_store);
                   3149: 
                   3150:       fprintf(outfile, "Capturing subpattern count = %d\n", count);
                   3151:       if (backrefmax > 0)
                   3152:         fprintf(outfile, "Max back reference = %d\n", backrefmax);
                   3153: 
                   3154:       if (namecount > 0)
                   3155:         {
                   3156:         fprintf(outfile, "Named capturing subpatterns:\n");
                   3157:         while (namecount-- > 0)
                   3158:           {
1.1.1.2   misho    3159: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
                   3160:           int imm2_size = use_pcre16 ? 1 : 2;
                   3161: #else
                   3162:           int imm2_size = IMM2_SIZE;
                   3163: #endif
                   3164:           int length = (int)STRLEN(nametable + imm2_size);
                   3165:           fprintf(outfile, "  ");
                   3166:           PCHARSV(nametable, imm2_size, length, outfile);
                   3167:           while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
                   3168: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
                   3169:           fprintf(outfile, "%3d\n", use_pcre16?
                   3170:              (int)(((PCRE_SPTR16)nametable)[0])
                   3171:             :((int)nametable[0] << 8) | (int)nametable[1]);
                   3172:           nametable += nameentrysize * (use_pcre16 ? 2 : 1);
                   3173: #else
                   3174:           fprintf(outfile, "%3d\n", GET2(nametable, 0));
                   3175: #ifdef SUPPORT_PCRE8
1.1       misho    3176:           nametable += nameentrysize;
1.1.1.2   misho    3177: #else
                   3178:           nametable += nameentrysize * 2;
                   3179: #endif
                   3180: #endif
1.1       misho    3181:           }
                   3182:         }
                   3183: 
                   3184:       if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
                   3185:       if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
                   3186: 
1.1.1.2   misho    3187:       all_options = ((REAL_PCRE *)re)->options;
                   3188:       if (do_flip) all_options = swap_uint32(all_options);
1.1       misho    3189: 
                   3190:       if (get_options == 0) fprintf(outfile, "No options\n");
                   3191:         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
                   3192:           ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
                   3193:           ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
                   3194:           ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
                   3195:           ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
                   3196:           ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
                   3197:           ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
                   3198:           ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
                   3199:           ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
                   3200:           ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
                   3201:           ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
                   3202:           ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
                   3203:           ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1.1.1.2   misho    3204:           ((get_options & PCRE_UTF8) != 0)? " utf" : "",
1.1       misho    3205:           ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1.1.1.2   misho    3206:           ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
1.1       misho    3207:           ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
                   3208:           ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
                   3209: 
                   3210:       if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
                   3211: 
                   3212:       switch (get_options & PCRE_NEWLINE_BITS)
                   3213:         {
                   3214:         case PCRE_NEWLINE_CR:
                   3215:         fprintf(outfile, "Forced newline sequence: CR\n");
                   3216:         break;
                   3217: 
                   3218:         case PCRE_NEWLINE_LF:
                   3219:         fprintf(outfile, "Forced newline sequence: LF\n");
                   3220:         break;
                   3221: 
                   3222:         case PCRE_NEWLINE_CRLF:
                   3223:         fprintf(outfile, "Forced newline sequence: CRLF\n");
                   3224:         break;
                   3225: 
                   3226:         case PCRE_NEWLINE_ANYCRLF:
                   3227:         fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
                   3228:         break;
                   3229: 
                   3230:         case PCRE_NEWLINE_ANY:
                   3231:         fprintf(outfile, "Forced newline sequence: ANY\n");
                   3232:         break;
                   3233: 
                   3234:         default:
                   3235:         break;
                   3236:         }
                   3237: 
                   3238:       if (first_char == -1)
                   3239:         {
                   3240:         fprintf(outfile, "First char at start or follows newline\n");
                   3241:         }
                   3242:       else if (first_char < 0)
                   3243:         {
                   3244:         fprintf(outfile, "No first char\n");
                   3245:         }
                   3246:       else
                   3247:         {
1.1.1.2   misho    3248:         const char *caseless =
                   3249:           ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
1.1       misho    3250:           "" : " (caseless)";
1.1.1.2   misho    3251: 
                   3252:         if (PRINTOK(first_char))
                   3253:           fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
1.1       misho    3254:         else
1.1.1.2   misho    3255:           {
                   3256:           fprintf(outfile, "First char = ");
                   3257:           pchar(first_char, outfile);
                   3258:           fprintf(outfile, "%s\n", caseless);
                   3259:           }
1.1       misho    3260:         }
                   3261: 
                   3262:       if (need_char < 0)
                   3263:         {
                   3264:         fprintf(outfile, "No need char\n");
                   3265:         }
                   3266:       else
                   3267:         {
1.1.1.2   misho    3268:         const char *caseless =
                   3269:           ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
1.1       misho    3270:           "" : " (caseless)";
1.1.1.2   misho    3271: 
                   3272:         if (PRINTOK(need_char))
                   3273:           fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
1.1       misho    3274:         else
1.1.1.2   misho    3275:           {
                   3276:           fprintf(outfile, "Need char = ");
                   3277:           pchar(need_char, outfile);
                   3278:           fprintf(outfile, "%s\n", caseless);
                   3279:           }
1.1       misho    3280:         }
                   3281: 
1.1.1.3 ! misho    3282:       if (maxlookbehind > 0)
        !          3283:         fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
        !          3284: 
1.1       misho    3285:       /* Don't output study size; at present it is in any case a fixed
                   3286:       value, but it varies, depending on the computer architecture, and
                   3287:       so messes up the test suite. (And with the /F option, it might be
                   3288:       flipped.) If study was forced by an external -s, don't show this
                   3289:       information unless -i or -d was also present. This means that, except
                   3290:       when auto-callouts are involved, the output from runs with and without
                   3291:       -s should be identical. */
                   3292: 
                   3293:       if (do_study || (force_study >= 0 && showinfo && !no_force_study))
                   3294:         {
                   3295:         if (extra == NULL)
                   3296:           fprintf(outfile, "Study returned NULL\n");
                   3297:         else
                   3298:           {
1.1.1.2   misho    3299:           pcre_uint8 *start_bits = NULL;
1.1       misho    3300:           int minlength;
                   3301: 
1.1.1.2   misho    3302:           if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
                   3303:             fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1.1       misho    3304: 
1.1.1.2   misho    3305:           if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
1.1       misho    3306:             {
1.1.1.2   misho    3307:             if (start_bits == NULL)
                   3308:               fprintf(outfile, "No set of starting bytes\n");
                   3309:             else
1.1       misho    3310:               {
1.1.1.2   misho    3311:               int i;
                   3312:               int c = 24;
                   3313:               fprintf(outfile, "Starting byte set: ");
                   3314:               for (i = 0; i < 256; i++)
1.1       misho    3315:                 {
1.1.1.2   misho    3316:                 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1.1       misho    3317:                   {
1.1.1.2   misho    3318:                   if (c > 75)
                   3319:                     {
                   3320:                     fprintf(outfile, "\n  ");
                   3321:                     c = 2;
                   3322:                     }
                   3323:                   if (PRINTOK(i) && i != ' ')
                   3324:                     {
                   3325:                     fprintf(outfile, "%c ", i);
                   3326:                     c += 2;
                   3327:                     }
                   3328:                   else
                   3329:                     {
                   3330:                     fprintf(outfile, "\\x%02x ", i);
                   3331:                     c += 5;
                   3332:                     }
1.1       misho    3333:                   }
                   3334:                 }
1.1.1.2   misho    3335:               fprintf(outfile, "\n");
1.1       misho    3336:               }
                   3337:             }
                   3338:           }
                   3339: 
                   3340:         /* Show this only if the JIT was set by /S, not by -s. */
                   3341: 
                   3342:         if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
                   3343:           {
                   3344:           int jit;
1.1.1.2   misho    3345:           if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
                   3346:             {
                   3347:             if (jit)
                   3348:               fprintf(outfile, "JIT study was successful\n");
                   3349:             else
1.1       misho    3350: #ifdef SUPPORT_JIT
1.1.1.2   misho    3351:               fprintf(outfile, "JIT study was not successful\n");
1.1       misho    3352: #else
1.1.1.2   misho    3353:               fprintf(outfile, "JIT support is not available in this version of PCRE\n");
1.1       misho    3354: #endif
1.1.1.2   misho    3355:             }
1.1       misho    3356:           }
                   3357:         }
                   3358:       }
                   3359: 
                   3360:     /* If the '>' option was present, we write out the regex to a file, and
                   3361:     that is all. The first 8 bytes of the file are the regex length and then
                   3362:     the study length, in big-endian order. */
                   3363: 
                   3364:     if (to_file != NULL)
                   3365:       {
                   3366:       FILE *f = fopen((char *)to_file, "wb");
                   3367:       if (f == NULL)
                   3368:         {
                   3369:         fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
                   3370:         }
                   3371:       else
                   3372:         {
1.1.1.2   misho    3373:         pcre_uint8 sbuf[8];
                   3374: 
                   3375:         if (do_flip) regexflip(re, extra);
                   3376:         sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
                   3377:         sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
                   3378:         sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
                   3379:         sbuf[3] = (pcre_uint8)((true_size) & 255);
                   3380:         sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
                   3381:         sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
                   3382:         sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
                   3383:         sbuf[7] = (pcre_uint8)((true_study_size) & 255);
1.1       misho    3384: 
                   3385:         if (fwrite(sbuf, 1, 8, f) < 8 ||
                   3386:             fwrite(re, 1, true_size, f) < true_size)
                   3387:           {
                   3388:           fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
                   3389:           }
                   3390:         else
                   3391:           {
                   3392:           fprintf(outfile, "Compiled pattern written to %s\n", to_file);
                   3393: 
                   3394:           /* If there is study data, write it. */
                   3395: 
                   3396:           if (extra != NULL)
                   3397:             {
                   3398:             if (fwrite(extra->study_data, 1, true_study_size, f) <
                   3399:                 true_study_size)
                   3400:               {
                   3401:               fprintf(outfile, "Write error on %s: %s\n", to_file,
                   3402:                 strerror(errno));
                   3403:               }
                   3404:             else fprintf(outfile, "Study data written to %s\n", to_file);
                   3405:             }
                   3406:           }
                   3407:         fclose(f);
                   3408:         }
                   3409: 
                   3410:       new_free(re);
1.1.1.2   misho    3411:       if (extra != NULL)
                   3412:         {
                   3413:         PCRE_FREE_STUDY(extra);
                   3414:         }
1.1       misho    3415:       if (locale_set)
                   3416:         {
                   3417:         new_free((void *)tables);
                   3418:         setlocale(LC_CTYPE, "C");
                   3419:         locale_set = 0;
                   3420:         }
                   3421:       continue;  /* With next regex */
                   3422:       }
                   3423:     }        /* End of non-POSIX compile */
                   3424: 
                   3425:   /* Read data lines and test them */
                   3426: 
                   3427:   for (;;)
                   3428:     {
1.1.1.2   misho    3429:     pcre_uint8 *q;
                   3430:     pcre_uint8 *bptr;
1.1       misho    3431:     int *use_offsets = offsets;
                   3432:     int use_size_offsets = size_offsets;
                   3433:     int callout_data = 0;
                   3434:     int callout_data_set = 0;
                   3435:     int count, c;
                   3436:     int copystrings = 0;
                   3437:     int find_match_limit = default_find_match_limit;
                   3438:     int getstrings = 0;
                   3439:     int getlist = 0;
                   3440:     int gmatched = 0;
                   3441:     int start_offset = 0;
                   3442:     int start_offset_sign = 1;
                   3443:     int g_notempty = 0;
                   3444:     int use_dfa = 0;
                   3445: 
                   3446:     *copynames = 0;
                   3447:     *getnames = 0;
                   3448: 
1.1.1.2   misho    3449: #ifdef SUPPORT_PCRE16
                   3450:     cn16ptr = copynames;
                   3451:     gn16ptr = getnames;
                   3452: #endif
                   3453: #ifdef SUPPORT_PCRE8
                   3454:     cn8ptr = copynames8;
                   3455:     gn8ptr = getnames8;
                   3456: #endif
1.1       misho    3457: 
1.1.1.2   misho    3458:     SET_PCRE_CALLOUT(callout);
1.1       misho    3459:     first_callout = 1;
                   3460:     last_callout_mark = NULL;
                   3461:     callout_extra = 0;
                   3462:     callout_count = 0;
                   3463:     callout_fail_count = 999999;
                   3464:     callout_fail_id = -1;
                   3465:     show_malloc = 0;
1.1.1.2   misho    3466:     options = 0;
1.1       misho    3467: 
                   3468:     if (extra != NULL) extra->flags &=
                   3469:       ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
                   3470: 
                   3471:     len = 0;
                   3472:     for (;;)
                   3473:       {
                   3474:       if (extend_inputline(infile, buffer + len, "data> ") == NULL)
                   3475:         {
                   3476:         if (len > 0)    /* Reached EOF without hitting a newline */
                   3477:           {
                   3478:           fprintf(outfile, "\n");
                   3479:           break;
                   3480:           }
                   3481:         done = 1;
                   3482:         goto CONTINUE;
                   3483:         }
                   3484:       if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
                   3485:       len = (int)strlen((char *)buffer);
                   3486:       if (buffer[len-1] == '\n') break;
                   3487:       }
                   3488: 
                   3489:     while (len > 0 && isspace(buffer[len-1])) len--;
                   3490:     buffer[len] = 0;
                   3491:     if (len == 0) break;
                   3492: 
                   3493:     p = buffer;
                   3494:     while (isspace(*p)) p++;
                   3495: 
                   3496:     bptr = q = dbuffer;
                   3497:     while ((c = *p++) != 0)
                   3498:       {
                   3499:       int i = 0;
                   3500:       int n = 0;
                   3501: 
1.1.1.2   misho    3502:       /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
                   3503:       In non-UTF mode, allow the value of the byte to fall through to later,
                   3504:       where values greater than 127 are turned into UTF-8 when running in
                   3505:       16-bit mode. */
                   3506: 
                   3507:       if (c != '\\')
                   3508:         {
                   3509:         if (use_utf)
                   3510:           {
                   3511:           *q++ = c;
                   3512:           continue;
                   3513:           }
                   3514:         }
                   3515: 
                   3516:       /* Handle backslash escapes */
                   3517: 
                   3518:       else switch ((c = *p++))
1.1       misho    3519:         {
                   3520:         case 'a': c =    7; break;
                   3521:         case 'b': c = '\b'; break;
                   3522:         case 'e': c =   27; break;
                   3523:         case 'f': c = '\f'; break;
                   3524:         case 'n': c = '\n'; break;
                   3525:         case 'r': c = '\r'; break;
                   3526:         case 't': c = '\t'; break;
                   3527:         case 'v': c = '\v'; break;
                   3528: 
                   3529:         case '0': case '1': case '2': case '3':
                   3530:         case '4': case '5': case '6': case '7':
                   3531:         c -= '0';
                   3532:         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
                   3533:           c = c * 8 + *p++ - '0';
                   3534:         break;
                   3535: 
                   3536:         case 'x':
                   3537:         if (*p == '{')
                   3538:           {
1.1.1.2   misho    3539:           pcre_uint8 *pt = p;
1.1       misho    3540:           c = 0;
                   3541: 
                   3542:           /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
                   3543:           when isxdigit() is a macro that refers to its argument more than
                   3544:           once. This is banned by the C Standard, but apparently happens in at
                   3545:           least one MacOS environment. */
                   3546: 
                   3547:           for (pt++; isxdigit(*pt); pt++)
1.1.1.2   misho    3548:             {
                   3549:             if (++i == 9)
                   3550:               fprintf(outfile, "** Too many hex digits in \\x{...} item; "
                   3551:                                "using only the first eight.\n");
                   3552:             else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
                   3553:             }
1.1       misho    3554:           if (*pt == '}')
                   3555:             {
                   3556:             p = pt + 1;
                   3557:             break;
                   3558:             }
1.1.1.2   misho    3559:           /* Not correct form for \x{...}; fall through */
1.1       misho    3560:           }
                   3561: 
1.1.1.2   misho    3562:         /* \x without {} always defines just one byte in 8-bit mode. This
                   3563:         allows UTF-8 characters to be constructed byte by byte, and also allows
                   3564:         invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
                   3565:         Otherwise, pass it down to later code so that it can be turned into
                   3566:         UTF-8 when running in 16-bit mode. */
1.1       misho    3567: 
                   3568:         c = 0;
                   3569:         while (i++ < 2 && isxdigit(*p))
                   3570:           {
                   3571:           c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
                   3572:           p++;
                   3573:           }
1.1.1.2   misho    3574:         if (use_utf)
                   3575:           {
                   3576:           *q++ = c;
                   3577:           continue;
                   3578:           }
1.1       misho    3579:         break;
                   3580: 
                   3581:         case 0:   /* \ followed by EOF allows for an empty line */
                   3582:         p--;
                   3583:         continue;
                   3584: 
                   3585:         case '>':
                   3586:         if (*p == '-')
                   3587:           {
                   3588:           start_offset_sign = -1;
                   3589:           p++;
                   3590:           }
                   3591:         while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
                   3592:         start_offset *= start_offset_sign;
                   3593:         continue;
                   3594: 
                   3595:         case 'A':  /* Option setting */
                   3596:         options |= PCRE_ANCHORED;
                   3597:         continue;
                   3598: 
                   3599:         case 'B':
                   3600:         options |= PCRE_NOTBOL;
                   3601:         continue;
                   3602: 
                   3603:         case 'C':
                   3604:         if (isdigit(*p))    /* Set copy string */
                   3605:           {
                   3606:           while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   3607:           copystrings |= 1 << n;
                   3608:           }
                   3609:         else if (isalnum(*p))
                   3610:           {
1.1.1.2   misho    3611:           READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
1.1       misho    3612:           }
                   3613:         else if (*p == '+')
                   3614:           {
                   3615:           callout_extra = 1;
                   3616:           p++;
                   3617:           }
                   3618:         else if (*p == '-')
                   3619:           {
1.1.1.2   misho    3620:           SET_PCRE_CALLOUT(NULL);
1.1       misho    3621:           p++;
                   3622:           }
                   3623:         else if (*p == '!')
                   3624:           {
                   3625:           callout_fail_id = 0;
                   3626:           p++;
                   3627:           while(isdigit(*p))
                   3628:             callout_fail_id = callout_fail_id * 10 + *p++ - '0';
                   3629:           callout_fail_count = 0;
                   3630:           if (*p == '!')
                   3631:             {
                   3632:             p++;
                   3633:             while(isdigit(*p))
                   3634:               callout_fail_count = callout_fail_count * 10 + *p++ - '0';
                   3635:             }
                   3636:           }
                   3637:         else if (*p == '*')
                   3638:           {
                   3639:           int sign = 1;
                   3640:           callout_data = 0;
                   3641:           if (*(++p) == '-') { sign = -1; p++; }
                   3642:           while(isdigit(*p))
                   3643:             callout_data = callout_data * 10 + *p++ - '0';
                   3644:           callout_data *= sign;
                   3645:           callout_data_set = 1;
                   3646:           }
                   3647:         continue;
                   3648: 
                   3649: #if !defined NODFA
                   3650:         case 'D':
                   3651: #if !defined NOPOSIX
                   3652:         if (posix || do_posix)
                   3653:           printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
                   3654:         else
                   3655: #endif
                   3656:           use_dfa = 1;
                   3657:         continue;
                   3658: #endif
                   3659: 
                   3660: #if !defined NODFA
                   3661:         case 'F':
                   3662:         options |= PCRE_DFA_SHORTEST;
                   3663:         continue;
                   3664: #endif
                   3665: 
                   3666:         case 'G':
                   3667:         if (isdigit(*p))
                   3668:           {
                   3669:           while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   3670:           getstrings |= 1 << n;
                   3671:           }
                   3672:         else if (isalnum(*p))
                   3673:           {
1.1.1.2   misho    3674:           READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
1.1       misho    3675:           }
                   3676:         continue;
                   3677: 
                   3678:         case 'J':
                   3679:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   3680:         if (extra != NULL
                   3681:             && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
                   3682:             && extra->executable_jit != NULL)
                   3683:           {
1.1.1.2   misho    3684:           if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
                   3685:           jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
                   3686:           PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
1.1       misho    3687:           }
                   3688:         continue;
                   3689: 
                   3690:         case 'L':
                   3691:         getlist = 1;
                   3692:         continue;
                   3693: 
                   3694:         case 'M':
                   3695:         find_match_limit = 1;
                   3696:         continue;
                   3697: 
                   3698:         case 'N':
                   3699:         if ((options & PCRE_NOTEMPTY) != 0)
                   3700:           options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
                   3701:         else
                   3702:           options |= PCRE_NOTEMPTY;
                   3703:         continue;
                   3704: 
                   3705:         case 'O':
                   3706:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   3707:         if (n > size_offsets_max)
                   3708:           {
                   3709:           size_offsets_max = n;
                   3710:           free(offsets);
                   3711:           use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
                   3712:           if (offsets == NULL)
                   3713:             {
                   3714:             printf("** Failed to get %d bytes of memory for offsets vector\n",
                   3715:               (int)(size_offsets_max * sizeof(int)));
                   3716:             yield = 1;
                   3717:             goto EXIT;
                   3718:             }
                   3719:           }
                   3720:         use_size_offsets = n;
                   3721:         if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1.1.1.3 ! misho    3722:           else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
1.1       misho    3723:         continue;
                   3724: 
                   3725:         case 'P':
                   3726:         options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
                   3727:           PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
                   3728:         continue;
                   3729: 
                   3730:         case 'Q':
                   3731:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   3732:         if (extra == NULL)
                   3733:           {
                   3734:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   3735:           extra->flags = 0;
                   3736:           }
                   3737:         extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
                   3738:         extra->match_limit_recursion = n;
                   3739:         continue;
                   3740: 
                   3741:         case 'q':
                   3742:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   3743:         if (extra == NULL)
                   3744:           {
                   3745:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   3746:           extra->flags = 0;
                   3747:           }
                   3748:         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
                   3749:         extra->match_limit = n;
                   3750:         continue;
                   3751: 
                   3752: #if !defined NODFA
                   3753:         case 'R':
                   3754:         options |= PCRE_DFA_RESTART;
                   3755:         continue;
                   3756: #endif
                   3757: 
                   3758:         case 'S':
                   3759:         show_malloc = 1;
                   3760:         continue;
                   3761: 
                   3762:         case 'Y':
                   3763:         options |= PCRE_NO_START_OPTIMIZE;
                   3764:         continue;
                   3765: 
                   3766:         case 'Z':
                   3767:         options |= PCRE_NOTEOL;
                   3768:         continue;
                   3769: 
                   3770:         case '?':
                   3771:         options |= PCRE_NO_UTF8_CHECK;
                   3772:         continue;
                   3773: 
                   3774:         case '<':
                   3775:           {
                   3776:           int x = check_newline(p, outfile);
                   3777:           if (x == 0) goto NEXT_DATA;
                   3778:           options |= x;
                   3779:           while (*p++ != '>');
                   3780:           }
                   3781:         continue;
                   3782:         }
1.1.1.2   misho    3783: 
                   3784:       /* We now have a character value in c that may be greater than 255. In
                   3785:       16-bit mode, we always convert characters to UTF-8 so that values greater
                   3786:       than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
                   3787:       convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
                   3788:       mode must have come from \x{...} or octal constructs because values from
                   3789:       \x.. get this far only in non-UTF mode. */
                   3790: 
                   3791: #if !defined NOUTF || defined SUPPORT_PCRE16
                   3792:       if (use_pcre16 || use_utf)
                   3793:         {
                   3794:         pcre_uint8 buff8[8];
                   3795:         int ii, utn;
                   3796:         utn = ord2utf8(c, buff8);
                   3797:         for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
                   3798:         }
                   3799:       else
                   3800: #endif
                   3801:         {
                   3802:         if (c > 255)
                   3803:           {
                   3804:           fprintf(outfile, "** Character \\x{%x} is greater than 255 "
                   3805:             "and UTF-8 mode is not enabled.\n", c);
                   3806:           fprintf(outfile, "** Truncation will probably give the wrong "
                   3807:             "result.\n");
                   3808:           }
                   3809:         *q++ = c;
                   3810:         }
1.1       misho    3811:       }
1.1.1.2   misho    3812: 
                   3813:     /* Reached end of subject string */
                   3814: 
1.1       misho    3815:     *q = 0;
                   3816:     len = (int)(q - dbuffer);
                   3817: 
                   3818:     /* Move the data to the end of the buffer so that a read over the end of
                   3819:     the buffer will be seen by valgrind, even if it doesn't cause a crash. If
                   3820:     we are using the POSIX interface, we must include the terminating zero. */
                   3821: 
                   3822: #if !defined NOPOSIX
                   3823:     if (posix || do_posix)
                   3824:       {
                   3825:       memmove(bptr + buffer_size - len - 1, bptr, len + 1);
                   3826:       bptr += buffer_size - len - 1;
                   3827:       }
                   3828:     else
                   3829: #endif
                   3830:       {
                   3831:       memmove(bptr + buffer_size - len, bptr, len);
                   3832:       bptr += buffer_size - len;
                   3833:       }
                   3834: 
                   3835:     if ((all_use_dfa || use_dfa) && find_match_limit)
                   3836:       {
                   3837:       printf("**Match limit not relevant for DFA matching: ignored\n");
                   3838:       find_match_limit = 0;
                   3839:       }
                   3840: 
                   3841:     /* Handle matching via the POSIX interface, which does not
                   3842:     support timing or playing with the match limit or callout data. */
                   3843: 
                   3844: #if !defined NOPOSIX
                   3845:     if (posix || do_posix)
                   3846:       {
                   3847:       int rc;
                   3848:       int eflags = 0;
                   3849:       regmatch_t *pmatch = NULL;
                   3850:       if (use_size_offsets > 0)
                   3851:         pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
                   3852:       if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
                   3853:       if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
                   3854:       if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
                   3855: 
                   3856:       rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
                   3857: 
                   3858:       if (rc != 0)
                   3859:         {
                   3860:         (void)regerror(rc, &preg, (char *)buffer, buffer_size);
                   3861:         fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
                   3862:         }
                   3863:       else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
                   3864:               != 0)
                   3865:         {
                   3866:         fprintf(outfile, "Matched with REG_NOSUB\n");
                   3867:         }
                   3868:       else
                   3869:         {
                   3870:         size_t i;
                   3871:         for (i = 0; i < (size_t)use_size_offsets; i++)
                   3872:           {
                   3873:           if (pmatch[i].rm_so >= 0)
                   3874:             {
                   3875:             fprintf(outfile, "%2d: ", (int)i);
1.1.1.2   misho    3876:             PCHARSV(dbuffer, pmatch[i].rm_so,
1.1       misho    3877:               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
                   3878:             fprintf(outfile, "\n");
                   3879:             if (do_showcaprest || (i == 0 && do_showrest))
                   3880:               {
                   3881:               fprintf(outfile, "%2d+ ", (int)i);
1.1.1.2   misho    3882:               PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1.1       misho    3883:                 outfile);
                   3884:               fprintf(outfile, "\n");
                   3885:               }
                   3886:             }
                   3887:           }
                   3888:         }
                   3889:       free(pmatch);
1.1.1.2   misho    3890:       goto NEXT_DATA;
1.1       misho    3891:       }
                   3892: 
1.1.1.2   misho    3893: #endif  /* !defined NOPOSIX */
                   3894: 
1.1       misho    3895:     /* Handle matching via the native interface - repeats for /g and /G */
                   3896: 
1.1.1.2   misho    3897: #ifdef SUPPORT_PCRE16
                   3898:     if (use_pcre16)
                   3899:       {
                   3900:       len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
                   3901:       switch(len)
                   3902:         {
                   3903:         case -1:
                   3904:         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
                   3905:           "converted to UTF-16\n");
                   3906:         goto NEXT_DATA;
                   3907: 
                   3908:         case -2:
                   3909:         fprintf(outfile, "**Failed: character value greater than 0x10ffff "
                   3910:           "cannot be converted to UTF-16\n");
                   3911:         goto NEXT_DATA;
                   3912: 
                   3913:         case -3:
                   3914:         fprintf(outfile, "**Failed: character value greater than 0xffff "
                   3915:           "cannot be converted to 16-bit in non-UTF mode\n");
                   3916:         goto NEXT_DATA;
                   3917: 
                   3918:         default:
                   3919:         break;
                   3920:         }
                   3921:       bptr = (pcre_uint8 *)buffer16;
                   3922:       }
                   3923: #endif
1.1       misho    3924: 
1.1.1.3 ! misho    3925:     /* Ensure that there is a JIT callback if we want to verify that JIT was
        !          3926:     actually used. If jit_stack == NULL, no stack has yet been assigned. */
        !          3927: 
        !          3928:     if (verify_jit && jit_stack == NULL && extra != NULL)
        !          3929:        { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
        !          3930: 
1.1       misho    3931:     for (;; gmatched++)    /* Loop for /g or /G */
                   3932:       {
                   3933:       markptr = NULL;
1.1.1.3 ! misho    3934:       jit_was_used = FALSE;
1.1       misho    3935: 
                   3936:       if (timeitm > 0)
                   3937:         {
                   3938:         register int i;
                   3939:         clock_t time_taken;
                   3940:         clock_t start_time = clock();
                   3941: 
                   3942: #if !defined NODFA
                   3943:         if (all_use_dfa || use_dfa)
                   3944:           {
1.1.1.3 ! misho    3945:           if ((options & PCRE_DFA_RESTART) != 0)
        !          3946:             {
        !          3947:             fprintf(outfile, "Timing DFA restarts is not supported\n");
        !          3948:             break;
        !          3949:             }
        !          3950:           if (dfa_workspace == NULL)
        !          3951:             dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
1.1       misho    3952:           for (i = 0; i < timeitm; i++)
1.1.1.2   misho    3953:             {
                   3954:             PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 ! misho    3955:               (options | g_notempty), use_offsets, use_size_offsets,
        !          3956:               dfa_workspace, DFA_WS_DIMENSION);
1.1.1.2   misho    3957:             }
1.1       misho    3958:           }
                   3959:         else
                   3960: #endif
                   3961: 
                   3962:         for (i = 0; i < timeitm; i++)
1.1.1.2   misho    3963:           {
                   3964:           PCRE_EXEC(count, re, extra, bptr, len, start_offset,
                   3965:             (options | g_notempty), use_offsets, use_size_offsets);
                   3966:           }
1.1       misho    3967:         time_taken = clock() - start_time;
                   3968:         fprintf(outfile, "Execute time %.4f milliseconds\n",
                   3969:           (((double)time_taken * 1000.0) / (double)timeitm) /
                   3970:             (double)CLOCKS_PER_SEC);
                   3971:         }
                   3972: 
                   3973:       /* If find_match_limit is set, we want to do repeated matches with
                   3974:       varying limits in order to find the minimum value for the match limit and
                   3975:       for the recursion limit. The match limits are relevant only to the normal
                   3976:       running of pcre_exec(), so disable the JIT optimization. This makes it
                   3977:       possible to run the same set of tests with and without JIT externally
                   3978:       requested. */
                   3979: 
                   3980:       if (find_match_limit)
                   3981:         {
                   3982:         if (extra == NULL)
                   3983:           {
                   3984:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   3985:           extra->flags = 0;
                   3986:           }
                   3987:         else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
                   3988: 
                   3989:         (void)check_match_limit(re, extra, bptr, len, start_offset,
                   3990:           options|g_notempty, use_offsets, use_size_offsets,
                   3991:           PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
                   3992:           PCRE_ERROR_MATCHLIMIT, "match()");
                   3993: 
                   3994:         count = check_match_limit(re, extra, bptr, len, start_offset,
                   3995:           options|g_notempty, use_offsets, use_size_offsets,
                   3996:           PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
                   3997:           PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
                   3998:         }
                   3999: 
                   4000:       /* If callout_data is set, use the interface with additional data */
                   4001: 
                   4002:       else if (callout_data_set)
                   4003:         {
                   4004:         if (extra == NULL)
                   4005:           {
                   4006:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   4007:           extra->flags = 0;
                   4008:           }
                   4009:         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
                   4010:         extra->callout_data = &callout_data;
1.1.1.2   misho    4011:         PCRE_EXEC(count, re, extra, bptr, len, start_offset,
1.1       misho    4012:           options | g_notempty, use_offsets, use_size_offsets);
                   4013:         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
                   4014:         }
                   4015: 
                   4016:       /* The normal case is just to do the match once, with the default
                   4017:       value of match_limit. */
                   4018: 
                   4019: #if !defined NODFA
                   4020:       else if (all_use_dfa || use_dfa)
                   4021:         {
1.1.1.3 ! misho    4022:         if (dfa_workspace == NULL)
        !          4023:           dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
        !          4024:         if (dfa_matched++ == 0)
        !          4025:           dfa_workspace[0] = -1;  /* To catch bad restart */
1.1.1.2   misho    4026:         PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
1.1.1.3 ! misho    4027:           (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
        !          4028:           DFA_WS_DIMENSION);
1.1       misho    4029:         if (count == 0)
                   4030:           {
                   4031:           fprintf(outfile, "Matched, but too many subsidiary matches\n");
                   4032:           count = use_size_offsets/2;
                   4033:           }
                   4034:         }
                   4035: #endif
                   4036: 
                   4037:       else
                   4038:         {
1.1.1.2   misho    4039:         PCRE_EXEC(count, re, extra, bptr, len, start_offset,
                   4040:           options | g_notempty, use_offsets, use_size_offsets);
1.1       misho    4041:         if (count == 0)
                   4042:           {
                   4043:           fprintf(outfile, "Matched, but too many substrings\n");
                   4044:           count = use_size_offsets/3;
                   4045:           }
                   4046:         }
                   4047: 
                   4048:       /* Matched */
                   4049: 
                   4050:       if (count >= 0)
                   4051:         {
                   4052:         int i, maxcount;
1.1.1.2   misho    4053:         void *cnptr, *gnptr;
1.1       misho    4054: 
                   4055: #if !defined NODFA
                   4056:         if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
                   4057: #endif
                   4058:           maxcount = use_size_offsets/3;
                   4059: 
                   4060:         /* This is a check against a lunatic return value. */
                   4061: 
                   4062:         if (count > maxcount)
                   4063:           {
                   4064:           fprintf(outfile,
                   4065:             "** PCRE error: returned count %d is too big for offset size %d\n",
                   4066:             count, use_size_offsets);
                   4067:           count = use_size_offsets/3;
                   4068:           if (do_g || do_G)
                   4069:             {
                   4070:             fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
                   4071:             do_g = do_G = FALSE;        /* Break g/G loop */
                   4072:             }
                   4073:           }
                   4074: 
                   4075:         /* do_allcaps requests showing of all captures in the pattern, to check
                   4076:         unset ones at the end. */
                   4077: 
                   4078:         if (do_allcaps)
                   4079:           {
1.1.1.2   misho    4080:           if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
                   4081:             goto SKIP_DATA;
1.1       misho    4082:           count++;   /* Allow for full match */
                   4083:           if (count * 2 > use_size_offsets) count = use_size_offsets/2;
                   4084:           }
                   4085: 
                   4086:         /* Output the captured substrings */
                   4087: 
                   4088:         for (i = 0; i < count * 2; i += 2)
                   4089:           {
                   4090:           if (use_offsets[i] < 0)
                   4091:             {
                   4092:             if (use_offsets[i] != -1)
                   4093:               fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
                   4094:                 use_offsets[i], i);
                   4095:             if (use_offsets[i+1] != -1)
                   4096:               fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
                   4097:                 use_offsets[i+1], i+1);
                   4098:             fprintf(outfile, "%2d: <unset>\n", i/2);
                   4099:             }
                   4100:           else
                   4101:             {
                   4102:             fprintf(outfile, "%2d: ", i/2);
1.1.1.2   misho    4103:             PCHARSV(bptr, use_offsets[i],
1.1       misho    4104:               use_offsets[i+1] - use_offsets[i], outfile);
1.1.1.3 ! misho    4105:             if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1       misho    4106:             fprintf(outfile, "\n");
                   4107:             if (do_showcaprest || (i == 0 && do_showrest))
                   4108:               {
                   4109:               fprintf(outfile, "%2d+ ", i/2);
1.1.1.2   misho    4110:               PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
1.1       misho    4111:                 outfile);
                   4112:               fprintf(outfile, "\n");
                   4113:               }
                   4114:             }
                   4115:           }
                   4116: 
1.1.1.2   misho    4117:         if (markptr != NULL)
                   4118:           {
                   4119:           fprintf(outfile, "MK: ");
                   4120:           PCHARSV(markptr, 0, -1, outfile);
                   4121:           fprintf(outfile, "\n");
                   4122:           }
1.1       misho    4123: 
                   4124:         for (i = 0; i < 32; i++)
                   4125:           {
                   4126:           if ((copystrings & (1 << i)) != 0)
                   4127:             {
1.1.1.2   misho    4128:             int rc;
1.1       misho    4129:             char copybuffer[256];
1.1.1.2   misho    4130:             PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
                   4131:               copybuffer, sizeof(copybuffer));
1.1       misho    4132:             if (rc < 0)
                   4133:               fprintf(outfile, "copy substring %d failed %d\n", i, rc);
                   4134:             else
1.1.1.2   misho    4135:               {
                   4136:               fprintf(outfile, "%2dC ", i);
                   4137:               PCHARSV(copybuffer, 0, rc, outfile);
                   4138:               fprintf(outfile, " (%d)\n", rc);
                   4139:               }
1.1       misho    4140:             }
                   4141:           }
                   4142: 
1.1.1.2   misho    4143:         cnptr = copynames;
                   4144:         for (;;)
1.1       misho    4145:           {
1.1.1.2   misho    4146:           int rc;
1.1       misho    4147:           char copybuffer[256];
1.1.1.2   misho    4148: 
                   4149:           if (use_pcre16)
                   4150:             {
                   4151:             if (*(pcre_uint16 *)cnptr == 0) break;
                   4152:             }
                   4153:           else
                   4154:             {
                   4155:             if (*(pcre_uint8 *)cnptr == 0) break;
                   4156:             }
                   4157: 
                   4158:           PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
                   4159:             cnptr, copybuffer, sizeof(copybuffer));
                   4160: 
1.1       misho    4161:           if (rc < 0)
1.1.1.2   misho    4162:             {
                   4163:             fprintf(outfile, "copy substring ");
                   4164:             PCHARSV(cnptr, 0, -1, outfile);
                   4165:             fprintf(outfile, " failed %d\n", rc);
                   4166:             }
1.1       misho    4167:           else
1.1.1.2   misho    4168:             {
                   4169:             fprintf(outfile, "  C ");
                   4170:             PCHARSV(copybuffer, 0, rc, outfile);
                   4171:             fprintf(outfile, " (%d) ", rc);
                   4172:             PCHARSV(cnptr, 0, -1, outfile);
                   4173:             putc('\n', outfile);
                   4174:             }
                   4175: 
                   4176:           cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
1.1       misho    4177:           }
                   4178: 
                   4179:         for (i = 0; i < 32; i++)
                   4180:           {
                   4181:           if ((getstrings & (1 << i)) != 0)
                   4182:             {
1.1.1.2   misho    4183:             int rc;
1.1       misho    4184:             const char *substring;
1.1.1.2   misho    4185:             PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
1.1       misho    4186:             if (rc < 0)
                   4187:               fprintf(outfile, "get substring %d failed %d\n", i, rc);
                   4188:             else
                   4189:               {
1.1.1.2   misho    4190:               fprintf(outfile, "%2dG ", i);
                   4191:               PCHARSV(substring, 0, rc, outfile);
                   4192:               fprintf(outfile, " (%d)\n", rc);
                   4193:               PCRE_FREE_SUBSTRING(substring);
1.1       misho    4194:               }
                   4195:             }
                   4196:           }
                   4197: 
1.1.1.2   misho    4198:         gnptr = getnames;
                   4199:         for (;;)
1.1       misho    4200:           {
1.1.1.2   misho    4201:           int rc;
1.1       misho    4202:           const char *substring;
1.1.1.2   misho    4203: 
                   4204:           if (use_pcre16)
                   4205:             {
                   4206:             if (*(pcre_uint16 *)gnptr == 0) break;
                   4207:             }
                   4208:           else
                   4209:             {
                   4210:             if (*(pcre_uint8 *)gnptr == 0) break;
                   4211:             }
                   4212: 
                   4213:           PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
                   4214:             gnptr, &substring);
1.1       misho    4215:           if (rc < 0)
1.1.1.2   misho    4216:             {
                   4217:             fprintf(outfile, "get substring ");
                   4218:             PCHARSV(gnptr, 0, -1, outfile);
                   4219:             fprintf(outfile, " failed %d\n", rc);
                   4220:             }
1.1       misho    4221:           else
                   4222:             {
1.1.1.2   misho    4223:             fprintf(outfile, "  G ");
                   4224:             PCHARSV(substring, 0, rc, outfile);
                   4225:             fprintf(outfile, " (%d) ", rc);
                   4226:             PCHARSV(gnptr, 0, -1, outfile);
                   4227:             PCRE_FREE_SUBSTRING(substring);
                   4228:             putc('\n', outfile);
1.1       misho    4229:             }
1.1.1.2   misho    4230: 
                   4231:           gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
1.1       misho    4232:           }
                   4233: 
                   4234:         if (getlist)
                   4235:           {
1.1.1.2   misho    4236:           int rc;
1.1       misho    4237:           const char **stringlist;
1.1.1.2   misho    4238:           PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
1.1       misho    4239:           if (rc < 0)
                   4240:             fprintf(outfile, "get substring list failed %d\n", rc);
                   4241:           else
                   4242:             {
                   4243:             for (i = 0; i < count; i++)
1.1.1.2   misho    4244:               {
                   4245:               fprintf(outfile, "%2dL ", i);
                   4246:               PCHARSV(stringlist[i], 0, -1, outfile);
                   4247:               putc('\n', outfile);
                   4248:               }
1.1       misho    4249:             if (stringlist[i] != NULL)
                   4250:               fprintf(outfile, "string list not terminated by NULL\n");
1.1.1.2   misho    4251:             PCRE_FREE_SUBSTRING_LIST(stringlist);
1.1       misho    4252:             }
                   4253:           }
                   4254:         }
                   4255: 
                   4256:       /* There was a partial match */
                   4257: 
                   4258:       else if (count == PCRE_ERROR_PARTIAL)
                   4259:         {
                   4260:         if (markptr == NULL) fprintf(outfile, "Partial match");
1.1.1.2   misho    4261:         else
                   4262:           {
                   4263:           fprintf(outfile, "Partial match, mark=");
                   4264:           PCHARSV(markptr, 0, -1, outfile);
                   4265:           }
1.1       misho    4266:         if (use_size_offsets > 1)
                   4267:           {
                   4268:           fprintf(outfile, ": ");
1.1.1.2   misho    4269:           PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
1.1       misho    4270:             outfile);
                   4271:           }
1.1.1.3 ! misho    4272:         if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
1.1       misho    4273:         fprintf(outfile, "\n");
                   4274:         break;  /* Out of the /g loop */
                   4275:         }
                   4276: 
                   4277:       /* Failed to match. If this is a /g or /G loop and we previously set
                   4278:       g_notempty after a null match, this is not necessarily the end. We want
                   4279:       to advance the start offset, and continue. We won't be at the end of the
                   4280:       string - that was checked before setting g_notempty.
                   4281: 
                   4282:       Complication arises in the case when the newline convention is "any",
                   4283:       "crlf", or "anycrlf". If the previous match was at the end of a line
                   4284:       terminated by CRLF, an advance of one character just passes the \r,
                   4285:       whereas we should prefer the longer newline sequence, as does the code in
                   4286:       pcre_exec(). Fudge the offset value to achieve this. We check for a
1.1.1.2   misho    4287:       newline setting in the pattern; if none was set, use PCRE_CONFIG() to
1.1       misho    4288:       find the default.
                   4289: 
                   4290:       Otherwise, in the case of UTF-8 matching, the advance must be one
                   4291:       character, not one byte. */
                   4292: 
                   4293:       else
                   4294:         {
                   4295:         if (g_notempty != 0)
                   4296:           {
                   4297:           int onechar = 1;
1.1.1.2   misho    4298:           unsigned int obits = ((REAL_PCRE *)re)->options;
1.1       misho    4299:           use_offsets[0] = start_offset;
                   4300:           if ((obits & PCRE_NEWLINE_BITS) == 0)
                   4301:             {
                   4302:             int d;
1.1.1.2   misho    4303:             (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
1.1       misho    4304:             /* Note that these values are always the ASCII ones, even in
                   4305:             EBCDIC environments. CR = 13, NL = 10. */
                   4306:             obits = (d == 13)? PCRE_NEWLINE_CR :
                   4307:                     (d == 10)? PCRE_NEWLINE_LF :
                   4308:                     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
                   4309:                     (d == -2)? PCRE_NEWLINE_ANYCRLF :
                   4310:                     (d == -1)? PCRE_NEWLINE_ANY : 0;
                   4311:             }
                   4312:           if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
                   4313:                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
                   4314:                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
                   4315:               &&
                   4316:               start_offset < len - 1 &&
1.1.1.2   misho    4317: #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
                   4318:               (use_pcre16?
                   4319:                    ((PCRE_SPTR16)bptr)[start_offset] == '\r'
                   4320:                 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
                   4321:               :
                   4322:                    bptr[start_offset] == '\r'
                   4323:                 && bptr[start_offset + 1] == '\n')
                   4324: #elif defined SUPPORT_PCRE16
                   4325:                  ((PCRE_SPTR16)bptr)[start_offset] == '\r'
                   4326:               && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
                   4327: #else
                   4328:                  bptr[start_offset] == '\r'
                   4329:               && bptr[start_offset + 1] == '\n'
                   4330: #endif
                   4331:               )
1.1       misho    4332:             onechar++;
1.1.1.2   misho    4333:           else if (use_utf)
1.1       misho    4334:             {
                   4335:             while (start_offset + onechar < len)
                   4336:               {
                   4337:               if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
                   4338:               onechar++;
                   4339:               }
                   4340:             }
                   4341:           use_offsets[1] = start_offset + onechar;
                   4342:           }
                   4343:         else
                   4344:           {
                   4345:           switch(count)
                   4346:             {
                   4347:             case PCRE_ERROR_NOMATCH:
                   4348:             if (gmatched == 0)
                   4349:               {
1.1.1.2   misho    4350:               if (markptr == NULL)
                   4351:                 {
1.1.1.3 ! misho    4352:                 fprintf(outfile, "No match");
1.1.1.2   misho    4353:                 }
                   4354:               else
                   4355:                 {
                   4356:                 fprintf(outfile, "No match, mark = ");
                   4357:                 PCHARSV(markptr, 0, -1, outfile);
                   4358:                 }
1.1.1.3 ! misho    4359:               if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
        !          4360:               putc('\n', outfile);
1.1       misho    4361:               }
                   4362:             break;
                   4363: 
                   4364:             case PCRE_ERROR_BADUTF8:
                   4365:             case PCRE_ERROR_SHORTUTF8:
1.1.1.2   misho    4366:             fprintf(outfile, "Error %d (%s UTF-%s string)", count,
                   4367:               (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
                   4368:               use_pcre16? "16" : "8");
1.1       misho    4369:             if (use_size_offsets >= 2)
                   4370:               fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
                   4371:                 use_offsets[1]);
                   4372:             fprintf(outfile, "\n");
                   4373:             break;
                   4374: 
1.1.1.2   misho    4375:             case PCRE_ERROR_BADUTF8_OFFSET:
                   4376:             fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
                   4377:               use_pcre16? "16" : "8");
                   4378:             break;
                   4379: 
1.1       misho    4380:             default:
1.1.1.2   misho    4381:             if (count < 0 &&
                   4382:                 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
1.1       misho    4383:               fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
                   4384:             else
                   4385:               fprintf(outfile, "Error %d (Unexpected value)\n", count);
                   4386:             break;
                   4387:             }
                   4388: 
                   4389:           break;  /* Out of the /g loop */
                   4390:           }
                   4391:         }
                   4392: 
                   4393:       /* If not /g or /G we are done */
                   4394: 
                   4395:       if (!do_g && !do_G) break;
                   4396: 
                   4397:       /* If we have matched an empty string, first check to see if we are at
                   4398:       the end of the subject. If so, the /g loop is over. Otherwise, mimic what
                   4399:       Perl's /g options does. This turns out to be rather cunning. First we set
                   4400:       PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
                   4401:       same point. If this fails (picked up above) we advance to the next
                   4402:       character. */
                   4403: 
                   4404:       g_notempty = 0;
                   4405: 
                   4406:       if (use_offsets[0] == use_offsets[1])
                   4407:         {
                   4408:         if (use_offsets[0] == len) break;
                   4409:         g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
                   4410:         }
                   4411: 
                   4412:       /* For /g, update the start offset, leaving the rest alone */
                   4413: 
                   4414:       if (do_g) start_offset = use_offsets[1];
                   4415: 
                   4416:       /* For /G, update the pointer and length */
                   4417: 
                   4418:       else
                   4419:         {
1.1.1.2   misho    4420:         bptr += use_offsets[1] * CHAR_SIZE;
1.1       misho    4421:         len -= use_offsets[1];
                   4422:         }
                   4423:       }  /* End of loop for /g and /G */
                   4424: 
                   4425:     NEXT_DATA: continue;
                   4426:     }    /* End of loop for data lines */
                   4427: 
                   4428:   CONTINUE:
                   4429: 
                   4430: #if !defined NOPOSIX
                   4431:   if (posix || do_posix) regfree(&preg);
                   4432: #endif
                   4433: 
                   4434:   if (re != NULL) new_free(re);
1.1.1.2   misho    4435:   if (extra != NULL)
                   4436:     {
                   4437:     PCRE_FREE_STUDY(extra);
                   4438:     }
1.1       misho    4439:   if (locale_set)
                   4440:     {
                   4441:     new_free((void *)tables);
                   4442:     setlocale(LC_CTYPE, "C");
                   4443:     locale_set = 0;
                   4444:     }
                   4445:   if (jit_stack != NULL)
                   4446:     {
1.1.1.2   misho    4447:     PCRE_JIT_STACK_FREE(jit_stack);
1.1       misho    4448:     jit_stack = NULL;
                   4449:     }
                   4450:   }
                   4451: 
                   4452: if (infile == stdin) fprintf(outfile, "\n");
                   4453: 
                   4454: EXIT:
                   4455: 
                   4456: if (infile != NULL && infile != stdin) fclose(infile);
                   4457: if (outfile != NULL && outfile != stdout) fclose(outfile);
                   4458: 
                   4459: free(buffer);
                   4460: free(dbuffer);
                   4461: free(pbuffer);
                   4462: free(offsets);
                   4463: 
1.1.1.2   misho    4464: #ifdef SUPPORT_PCRE16
                   4465: if (buffer16 != NULL) free(buffer16);
                   4466: #endif
                   4467: 
1.1       misho    4468: return yield;
                   4469: }
                   4470: 
                   4471: /* End of pcretest.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>