Annotation of embedaddon/pcre/pcretest.c, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *             PCRE testing program               *
                      3: *************************************************/
                      4: 
                      5: /* This program was hacked up as a tester for PCRE. I really should have
                      6: written it more tidily in the first place. Will I ever learn? It has grown and
                      7: been extended and consequently is now rather, er, *very* untidy in places.
                      8: 
                      9: -----------------------------------------------------------------------------
                     10: Redistribution and use in source and binary forms, with or without
                     11: modification, are permitted provided that the following conditions are met:
                     12: 
                     13:     * Redistributions of source code must retain the above copyright notice,
                     14:       this list of conditions and the following disclaimer.
                     15: 
                     16:     * Redistributions in binary form must reproduce the above copyright
                     17:       notice, this list of conditions and the following disclaimer in the
                     18:       documentation and/or other materials provided with the distribution.
                     19: 
                     20:     * Neither the name of the University of Cambridge nor the names of its
                     21:       contributors may be used to endorse or promote products derived from
                     22:       this software without specific prior written permission.
                     23: 
                     24: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     25: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     26: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     27: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     28: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     29: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     30: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     31: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     32: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     33: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     34: POSSIBILITY OF SUCH DAMAGE.
                     35: -----------------------------------------------------------------------------
                     36: */
                     37: 
                     38: 
                     39: #ifdef HAVE_CONFIG_H
                     40: #include "config.h"
                     41: #endif
                     42: 
                     43: #include <ctype.h>
                     44: #include <stdio.h>
                     45: #include <string.h>
                     46: #include <stdlib.h>
                     47: #include <time.h>
                     48: #include <locale.h>
                     49: #include <errno.h>
                     50: 
                     51: #ifdef SUPPORT_LIBREADLINE
                     52: #ifdef HAVE_UNISTD_H
                     53: #include <unistd.h>
                     54: #endif
                     55: #include <readline/readline.h>
                     56: #include <readline/history.h>
                     57: #endif
                     58: 
                     59: 
                     60: /* A number of things vary for Windows builds. Originally, pcretest opened its
                     61: input and output without "b"; then I was told that "b" was needed in some
                     62: environments, so it was added for release 5.0 to both the input and output. (It
                     63: makes no difference on Unix-like systems.) Later I was told that it is wrong
                     64: for the input on Windows. I've now abstracted the modes into two macros that
                     65: are set here, to make it easier to fiddle with them, and removed "b" from the
                     66: input mode under Windows. */
                     67: 
                     68: #if defined(_WIN32) || defined(WIN32)
                     69: #include <io.h>                /* For _setmode() */
                     70: #include <fcntl.h>             /* For _O_BINARY */
                     71: #define INPUT_MODE   "r"
                     72: #define OUTPUT_MODE  "wb"
                     73: 
                     74: #ifndef isatty
                     75: #define isatty _isatty         /* This is what Windows calls them, I'm told, */
                     76: #endif                         /* though in some environments they seem to   */
                     77:                                /* be already defined, hence the #ifndefs.    */
                     78: #ifndef fileno
                     79: #define fileno _fileno
                     80: #endif
                     81: 
                     82: /* A user sent this fix for Borland Builder 5 under Windows. */
                     83: 
                     84: #ifdef __BORLANDC__
                     85: #define _setmode(handle, mode) setmode(handle, mode)
                     86: #endif
                     87: 
                     88: /* Not Windows */
                     89: 
                     90: #else
                     91: #include <sys/time.h>          /* These two includes are needed */
                     92: #include <sys/resource.h>      /* for setrlimit(). */
                     93: #define INPUT_MODE   "rb"
                     94: #define OUTPUT_MODE  "wb"
                     95: #endif
                     96: 
                     97: 
                     98: /* We have to include pcre_internal.h because we need the internal info for
                     99: displaying the results of pcre_study() and we also need to know about the
                    100: internal macros, structures, and other internal data values; pcretest has
                    101: "inside information" compared to a program that strictly follows the PCRE API.
                    102: 
                    103: Although pcre_internal.h does itself include pcre.h, we explicitly include it
                    104: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
                    105: appropriately for an application, not for building PCRE. */
                    106: 
                    107: #include "pcre.h"
                    108: #include "pcre_internal.h"
                    109: 
                    110: /* We need access to some of the data tables that PCRE uses. So as not to have
                    111: to keep two copies, we include the source file here, changing the names of the
                    112: external symbols to prevent clashes. */
                    113: 
                    114: #define _pcre_ucp_gentype      ucp_gentype
                    115: #define _pcre_ucp_typerange    ucp_typerange
                    116: #define _pcre_utf8_table1      utf8_table1
                    117: #define _pcre_utf8_table1_size utf8_table1_size
                    118: #define _pcre_utf8_table2      utf8_table2
                    119: #define _pcre_utf8_table3      utf8_table3
                    120: #define _pcre_utf8_table4      utf8_table4
                    121: #define _pcre_utf8_char_sizes  utf8_char_sizes
                    122: #define _pcre_utt              utt
                    123: #define _pcre_utt_size         utt_size
                    124: #define _pcre_utt_names        utt_names
                    125: #define _pcre_OP_lengths       OP_lengths
                    126: 
                    127: #include "pcre_tables.c"
                    128: 
                    129: /* We also need the pcre_printint() function for printing out compiled
                    130: patterns. This function is in a separate file so that it can be included in
                    131: pcre_compile.c when that module is compiled with debugging enabled. It needs to
                    132: know which case is being compiled. */
                    133: 
                    134: #define COMPILING_PCRETEST
                    135: #include "pcre_printint.src"
                    136: 
                    137: /* The definition of the macro PRINTABLE, which determines whether to print an
                    138: output character as-is or as a hex value when showing compiled patterns, is
                    139: contained in the printint.src file. We uses it here also, in cases when the
                    140: locale has not been explicitly changed, so as to get consistent output from
                    141: systems that differ in their output from isprint() even in the "C" locale. */
                    142: 
                    143: #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
                    144: 
                    145: /* It is possible to compile this test program without including support for
                    146: testing the POSIX interface, though this is not available via the standard
                    147: Makefile. */
                    148: 
                    149: #if !defined NOPOSIX
                    150: #include "pcreposix.h"
                    151: #endif
                    152: 
                    153: /* It is also possible, for the benefit of the version currently imported into
                    154: Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
                    155: interface to the DFA matcher (NODFA), and without the doublecheck of the old
                    156: "info" function (define NOINFOCHECK). In fact, we automatically cut out the
                    157: UTF8 support if PCRE is built without it. */
                    158: 
                    159: #ifndef SUPPORT_UTF8
                    160: #ifndef NOUTF8
                    161: #define NOUTF8
                    162: #endif
                    163: #endif
                    164: 
                    165: 
                    166: /* Other parameters */
                    167: 
                    168: #ifndef CLOCKS_PER_SEC
                    169: #ifdef CLK_TCK
                    170: #define CLOCKS_PER_SEC CLK_TCK
                    171: #else
                    172: #define CLOCKS_PER_SEC 100
                    173: #endif
                    174: #endif
                    175: 
                    176: /* This is the default loop count for timing. */
                    177: 
                    178: #define LOOPREPEAT 500000
                    179: 
                    180: /* Static variables */
                    181: 
                    182: static FILE *outfile;
                    183: static int log_store = 0;
                    184: static int callout_count;
                    185: static int callout_extra;
                    186: static int callout_fail_count;
                    187: static int callout_fail_id;
                    188: static int debug_lengths;
                    189: static int first_callout;
                    190: static int locale_set = 0;
                    191: static int show_malloc;
                    192: static int use_utf8;
                    193: static size_t gotten_store;
                    194: static size_t first_gotten_store = 0;
                    195: static const unsigned char *last_callout_mark = NULL;
                    196: 
                    197: /* The buffers grow automatically if very long input lines are encountered. */
                    198: 
                    199: static int buffer_size = 50000;
                    200: static uschar *buffer = NULL;
                    201: static uschar *dbuffer = NULL;
                    202: static uschar *pbuffer = NULL;
                    203: 
                    204: /* Textual explanations for runtime error codes */
                    205: 
                    206: static const char *errtexts[] = {
                    207:   NULL,  /* 0 is no error */
                    208:   NULL,  /* NOMATCH is handled specially */
                    209:   "NULL argument passed",
                    210:   "bad option value",
                    211:   "magic number missing",
                    212:   "unknown opcode - pattern overwritten?",
                    213:   "no more memory",
                    214:   NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
                    215:   "match limit exceeded",
                    216:   "callout error code",
                    217:   NULL,  /* BADUTF8 is handled specially */
                    218:   "bad UTF-8 offset",
                    219:   NULL,  /* PARTIAL is handled specially */
                    220:   "not used - internal error",
                    221:   "internal error - pattern overwritten?",
                    222:   "bad count value",
                    223:   "item unsupported for DFA matching",
                    224:   "backreference condition or recursion test not supported for DFA matching",
                    225:   "match limit not supported for DFA matching",
                    226:   "workspace size exceeded in DFA matching",
                    227:   "too much recursion for DFA matching",
                    228:   "recursion limit exceeded",
                    229:   "not used - internal error",
                    230:   "invalid combination of newline options",
                    231:   "bad offset value",
                    232:   NULL,  /* SHORTUTF8 is handled specially */
                    233:   "nested recursion at the same subject position",
                    234:   "JIT stack limit reached"
                    235: };
                    236: 
                    237: 
                    238: /*************************************************
                    239: *         Alternate character tables             *
                    240: *************************************************/
                    241: 
                    242: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
                    243: using the default tables of the library. However, the T option can be used to
                    244: select alternate sets of tables, for different kinds of testing. Note also that
                    245: the L (locale) option also adjusts the tables. */
                    246: 
                    247: /* This is the set of tables distributed as default with PCRE. It recognizes
                    248: only ASCII characters. */
                    249: 
                    250: static const unsigned char tables0[] = {
                    251: 
                    252: /* This table is a lower casing table. */
                    253: 
                    254:     0,  1,  2,  3,  4,  5,  6,  7,
                    255:     8,  9, 10, 11, 12, 13, 14, 15,
                    256:    16, 17, 18, 19, 20, 21, 22, 23,
                    257:    24, 25, 26, 27, 28, 29, 30, 31,
                    258:    32, 33, 34, 35, 36, 37, 38, 39,
                    259:    40, 41, 42, 43, 44, 45, 46, 47,
                    260:    48, 49, 50, 51, 52, 53, 54, 55,
                    261:    56, 57, 58, 59, 60, 61, 62, 63,
                    262:    64, 97, 98, 99,100,101,102,103,
                    263:   104,105,106,107,108,109,110,111,
                    264:   112,113,114,115,116,117,118,119,
                    265:   120,121,122, 91, 92, 93, 94, 95,
                    266:    96, 97, 98, 99,100,101,102,103,
                    267:   104,105,106,107,108,109,110,111,
                    268:   112,113,114,115,116,117,118,119,
                    269:   120,121,122,123,124,125,126,127,
                    270:   128,129,130,131,132,133,134,135,
                    271:   136,137,138,139,140,141,142,143,
                    272:   144,145,146,147,148,149,150,151,
                    273:   152,153,154,155,156,157,158,159,
                    274:   160,161,162,163,164,165,166,167,
                    275:   168,169,170,171,172,173,174,175,
                    276:   176,177,178,179,180,181,182,183,
                    277:   184,185,186,187,188,189,190,191,
                    278:   192,193,194,195,196,197,198,199,
                    279:   200,201,202,203,204,205,206,207,
                    280:   208,209,210,211,212,213,214,215,
                    281:   216,217,218,219,220,221,222,223,
                    282:   224,225,226,227,228,229,230,231,
                    283:   232,233,234,235,236,237,238,239,
                    284:   240,241,242,243,244,245,246,247,
                    285:   248,249,250,251,252,253,254,255,
                    286: 
                    287: /* This table is a case flipping table. */
                    288: 
                    289:     0,  1,  2,  3,  4,  5,  6,  7,
                    290:     8,  9, 10, 11, 12, 13, 14, 15,
                    291:    16, 17, 18, 19, 20, 21, 22, 23,
                    292:    24, 25, 26, 27, 28, 29, 30, 31,
                    293:    32, 33, 34, 35, 36, 37, 38, 39,
                    294:    40, 41, 42, 43, 44, 45, 46, 47,
                    295:    48, 49, 50, 51, 52, 53, 54, 55,
                    296:    56, 57, 58, 59, 60, 61, 62, 63,
                    297:    64, 97, 98, 99,100,101,102,103,
                    298:   104,105,106,107,108,109,110,111,
                    299:   112,113,114,115,116,117,118,119,
                    300:   120,121,122, 91, 92, 93, 94, 95,
                    301:    96, 65, 66, 67, 68, 69, 70, 71,
                    302:    72, 73, 74, 75, 76, 77, 78, 79,
                    303:    80, 81, 82, 83, 84, 85, 86, 87,
                    304:    88, 89, 90,123,124,125,126,127,
                    305:   128,129,130,131,132,133,134,135,
                    306:   136,137,138,139,140,141,142,143,
                    307:   144,145,146,147,148,149,150,151,
                    308:   152,153,154,155,156,157,158,159,
                    309:   160,161,162,163,164,165,166,167,
                    310:   168,169,170,171,172,173,174,175,
                    311:   176,177,178,179,180,181,182,183,
                    312:   184,185,186,187,188,189,190,191,
                    313:   192,193,194,195,196,197,198,199,
                    314:   200,201,202,203,204,205,206,207,
                    315:   208,209,210,211,212,213,214,215,
                    316:   216,217,218,219,220,221,222,223,
                    317:   224,225,226,227,228,229,230,231,
                    318:   232,233,234,235,236,237,238,239,
                    319:   240,241,242,243,244,245,246,247,
                    320:   248,249,250,251,252,253,254,255,
                    321: 
                    322: /* This table contains bit maps for various character classes. Each map is 32
                    323: bytes long and the bits run from the least significant end of each byte. The
                    324: classes that have their own maps are: space, xdigit, digit, upper, lower, word,
                    325: graph, print, punct, and cntrl. Other classes are built from combinations. */
                    326: 
                    327:   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
                    328:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    329:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    330:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    331: 
                    332:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
                    333:   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
                    334:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    335:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    336: 
                    337:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
                    338:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    339:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    340:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    341: 
                    342:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    343:   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
                    344:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    345:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    346: 
                    347:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    348:   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
                    349:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    350:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    351: 
                    352:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
                    353:   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
                    354:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    355:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    356: 
                    357:   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
                    358:   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
                    359:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    360:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    361: 
                    362:   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
                    363:   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
                    364:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    365:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    366: 
                    367:   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
                    368:   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
                    369:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    370:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    371: 
                    372:   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
                    373:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
                    374:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    375:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                    376: 
                    377: /* This table identifies various classes of character by individual bits:
                    378:   0x01   white space character
                    379:   0x02   letter
                    380:   0x04   decimal digit
                    381:   0x08   hexadecimal digit
                    382:   0x10   alphanumeric or '_'
                    383:   0x80   regular expression metacharacter or binary zero
                    384: */
                    385: 
                    386:   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
                    387:   0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
                    388:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
                    389:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
                    390:   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
                    391:   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
                    392:   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
                    393:   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
                    394:   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
                    395:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
                    396:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
                    397:   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
                    398:   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
                    399:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
                    400:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
                    401:   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
                    402:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
                    403:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
                    404:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
                    405:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
                    406:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
                    407:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
                    408:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
                    409:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
                    410:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
                    411:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
                    412:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
                    413:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
                    414:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
                    415:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
                    416:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
                    417:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
                    418: 
                    419: /* This is a set of tables that came orginally from a Windows user. It seems to
                    420: be at least an approximation of ISO 8859. In particular, there are characters
                    421: greater than 128 that are marked as spaces, letters, etc. */
                    422: 
                    423: static const unsigned char tables1[] = {
                    424: 0,1,2,3,4,5,6,7,
                    425: 8,9,10,11,12,13,14,15,
                    426: 16,17,18,19,20,21,22,23,
                    427: 24,25,26,27,28,29,30,31,
                    428: 32,33,34,35,36,37,38,39,
                    429: 40,41,42,43,44,45,46,47,
                    430: 48,49,50,51,52,53,54,55,
                    431: 56,57,58,59,60,61,62,63,
                    432: 64,97,98,99,100,101,102,103,
                    433: 104,105,106,107,108,109,110,111,
                    434: 112,113,114,115,116,117,118,119,
                    435: 120,121,122,91,92,93,94,95,
                    436: 96,97,98,99,100,101,102,103,
                    437: 104,105,106,107,108,109,110,111,
                    438: 112,113,114,115,116,117,118,119,
                    439: 120,121,122,123,124,125,126,127,
                    440: 128,129,130,131,132,133,134,135,
                    441: 136,137,138,139,140,141,142,143,
                    442: 144,145,146,147,148,149,150,151,
                    443: 152,153,154,155,156,157,158,159,
                    444: 160,161,162,163,164,165,166,167,
                    445: 168,169,170,171,172,173,174,175,
                    446: 176,177,178,179,180,181,182,183,
                    447: 184,185,186,187,188,189,190,191,
                    448: 224,225,226,227,228,229,230,231,
                    449: 232,233,234,235,236,237,238,239,
                    450: 240,241,242,243,244,245,246,215,
                    451: 248,249,250,251,252,253,254,223,
                    452: 224,225,226,227,228,229,230,231,
                    453: 232,233,234,235,236,237,238,239,
                    454: 240,241,242,243,244,245,246,247,
                    455: 248,249,250,251,252,253,254,255,
                    456: 0,1,2,3,4,5,6,7,
                    457: 8,9,10,11,12,13,14,15,
                    458: 16,17,18,19,20,21,22,23,
                    459: 24,25,26,27,28,29,30,31,
                    460: 32,33,34,35,36,37,38,39,
                    461: 40,41,42,43,44,45,46,47,
                    462: 48,49,50,51,52,53,54,55,
                    463: 56,57,58,59,60,61,62,63,
                    464: 64,97,98,99,100,101,102,103,
                    465: 104,105,106,107,108,109,110,111,
                    466: 112,113,114,115,116,117,118,119,
                    467: 120,121,122,91,92,93,94,95,
                    468: 96,65,66,67,68,69,70,71,
                    469: 72,73,74,75,76,77,78,79,
                    470: 80,81,82,83,84,85,86,87,
                    471: 88,89,90,123,124,125,126,127,
                    472: 128,129,130,131,132,133,134,135,
                    473: 136,137,138,139,140,141,142,143,
                    474: 144,145,146,147,148,149,150,151,
                    475: 152,153,154,155,156,157,158,159,
                    476: 160,161,162,163,164,165,166,167,
                    477: 168,169,170,171,172,173,174,175,
                    478: 176,177,178,179,180,181,182,183,
                    479: 184,185,186,187,188,189,190,191,
                    480: 224,225,226,227,228,229,230,231,
                    481: 232,233,234,235,236,237,238,239,
                    482: 240,241,242,243,244,245,246,215,
                    483: 248,249,250,251,252,253,254,223,
                    484: 192,193,194,195,196,197,198,199,
                    485: 200,201,202,203,204,205,206,207,
                    486: 208,209,210,211,212,213,214,247,
                    487: 216,217,218,219,220,221,222,255,
                    488: 0,62,0,0,1,0,0,0,
                    489: 0,0,0,0,0,0,0,0,
                    490: 32,0,0,0,1,0,0,0,
                    491: 0,0,0,0,0,0,0,0,
                    492: 0,0,0,0,0,0,255,3,
                    493: 126,0,0,0,126,0,0,0,
                    494: 0,0,0,0,0,0,0,0,
                    495: 0,0,0,0,0,0,0,0,
                    496: 0,0,0,0,0,0,255,3,
                    497: 0,0,0,0,0,0,0,0,
                    498: 0,0,0,0,0,0,12,2,
                    499: 0,0,0,0,0,0,0,0,
                    500: 0,0,0,0,0,0,0,0,
                    501: 254,255,255,7,0,0,0,0,
                    502: 0,0,0,0,0,0,0,0,
                    503: 255,255,127,127,0,0,0,0,
                    504: 0,0,0,0,0,0,0,0,
                    505: 0,0,0,0,254,255,255,7,
                    506: 0,0,0,0,0,4,32,4,
                    507: 0,0,0,128,255,255,127,255,
                    508: 0,0,0,0,0,0,255,3,
                    509: 254,255,255,135,254,255,255,7,
                    510: 0,0,0,0,0,4,44,6,
                    511: 255,255,127,255,255,255,127,255,
                    512: 0,0,0,0,254,255,255,255,
                    513: 255,255,255,255,255,255,255,127,
                    514: 0,0,0,0,254,255,255,255,
                    515: 255,255,255,255,255,255,255,255,
                    516: 0,2,0,0,255,255,255,255,
                    517: 255,255,255,255,255,255,255,127,
                    518: 0,0,0,0,255,255,255,255,
                    519: 255,255,255,255,255,255,255,255,
                    520: 0,0,0,0,254,255,0,252,
                    521: 1,0,0,248,1,0,0,120,
                    522: 0,0,0,0,254,255,255,255,
                    523: 0,0,128,0,0,0,128,0,
                    524: 255,255,255,255,0,0,0,0,
                    525: 0,0,0,0,0,0,0,128,
                    526: 255,255,255,255,0,0,0,0,
                    527: 0,0,0,0,0,0,0,0,
                    528: 128,0,0,0,0,0,0,0,
                    529: 0,1,1,0,1,1,0,0,
                    530: 0,0,0,0,0,0,0,0,
                    531: 0,0,0,0,0,0,0,0,
                    532: 1,0,0,0,128,0,0,0,
                    533: 128,128,128,128,0,0,128,0,
                    534: 28,28,28,28,28,28,28,28,
                    535: 28,28,0,0,0,0,0,128,
                    536: 0,26,26,26,26,26,26,18,
                    537: 18,18,18,18,18,18,18,18,
                    538: 18,18,18,18,18,18,18,18,
                    539: 18,18,18,128,128,0,128,16,
                    540: 0,26,26,26,26,26,26,18,
                    541: 18,18,18,18,18,18,18,18,
                    542: 18,18,18,18,18,18,18,18,
                    543: 18,18,18,128,128,0,0,0,
                    544: 0,0,0,0,0,1,0,0,
                    545: 0,0,0,0,0,0,0,0,
                    546: 0,0,0,0,0,0,0,0,
                    547: 0,0,0,0,0,0,0,0,
                    548: 1,0,0,0,0,0,0,0,
                    549: 0,0,18,0,0,0,0,0,
                    550: 0,0,20,20,0,18,0,0,
                    551: 0,20,18,0,0,0,0,0,
                    552: 18,18,18,18,18,18,18,18,
                    553: 18,18,18,18,18,18,18,18,
                    554: 18,18,18,18,18,18,18,0,
                    555: 18,18,18,18,18,18,18,18,
                    556: 18,18,18,18,18,18,18,18,
                    557: 18,18,18,18,18,18,18,18,
                    558: 18,18,18,18,18,18,18,0,
                    559: 18,18,18,18,18,18,18,18
                    560: };
                    561: 
                    562: 
                    563: 
                    564: 
                    565: #ifndef HAVE_STRERROR
                    566: /*************************************************
                    567: *     Provide strerror() for non-ANSI libraries  *
                    568: *************************************************/
                    569: 
                    570: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
                    571: in their libraries, but can provide the same facility by this simple
                    572: alternative function. */
                    573: 
                    574: extern int   sys_nerr;
                    575: extern char *sys_errlist[];
                    576: 
                    577: char *
                    578: strerror(int n)
                    579: {
                    580: if (n < 0 || n >= sys_nerr) return "unknown error number";
                    581: return sys_errlist[n];
                    582: }
                    583: #endif /* HAVE_STRERROR */
                    584: 
                    585: 
                    586: /*************************************************
                    587: *         JIT memory callback                    *
                    588: *************************************************/
                    589: 
                    590: static pcre_jit_stack* jit_callback(void *arg)
                    591: {
                    592: return (pcre_jit_stack *)arg;
                    593: }
                    594: 
                    595: 
                    596: /*************************************************
                    597: *        Read or extend an input line            *
                    598: *************************************************/
                    599: 
                    600: /* Input lines are read into buffer, but both patterns and data lines can be
                    601: continued over multiple input lines. In addition, if the buffer fills up, we
                    602: want to automatically expand it so as to be able to handle extremely large
                    603: lines that are needed for certain stress tests. When the input buffer is
                    604: expanded, the other two buffers must also be expanded likewise, and the
                    605: contents of pbuffer, which are a copy of the input for callouts, must be
                    606: preserved (for when expansion happens for a data line). This is not the most
                    607: optimal way of handling this, but hey, this is just a test program!
                    608: 
                    609: Arguments:
                    610:   f            the file to read
                    611:   start        where in buffer to start (this *must* be within buffer)
                    612:   prompt       for stdin or readline()
                    613: 
                    614: Returns:       pointer to the start of new data
                    615:                could be a copy of start, or could be moved
                    616:                NULL if no data read and EOF reached
                    617: */
                    618: 
                    619: static uschar *
                    620: extend_inputline(FILE *f, uschar *start, const char *prompt)
                    621: {
                    622: uschar *here = start;
                    623: 
                    624: for (;;)
                    625:   {
                    626:   int rlen = (int)(buffer_size - (here - buffer));
                    627: 
                    628:   if (rlen > 1000)
                    629:     {
                    630:     int dlen;
                    631: 
                    632:     /* If libreadline support is required, use readline() to read a line if the
                    633:     input is a terminal. Note that readline() removes the trailing newline, so
                    634:     we must put it back again, to be compatible with fgets(). */
                    635: 
                    636: #ifdef SUPPORT_LIBREADLINE
                    637:     if (isatty(fileno(f)))
                    638:       {
                    639:       size_t len;
                    640:       char *s = readline(prompt);
                    641:       if (s == NULL) return (here == start)? NULL : start;
                    642:       len = strlen(s);
                    643:       if (len > 0) add_history(s);
                    644:       if (len > rlen - 1) len = rlen - 1;
                    645:       memcpy(here, s, len);
                    646:       here[len] = '\n';
                    647:       here[len+1] = 0;
                    648:       free(s);
                    649:       }
                    650:     else
                    651: #endif
                    652: 
                    653:     /* Read the next line by normal means, prompting if the file is stdin. */
                    654: 
                    655:       {
                    656:       if (f == stdin) printf("%s", prompt);
                    657:       if (fgets((char *)here, rlen,  f) == NULL)
                    658:         return (here == start)? NULL : start;
                    659:       }
                    660: 
                    661:     dlen = (int)strlen((char *)here);
                    662:     if (dlen > 0 && here[dlen - 1] == '\n') return start;
                    663:     here += dlen;
                    664:     }
                    665: 
                    666:   else
                    667:     {
                    668:     int new_buffer_size = 2*buffer_size;
                    669:     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
                    670:     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
                    671:     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
                    672: 
                    673:     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
                    674:       {
                    675:       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
                    676:       exit(1);
                    677:       }
                    678: 
                    679:     memcpy(new_buffer, buffer, buffer_size);
                    680:     memcpy(new_pbuffer, pbuffer, buffer_size);
                    681: 
                    682:     buffer_size = new_buffer_size;
                    683: 
                    684:     start = new_buffer + (start - buffer);
                    685:     here = new_buffer + (here - buffer);
                    686: 
                    687:     free(buffer);
                    688:     free(dbuffer);
                    689:     free(pbuffer);
                    690: 
                    691:     buffer = new_buffer;
                    692:     dbuffer = new_dbuffer;
                    693:     pbuffer = new_pbuffer;
                    694:     }
                    695:   }
                    696: 
                    697: return NULL;  /* Control never gets here */
                    698: }
                    699: 
                    700: 
                    701: 
                    702: 
                    703: 
                    704: 
                    705: 
                    706: /*************************************************
                    707: *          Read number from string               *
                    708: *************************************************/
                    709: 
                    710: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
                    711: around with conditional compilation, just do the job by hand. It is only used
                    712: for unpicking arguments, so just keep it simple.
                    713: 
                    714: Arguments:
                    715:   str           string to be converted
                    716:   endptr        where to put the end pointer
                    717: 
                    718: Returns:        the unsigned long
                    719: */
                    720: 
                    721: static int
                    722: get_value(unsigned char *str, unsigned char **endptr)
                    723: {
                    724: int result = 0;
                    725: while(*str != 0 && isspace(*str)) str++;
                    726: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
                    727: *endptr = str;
                    728: return(result);
                    729: }
                    730: 
                    731: 
                    732: 
                    733: 
                    734: /*************************************************
                    735: *            Convert UTF-8 string to value       *
                    736: *************************************************/
                    737: 
                    738: /* This function takes one or more bytes that represents a UTF-8 character,
                    739: and returns the value of the character.
                    740: 
                    741: Argument:
                    742:   utf8bytes   a pointer to the byte vector
                    743:   vptr        a pointer to an int to receive the value
                    744: 
                    745: Returns:      >  0 => the number of bytes consumed
                    746:               -6 to 0 => malformed UTF-8 character at offset = (-return)
                    747: */
                    748: 
                    749: #if !defined NOUTF8
                    750: 
                    751: static int
                    752: utf82ord(unsigned char *utf8bytes, int *vptr)
                    753: {
                    754: int c = *utf8bytes++;
                    755: int d = c;
                    756: int i, j, s;
                    757: 
                    758: for (i = -1; i < 6; i++)               /* i is number of additional bytes */
                    759:   {
                    760:   if ((d & 0x80) == 0) break;
                    761:   d <<= 1;
                    762:   }
                    763: 
                    764: if (i == -1) { *vptr = c; return 1; }  /* ascii character */
                    765: if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
                    766: 
                    767: /* i now has a value in the range 1-5 */
                    768: 
                    769: s = 6*i;
                    770: d = (c & utf8_table3[i]) << s;
                    771: 
                    772: for (j = 0; j < i; j++)
                    773:   {
                    774:   c = *utf8bytes++;
                    775:   if ((c & 0xc0) != 0x80) return -(j+1);
                    776:   s -= 6;
                    777:   d |= (c & 0x3f) << s;
                    778:   }
                    779: 
                    780: /* Check that encoding was the correct unique one */
                    781: 
                    782: for (j = 0; j < utf8_table1_size; j++)
                    783:   if (d <= utf8_table1[j]) break;
                    784: if (j != i) return -(i+1);
                    785: 
                    786: /* Valid value */
                    787: 
                    788: *vptr = d;
                    789: return i+1;
                    790: }
                    791: 
                    792: #endif
                    793: 
                    794: 
                    795: 
                    796: /*************************************************
                    797: *       Convert character value to UTF-8         *
                    798: *************************************************/
                    799: 
                    800: /* This function takes an integer value in the range 0 - 0x7fffffff
                    801: and encodes it as a UTF-8 character in 0 to 6 bytes.
                    802: 
                    803: Arguments:
                    804:   cvalue     the character value
                    805:   utf8bytes  pointer to buffer for result - at least 6 bytes long
                    806: 
                    807: Returns:     number of characters placed in the buffer
                    808: */
                    809: 
                    810: #if !defined NOUTF8
                    811: 
                    812: static int
                    813: ord2utf8(int cvalue, uschar *utf8bytes)
                    814: {
                    815: register int i, j;
                    816: for (i = 0; i < utf8_table1_size; i++)
                    817:   if (cvalue <= utf8_table1[i]) break;
                    818: utf8bytes += i;
                    819: for (j = i; j > 0; j--)
                    820:  {
                    821:  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
                    822:  cvalue >>= 6;
                    823:  }
                    824: *utf8bytes = utf8_table2[i] | cvalue;
                    825: return i + 1;
                    826: }
                    827: 
                    828: #endif
                    829: 
                    830: 
                    831: 
                    832: /*************************************************
                    833: *             Print character string             *
                    834: *************************************************/
                    835: 
                    836: /* Character string printing function. Must handle UTF-8 strings in utf8
                    837: mode. Yields number of characters printed. If handed a NULL file, just counts
                    838: chars without printing. */
                    839: 
                    840: static int pchars(unsigned char *p, int length, FILE *f)
                    841: {
                    842: int c = 0;
                    843: int yield = 0;
                    844: 
                    845: while (length-- > 0)
                    846:   {
                    847: #if !defined NOUTF8
                    848:   if (use_utf8)
                    849:     {
                    850:     int rc = utf82ord(p, &c);
                    851: 
                    852:     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
                    853:       {
                    854:       length -= rc - 1;
                    855:       p += rc;
                    856:       if (PRINTHEX(c))
                    857:         {
                    858:         if (f != NULL) fprintf(f, "%c", c);
                    859:         yield++;
                    860:         }
                    861:       else
                    862:         {
                    863:         int n = 4;
                    864:         if (f != NULL) fprintf(f, "\\x{%02x}", c);
                    865:         yield += (n <= 0x000000ff)? 2 :
                    866:                  (n <= 0x00000fff)? 3 :
                    867:                  (n <= 0x0000ffff)? 4 :
                    868:                  (n <= 0x000fffff)? 5 : 6;
                    869:         }
                    870:       continue;
                    871:       }
                    872:     }
                    873: #endif
                    874: 
                    875:    /* Not UTF-8, or malformed UTF-8  */
                    876: 
                    877:   c = *p++;
                    878:   if (PRINTHEX(c))
                    879:     {
                    880:     if (f != NULL) fprintf(f, "%c", c);
                    881:     yield++;
                    882:     }
                    883:   else
                    884:     {
                    885:     if (f != NULL) fprintf(f, "\\x%02x", c);
                    886:     yield += 4;
                    887:     }
                    888:   }
                    889: 
                    890: return yield;
                    891: }
                    892: 
                    893: 
                    894: 
                    895: /*************************************************
                    896: *              Callout function                  *
                    897: *************************************************/
                    898: 
                    899: /* Called from PCRE as a result of the (?C) item. We print out where we are in
                    900: the match. Yield zero unless more callouts than the fail count, or the callout
                    901: data is not zero. */
                    902: 
                    903: static int callout(pcre_callout_block *cb)
                    904: {
                    905: FILE *f = (first_callout | callout_extra)? outfile : NULL;
                    906: int i, pre_start, post_start, subject_length;
                    907: 
                    908: if (callout_extra)
                    909:   {
                    910:   fprintf(f, "Callout %d: last capture = %d\n",
                    911:     cb->callout_number, cb->capture_last);
                    912: 
                    913:   for (i = 0; i < cb->capture_top * 2; i += 2)
                    914:     {
                    915:     if (cb->offset_vector[i] < 0)
                    916:       fprintf(f, "%2d: <unset>\n", i/2);
                    917:     else
                    918:       {
                    919:       fprintf(f, "%2d: ", i/2);
                    920:       (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
                    921:         cb->offset_vector[i+1] - cb->offset_vector[i], f);
                    922:       fprintf(f, "\n");
                    923:       }
                    924:     }
                    925:   }
                    926: 
                    927: /* Re-print the subject in canonical form, the first time or if giving full
                    928: datails. On subsequent calls in the same match, we use pchars just to find the
                    929: printed lengths of the substrings. */
                    930: 
                    931: if (f != NULL) fprintf(f, "--->");
                    932: 
                    933: pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
                    934: post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
                    935:   cb->current_position - cb->start_match, f);
                    936: 
                    937: subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
                    938: 
                    939: (void)pchars((unsigned char *)(cb->subject + cb->current_position),
                    940:   cb->subject_length - cb->current_position, f);
                    941: 
                    942: if (f != NULL) fprintf(f, "\n");
                    943: 
                    944: /* Always print appropriate indicators, with callout number if not already
                    945: shown. For automatic callouts, show the pattern offset. */
                    946: 
                    947: if (cb->callout_number == 255)
                    948:   {
                    949:   fprintf(outfile, "%+3d ", cb->pattern_position);
                    950:   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
                    951:   }
                    952: else
                    953:   {
                    954:   if (callout_extra) fprintf(outfile, "    ");
                    955:     else fprintf(outfile, "%3d ", cb->callout_number);
                    956:   }
                    957: 
                    958: for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
                    959: fprintf(outfile, "^");
                    960: 
                    961: if (post_start > 0)
                    962:   {
                    963:   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
                    964:   fprintf(outfile, "^");
                    965:   }
                    966: 
                    967: for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
                    968:   fprintf(outfile, " ");
                    969: 
                    970: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
                    971:   pbuffer + cb->pattern_position);
                    972: 
                    973: fprintf(outfile, "\n");
                    974: first_callout = 0;
                    975: 
                    976: if (cb->mark != last_callout_mark)
                    977:   {
                    978:   fprintf(outfile, "Latest Mark: %s\n",
                    979:     (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
                    980:   last_callout_mark = cb->mark;
                    981:   }
                    982: 
                    983: if (cb->callout_data != NULL)
                    984:   {
                    985:   int callout_data = *((int *)(cb->callout_data));
                    986:   if (callout_data != 0)
                    987:     {
                    988:     fprintf(outfile, "Callout data = %d\n", callout_data);
                    989:     return callout_data;
                    990:     }
                    991:   }
                    992: 
                    993: return (cb->callout_number != callout_fail_id)? 0 :
                    994:        (++callout_count >= callout_fail_count)? 1 : 0;
                    995: }
                    996: 
                    997: 
                    998: /*************************************************
                    999: *            Local malloc functions              *
                   1000: *************************************************/
                   1001: 
                   1002: /* Alternative malloc function, to test functionality and save the size of a
                   1003: compiled re, which is the first store request that pcre_compile() makes. The
                   1004: show_malloc variable is set only during matching. */
                   1005: 
                   1006: static void *new_malloc(size_t size)
                   1007: {
                   1008: void *block = malloc(size);
                   1009: gotten_store = size;
                   1010: if (first_gotten_store == 0) first_gotten_store = size;
                   1011: if (show_malloc)
                   1012:   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
                   1013: return block;
                   1014: }
                   1015: 
                   1016: static void new_free(void *block)
                   1017: {
                   1018: if (show_malloc)
                   1019:   fprintf(outfile, "free             %p\n", block);
                   1020: free(block);
                   1021: }
                   1022: 
                   1023: /* For recursion malloc/free, to test stacking calls */
                   1024: 
                   1025: static void *stack_malloc(size_t size)
                   1026: {
                   1027: void *block = malloc(size);
                   1028: if (show_malloc)
                   1029:   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
                   1030: return block;
                   1031: }
                   1032: 
                   1033: static void stack_free(void *block)
                   1034: {
                   1035: if (show_malloc)
                   1036:   fprintf(outfile, "stack_free       %p\n", block);
                   1037: free(block);
                   1038: }
                   1039: 
                   1040: 
                   1041: /*************************************************
                   1042: *          Call pcre_fullinfo()                  *
                   1043: *************************************************/
                   1044: 
                   1045: /* Get one piece of information from the pcre_fullinfo() function */
                   1046: 
                   1047: static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
                   1048: {
                   1049: int rc;
                   1050: if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
                   1051:   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
                   1052: }
                   1053: 
                   1054: 
                   1055: 
                   1056: /*************************************************
                   1057: *         Byte flipping function                 *
                   1058: *************************************************/
                   1059: 
                   1060: static unsigned long int
                   1061: byteflip(unsigned long int value, int n)
                   1062: {
                   1063: if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
                   1064: return ((value & 0x000000ff) << 24) |
                   1065:        ((value & 0x0000ff00) <<  8) |
                   1066:        ((value & 0x00ff0000) >>  8) |
                   1067:        ((value & 0xff000000) >> 24);
                   1068: }
                   1069: 
                   1070: 
                   1071: 
                   1072: 
                   1073: /*************************************************
                   1074: *        Check match or recursion limit          *
                   1075: *************************************************/
                   1076: 
                   1077: static int
                   1078: check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
                   1079:   int start_offset, int options, int *use_offsets, int use_size_offsets,
                   1080:   int flag, unsigned long int *limit, int errnumber, const char *msg)
                   1081: {
                   1082: int count;
                   1083: int min = 0;
                   1084: int mid = 64;
                   1085: int max = -1;
                   1086: 
                   1087: extra->flags |= flag;
                   1088: 
                   1089: for (;;)
                   1090:   {
                   1091:   *limit = mid;
                   1092: 
                   1093:   count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
                   1094:     use_offsets, use_size_offsets);
                   1095: 
                   1096:   if (count == errnumber)
                   1097:     {
                   1098:     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
                   1099:     min = mid;
                   1100:     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
                   1101:     }
                   1102: 
                   1103:   else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
                   1104:                          count == PCRE_ERROR_PARTIAL)
                   1105:     {
                   1106:     if (mid == min + 1)
                   1107:       {
                   1108:       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
                   1109:       break;
                   1110:       }
                   1111:     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
                   1112:     max = mid;
                   1113:     mid = (min + mid)/2;
                   1114:     }
                   1115:   else break;    /* Some other error */
                   1116:   }
                   1117: 
                   1118: extra->flags &= ~flag;
                   1119: return count;
                   1120: }
                   1121: 
                   1122: 
                   1123: 
                   1124: /*************************************************
                   1125: *         Case-independent strncmp() function    *
                   1126: *************************************************/
                   1127: 
                   1128: /*
                   1129: Arguments:
                   1130:   s         first string
                   1131:   t         second string
                   1132:   n         number of characters to compare
                   1133: 
                   1134: Returns:    < 0, = 0, or > 0, according to the comparison
                   1135: */
                   1136: 
                   1137: static int
                   1138: strncmpic(uschar *s, uschar *t, int n)
                   1139: {
                   1140: while (n--)
                   1141:   {
                   1142:   int c = tolower(*s++) - tolower(*t++);
                   1143:   if (c) return c;
                   1144:   }
                   1145: return 0;
                   1146: }
                   1147: 
                   1148: 
                   1149: 
                   1150: /*************************************************
                   1151: *         Check newline indicator                *
                   1152: *************************************************/
                   1153: 
                   1154: /* This is used both at compile and run-time to check for <xxx> escapes. Print
                   1155: a message and return 0 if there is no match.
                   1156: 
                   1157: Arguments:
                   1158:   p           points after the leading '<'
                   1159:   f           file for error message
                   1160: 
                   1161: Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
                   1162: */
                   1163: 
                   1164: static int
                   1165: check_newline(uschar *p, FILE *f)
                   1166: {
                   1167: if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
                   1168: if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
                   1169: if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
                   1170: if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
                   1171: if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
                   1172: if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
                   1173: if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
                   1174: fprintf(f, "Unknown newline type at: <%s\n", p);
                   1175: return 0;
                   1176: }
                   1177: 
                   1178: 
                   1179: 
                   1180: /*************************************************
                   1181: *             Usage function                     *
                   1182: *************************************************/
                   1183: 
                   1184: static void
                   1185: usage(void)
                   1186: {
                   1187: printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
                   1188: printf("Input and output default to stdin and stdout.\n");
                   1189: #ifdef SUPPORT_LIBREADLINE
                   1190: printf("If input is a terminal, readline() is used to read from it.\n");
                   1191: #else
                   1192: printf("This version of pcretest is not linked with readline().\n");
                   1193: #endif
                   1194: printf("\nOptions:\n");
                   1195: printf("  -b       show compiled code (bytecode)\n");
                   1196: printf("  -C       show PCRE compile-time options and exit\n");
                   1197: printf("  -d       debug: show compiled code and information (-b and -i)\n");
                   1198: #if !defined NODFA
                   1199: printf("  -dfa     force DFA matching for all subjects\n");
                   1200: #endif
                   1201: printf("  -help    show usage information\n");
                   1202: printf("  -i       show information about compiled patterns\n"
                   1203:        "  -M       find MATCH_LIMIT minimum for each subject\n"
                   1204:        "  -m       output memory used information\n"
                   1205:        "  -o <n>   set size of offsets vector to <n>\n");
                   1206: #if !defined NOPOSIX
                   1207: printf("  -p       use POSIX interface\n");
                   1208: #endif
                   1209: printf("  -q       quiet: do not output PCRE version number at start\n");
                   1210: printf("  -S <n>   set stack size to <n> megabytes\n");
                   1211: printf("  -s       force each pattern to be studied at basic level\n"
                   1212:        "  -s+      force each pattern to be studied, using JIT if available\n"
                   1213:        "  -t       time compilation and execution\n");
                   1214: printf("  -t <n>   time compilation and execution, repeating <n> times\n");
                   1215: printf("  -tm      time execution (matching) only\n");
                   1216: printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
                   1217: }
                   1218: 
                   1219: 
                   1220: 
                   1221: /*************************************************
                   1222: *                Main Program                    *
                   1223: *************************************************/
                   1224: 
                   1225: /* Read lines from named file or stdin and write to named file or stdout; lines
                   1226: consist of a regular expression, in delimiters and optionally followed by
                   1227: options, followed by a set of test data, terminated by an empty line. */
                   1228: 
                   1229: int main(int argc, char **argv)
                   1230: {
                   1231: FILE *infile = stdin;
                   1232: int options = 0;
                   1233: int study_options = 0;
                   1234: int default_find_match_limit = FALSE;
                   1235: int op = 1;
                   1236: int timeit = 0;
                   1237: int timeitm = 0;
                   1238: int showinfo = 0;
                   1239: int showstore = 0;
                   1240: int force_study = -1;
                   1241: int force_study_options = 0;
                   1242: int quiet = 0;
                   1243: int size_offsets = 45;
                   1244: int size_offsets_max;
                   1245: int *offsets = NULL;
                   1246: #if !defined NOPOSIX
                   1247: int posix = 0;
                   1248: #endif
                   1249: int debug = 0;
                   1250: int done = 0;
                   1251: int all_use_dfa = 0;
                   1252: int yield = 0;
                   1253: int stack_size;
                   1254: 
                   1255: pcre_jit_stack *jit_stack = NULL;
                   1256: 
                   1257: 
                   1258: /* These vectors store, end-to-end, a list of captured substring names. Assume
                   1259: that 1024 is plenty long enough for the few names we'll be testing. */
                   1260: 
                   1261: uschar copynames[1024];
                   1262: uschar getnames[1024];
                   1263: 
                   1264: uschar *copynamesptr;
                   1265: uschar *getnamesptr;
                   1266: 
                   1267: /* Get buffers from malloc() so that Electric Fence will check their misuse
                   1268: when I am debugging. They grow automatically when very long lines are read. */
                   1269: 
                   1270: buffer = (unsigned char *)malloc(buffer_size);
                   1271: dbuffer = (unsigned char *)malloc(buffer_size);
                   1272: pbuffer = (unsigned char *)malloc(buffer_size);
                   1273: 
                   1274: /* The outfile variable is static so that new_malloc can use it. */
                   1275: 
                   1276: outfile = stdout;
                   1277: 
                   1278: /* The following  _setmode() stuff is some Windows magic that tells its runtime
                   1279: library to translate CRLF into a single LF character. At least, that's what
                   1280: I've been told: never having used Windows I take this all on trust. Originally
                   1281: it set 0x8000, but then I was advised that _O_BINARY was better. */
                   1282: 
                   1283: #if defined(_WIN32) || defined(WIN32)
                   1284: _setmode( _fileno( stdout ), _O_BINARY );
                   1285: #endif
                   1286: 
                   1287: /* Scan options */
                   1288: 
                   1289: while (argc > 1 && argv[op][0] == '-')
                   1290:   {
                   1291:   unsigned char *endptr;
                   1292: 
                   1293:   if (strcmp(argv[op], "-m") == 0) showstore = 1;
                   1294:   else if (strcmp(argv[op], "-s") == 0) force_study = 0;
                   1295:   else if (strcmp(argv[op], "-s+") == 0)
                   1296:     {
                   1297:     force_study = 1;
                   1298:     force_study_options = PCRE_STUDY_JIT_COMPILE;
                   1299:     }
                   1300:   else if (strcmp(argv[op], "-q") == 0) quiet = 1;
                   1301:   else if (strcmp(argv[op], "-b") == 0) debug = 1;
                   1302:   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
                   1303:   else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
                   1304:   else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
                   1305: #if !defined NODFA
                   1306:   else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
                   1307: #endif
                   1308:   else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
                   1309:       ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
                   1310:         *endptr == 0))
                   1311:     {
                   1312:     op++;
                   1313:     argc--;
                   1314:     }
                   1315:   else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
                   1316:     {
                   1317:     int both = argv[op][2] == 0;
                   1318:     int temp;
                   1319:     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
                   1320:                      *endptr == 0))
                   1321:       {
                   1322:       timeitm = temp;
                   1323:       op++;
                   1324:       argc--;
                   1325:       }
                   1326:     else timeitm = LOOPREPEAT;
                   1327:     if (both) timeit = timeitm;
                   1328:     }
                   1329:   else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
                   1330:       ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
                   1331:         *endptr == 0))
                   1332:     {
                   1333: #if defined(_WIN32) || defined(WIN32) || defined(__minix)
                   1334:     printf("PCRE: -S not supported on this OS\n");
                   1335:     exit(1);
                   1336: #else
                   1337:     int rc;
                   1338:     struct rlimit rlim;
                   1339:     getrlimit(RLIMIT_STACK, &rlim);
                   1340:     rlim.rlim_cur = stack_size * 1024 * 1024;
                   1341:     rc = setrlimit(RLIMIT_STACK, &rlim);
                   1342:     if (rc != 0)
                   1343:       {
                   1344:     printf("PCRE: setrlimit() failed with error %d\n", rc);
                   1345:     exit(1);
                   1346:       }
                   1347:     op++;
                   1348:     argc--;
                   1349: #endif
                   1350:     }
                   1351: #if !defined NOPOSIX
                   1352:   else if (strcmp(argv[op], "-p") == 0) posix = 1;
                   1353: #endif
                   1354:   else if (strcmp(argv[op], "-C") == 0)
                   1355:     {
                   1356:     int rc;
                   1357:     unsigned long int lrc;
                   1358:     printf("PCRE version %s\n", pcre_version());
                   1359:     printf("Compiled with\n");
                   1360:     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
                   1361:     printf("  %sUTF-8 support\n", rc? "" : "No ");
                   1362:     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
                   1363:     printf("  %sUnicode properties support\n", rc? "" : "No ");
                   1364:     (void)pcre_config(PCRE_CONFIG_JIT, &rc);
                   1365:     if (rc)
                   1366:       printf("  Just-in-time compiler support\n");
                   1367:     else
                   1368:       printf("  No just-in-time compiler support\n");
                   1369:     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
                   1370:     /* Note that these values are always the ASCII values, even
                   1371:     in EBCDIC environments. CR is 13 and NL is 10. */
                   1372:     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
                   1373:       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
                   1374:       (rc == -2)? "ANYCRLF" :
                   1375:       (rc == -1)? "ANY" : "???");
                   1376:     (void)pcre_config(PCRE_CONFIG_BSR, &rc);
                   1377:     printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
                   1378:                                      "all Unicode newlines");
                   1379:     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
                   1380:     printf("  Internal link size = %d\n", rc);
                   1381:     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
                   1382:     printf("  POSIX malloc threshold = %d\n", rc);
                   1383:     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
                   1384:     printf("  Default match limit = %ld\n", lrc);
                   1385:     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
                   1386:     printf("  Default recursion depth limit = %ld\n", lrc);
                   1387:     (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
                   1388:     printf("  Match recursion uses %s\n", rc? "stack" : "heap");
                   1389:     goto EXIT;
                   1390:     }
                   1391:   else if (strcmp(argv[op], "-help") == 0 ||
                   1392:            strcmp(argv[op], "--help") == 0)
                   1393:     {
                   1394:     usage();
                   1395:     goto EXIT;
                   1396:     }
                   1397:   else
                   1398:     {
                   1399:     printf("** Unknown or malformed option %s\n", argv[op]);
                   1400:     usage();
                   1401:     yield = 1;
                   1402:     goto EXIT;
                   1403:     }
                   1404:   op++;
                   1405:   argc--;
                   1406:   }
                   1407: 
                   1408: /* Get the store for the offsets vector, and remember what it was */
                   1409: 
                   1410: size_offsets_max = size_offsets;
                   1411: offsets = (int *)malloc(size_offsets_max * sizeof(int));
                   1412: if (offsets == NULL)
                   1413:   {
                   1414:   printf("** Failed to get %d bytes of memory for offsets vector\n",
                   1415:     (int)(size_offsets_max * sizeof(int)));
                   1416:   yield = 1;
                   1417:   goto EXIT;
                   1418:   }
                   1419: 
                   1420: /* Sort out the input and output files */
                   1421: 
                   1422: if (argc > 1)
                   1423:   {
                   1424:   infile = fopen(argv[op], INPUT_MODE);
                   1425:   if (infile == NULL)
                   1426:     {
                   1427:     printf("** Failed to open %s\n", argv[op]);
                   1428:     yield = 1;
                   1429:     goto EXIT;
                   1430:     }
                   1431:   }
                   1432: 
                   1433: if (argc > 2)
                   1434:   {
                   1435:   outfile = fopen(argv[op+1], OUTPUT_MODE);
                   1436:   if (outfile == NULL)
                   1437:     {
                   1438:     printf("** Failed to open %s\n", argv[op+1]);
                   1439:     yield = 1;
                   1440:     goto EXIT;
                   1441:     }
                   1442:   }
                   1443: 
                   1444: /* Set alternative malloc function */
                   1445: 
                   1446: pcre_malloc = new_malloc;
                   1447: pcre_free = new_free;
                   1448: pcre_stack_malloc = stack_malloc;
                   1449: pcre_stack_free = stack_free;
                   1450: 
                   1451: /* Heading line unless quiet, then prompt for first regex if stdin */
                   1452: 
                   1453: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
                   1454: 
                   1455: /* Main loop */
                   1456: 
                   1457: while (!done)
                   1458:   {
                   1459:   pcre *re = NULL;
                   1460:   pcre_extra *extra = NULL;
                   1461: 
                   1462: #if !defined NOPOSIX  /* There are still compilers that require no indent */
                   1463:   regex_t preg;
                   1464:   int do_posix = 0;
                   1465: #endif
                   1466: 
                   1467:   const char *error;
                   1468:   unsigned char *markptr;
                   1469:   unsigned char *p, *pp, *ppp;
                   1470:   unsigned char *to_file = NULL;
                   1471:   const unsigned char *tables = NULL;
                   1472:   unsigned long int true_size, true_study_size = 0;
                   1473:   size_t size, regex_gotten_store;
                   1474:   int do_allcaps = 0;
                   1475:   int do_mark = 0;
                   1476:   int do_study = 0;
                   1477:   int no_force_study = 0;
                   1478:   int do_debug = debug;
                   1479:   int do_G = 0;
                   1480:   int do_g = 0;
                   1481:   int do_showinfo = showinfo;
                   1482:   int do_showrest = 0;
                   1483:   int do_showcaprest = 0;
                   1484:   int do_flip = 0;
                   1485:   int erroroffset, len, delimiter, poffset;
                   1486: 
                   1487:   use_utf8 = 0;
                   1488:   debug_lengths = 1;
                   1489: 
                   1490:   if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
                   1491:   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
                   1492:   fflush(outfile);
                   1493: 
                   1494:   p = buffer;
                   1495:   while (isspace(*p)) p++;
                   1496:   if (*p == 0) continue;
                   1497: 
                   1498:   /* See if the pattern is to be loaded pre-compiled from a file. */
                   1499: 
                   1500:   if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
                   1501:     {
                   1502:     unsigned long int magic, get_options;
                   1503:     uschar sbuf[8];
                   1504:     FILE *f;
                   1505: 
                   1506:     p++;
                   1507:     pp = p + (int)strlen((char *)p);
                   1508:     while (isspace(pp[-1])) pp--;
                   1509:     *pp = 0;
                   1510: 
                   1511:     f = fopen((char *)p, "rb");
                   1512:     if (f == NULL)
                   1513:       {
                   1514:       fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
                   1515:       continue;
                   1516:       }
                   1517: 
                   1518:     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
                   1519: 
                   1520:     true_size =
                   1521:       (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
                   1522:     true_study_size =
                   1523:       (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
                   1524: 
                   1525:     re = (real_pcre *)new_malloc(true_size);
                   1526:     regex_gotten_store = first_gotten_store;
                   1527: 
                   1528:     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
                   1529: 
                   1530:     magic = ((real_pcre *)re)->magic_number;
                   1531:     if (magic != MAGIC_NUMBER)
                   1532:       {
                   1533:       if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
                   1534:         {
                   1535:         do_flip = 1;
                   1536:         }
                   1537:       else
                   1538:         {
                   1539:         fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
                   1540:         fclose(f);
                   1541:         continue;
                   1542:         }
                   1543:       }
                   1544: 
                   1545:     fprintf(outfile, "Compiled pattern%s loaded from %s\n",
                   1546:       do_flip? " (byte-inverted)" : "", p);
                   1547: 
                   1548:     /* Need to know if UTF-8 for printing data strings */
                   1549: 
                   1550:     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
                   1551:     use_utf8 = (get_options & PCRE_UTF8) != 0;
                   1552: 
                   1553:     /* Now see if there is any following study data. */
                   1554: 
                   1555:     if (true_study_size != 0)
                   1556:       {
                   1557:       pcre_study_data *psd;
                   1558: 
                   1559:       extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
                   1560:       extra->flags = PCRE_EXTRA_STUDY_DATA;
                   1561: 
                   1562:       psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
                   1563:       extra->study_data = psd;
                   1564: 
                   1565:       if (fread(psd, 1, true_study_size, f) != true_study_size)
                   1566:         {
                   1567:         FAIL_READ:
                   1568:         fprintf(outfile, "Failed to read data from %s\n", p);
                   1569:         if (extra != NULL) pcre_free_study(extra);
                   1570:         if (re != NULL) new_free(re);
                   1571:         fclose(f);
                   1572:         continue;
                   1573:         }
                   1574:       fprintf(outfile, "Study data loaded from %s\n", p);
                   1575:       do_study = 1;     /* To get the data output if requested */
                   1576:       }
                   1577:     else fprintf(outfile, "No study data\n");
                   1578: 
                   1579:     fclose(f);
                   1580:     goto SHOW_INFO;
                   1581:     }
                   1582: 
                   1583:   /* In-line pattern (the usual case). Get the delimiter and seek the end of
                   1584:   the pattern; if is isn't complete, read more. */
                   1585: 
                   1586:   delimiter = *p++;
                   1587: 
                   1588:   if (isalnum(delimiter) || delimiter == '\\')
                   1589:     {
                   1590:     fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
                   1591:     goto SKIP_DATA;
                   1592:     }
                   1593: 
                   1594:   pp = p;
                   1595:   poffset = (int)(p - buffer);
                   1596: 
                   1597:   for(;;)
                   1598:     {
                   1599:     while (*pp != 0)
                   1600:       {
                   1601:       if (*pp == '\\' && pp[1] != 0) pp++;
                   1602:         else if (*pp == delimiter) break;
                   1603:       pp++;
                   1604:       }
                   1605:     if (*pp != 0) break;
                   1606:     if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
                   1607:       {
                   1608:       fprintf(outfile, "** Unexpected EOF\n");
                   1609:       done = 1;
                   1610:       goto CONTINUE;
                   1611:       }
                   1612:     if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
                   1613:     }
                   1614: 
                   1615:   /* The buffer may have moved while being extended; reset the start of data
                   1616:   pointer to the correct relative point in the buffer. */
                   1617: 
                   1618:   p = buffer + poffset;
                   1619: 
                   1620:   /* If the first character after the delimiter is backslash, make
                   1621:   the pattern end with backslash. This is purely to provide a way
                   1622:   of testing for the error message when a pattern ends with backslash. */
                   1623: 
                   1624:   if (pp[1] == '\\') *pp++ = '\\';
                   1625: 
                   1626:   /* Terminate the pattern at the delimiter, and save a copy of the pattern
                   1627:   for callouts. */
                   1628: 
                   1629:   *pp++ = 0;
                   1630:   strcpy((char *)pbuffer, (char *)p);
                   1631: 
                   1632:   /* Look for options after final delimiter */
                   1633: 
                   1634:   options = 0;
                   1635:   study_options = 0;
                   1636:   log_store = showstore;  /* default from command line */
                   1637: 
                   1638:   while (*pp != 0)
                   1639:     {
                   1640:     switch (*pp++)
                   1641:       {
                   1642:       case 'f': options |= PCRE_FIRSTLINE; break;
                   1643:       case 'g': do_g = 1; break;
                   1644:       case 'i': options |= PCRE_CASELESS; break;
                   1645:       case 'm': options |= PCRE_MULTILINE; break;
                   1646:       case 's': options |= PCRE_DOTALL; break;
                   1647:       case 'x': options |= PCRE_EXTENDED; break;
                   1648: 
                   1649:       case '+':
                   1650:       if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
                   1651:       break;
                   1652: 
                   1653:       case '=': do_allcaps = 1; break;
                   1654:       case 'A': options |= PCRE_ANCHORED; break;
                   1655:       case 'B': do_debug = 1; break;
                   1656:       case 'C': options |= PCRE_AUTO_CALLOUT; break;
                   1657:       case 'D': do_debug = do_showinfo = 1; break;
                   1658:       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
                   1659:       case 'F': do_flip = 1; break;
                   1660:       case 'G': do_G = 1; break;
                   1661:       case 'I': do_showinfo = 1; break;
                   1662:       case 'J': options |= PCRE_DUPNAMES; break;
                   1663:       case 'K': do_mark = 1; break;
                   1664:       case 'M': log_store = 1; break;
                   1665:       case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
                   1666: 
                   1667: #if !defined NOPOSIX
                   1668:       case 'P': do_posix = 1; break;
                   1669: #endif
                   1670: 
                   1671:       case 'S':
                   1672:       if (do_study == 0)
                   1673:         {
                   1674:         do_study = 1;
                   1675:         if (*pp == '+')
                   1676:           {
                   1677:           study_options |= PCRE_STUDY_JIT_COMPILE;
                   1678:           pp++;
                   1679:           }
                   1680:         }
                   1681:       else
                   1682:         {
                   1683:         do_study = 0;
                   1684:         no_force_study = 1;
                   1685:         }
                   1686:       break;
                   1687: 
                   1688:       case 'U': options |= PCRE_UNGREEDY; break;
                   1689:       case 'W': options |= PCRE_UCP; break;
                   1690:       case 'X': options |= PCRE_EXTRA; break;
                   1691:       case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
                   1692:       case 'Z': debug_lengths = 0; break;
                   1693:       case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
                   1694:       case '?': options |= PCRE_NO_UTF8_CHECK; break;
                   1695: 
                   1696:       case 'T':
                   1697:       switch (*pp++)
                   1698:         {
                   1699:         case '0': tables = tables0; break;
                   1700:         case '1': tables = tables1; break;
                   1701: 
                   1702:         case '\r':
                   1703:         case '\n':
                   1704:         case ' ':
                   1705:         case 0:
                   1706:         fprintf(outfile, "** Missing table number after /T\n");
                   1707:         goto SKIP_DATA;
                   1708: 
                   1709:         default:
                   1710:         fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
                   1711:         goto SKIP_DATA;
                   1712:         }
                   1713:       break;
                   1714: 
                   1715:       case 'L':
                   1716:       ppp = pp;
                   1717:       /* The '\r' test here is so that it works on Windows. */
                   1718:       /* The '0' test is just in case this is an unterminated line. */
                   1719:       while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
                   1720:       *ppp = 0;
                   1721:       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
                   1722:         {
                   1723:         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
                   1724:         goto SKIP_DATA;
                   1725:         }
                   1726:       locale_set = 1;
                   1727:       tables = pcre_maketables();
                   1728:       pp = ppp;
                   1729:       break;
                   1730: 
                   1731:       case '>':
                   1732:       to_file = pp;
                   1733:       while (*pp != 0) pp++;
                   1734:       while (isspace(pp[-1])) pp--;
                   1735:       *pp = 0;
                   1736:       break;
                   1737: 
                   1738:       case '<':
                   1739:         {
                   1740:         if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
                   1741:           {
                   1742:           options |= PCRE_JAVASCRIPT_COMPAT;
                   1743:           pp += 3;
                   1744:           }
                   1745:         else
                   1746:           {
                   1747:           int x = check_newline(pp, outfile);
                   1748:           if (x == 0) goto SKIP_DATA;
                   1749:           options |= x;
                   1750:           while (*pp++ != '>');
                   1751:           }
                   1752:         }
                   1753:       break;
                   1754: 
                   1755:       case '\r':                      /* So that it works in Windows */
                   1756:       case '\n':
                   1757:       case ' ':
                   1758:       break;
                   1759: 
                   1760:       default:
                   1761:       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
                   1762:       goto SKIP_DATA;
                   1763:       }
                   1764:     }
                   1765: 
                   1766:   /* Handle compiling via the POSIX interface, which doesn't support the
                   1767:   timing, showing, or debugging options, nor the ability to pass over
                   1768:   local character tables. */
                   1769: 
                   1770: #if !defined NOPOSIX
                   1771:   if (posix || do_posix)
                   1772:     {
                   1773:     int rc;
                   1774:     int cflags = 0;
                   1775: 
                   1776:     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
                   1777:     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
                   1778:     if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
                   1779:     if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
                   1780:     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
                   1781:     if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
                   1782:     if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
                   1783: 
                   1784:     first_gotten_store = 0;
                   1785:     rc = regcomp(&preg, (char *)p, cflags);
                   1786: 
                   1787:     /* Compilation failed; go back for another re, skipping to blank line
                   1788:     if non-interactive. */
                   1789: 
                   1790:     if (rc != 0)
                   1791:       {
                   1792:       (void)regerror(rc, &preg, (char *)buffer, buffer_size);
                   1793:       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
                   1794:       goto SKIP_DATA;
                   1795:       }
                   1796:     }
                   1797: 
                   1798:   /* Handle compiling via the native interface */
                   1799: 
                   1800:   else
                   1801: #endif  /* !defined NOPOSIX */
                   1802: 
                   1803:     {
                   1804:     unsigned long int get_options;
                   1805: 
                   1806:     if (timeit > 0)
                   1807:       {
                   1808:       register int i;
                   1809:       clock_t time_taken;
                   1810:       clock_t start_time = clock();
                   1811:       for (i = 0; i < timeit; i++)
                   1812:         {
                   1813:         re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
                   1814:         if (re != NULL) free(re);
                   1815:         }
                   1816:       time_taken = clock() - start_time;
                   1817:       fprintf(outfile, "Compile time %.4f milliseconds\n",
                   1818:         (((double)time_taken * 1000.0) / (double)timeit) /
                   1819:           (double)CLOCKS_PER_SEC);
                   1820:       }
                   1821: 
                   1822:     first_gotten_store = 0;
                   1823:     re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
                   1824: 
                   1825:     /* Compilation failed; go back for another re, skipping to blank line
                   1826:     if non-interactive. */
                   1827: 
                   1828:     if (re == NULL)
                   1829:       {
                   1830:       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
                   1831:       SKIP_DATA:
                   1832:       if (infile != stdin)
                   1833:         {
                   1834:         for (;;)
                   1835:           {
                   1836:           if (extend_inputline(infile, buffer, NULL) == NULL)
                   1837:             {
                   1838:             done = 1;
                   1839:             goto CONTINUE;
                   1840:             }
                   1841:           len = (int)strlen((char *)buffer);
                   1842:           while (len > 0 && isspace(buffer[len-1])) len--;
                   1843:           if (len == 0) break;
                   1844:           }
                   1845:         fprintf(outfile, "\n");
                   1846:         }
                   1847:       goto CONTINUE;
                   1848:       }
                   1849: 
                   1850:     /* Compilation succeeded. It is now possible to set the UTF-8 option from
                   1851:     within the regex; check for this so that we know how to process the data
                   1852:     lines. */
                   1853: 
                   1854:     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
                   1855:     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
                   1856: 
                   1857:     /* Extract the size for possible writing before possibly flipping it,
                   1858:     and remember the store that was got. */
                   1859: 
                   1860:     true_size = ((real_pcre *)re)->size;
                   1861:     regex_gotten_store = first_gotten_store;
                   1862: 
                   1863:     /* Output code size information if requested */
                   1864: 
                   1865:     if (log_store)
                   1866:       fprintf(outfile, "Memory allocation (code space): %d\n",
                   1867:         (int)(first_gotten_store -
                   1868:               sizeof(real_pcre) -
                   1869:               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
                   1870: 
                   1871:     /* If -s or /S was present, study the regex to generate additional info to
                   1872:     help with the matching, unless the pattern has the SS option, which
                   1873:     suppresses the effect of /S (used for a few test patterns where studying is
                   1874:     never sensible). */
                   1875: 
                   1876:     if (do_study || (force_study >= 0 && !no_force_study))
                   1877:       {
                   1878:       if (timeit > 0)
                   1879:         {
                   1880:         register int i;
                   1881:         clock_t time_taken;
                   1882:         clock_t start_time = clock();
                   1883:         for (i = 0; i < timeit; i++)
                   1884:           extra = pcre_study(re, study_options | force_study_options, &error);
                   1885:         time_taken = clock() - start_time;
                   1886:         if (extra != NULL) pcre_free_study(extra);
                   1887:         fprintf(outfile, "  Study time %.4f milliseconds\n",
                   1888:           (((double)time_taken * 1000.0) / (double)timeit) /
                   1889:             (double)CLOCKS_PER_SEC);
                   1890:         }
                   1891:       extra = pcre_study(re, study_options | force_study_options, &error);
                   1892:       if (error != NULL)
                   1893:         fprintf(outfile, "Failed to study: %s\n", error);
                   1894:       else if (extra != NULL)
                   1895:         {
                   1896:         true_study_size = ((pcre_study_data *)(extra->study_data))->size;
                   1897:         if (log_store)
                   1898:           {
                   1899:           size_t jitsize;
                   1900:           new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
                   1901:           if (jitsize != 0)
                   1902:             fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
                   1903:           }
                   1904:         }
                   1905:       }
                   1906: 
                   1907:     /* If /K was present, we set up for handling MARK data. */
                   1908: 
                   1909:     if (do_mark)
                   1910:       {
                   1911:       if (extra == NULL)
                   1912:         {
                   1913:         extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   1914:         extra->flags = 0;
                   1915:         }
                   1916:       extra->mark = &markptr;
                   1917:       extra->flags |= PCRE_EXTRA_MARK;
                   1918:       }
                   1919: 
                   1920:     /* If the 'F' option was present, we flip the bytes of all the integer
                   1921:     fields in the regex data block and the study block. This is to make it
                   1922:     possible to test PCRE's handling of byte-flipped patterns, e.g. those
                   1923:     compiled on a different architecture. */
                   1924: 
                   1925:     if (do_flip)
                   1926:       {
                   1927:       real_pcre *rre = (real_pcre *)re;
                   1928:       rre->magic_number =
                   1929:         byteflip(rre->magic_number, sizeof(rre->magic_number));
                   1930:       rre->size = byteflip(rre->size, sizeof(rre->size));
                   1931:       rre->options = byteflip(rre->options, sizeof(rre->options));
                   1932:       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
                   1933:       rre->top_bracket =
                   1934:         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
                   1935:       rre->top_backref =
                   1936:         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
                   1937:       rre->first_byte =
                   1938:         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
                   1939:       rre->req_byte =
                   1940:         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
                   1941:       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
                   1942:         sizeof(rre->name_table_offset));
                   1943:       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
                   1944:         sizeof(rre->name_entry_size));
                   1945:       rre->name_count = (pcre_uint16)byteflip(rre->name_count,
                   1946:         sizeof(rre->name_count));
                   1947: 
                   1948:       if (extra != NULL)
                   1949:         {
                   1950:         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
                   1951:         rsd->size = byteflip(rsd->size, sizeof(rsd->size));
                   1952:         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
                   1953:         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
                   1954:         }
                   1955:       }
                   1956: 
                   1957:     /* Extract information from the compiled data if required. There are now
                   1958:     two info-returning functions. The old one has a limited interface and
                   1959:     returns only limited data. Check that it agrees with the newer one. */
                   1960: 
                   1961:     SHOW_INFO:
                   1962: 
                   1963:     if (do_debug)
                   1964:       {
                   1965:       fprintf(outfile, "------------------------------------------------------------------\n");
                   1966:       pcre_printint(re, outfile, debug_lengths);
                   1967:       }
                   1968: 
                   1969:     /* We already have the options in get_options (see above) */
                   1970: 
                   1971:     if (do_showinfo)
                   1972:       {
                   1973:       unsigned long int all_options;
                   1974: #if !defined NOINFOCHECK
                   1975:       int old_first_char, old_options, old_count;
                   1976: #endif
                   1977:       int count, backrefmax, first_char, need_char, okpartial, jchanged,
                   1978:         hascrorlf;
                   1979:       int nameentrysize, namecount;
                   1980:       const uschar *nametable;
                   1981: 
                   1982:       new_info(re, NULL, PCRE_INFO_SIZE, &size);
                   1983:       new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
                   1984:       new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
                   1985:       new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
                   1986:       new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
                   1987:       new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
                   1988:       new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
                   1989:       new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
                   1990:       new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
                   1991:       new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
                   1992:       new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
                   1993: 
                   1994: #if !defined NOINFOCHECK
                   1995:       old_count = pcre_info(re, &old_options, &old_first_char);
                   1996:       if (count < 0) fprintf(outfile,
                   1997:         "Error %d from pcre_info()\n", count);
                   1998:       else
                   1999:         {
                   2000:         if (old_count != count) fprintf(outfile,
                   2001:           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
                   2002:             old_count);
                   2003: 
                   2004:         if (old_first_char != first_char) fprintf(outfile,
                   2005:           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
                   2006:             first_char, old_first_char);
                   2007: 
                   2008:         if (old_options != (int)get_options) fprintf(outfile,
                   2009:           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
                   2010:             get_options, old_options);
                   2011:         }
                   2012: #endif
                   2013: 
                   2014:       if (size != regex_gotten_store) fprintf(outfile,
                   2015:         "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
                   2016:         (int)size, (int)regex_gotten_store);
                   2017: 
                   2018:       fprintf(outfile, "Capturing subpattern count = %d\n", count);
                   2019:       if (backrefmax > 0)
                   2020:         fprintf(outfile, "Max back reference = %d\n", backrefmax);
                   2021: 
                   2022:       if (namecount > 0)
                   2023:         {
                   2024:         fprintf(outfile, "Named capturing subpatterns:\n");
                   2025:         while (namecount-- > 0)
                   2026:           {
                   2027:           fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
                   2028:             nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
                   2029:             GET2(nametable, 0));
                   2030:           nametable += nameentrysize;
                   2031:           }
                   2032:         }
                   2033: 
                   2034:       if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
                   2035:       if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
                   2036: 
                   2037:       all_options = ((real_pcre *)re)->options;
                   2038:       if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
                   2039: 
                   2040:       if (get_options == 0) fprintf(outfile, "No options\n");
                   2041:         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
                   2042:           ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
                   2043:           ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
                   2044:           ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
                   2045:           ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
                   2046:           ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
                   2047:           ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
                   2048:           ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
                   2049:           ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
                   2050:           ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
                   2051:           ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
                   2052:           ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
                   2053:           ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
                   2054:           ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
                   2055:           ((get_options & PCRE_UCP) != 0)? " ucp" : "",
                   2056:           ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
                   2057:           ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
                   2058:           ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
                   2059: 
                   2060:       if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
                   2061: 
                   2062:       switch (get_options & PCRE_NEWLINE_BITS)
                   2063:         {
                   2064:         case PCRE_NEWLINE_CR:
                   2065:         fprintf(outfile, "Forced newline sequence: CR\n");
                   2066:         break;
                   2067: 
                   2068:         case PCRE_NEWLINE_LF:
                   2069:         fprintf(outfile, "Forced newline sequence: LF\n");
                   2070:         break;
                   2071: 
                   2072:         case PCRE_NEWLINE_CRLF:
                   2073:         fprintf(outfile, "Forced newline sequence: CRLF\n");
                   2074:         break;
                   2075: 
                   2076:         case PCRE_NEWLINE_ANYCRLF:
                   2077:         fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
                   2078:         break;
                   2079: 
                   2080:         case PCRE_NEWLINE_ANY:
                   2081:         fprintf(outfile, "Forced newline sequence: ANY\n");
                   2082:         break;
                   2083: 
                   2084:         default:
                   2085:         break;
                   2086:         }
                   2087: 
                   2088:       if (first_char == -1)
                   2089:         {
                   2090:         fprintf(outfile, "First char at start or follows newline\n");
                   2091:         }
                   2092:       else if (first_char < 0)
                   2093:         {
                   2094:         fprintf(outfile, "No first char\n");
                   2095:         }
                   2096:       else
                   2097:         {
                   2098:         int ch = first_char & 255;
                   2099:         const char *caseless = ((first_char & REQ_CASELESS) == 0)?
                   2100:           "" : " (caseless)";
                   2101:         if (PRINTHEX(ch))
                   2102:           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
                   2103:         else
                   2104:           fprintf(outfile, "First char = %d%s\n", ch, caseless);
                   2105:         }
                   2106: 
                   2107:       if (need_char < 0)
                   2108:         {
                   2109:         fprintf(outfile, "No need char\n");
                   2110:         }
                   2111:       else
                   2112:         {
                   2113:         int ch = need_char & 255;
                   2114:         const char *caseless = ((need_char & REQ_CASELESS) == 0)?
                   2115:           "" : " (caseless)";
                   2116:         if (PRINTHEX(ch))
                   2117:           fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
                   2118:         else
                   2119:           fprintf(outfile, "Need char = %d%s\n", ch, caseless);
                   2120:         }
                   2121: 
                   2122:       /* Don't output study size; at present it is in any case a fixed
                   2123:       value, but it varies, depending on the computer architecture, and
                   2124:       so messes up the test suite. (And with the /F option, it might be
                   2125:       flipped.) If study was forced by an external -s, don't show this
                   2126:       information unless -i or -d was also present. This means that, except
                   2127:       when auto-callouts are involved, the output from runs with and without
                   2128:       -s should be identical. */
                   2129: 
                   2130:       if (do_study || (force_study >= 0 && showinfo && !no_force_study))
                   2131:         {
                   2132:         if (extra == NULL)
                   2133:           fprintf(outfile, "Study returned NULL\n");
                   2134:         else
                   2135:           {
                   2136:           uschar *start_bits = NULL;
                   2137:           int minlength;
                   2138: 
                   2139:           new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
                   2140:           fprintf(outfile, "Subject length lower bound = %d\n", minlength);
                   2141: 
                   2142:           new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
                   2143:           if (start_bits == NULL)
                   2144:             fprintf(outfile, "No set of starting bytes\n");
                   2145:           else
                   2146:             {
                   2147:             int i;
                   2148:             int c = 24;
                   2149:             fprintf(outfile, "Starting byte set: ");
                   2150:             for (i = 0; i < 256; i++)
                   2151:               {
                   2152:               if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   2153:                 {
                   2154:                 if (c > 75)
                   2155:                   {
                   2156:                   fprintf(outfile, "\n  ");
                   2157:                   c = 2;
                   2158:                   }
                   2159:                 if (PRINTHEX(i) && i != ' ')
                   2160:                   {
                   2161:                   fprintf(outfile, "%c ", i);
                   2162:                   c += 2;
                   2163:                   }
                   2164:                 else
                   2165:                   {
                   2166:                   fprintf(outfile, "\\x%02x ", i);
                   2167:                   c += 5;
                   2168:                   }
                   2169:                 }
                   2170:               }
                   2171:             fprintf(outfile, "\n");
                   2172:             }
                   2173:           }
                   2174: 
                   2175:         /* Show this only if the JIT was set by /S, not by -s. */
                   2176: 
                   2177:         if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
                   2178:           {
                   2179:           int jit;
                   2180:           new_info(re, extra, PCRE_INFO_JIT, &jit);
                   2181:           if (jit)
                   2182:             fprintf(outfile, "JIT study was successful\n");
                   2183:           else
                   2184: #ifdef SUPPORT_JIT
                   2185:             fprintf(outfile, "JIT study was not successful\n");
                   2186: #else
                   2187:             fprintf(outfile, "JIT support is not available in this version of PCRE\n");
                   2188: #endif
                   2189:           }
                   2190:         }
                   2191:       }
                   2192: 
                   2193:     /* If the '>' option was present, we write out the regex to a file, and
                   2194:     that is all. The first 8 bytes of the file are the regex length and then
                   2195:     the study length, in big-endian order. */
                   2196: 
                   2197:     if (to_file != NULL)
                   2198:       {
                   2199:       FILE *f = fopen((char *)to_file, "wb");
                   2200:       if (f == NULL)
                   2201:         {
                   2202:         fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
                   2203:         }
                   2204:       else
                   2205:         {
                   2206:         uschar sbuf[8];
                   2207:         sbuf[0] = (uschar)((true_size >> 24) & 255);
                   2208:         sbuf[1] = (uschar)((true_size >> 16) & 255);
                   2209:         sbuf[2] = (uschar)((true_size >>  8) & 255);
                   2210:         sbuf[3] = (uschar)((true_size) & 255);
                   2211: 
                   2212:         sbuf[4] = (uschar)((true_study_size >> 24) & 255);
                   2213:         sbuf[5] = (uschar)((true_study_size >> 16) & 255);
                   2214:         sbuf[6] = (uschar)((true_study_size >>  8) & 255);
                   2215:         sbuf[7] = (uschar)((true_study_size) & 255);
                   2216: 
                   2217:         if (fwrite(sbuf, 1, 8, f) < 8 ||
                   2218:             fwrite(re, 1, true_size, f) < true_size)
                   2219:           {
                   2220:           fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
                   2221:           }
                   2222:         else
                   2223:           {
                   2224:           fprintf(outfile, "Compiled pattern written to %s\n", to_file);
                   2225: 
                   2226:           /* If there is study data, write it. */
                   2227: 
                   2228:           if (extra != NULL)
                   2229:             {
                   2230:             if (fwrite(extra->study_data, 1, true_study_size, f) <
                   2231:                 true_study_size)
                   2232:               {
                   2233:               fprintf(outfile, "Write error on %s: %s\n", to_file,
                   2234:                 strerror(errno));
                   2235:               }
                   2236:             else fprintf(outfile, "Study data written to %s\n", to_file);
                   2237:             }
                   2238:           }
                   2239:         fclose(f);
                   2240:         }
                   2241: 
                   2242:       new_free(re);
                   2243:       if (extra != NULL) pcre_free_study(extra);
                   2244:       if (locale_set)
                   2245:         {
                   2246:         new_free((void *)tables);
                   2247:         setlocale(LC_CTYPE, "C");
                   2248:         locale_set = 0;
                   2249:         }
                   2250:       continue;  /* With next regex */
                   2251:       }
                   2252:     }        /* End of non-POSIX compile */
                   2253: 
                   2254:   /* Read data lines and test them */
                   2255: 
                   2256:   for (;;)
                   2257:     {
                   2258:     uschar *q;
                   2259:     uschar *bptr;
                   2260:     int *use_offsets = offsets;
                   2261:     int use_size_offsets = size_offsets;
                   2262:     int callout_data = 0;
                   2263:     int callout_data_set = 0;
                   2264:     int count, c;
                   2265:     int copystrings = 0;
                   2266:     int find_match_limit = default_find_match_limit;
                   2267:     int getstrings = 0;
                   2268:     int getlist = 0;
                   2269:     int gmatched = 0;
                   2270:     int start_offset = 0;
                   2271:     int start_offset_sign = 1;
                   2272:     int g_notempty = 0;
                   2273:     int use_dfa = 0;
                   2274: 
                   2275:     options = 0;
                   2276: 
                   2277:     *copynames = 0;
                   2278:     *getnames = 0;
                   2279: 
                   2280:     copynamesptr = copynames;
                   2281:     getnamesptr = getnames;
                   2282: 
                   2283:     pcre_callout = callout;
                   2284:     first_callout = 1;
                   2285:     last_callout_mark = NULL;
                   2286:     callout_extra = 0;
                   2287:     callout_count = 0;
                   2288:     callout_fail_count = 999999;
                   2289:     callout_fail_id = -1;
                   2290:     show_malloc = 0;
                   2291: 
                   2292:     if (extra != NULL) extra->flags &=
                   2293:       ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
                   2294: 
                   2295:     len = 0;
                   2296:     for (;;)
                   2297:       {
                   2298:       if (extend_inputline(infile, buffer + len, "data> ") == NULL)
                   2299:         {
                   2300:         if (len > 0)    /* Reached EOF without hitting a newline */
                   2301:           {
                   2302:           fprintf(outfile, "\n");
                   2303:           break;
                   2304:           }
                   2305:         done = 1;
                   2306:         goto CONTINUE;
                   2307:         }
                   2308:       if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
                   2309:       len = (int)strlen((char *)buffer);
                   2310:       if (buffer[len-1] == '\n') break;
                   2311:       }
                   2312: 
                   2313:     while (len > 0 && isspace(buffer[len-1])) len--;
                   2314:     buffer[len] = 0;
                   2315:     if (len == 0) break;
                   2316: 
                   2317:     p = buffer;
                   2318:     while (isspace(*p)) p++;
                   2319: 
                   2320:     bptr = q = dbuffer;
                   2321:     while ((c = *p++) != 0)
                   2322:       {
                   2323:       int i = 0;
                   2324:       int n = 0;
                   2325: 
                   2326:       if (c == '\\') switch ((c = *p++))
                   2327:         {
                   2328:         case 'a': c =    7; break;
                   2329:         case 'b': c = '\b'; break;
                   2330:         case 'e': c =   27; break;
                   2331:         case 'f': c = '\f'; break;
                   2332:         case 'n': c = '\n'; break;
                   2333:         case 'r': c = '\r'; break;
                   2334:         case 't': c = '\t'; break;
                   2335:         case 'v': c = '\v'; break;
                   2336: 
                   2337:         case '0': case '1': case '2': case '3':
                   2338:         case '4': case '5': case '6': case '7':
                   2339:         c -= '0';
                   2340:         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
                   2341:           c = c * 8 + *p++ - '0';
                   2342: 
                   2343: #if !defined NOUTF8
                   2344:         if (use_utf8 && c > 255)
                   2345:           {
                   2346:           unsigned char buff8[8];
                   2347:           int ii, utn;
                   2348:           utn = ord2utf8(c, buff8);
                   2349:           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
                   2350:           c = buff8[ii];   /* Last byte */
                   2351:           }
                   2352: #endif
                   2353:         break;
                   2354: 
                   2355:         case 'x':
                   2356: 
                   2357:         /* Handle \x{..} specially - new Perl thing for utf8 */
                   2358: 
                   2359: #if !defined NOUTF8
                   2360:         if (*p == '{')
                   2361:           {
                   2362:           unsigned char *pt = p;
                   2363:           c = 0;
                   2364: 
                   2365:           /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
                   2366:           when isxdigit() is a macro that refers to its argument more than
                   2367:           once. This is banned by the C Standard, but apparently happens in at
                   2368:           least one MacOS environment. */
                   2369: 
                   2370:           for (pt++; isxdigit(*pt); pt++)
                   2371:             c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
                   2372:           if (*pt == '}')
                   2373:             {
                   2374:             unsigned char buff8[8];
                   2375:             int ii, utn;
                   2376:             if (use_utf8)
                   2377:               {
                   2378:               utn = ord2utf8(c, buff8);
                   2379:               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
                   2380:               c = buff8[ii];   /* Last byte */
                   2381:               }
                   2382:             else
                   2383:              {
                   2384:              if (c > 255)
                   2385:                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
                   2386:                  "UTF-8 mode is not enabled.\n"
                   2387:                  "** Truncation will probably give the wrong result.\n", c);
                   2388:              }
                   2389:             p = pt + 1;
                   2390:             break;
                   2391:             }
                   2392:           /* Not correct form; fall through */
                   2393:           }
                   2394: #endif
                   2395: 
                   2396:         /* Ordinary \x */
                   2397: 
                   2398:         c = 0;
                   2399:         while (i++ < 2 && isxdigit(*p))
                   2400:           {
                   2401:           c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
                   2402:           p++;
                   2403:           }
                   2404:         break;
                   2405: 
                   2406:         case 0:   /* \ followed by EOF allows for an empty line */
                   2407:         p--;
                   2408:         continue;
                   2409: 
                   2410:         case '>':
                   2411:         if (*p == '-')
                   2412:           {
                   2413:           start_offset_sign = -1;
                   2414:           p++;
                   2415:           }
                   2416:         while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
                   2417:         start_offset *= start_offset_sign;
                   2418:         continue;
                   2419: 
                   2420:         case 'A':  /* Option setting */
                   2421:         options |= PCRE_ANCHORED;
                   2422:         continue;
                   2423: 
                   2424:         case 'B':
                   2425:         options |= PCRE_NOTBOL;
                   2426:         continue;
                   2427: 
                   2428:         case 'C':
                   2429:         if (isdigit(*p))    /* Set copy string */
                   2430:           {
                   2431:           while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   2432:           copystrings |= 1 << n;
                   2433:           }
                   2434:         else if (isalnum(*p))
                   2435:           {
                   2436:           uschar *npp = copynamesptr;
                   2437:           while (isalnum(*p)) *npp++ = *p++;
                   2438:           *npp++ = 0;
                   2439:           *npp = 0;
                   2440:           n = pcre_get_stringnumber(re, (char *)copynamesptr);
                   2441:           if (n < 0)
                   2442:             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
                   2443:           copynamesptr = npp;
                   2444:           }
                   2445:         else if (*p == '+')
                   2446:           {
                   2447:           callout_extra = 1;
                   2448:           p++;
                   2449:           }
                   2450:         else if (*p == '-')
                   2451:           {
                   2452:           pcre_callout = NULL;
                   2453:           p++;
                   2454:           }
                   2455:         else if (*p == '!')
                   2456:           {
                   2457:           callout_fail_id = 0;
                   2458:           p++;
                   2459:           while(isdigit(*p))
                   2460:             callout_fail_id = callout_fail_id * 10 + *p++ - '0';
                   2461:           callout_fail_count = 0;
                   2462:           if (*p == '!')
                   2463:             {
                   2464:             p++;
                   2465:             while(isdigit(*p))
                   2466:               callout_fail_count = callout_fail_count * 10 + *p++ - '0';
                   2467:             }
                   2468:           }
                   2469:         else if (*p == '*')
                   2470:           {
                   2471:           int sign = 1;
                   2472:           callout_data = 0;
                   2473:           if (*(++p) == '-') { sign = -1; p++; }
                   2474:           while(isdigit(*p))
                   2475:             callout_data = callout_data * 10 + *p++ - '0';
                   2476:           callout_data *= sign;
                   2477:           callout_data_set = 1;
                   2478:           }
                   2479:         continue;
                   2480: 
                   2481: #if !defined NODFA
                   2482:         case 'D':
                   2483: #if !defined NOPOSIX
                   2484:         if (posix || do_posix)
                   2485:           printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
                   2486:         else
                   2487: #endif
                   2488:           use_dfa = 1;
                   2489:         continue;
                   2490: #endif
                   2491: 
                   2492: #if !defined NODFA
                   2493:         case 'F':
                   2494:         options |= PCRE_DFA_SHORTEST;
                   2495:         continue;
                   2496: #endif
                   2497: 
                   2498:         case 'G':
                   2499:         if (isdigit(*p))
                   2500:           {
                   2501:           while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   2502:           getstrings |= 1 << n;
                   2503:           }
                   2504:         else if (isalnum(*p))
                   2505:           {
                   2506:           uschar *npp = getnamesptr;
                   2507:           while (isalnum(*p)) *npp++ = *p++;
                   2508:           *npp++ = 0;
                   2509:           *npp = 0;
                   2510:           n = pcre_get_stringnumber(re, (char *)getnamesptr);
                   2511:           if (n < 0)
                   2512:             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
                   2513:           getnamesptr = npp;
                   2514:           }
                   2515:         continue;
                   2516: 
                   2517:         case 'J':
                   2518:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   2519:         if (extra != NULL
                   2520:             && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
                   2521:             && extra->executable_jit != NULL)
                   2522:           {
                   2523:          if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
                   2524:          jit_stack = pcre_jit_stack_alloc(1, n * 1024);
                   2525:          pcre_assign_jit_stack(extra, jit_callback, jit_stack);
                   2526:           }
                   2527:         continue;
                   2528: 
                   2529:         case 'L':
                   2530:         getlist = 1;
                   2531:         continue;
                   2532: 
                   2533:         case 'M':
                   2534:         find_match_limit = 1;
                   2535:         continue;
                   2536: 
                   2537:         case 'N':
                   2538:         if ((options & PCRE_NOTEMPTY) != 0)
                   2539:           options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
                   2540:         else
                   2541:           options |= PCRE_NOTEMPTY;
                   2542:         continue;
                   2543: 
                   2544:         case 'O':
                   2545:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   2546:         if (n > size_offsets_max)
                   2547:           {
                   2548:           size_offsets_max = n;
                   2549:           free(offsets);
                   2550:           use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
                   2551:           if (offsets == NULL)
                   2552:             {
                   2553:             printf("** Failed to get %d bytes of memory for offsets vector\n",
                   2554:               (int)(size_offsets_max * sizeof(int)));
                   2555:             yield = 1;
                   2556:             goto EXIT;
                   2557:             }
                   2558:           }
                   2559:         use_size_offsets = n;
                   2560:         if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
                   2561:         continue;
                   2562: 
                   2563:         case 'P':
                   2564:         options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
                   2565:           PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
                   2566:         continue;
                   2567: 
                   2568:         case 'Q':
                   2569:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   2570:         if (extra == NULL)
                   2571:           {
                   2572:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   2573:           extra->flags = 0;
                   2574:           }
                   2575:         extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
                   2576:         extra->match_limit_recursion = n;
                   2577:         continue;
                   2578: 
                   2579:         case 'q':
                   2580:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
                   2581:         if (extra == NULL)
                   2582:           {
                   2583:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   2584:           extra->flags = 0;
                   2585:           }
                   2586:         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
                   2587:         extra->match_limit = n;
                   2588:         continue;
                   2589: 
                   2590: #if !defined NODFA
                   2591:         case 'R':
                   2592:         options |= PCRE_DFA_RESTART;
                   2593:         continue;
                   2594: #endif
                   2595: 
                   2596:         case 'S':
                   2597:         show_malloc = 1;
                   2598:         continue;
                   2599: 
                   2600:         case 'Y':
                   2601:         options |= PCRE_NO_START_OPTIMIZE;
                   2602:         continue;
                   2603: 
                   2604:         case 'Z':
                   2605:         options |= PCRE_NOTEOL;
                   2606:         continue;
                   2607: 
                   2608:         case '?':
                   2609:         options |= PCRE_NO_UTF8_CHECK;
                   2610:         continue;
                   2611: 
                   2612:         case '<':
                   2613:           {
                   2614:           int x = check_newline(p, outfile);
                   2615:           if (x == 0) goto NEXT_DATA;
                   2616:           options |= x;
                   2617:           while (*p++ != '>');
                   2618:           }
                   2619:         continue;
                   2620:         }
                   2621:       *q++ = c;
                   2622:       }
                   2623:     *q = 0;
                   2624:     len = (int)(q - dbuffer);
                   2625: 
                   2626:     /* Move the data to the end of the buffer so that a read over the end of
                   2627:     the buffer will be seen by valgrind, even if it doesn't cause a crash. If
                   2628:     we are using the POSIX interface, we must include the terminating zero. */
                   2629: 
                   2630: #if !defined NOPOSIX
                   2631:     if (posix || do_posix)
                   2632:       {
                   2633:       memmove(bptr + buffer_size - len - 1, bptr, len + 1);
                   2634:       bptr += buffer_size - len - 1;
                   2635:       }
                   2636:     else
                   2637: #endif
                   2638:       {
                   2639:       memmove(bptr + buffer_size - len, bptr, len);
                   2640:       bptr += buffer_size - len;
                   2641:       }
                   2642: 
                   2643:     if ((all_use_dfa || use_dfa) && find_match_limit)
                   2644:       {
                   2645:       printf("**Match limit not relevant for DFA matching: ignored\n");
                   2646:       find_match_limit = 0;
                   2647:       }
                   2648: 
                   2649:     /* Handle matching via the POSIX interface, which does not
                   2650:     support timing or playing with the match limit or callout data. */
                   2651: 
                   2652: #if !defined NOPOSIX
                   2653:     if (posix || do_posix)
                   2654:       {
                   2655:       int rc;
                   2656:       int eflags = 0;
                   2657:       regmatch_t *pmatch = NULL;
                   2658:       if (use_size_offsets > 0)
                   2659:         pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
                   2660:       if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
                   2661:       if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
                   2662:       if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
                   2663: 
                   2664:       rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
                   2665: 
                   2666:       if (rc != 0)
                   2667:         {
                   2668:         (void)regerror(rc, &preg, (char *)buffer, buffer_size);
                   2669:         fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
                   2670:         }
                   2671:       else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
                   2672:               != 0)
                   2673:         {
                   2674:         fprintf(outfile, "Matched with REG_NOSUB\n");
                   2675:         }
                   2676:       else
                   2677:         {
                   2678:         size_t i;
                   2679:         for (i = 0; i < (size_t)use_size_offsets; i++)
                   2680:           {
                   2681:           if (pmatch[i].rm_so >= 0)
                   2682:             {
                   2683:             fprintf(outfile, "%2d: ", (int)i);
                   2684:             (void)pchars(dbuffer + pmatch[i].rm_so,
                   2685:               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
                   2686:             fprintf(outfile, "\n");
                   2687:             if (do_showcaprest || (i == 0 && do_showrest))
                   2688:               {
                   2689:               fprintf(outfile, "%2d+ ", (int)i);
                   2690:               (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
                   2691:                 outfile);
                   2692:               fprintf(outfile, "\n");
                   2693:               }
                   2694:             }
                   2695:           }
                   2696:         }
                   2697:       free(pmatch);
                   2698:       }
                   2699: 
                   2700:     /* Handle matching via the native interface - repeats for /g and /G */
                   2701: 
                   2702:     else
                   2703: #endif  /* !defined NOPOSIX */
                   2704: 
                   2705:     for (;; gmatched++)    /* Loop for /g or /G */
                   2706:       {
                   2707:       markptr = NULL;
                   2708: 
                   2709:       if (timeitm > 0)
                   2710:         {
                   2711:         register int i;
                   2712:         clock_t time_taken;
                   2713:         clock_t start_time = clock();
                   2714: 
                   2715: #if !defined NODFA
                   2716:         if (all_use_dfa || use_dfa)
                   2717:           {
                   2718:           int workspace[1000];
                   2719:           for (i = 0; i < timeitm; i++)
                   2720:             count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
                   2721:               options | g_notempty, use_offsets, use_size_offsets, workspace,
                   2722:               sizeof(workspace)/sizeof(int));
                   2723:           }
                   2724:         else
                   2725: #endif
                   2726: 
                   2727:         for (i = 0; i < timeitm; i++)
                   2728:           count = pcre_exec(re, extra, (char *)bptr, len,
                   2729:             start_offset, options | g_notempty, use_offsets, use_size_offsets);
                   2730: 
                   2731:         time_taken = clock() - start_time;
                   2732:         fprintf(outfile, "Execute time %.4f milliseconds\n",
                   2733:           (((double)time_taken * 1000.0) / (double)timeitm) /
                   2734:             (double)CLOCKS_PER_SEC);
                   2735:         }
                   2736: 
                   2737:       /* If find_match_limit is set, we want to do repeated matches with
                   2738:       varying limits in order to find the minimum value for the match limit and
                   2739:       for the recursion limit. The match limits are relevant only to the normal
                   2740:       running of pcre_exec(), so disable the JIT optimization. This makes it
                   2741:       possible to run the same set of tests with and without JIT externally
                   2742:       requested. */
                   2743: 
                   2744:       if (find_match_limit)
                   2745:         {
                   2746:         if (extra == NULL)
                   2747:           {
                   2748:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   2749:           extra->flags = 0;
                   2750:           }
                   2751:         else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
                   2752: 
                   2753:         (void)check_match_limit(re, extra, bptr, len, start_offset,
                   2754:           options|g_notempty, use_offsets, use_size_offsets,
                   2755:           PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
                   2756:           PCRE_ERROR_MATCHLIMIT, "match()");
                   2757: 
                   2758:         count = check_match_limit(re, extra, bptr, len, start_offset,
                   2759:           options|g_notempty, use_offsets, use_size_offsets,
                   2760:           PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
                   2761:           PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
                   2762:         }
                   2763: 
                   2764:       /* If callout_data is set, use the interface with additional data */
                   2765: 
                   2766:       else if (callout_data_set)
                   2767:         {
                   2768:         if (extra == NULL)
                   2769:           {
                   2770:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
                   2771:           extra->flags = 0;
                   2772:           }
                   2773:         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
                   2774:         extra->callout_data = &callout_data;
                   2775:         count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
                   2776:           options | g_notempty, use_offsets, use_size_offsets);
                   2777:         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
                   2778:         }
                   2779: 
                   2780:       /* The normal case is just to do the match once, with the default
                   2781:       value of match_limit. */
                   2782: 
                   2783: #if !defined NODFA
                   2784:       else if (all_use_dfa || use_dfa)
                   2785:         {
                   2786:         int workspace[1000];
                   2787:         count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
                   2788:           options | g_notempty, use_offsets, use_size_offsets, workspace,
                   2789:           sizeof(workspace)/sizeof(int));
                   2790:         if (count == 0)
                   2791:           {
                   2792:           fprintf(outfile, "Matched, but too many subsidiary matches\n");
                   2793:           count = use_size_offsets/2;
                   2794:           }
                   2795:         }
                   2796: #endif
                   2797: 
                   2798:       else
                   2799:         {
                   2800:         count = pcre_exec(re, extra, (char *)bptr, len,
                   2801:           start_offset, options | g_notempty, use_offsets, use_size_offsets);
                   2802:         if (count == 0)
                   2803:           {
                   2804:           fprintf(outfile, "Matched, but too many substrings\n");
                   2805:           count = use_size_offsets/3;
                   2806:           }
                   2807:         }
                   2808: 
                   2809:       /* Matched */
                   2810: 
                   2811:       if (count >= 0)
                   2812:         {
                   2813:         int i, maxcount;
                   2814: 
                   2815: #if !defined NODFA
                   2816:         if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
                   2817: #endif
                   2818:           maxcount = use_size_offsets/3;
                   2819: 
                   2820:         /* This is a check against a lunatic return value. */
                   2821: 
                   2822:         if (count > maxcount)
                   2823:           {
                   2824:           fprintf(outfile,
                   2825:             "** PCRE error: returned count %d is too big for offset size %d\n",
                   2826:             count, use_size_offsets);
                   2827:           count = use_size_offsets/3;
                   2828:           if (do_g || do_G)
                   2829:             {
                   2830:             fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
                   2831:             do_g = do_G = FALSE;        /* Break g/G loop */
                   2832:             }
                   2833:           }
                   2834: 
                   2835:         /* do_allcaps requests showing of all captures in the pattern, to check
                   2836:         unset ones at the end. */
                   2837: 
                   2838:         if (do_allcaps)
                   2839:           {
                   2840:           new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
                   2841:           count++;   /* Allow for full match */
                   2842:           if (count * 2 > use_size_offsets) count = use_size_offsets/2;
                   2843:           }
                   2844: 
                   2845:         /* Output the captured substrings */
                   2846: 
                   2847:         for (i = 0; i < count * 2; i += 2)
                   2848:           {
                   2849:           if (use_offsets[i] < 0)
                   2850:             {
                   2851:             if (use_offsets[i] != -1)
                   2852:               fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
                   2853:                 use_offsets[i], i);
                   2854:             if (use_offsets[i+1] != -1)
                   2855:               fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
                   2856:                 use_offsets[i+1], i+1);
                   2857:             fprintf(outfile, "%2d: <unset>\n", i/2);
                   2858:             }
                   2859:           else
                   2860:             {
                   2861:             fprintf(outfile, "%2d: ", i/2);
                   2862:             (void)pchars(bptr + use_offsets[i],
                   2863:               use_offsets[i+1] - use_offsets[i], outfile);
                   2864:             fprintf(outfile, "\n");
                   2865:             if (do_showcaprest || (i == 0 && do_showrest))
                   2866:               {
                   2867:               fprintf(outfile, "%2d+ ", i/2);
                   2868:               (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
                   2869:                 outfile);
                   2870:               fprintf(outfile, "\n");
                   2871:               }
                   2872:             }
                   2873:           }
                   2874: 
                   2875:         if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
                   2876: 
                   2877:         for (i = 0; i < 32; i++)
                   2878:           {
                   2879:           if ((copystrings & (1 << i)) != 0)
                   2880:             {
                   2881:             char copybuffer[256];
                   2882:             int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
                   2883:               i, copybuffer, sizeof(copybuffer));
                   2884:             if (rc < 0)
                   2885:               fprintf(outfile, "copy substring %d failed %d\n", i, rc);
                   2886:             else
                   2887:               fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
                   2888:             }
                   2889:           }
                   2890: 
                   2891:         for (copynamesptr = copynames;
                   2892:              *copynamesptr != 0;
                   2893:              copynamesptr += (int)strlen((char*)copynamesptr) + 1)
                   2894:           {
                   2895:           char copybuffer[256];
                   2896:           int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
                   2897:             count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
                   2898:           if (rc < 0)
                   2899:             fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
                   2900:           else
                   2901:             fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
                   2902:           }
                   2903: 
                   2904:         for (i = 0; i < 32; i++)
                   2905:           {
                   2906:           if ((getstrings & (1 << i)) != 0)
                   2907:             {
                   2908:             const char *substring;
                   2909:             int rc = pcre_get_substring((char *)bptr, use_offsets, count,
                   2910:               i, &substring);
                   2911:             if (rc < 0)
                   2912:               fprintf(outfile, "get substring %d failed %d\n", i, rc);
                   2913:             else
                   2914:               {
                   2915:               fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
                   2916:               pcre_free_substring(substring);
                   2917:               }
                   2918:             }
                   2919:           }
                   2920: 
                   2921:         for (getnamesptr = getnames;
                   2922:              *getnamesptr != 0;
                   2923:              getnamesptr += (int)strlen((char*)getnamesptr) + 1)
                   2924:           {
                   2925:           const char *substring;
                   2926:           int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
                   2927:             count, (char *)getnamesptr, &substring);
                   2928:           if (rc < 0)
                   2929:             fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
                   2930:           else
                   2931:             {
                   2932:             fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
                   2933:             pcre_free_substring(substring);
                   2934:             }
                   2935:           }
                   2936: 
                   2937:         if (getlist)
                   2938:           {
                   2939:           const char **stringlist;
                   2940:           int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
                   2941:             &stringlist);
                   2942:           if (rc < 0)
                   2943:             fprintf(outfile, "get substring list failed %d\n", rc);
                   2944:           else
                   2945:             {
                   2946:             for (i = 0; i < count; i++)
                   2947:               fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
                   2948:             if (stringlist[i] != NULL)
                   2949:               fprintf(outfile, "string list not terminated by NULL\n");
                   2950:             pcre_free_substring_list(stringlist);
                   2951:             }
                   2952:           }
                   2953:         }
                   2954: 
                   2955:       /* There was a partial match */
                   2956: 
                   2957:       else if (count == PCRE_ERROR_PARTIAL)
                   2958:         {
                   2959:         if (markptr == NULL) fprintf(outfile, "Partial match");
                   2960:           else fprintf(outfile, "Partial match, mark=%s", markptr);
                   2961:         if (use_size_offsets > 1)
                   2962:           {
                   2963:           fprintf(outfile, ": ");
                   2964:           pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
                   2965:             outfile);
                   2966:           }
                   2967:         fprintf(outfile, "\n");
                   2968:         break;  /* Out of the /g loop */
                   2969:         }
                   2970: 
                   2971:       /* Failed to match. If this is a /g or /G loop and we previously set
                   2972:       g_notempty after a null match, this is not necessarily the end. We want
                   2973:       to advance the start offset, and continue. We won't be at the end of the
                   2974:       string - that was checked before setting g_notempty.
                   2975: 
                   2976:       Complication arises in the case when the newline convention is "any",
                   2977:       "crlf", or "anycrlf". If the previous match was at the end of a line
                   2978:       terminated by CRLF, an advance of one character just passes the \r,
                   2979:       whereas we should prefer the longer newline sequence, as does the code in
                   2980:       pcre_exec(). Fudge the offset value to achieve this. We check for a
                   2981:       newline setting in the pattern; if none was set, use pcre_config() to
                   2982:       find the default.
                   2983: 
                   2984:       Otherwise, in the case of UTF-8 matching, the advance must be one
                   2985:       character, not one byte. */
                   2986: 
                   2987:       else
                   2988:         {
                   2989:         if (g_notempty != 0)
                   2990:           {
                   2991:           int onechar = 1;
                   2992:           unsigned int obits = ((real_pcre *)re)->options;
                   2993:           use_offsets[0] = start_offset;
                   2994:           if ((obits & PCRE_NEWLINE_BITS) == 0)
                   2995:             {
                   2996:             int d;
                   2997:             (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
                   2998:             /* Note that these values are always the ASCII ones, even in
                   2999:             EBCDIC environments. CR = 13, NL = 10. */
                   3000:             obits = (d == 13)? PCRE_NEWLINE_CR :
                   3001:                     (d == 10)? PCRE_NEWLINE_LF :
                   3002:                     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
                   3003:                     (d == -2)? PCRE_NEWLINE_ANYCRLF :
                   3004:                     (d == -1)? PCRE_NEWLINE_ANY : 0;
                   3005:             }
                   3006:           if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
                   3007:                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
                   3008:                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
                   3009:               &&
                   3010:               start_offset < len - 1 &&
                   3011:               bptr[start_offset] == '\r' &&
                   3012:               bptr[start_offset+1] == '\n')
                   3013:             onechar++;
                   3014:           else if (use_utf8)
                   3015:             {
                   3016:             while (start_offset + onechar < len)
                   3017:               {
                   3018:               if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
                   3019:               onechar++;
                   3020:               }
                   3021:             }
                   3022:           use_offsets[1] = start_offset + onechar;
                   3023:           }
                   3024:         else
                   3025:           {
                   3026:           switch(count)
                   3027:             {
                   3028:             case PCRE_ERROR_NOMATCH:
                   3029:             if (gmatched == 0)
                   3030:               {
                   3031:               if (markptr == NULL) fprintf(outfile, "No match\n");
                   3032:                 else fprintf(outfile, "No match, mark = %s\n", markptr);
                   3033:               }
                   3034:             break;
                   3035: 
                   3036:             case PCRE_ERROR_BADUTF8:
                   3037:             case PCRE_ERROR_SHORTUTF8:
                   3038:             fprintf(outfile, "Error %d (%s UTF-8 string)", count,
                   3039:               (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
                   3040:             if (use_size_offsets >= 2)
                   3041:               fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
                   3042:                 use_offsets[1]);
                   3043:             fprintf(outfile, "\n");
                   3044:             break;
                   3045: 
                   3046:             default:
                   3047:             if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
                   3048:               fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
                   3049:             else
                   3050:               fprintf(outfile, "Error %d (Unexpected value)\n", count);
                   3051:             break;
                   3052:             }
                   3053: 
                   3054:           break;  /* Out of the /g loop */
                   3055:           }
                   3056:         }
                   3057: 
                   3058:       /* If not /g or /G we are done */
                   3059: 
                   3060:       if (!do_g && !do_G) break;
                   3061: 
                   3062:       /* If we have matched an empty string, first check to see if we are at
                   3063:       the end of the subject. If so, the /g loop is over. Otherwise, mimic what
                   3064:       Perl's /g options does. This turns out to be rather cunning. First we set
                   3065:       PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
                   3066:       same point. If this fails (picked up above) we advance to the next
                   3067:       character. */
                   3068: 
                   3069:       g_notempty = 0;
                   3070: 
                   3071:       if (use_offsets[0] == use_offsets[1])
                   3072:         {
                   3073:         if (use_offsets[0] == len) break;
                   3074:         g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
                   3075:         }
                   3076: 
                   3077:       /* For /g, update the start offset, leaving the rest alone */
                   3078: 
                   3079:       if (do_g) start_offset = use_offsets[1];
                   3080: 
                   3081:       /* For /G, update the pointer and length */
                   3082: 
                   3083:       else
                   3084:         {
                   3085:         bptr += use_offsets[1];
                   3086:         len -= use_offsets[1];
                   3087:         }
                   3088:       }  /* End of loop for /g and /G */
                   3089: 
                   3090:     NEXT_DATA: continue;
                   3091:     }    /* End of loop for data lines */
                   3092: 
                   3093:   CONTINUE:
                   3094: 
                   3095: #if !defined NOPOSIX
                   3096:   if (posix || do_posix) regfree(&preg);
                   3097: #endif
                   3098: 
                   3099:   if (re != NULL) new_free(re);
                   3100:   if (extra != NULL) pcre_free_study(extra);
                   3101:   if (locale_set)
                   3102:     {
                   3103:     new_free((void *)tables);
                   3104:     setlocale(LC_CTYPE, "C");
                   3105:     locale_set = 0;
                   3106:     }
                   3107:   if (jit_stack != NULL)
                   3108:     {
                   3109:     pcre_jit_stack_free(jit_stack);
                   3110:     jit_stack = NULL;
                   3111:     }
                   3112:   }
                   3113: 
                   3114: if (infile == stdin) fprintf(outfile, "\n");
                   3115: 
                   3116: EXIT:
                   3117: 
                   3118: if (infile != NULL && infile != stdin) fclose(infile);
                   3119: if (outfile != NULL && outfile != stdout) fclose(outfile);
                   3120: 
                   3121: free(buffer);
                   3122: free(dbuffer);
                   3123: free(pbuffer);
                   3124: free(offsets);
                   3125: 
                   3126: return yield;
                   3127: }
                   3128: 
                   3129: /* End of pcretest.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>