File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcretest.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:05:51 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_21, HEAD
pcre

    1: /*************************************************
    2: *             PCRE testing program               *
    3: *************************************************/
    4: 
    5: /* This program was hacked up as a tester for PCRE. I really should have
    6: written it more tidily in the first place. Will I ever learn? It has grown and
    7: been extended and consequently is now rather, er, *very* untidy in places.
    8: 
    9: -----------------------------------------------------------------------------
   10: Redistribution and use in source and binary forms, with or without
   11: modification, are permitted provided that the following conditions are met:
   12: 
   13:     * Redistributions of source code must retain the above copyright notice,
   14:       this list of conditions and the following disclaimer.
   15: 
   16:     * Redistributions in binary form must reproduce the above copyright
   17:       notice, this list of conditions and the following disclaimer in the
   18:       documentation and/or other materials provided with the distribution.
   19: 
   20:     * Neither the name of the University of Cambridge nor the names of its
   21:       contributors may be used to endorse or promote products derived from
   22:       this software without specific prior written permission.
   23: 
   24: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   25: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   28: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   29: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   30: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   31: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   32: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   33: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34: POSSIBILITY OF SUCH DAMAGE.
   35: -----------------------------------------------------------------------------
   36: */
   37: 
   38: 
   39: #ifdef HAVE_CONFIG_H
   40: #include "config.h"
   41: #endif
   42: 
   43: #include <ctype.h>
   44: #include <stdio.h>
   45: #include <string.h>
   46: #include <stdlib.h>
   47: #include <time.h>
   48: #include <locale.h>
   49: #include <errno.h>
   50: 
   51: #ifdef SUPPORT_LIBREADLINE
   52: #ifdef HAVE_UNISTD_H
   53: #include <unistd.h>
   54: #endif
   55: #include <readline/readline.h>
   56: #include <readline/history.h>
   57: #endif
   58: 
   59: 
   60: /* A number of things vary for Windows builds. Originally, pcretest opened its
   61: input and output without "b"; then I was told that "b" was needed in some
   62: environments, so it was added for release 5.0 to both the input and output. (It
   63: makes no difference on Unix-like systems.) Later I was told that it is wrong
   64: for the input on Windows. I've now abstracted the modes into two macros that
   65: are set here, to make it easier to fiddle with them, and removed "b" from the
   66: input mode under Windows. */
   67: 
   68: #if defined(_WIN32) || defined(WIN32)
   69: #include <io.h>                /* For _setmode() */
   70: #include <fcntl.h>             /* For _O_BINARY */
   71: #define INPUT_MODE   "r"
   72: #define OUTPUT_MODE  "wb"
   73: 
   74: #ifndef isatty
   75: #define isatty _isatty         /* This is what Windows calls them, I'm told, */
   76: #endif                         /* though in some environments they seem to   */
   77:                                /* be already defined, hence the #ifndefs.    */
   78: #ifndef fileno
   79: #define fileno _fileno
   80: #endif
   81: 
   82: /* A user sent this fix for Borland Builder 5 under Windows. */
   83: 
   84: #ifdef __BORLANDC__
   85: #define _setmode(handle, mode) setmode(handle, mode)
   86: #endif
   87: 
   88: /* Not Windows */
   89: 
   90: #else
   91: #include <sys/time.h>          /* These two includes are needed */
   92: #include <sys/resource.h>      /* for setrlimit(). */
   93: #define INPUT_MODE   "rb"
   94: #define OUTPUT_MODE  "wb"
   95: #endif
   96: 
   97: 
   98: /* We have to include pcre_internal.h because we need the internal info for
   99: displaying the results of pcre_study() and we also need to know about the
  100: internal macros, structures, and other internal data values; pcretest has
  101: "inside information" compared to a program that strictly follows the PCRE API.
  102: 
  103: Although pcre_internal.h does itself include pcre.h, we explicitly include it
  104: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
  105: appropriately for an application, not for building PCRE. */
  106: 
  107: #include "pcre.h"
  108: #include "pcre_internal.h"
  109: 
  110: /* We need access to some of the data tables that PCRE uses. So as not to have
  111: to keep two copies, we include the source file here, changing the names of the
  112: external symbols to prevent clashes. */
  113: 
  114: #define _pcre_ucp_gentype      ucp_gentype
  115: #define _pcre_ucp_typerange    ucp_typerange
  116: #define _pcre_utf8_table1      utf8_table1
  117: #define _pcre_utf8_table1_size utf8_table1_size
  118: #define _pcre_utf8_table2      utf8_table2
  119: #define _pcre_utf8_table3      utf8_table3
  120: #define _pcre_utf8_table4      utf8_table4
  121: #define _pcre_utf8_char_sizes  utf8_char_sizes
  122: #define _pcre_utt              utt
  123: #define _pcre_utt_size         utt_size
  124: #define _pcre_utt_names        utt_names
  125: #define _pcre_OP_lengths       OP_lengths
  126: 
  127: #include "pcre_tables.c"
  128: 
  129: /* We also need the pcre_printint() function for printing out compiled
  130: patterns. This function is in a separate file so that it can be included in
  131: pcre_compile.c when that module is compiled with debugging enabled. It needs to
  132: know which case is being compiled. */
  133: 
  134: #define COMPILING_PCRETEST
  135: #include "pcre_printint.src"
  136: 
  137: /* The definition of the macro PRINTABLE, which determines whether to print an
  138: output character as-is or as a hex value when showing compiled patterns, is
  139: contained in the printint.src file. We uses it here also, in cases when the
  140: locale has not been explicitly changed, so as to get consistent output from
  141: systems that differ in their output from isprint() even in the "C" locale. */
  142: 
  143: #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
  144: 
  145: /* It is possible to compile this test program without including support for
  146: testing the POSIX interface, though this is not available via the standard
  147: Makefile. */
  148: 
  149: #if !defined NOPOSIX
  150: #include "pcreposix.h"
  151: #endif
  152: 
  153: /* It is also possible, for the benefit of the version currently imported into
  154: Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
  155: interface to the DFA matcher (NODFA), and without the doublecheck of the old
  156: "info" function (define NOINFOCHECK). In fact, we automatically cut out the
  157: UTF8 support if PCRE is built without it. */
  158: 
  159: #ifndef SUPPORT_UTF8
  160: #ifndef NOUTF8
  161: #define NOUTF8
  162: #endif
  163: #endif
  164: 
  165: 
  166: /* Other parameters */
  167: 
  168: #ifndef CLOCKS_PER_SEC
  169: #ifdef CLK_TCK
  170: #define CLOCKS_PER_SEC CLK_TCK
  171: #else
  172: #define CLOCKS_PER_SEC 100
  173: #endif
  174: #endif
  175: 
  176: /* This is the default loop count for timing. */
  177: 
  178: #define LOOPREPEAT 500000
  179: 
  180: /* Static variables */
  181: 
  182: static FILE *outfile;
  183: static int log_store = 0;
  184: static int callout_count;
  185: static int callout_extra;
  186: static int callout_fail_count;
  187: static int callout_fail_id;
  188: static int debug_lengths;
  189: static int first_callout;
  190: static int locale_set = 0;
  191: static int show_malloc;
  192: static int use_utf8;
  193: static size_t gotten_store;
  194: static size_t first_gotten_store = 0;
  195: static const unsigned char *last_callout_mark = NULL;
  196: 
  197: /* The buffers grow automatically if very long input lines are encountered. */
  198: 
  199: static int buffer_size = 50000;
  200: static uschar *buffer = NULL;
  201: static uschar *dbuffer = NULL;
  202: static uschar *pbuffer = NULL;
  203: 
  204: /* Textual explanations for runtime error codes */
  205: 
  206: static const char *errtexts[] = {
  207:   NULL,  /* 0 is no error */
  208:   NULL,  /* NOMATCH is handled specially */
  209:   "NULL argument passed",
  210:   "bad option value",
  211:   "magic number missing",
  212:   "unknown opcode - pattern overwritten?",
  213:   "no more memory",
  214:   NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
  215:   "match limit exceeded",
  216:   "callout error code",
  217:   NULL,  /* BADUTF8 is handled specially */
  218:   "bad UTF-8 offset",
  219:   NULL,  /* PARTIAL is handled specially */
  220:   "not used - internal error",
  221:   "internal error - pattern overwritten?",
  222:   "bad count value",
  223:   "item unsupported for DFA matching",
  224:   "backreference condition or recursion test not supported for DFA matching",
  225:   "match limit not supported for DFA matching",
  226:   "workspace size exceeded in DFA matching",
  227:   "too much recursion for DFA matching",
  228:   "recursion limit exceeded",
  229:   "not used - internal error",
  230:   "invalid combination of newline options",
  231:   "bad offset value",
  232:   NULL,  /* SHORTUTF8 is handled specially */
  233:   "nested recursion at the same subject position",
  234:   "JIT stack limit reached"
  235: };
  236: 
  237: 
  238: /*************************************************
  239: *         Alternate character tables             *
  240: *************************************************/
  241: 
  242: /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
  243: using the default tables of the library. However, the T option can be used to
  244: select alternate sets of tables, for different kinds of testing. Note also that
  245: the L (locale) option also adjusts the tables. */
  246: 
  247: /* This is the set of tables distributed as default with PCRE. It recognizes
  248: only ASCII characters. */
  249: 
  250: static const unsigned char tables0[] = {
  251: 
  252: /* This table is a lower casing table. */
  253: 
  254:     0,  1,  2,  3,  4,  5,  6,  7,
  255:     8,  9, 10, 11, 12, 13, 14, 15,
  256:    16, 17, 18, 19, 20, 21, 22, 23,
  257:    24, 25, 26, 27, 28, 29, 30, 31,
  258:    32, 33, 34, 35, 36, 37, 38, 39,
  259:    40, 41, 42, 43, 44, 45, 46, 47,
  260:    48, 49, 50, 51, 52, 53, 54, 55,
  261:    56, 57, 58, 59, 60, 61, 62, 63,
  262:    64, 97, 98, 99,100,101,102,103,
  263:   104,105,106,107,108,109,110,111,
  264:   112,113,114,115,116,117,118,119,
  265:   120,121,122, 91, 92, 93, 94, 95,
  266:    96, 97, 98, 99,100,101,102,103,
  267:   104,105,106,107,108,109,110,111,
  268:   112,113,114,115,116,117,118,119,
  269:   120,121,122,123,124,125,126,127,
  270:   128,129,130,131,132,133,134,135,
  271:   136,137,138,139,140,141,142,143,
  272:   144,145,146,147,148,149,150,151,
  273:   152,153,154,155,156,157,158,159,
  274:   160,161,162,163,164,165,166,167,
  275:   168,169,170,171,172,173,174,175,
  276:   176,177,178,179,180,181,182,183,
  277:   184,185,186,187,188,189,190,191,
  278:   192,193,194,195,196,197,198,199,
  279:   200,201,202,203,204,205,206,207,
  280:   208,209,210,211,212,213,214,215,
  281:   216,217,218,219,220,221,222,223,
  282:   224,225,226,227,228,229,230,231,
  283:   232,233,234,235,236,237,238,239,
  284:   240,241,242,243,244,245,246,247,
  285:   248,249,250,251,252,253,254,255,
  286: 
  287: /* This table is a case flipping table. */
  288: 
  289:     0,  1,  2,  3,  4,  5,  6,  7,
  290:     8,  9, 10, 11, 12, 13, 14, 15,
  291:    16, 17, 18, 19, 20, 21, 22, 23,
  292:    24, 25, 26, 27, 28, 29, 30, 31,
  293:    32, 33, 34, 35, 36, 37, 38, 39,
  294:    40, 41, 42, 43, 44, 45, 46, 47,
  295:    48, 49, 50, 51, 52, 53, 54, 55,
  296:    56, 57, 58, 59, 60, 61, 62, 63,
  297:    64, 97, 98, 99,100,101,102,103,
  298:   104,105,106,107,108,109,110,111,
  299:   112,113,114,115,116,117,118,119,
  300:   120,121,122, 91, 92, 93, 94, 95,
  301:    96, 65, 66, 67, 68, 69, 70, 71,
  302:    72, 73, 74, 75, 76, 77, 78, 79,
  303:    80, 81, 82, 83, 84, 85, 86, 87,
  304:    88, 89, 90,123,124,125,126,127,
  305:   128,129,130,131,132,133,134,135,
  306:   136,137,138,139,140,141,142,143,
  307:   144,145,146,147,148,149,150,151,
  308:   152,153,154,155,156,157,158,159,
  309:   160,161,162,163,164,165,166,167,
  310:   168,169,170,171,172,173,174,175,
  311:   176,177,178,179,180,181,182,183,
  312:   184,185,186,187,188,189,190,191,
  313:   192,193,194,195,196,197,198,199,
  314:   200,201,202,203,204,205,206,207,
  315:   208,209,210,211,212,213,214,215,
  316:   216,217,218,219,220,221,222,223,
  317:   224,225,226,227,228,229,230,231,
  318:   232,233,234,235,236,237,238,239,
  319:   240,241,242,243,244,245,246,247,
  320:   248,249,250,251,252,253,254,255,
  321: 
  322: /* This table contains bit maps for various character classes. Each map is 32
  323: bytes long and the bits run from the least significant end of each byte. The
  324: classes that have their own maps are: space, xdigit, digit, upper, lower, word,
  325: graph, print, punct, and cntrl. Other classes are built from combinations. */
  326: 
  327:   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
  328:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  329:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  330:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  331: 
  332:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
  333:   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
  334:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  335:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  336: 
  337:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
  338:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  339:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  340:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  341: 
  342:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  343:   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
  344:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  345:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  346: 
  347:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  348:   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
  349:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  350:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  351: 
  352:   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
  353:   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
  354:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  355:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  356: 
  357:   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
  358:   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
  359:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  360:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  361: 
  362:   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
  363:   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
  364:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  365:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  366: 
  367:   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
  368:   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
  369:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  370:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  371: 
  372:   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
  373:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
  374:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  375:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  376: 
  377: /* This table identifies various classes of character by individual bits:
  378:   0x01   white space character
  379:   0x02   letter
  380:   0x04   decimal digit
  381:   0x08   hexadecimal digit
  382:   0x10   alphanumeric or '_'
  383:   0x80   regular expression metacharacter or binary zero
  384: */
  385: 
  386:   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
  387:   0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
  388:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
  389:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
  390:   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
  391:   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
  392:   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
  393:   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
  394:   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
  395:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
  396:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
  397:   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
  398:   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
  399:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
  400:   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
  401:   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
  402:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
  403:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
  404:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
  405:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
  406:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
  407:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
  408:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
  409:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
  410:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
  411:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
  412:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
  413:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
  414:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
  415:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
  416:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
  417:   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
  418: 
  419: /* This is a set of tables that came orginally from a Windows user. It seems to
  420: be at least an approximation of ISO 8859. In particular, there are characters
  421: greater than 128 that are marked as spaces, letters, etc. */
  422: 
  423: static const unsigned char tables1[] = {
  424: 0,1,2,3,4,5,6,7,
  425: 8,9,10,11,12,13,14,15,
  426: 16,17,18,19,20,21,22,23,
  427: 24,25,26,27,28,29,30,31,
  428: 32,33,34,35,36,37,38,39,
  429: 40,41,42,43,44,45,46,47,
  430: 48,49,50,51,52,53,54,55,
  431: 56,57,58,59,60,61,62,63,
  432: 64,97,98,99,100,101,102,103,
  433: 104,105,106,107,108,109,110,111,
  434: 112,113,114,115,116,117,118,119,
  435: 120,121,122,91,92,93,94,95,
  436: 96,97,98,99,100,101,102,103,
  437: 104,105,106,107,108,109,110,111,
  438: 112,113,114,115,116,117,118,119,
  439: 120,121,122,123,124,125,126,127,
  440: 128,129,130,131,132,133,134,135,
  441: 136,137,138,139,140,141,142,143,
  442: 144,145,146,147,148,149,150,151,
  443: 152,153,154,155,156,157,158,159,
  444: 160,161,162,163,164,165,166,167,
  445: 168,169,170,171,172,173,174,175,
  446: 176,177,178,179,180,181,182,183,
  447: 184,185,186,187,188,189,190,191,
  448: 224,225,226,227,228,229,230,231,
  449: 232,233,234,235,236,237,238,239,
  450: 240,241,242,243,244,245,246,215,
  451: 248,249,250,251,252,253,254,223,
  452: 224,225,226,227,228,229,230,231,
  453: 232,233,234,235,236,237,238,239,
  454: 240,241,242,243,244,245,246,247,
  455: 248,249,250,251,252,253,254,255,
  456: 0,1,2,3,4,5,6,7,
  457: 8,9,10,11,12,13,14,15,
  458: 16,17,18,19,20,21,22,23,
  459: 24,25,26,27,28,29,30,31,
  460: 32,33,34,35,36,37,38,39,
  461: 40,41,42,43,44,45,46,47,
  462: 48,49,50,51,52,53,54,55,
  463: 56,57,58,59,60,61,62,63,
  464: 64,97,98,99,100,101,102,103,
  465: 104,105,106,107,108,109,110,111,
  466: 112,113,114,115,116,117,118,119,
  467: 120,121,122,91,92,93,94,95,
  468: 96,65,66,67,68,69,70,71,
  469: 72,73,74,75,76,77,78,79,
  470: 80,81,82,83,84,85,86,87,
  471: 88,89,90,123,124,125,126,127,
  472: 128,129,130,131,132,133,134,135,
  473: 136,137,138,139,140,141,142,143,
  474: 144,145,146,147,148,149,150,151,
  475: 152,153,154,155,156,157,158,159,
  476: 160,161,162,163,164,165,166,167,
  477: 168,169,170,171,172,173,174,175,
  478: 176,177,178,179,180,181,182,183,
  479: 184,185,186,187,188,189,190,191,
  480: 224,225,226,227,228,229,230,231,
  481: 232,233,234,235,236,237,238,239,
  482: 240,241,242,243,244,245,246,215,
  483: 248,249,250,251,252,253,254,223,
  484: 192,193,194,195,196,197,198,199,
  485: 200,201,202,203,204,205,206,207,
  486: 208,209,210,211,212,213,214,247,
  487: 216,217,218,219,220,221,222,255,
  488: 0,62,0,0,1,0,0,0,
  489: 0,0,0,0,0,0,0,0,
  490: 32,0,0,0,1,0,0,0,
  491: 0,0,0,0,0,0,0,0,
  492: 0,0,0,0,0,0,255,3,
  493: 126,0,0,0,126,0,0,0,
  494: 0,0,0,0,0,0,0,0,
  495: 0,0,0,0,0,0,0,0,
  496: 0,0,0,0,0,0,255,3,
  497: 0,0,0,0,0,0,0,0,
  498: 0,0,0,0,0,0,12,2,
  499: 0,0,0,0,0,0,0,0,
  500: 0,0,0,0,0,0,0,0,
  501: 254,255,255,7,0,0,0,0,
  502: 0,0,0,0,0,0,0,0,
  503: 255,255,127,127,0,0,0,0,
  504: 0,0,0,0,0,0,0,0,
  505: 0,0,0,0,254,255,255,7,
  506: 0,0,0,0,0,4,32,4,
  507: 0,0,0,128,255,255,127,255,
  508: 0,0,0,0,0,0,255,3,
  509: 254,255,255,135,254,255,255,7,
  510: 0,0,0,0,0,4,44,6,
  511: 255,255,127,255,255,255,127,255,
  512: 0,0,0,0,254,255,255,255,
  513: 255,255,255,255,255,255,255,127,
  514: 0,0,0,0,254,255,255,255,
  515: 255,255,255,255,255,255,255,255,
  516: 0,2,0,0,255,255,255,255,
  517: 255,255,255,255,255,255,255,127,
  518: 0,0,0,0,255,255,255,255,
  519: 255,255,255,255,255,255,255,255,
  520: 0,0,0,0,254,255,0,252,
  521: 1,0,0,248,1,0,0,120,
  522: 0,0,0,0,254,255,255,255,
  523: 0,0,128,0,0,0,128,0,
  524: 255,255,255,255,0,0,0,0,
  525: 0,0,0,0,0,0,0,128,
  526: 255,255,255,255,0,0,0,0,
  527: 0,0,0,0,0,0,0,0,
  528: 128,0,0,0,0,0,0,0,
  529: 0,1,1,0,1,1,0,0,
  530: 0,0,0,0,0,0,0,0,
  531: 0,0,0,0,0,0,0,0,
  532: 1,0,0,0,128,0,0,0,
  533: 128,128,128,128,0,0,128,0,
  534: 28,28,28,28,28,28,28,28,
  535: 28,28,0,0,0,0,0,128,
  536: 0,26,26,26,26,26,26,18,
  537: 18,18,18,18,18,18,18,18,
  538: 18,18,18,18,18,18,18,18,
  539: 18,18,18,128,128,0,128,16,
  540: 0,26,26,26,26,26,26,18,
  541: 18,18,18,18,18,18,18,18,
  542: 18,18,18,18,18,18,18,18,
  543: 18,18,18,128,128,0,0,0,
  544: 0,0,0,0,0,1,0,0,
  545: 0,0,0,0,0,0,0,0,
  546: 0,0,0,0,0,0,0,0,
  547: 0,0,0,0,0,0,0,0,
  548: 1,0,0,0,0,0,0,0,
  549: 0,0,18,0,0,0,0,0,
  550: 0,0,20,20,0,18,0,0,
  551: 0,20,18,0,0,0,0,0,
  552: 18,18,18,18,18,18,18,18,
  553: 18,18,18,18,18,18,18,18,
  554: 18,18,18,18,18,18,18,0,
  555: 18,18,18,18,18,18,18,18,
  556: 18,18,18,18,18,18,18,18,
  557: 18,18,18,18,18,18,18,18,
  558: 18,18,18,18,18,18,18,0,
  559: 18,18,18,18,18,18,18,18
  560: };
  561: 
  562: 
  563: 
  564: 
  565: #ifndef HAVE_STRERROR
  566: /*************************************************
  567: *     Provide strerror() for non-ANSI libraries  *
  568: *************************************************/
  569: 
  570: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
  571: in their libraries, but can provide the same facility by this simple
  572: alternative function. */
  573: 
  574: extern int   sys_nerr;
  575: extern char *sys_errlist[];
  576: 
  577: char *
  578: strerror(int n)
  579: {
  580: if (n < 0 || n >= sys_nerr) return "unknown error number";
  581: return sys_errlist[n];
  582: }
  583: #endif /* HAVE_STRERROR */
  584: 
  585: 
  586: /*************************************************
  587: *         JIT memory callback                    *
  588: *************************************************/
  589: 
  590: static pcre_jit_stack* jit_callback(void *arg)
  591: {
  592: return (pcre_jit_stack *)arg;
  593: }
  594: 
  595: 
  596: /*************************************************
  597: *        Read or extend an input line            *
  598: *************************************************/
  599: 
  600: /* Input lines are read into buffer, but both patterns and data lines can be
  601: continued over multiple input lines. In addition, if the buffer fills up, we
  602: want to automatically expand it so as to be able to handle extremely large
  603: lines that are needed for certain stress tests. When the input buffer is
  604: expanded, the other two buffers must also be expanded likewise, and the
  605: contents of pbuffer, which are a copy of the input for callouts, must be
  606: preserved (for when expansion happens for a data line). This is not the most
  607: optimal way of handling this, but hey, this is just a test program!
  608: 
  609: Arguments:
  610:   f            the file to read
  611:   start        where in buffer to start (this *must* be within buffer)
  612:   prompt       for stdin or readline()
  613: 
  614: Returns:       pointer to the start of new data
  615:                could be a copy of start, or could be moved
  616:                NULL if no data read and EOF reached
  617: */
  618: 
  619: static uschar *
  620: extend_inputline(FILE *f, uschar *start, const char *prompt)
  621: {
  622: uschar *here = start;
  623: 
  624: for (;;)
  625:   {
  626:   int rlen = (int)(buffer_size - (here - buffer));
  627: 
  628:   if (rlen > 1000)
  629:     {
  630:     int dlen;
  631: 
  632:     /* If libreadline support is required, use readline() to read a line if the
  633:     input is a terminal. Note that readline() removes the trailing newline, so
  634:     we must put it back again, to be compatible with fgets(). */
  635: 
  636: #ifdef SUPPORT_LIBREADLINE
  637:     if (isatty(fileno(f)))
  638:       {
  639:       size_t len;
  640:       char *s = readline(prompt);
  641:       if (s == NULL) return (here == start)? NULL : start;
  642:       len = strlen(s);
  643:       if (len > 0) add_history(s);
  644:       if (len > rlen - 1) len = rlen - 1;
  645:       memcpy(here, s, len);
  646:       here[len] = '\n';
  647:       here[len+1] = 0;
  648:       free(s);
  649:       }
  650:     else
  651: #endif
  652: 
  653:     /* Read the next line by normal means, prompting if the file is stdin. */
  654: 
  655:       {
  656:       if (f == stdin) printf("%s", prompt);
  657:       if (fgets((char *)here, rlen,  f) == NULL)
  658:         return (here == start)? NULL : start;
  659:       }
  660: 
  661:     dlen = (int)strlen((char *)here);
  662:     if (dlen > 0 && here[dlen - 1] == '\n') return start;
  663:     here += dlen;
  664:     }
  665: 
  666:   else
  667:     {
  668:     int new_buffer_size = 2*buffer_size;
  669:     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
  670:     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
  671:     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
  672: 
  673:     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
  674:       {
  675:       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
  676:       exit(1);
  677:       }
  678: 
  679:     memcpy(new_buffer, buffer, buffer_size);
  680:     memcpy(new_pbuffer, pbuffer, buffer_size);
  681: 
  682:     buffer_size = new_buffer_size;
  683: 
  684:     start = new_buffer + (start - buffer);
  685:     here = new_buffer + (here - buffer);
  686: 
  687:     free(buffer);
  688:     free(dbuffer);
  689:     free(pbuffer);
  690: 
  691:     buffer = new_buffer;
  692:     dbuffer = new_dbuffer;
  693:     pbuffer = new_pbuffer;
  694:     }
  695:   }
  696: 
  697: return NULL;  /* Control never gets here */
  698: }
  699: 
  700: 
  701: 
  702: 
  703: 
  704: 
  705: 
  706: /*************************************************
  707: *          Read number from string               *
  708: *************************************************/
  709: 
  710: /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
  711: around with conditional compilation, just do the job by hand. It is only used
  712: for unpicking arguments, so just keep it simple.
  713: 
  714: Arguments:
  715:   str           string to be converted
  716:   endptr        where to put the end pointer
  717: 
  718: Returns:        the unsigned long
  719: */
  720: 
  721: static int
  722: get_value(unsigned char *str, unsigned char **endptr)
  723: {
  724: int result = 0;
  725: while(*str != 0 && isspace(*str)) str++;
  726: while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
  727: *endptr = str;
  728: return(result);
  729: }
  730: 
  731: 
  732: 
  733: 
  734: /*************************************************
  735: *            Convert UTF-8 string to value       *
  736: *************************************************/
  737: 
  738: /* This function takes one or more bytes that represents a UTF-8 character,
  739: and returns the value of the character.
  740: 
  741: Argument:
  742:   utf8bytes   a pointer to the byte vector
  743:   vptr        a pointer to an int to receive the value
  744: 
  745: Returns:      >  0 => the number of bytes consumed
  746:               -6 to 0 => malformed UTF-8 character at offset = (-return)
  747: */
  748: 
  749: #if !defined NOUTF8
  750: 
  751: static int
  752: utf82ord(unsigned char *utf8bytes, int *vptr)
  753: {
  754: int c = *utf8bytes++;
  755: int d = c;
  756: int i, j, s;
  757: 
  758: for (i = -1; i < 6; i++)               /* i is number of additional bytes */
  759:   {
  760:   if ((d & 0x80) == 0) break;
  761:   d <<= 1;
  762:   }
  763: 
  764: if (i == -1) { *vptr = c; return 1; }  /* ascii character */
  765: if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
  766: 
  767: /* i now has a value in the range 1-5 */
  768: 
  769: s = 6*i;
  770: d = (c & utf8_table3[i]) << s;
  771: 
  772: for (j = 0; j < i; j++)
  773:   {
  774:   c = *utf8bytes++;
  775:   if ((c & 0xc0) != 0x80) return -(j+1);
  776:   s -= 6;
  777:   d |= (c & 0x3f) << s;
  778:   }
  779: 
  780: /* Check that encoding was the correct unique one */
  781: 
  782: for (j = 0; j < utf8_table1_size; j++)
  783:   if (d <= utf8_table1[j]) break;
  784: if (j != i) return -(i+1);
  785: 
  786: /* Valid value */
  787: 
  788: *vptr = d;
  789: return i+1;
  790: }
  791: 
  792: #endif
  793: 
  794: 
  795: 
  796: /*************************************************
  797: *       Convert character value to UTF-8         *
  798: *************************************************/
  799: 
  800: /* This function takes an integer value in the range 0 - 0x7fffffff
  801: and encodes it as a UTF-8 character in 0 to 6 bytes.
  802: 
  803: Arguments:
  804:   cvalue     the character value
  805:   utf8bytes  pointer to buffer for result - at least 6 bytes long
  806: 
  807: Returns:     number of characters placed in the buffer
  808: */
  809: 
  810: #if !defined NOUTF8
  811: 
  812: static int
  813: ord2utf8(int cvalue, uschar *utf8bytes)
  814: {
  815: register int i, j;
  816: for (i = 0; i < utf8_table1_size; i++)
  817:   if (cvalue <= utf8_table1[i]) break;
  818: utf8bytes += i;
  819: for (j = i; j > 0; j--)
  820:  {
  821:  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
  822:  cvalue >>= 6;
  823:  }
  824: *utf8bytes = utf8_table2[i] | cvalue;
  825: return i + 1;
  826: }
  827: 
  828: #endif
  829: 
  830: 
  831: 
  832: /*************************************************
  833: *             Print character string             *
  834: *************************************************/
  835: 
  836: /* Character string printing function. Must handle UTF-8 strings in utf8
  837: mode. Yields number of characters printed. If handed a NULL file, just counts
  838: chars without printing. */
  839: 
  840: static int pchars(unsigned char *p, int length, FILE *f)
  841: {
  842: int c = 0;
  843: int yield = 0;
  844: 
  845: while (length-- > 0)
  846:   {
  847: #if !defined NOUTF8
  848:   if (use_utf8)
  849:     {
  850:     int rc = utf82ord(p, &c);
  851: 
  852:     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
  853:       {
  854:       length -= rc - 1;
  855:       p += rc;
  856:       if (PRINTHEX(c))
  857:         {
  858:         if (f != NULL) fprintf(f, "%c", c);
  859:         yield++;
  860:         }
  861:       else
  862:         {
  863:         int n = 4;
  864:         if (f != NULL) fprintf(f, "\\x{%02x}", c);
  865:         yield += (n <= 0x000000ff)? 2 :
  866:                  (n <= 0x00000fff)? 3 :
  867:                  (n <= 0x0000ffff)? 4 :
  868:                  (n <= 0x000fffff)? 5 : 6;
  869:         }
  870:       continue;
  871:       }
  872:     }
  873: #endif
  874: 
  875:    /* Not UTF-8, or malformed UTF-8  */
  876: 
  877:   c = *p++;
  878:   if (PRINTHEX(c))
  879:     {
  880:     if (f != NULL) fprintf(f, "%c", c);
  881:     yield++;
  882:     }
  883:   else
  884:     {
  885:     if (f != NULL) fprintf(f, "\\x%02x", c);
  886:     yield += 4;
  887:     }
  888:   }
  889: 
  890: return yield;
  891: }
  892: 
  893: 
  894: 
  895: /*************************************************
  896: *              Callout function                  *
  897: *************************************************/
  898: 
  899: /* Called from PCRE as a result of the (?C) item. We print out where we are in
  900: the match. Yield zero unless more callouts than the fail count, or the callout
  901: data is not zero. */
  902: 
  903: static int callout(pcre_callout_block *cb)
  904: {
  905: FILE *f = (first_callout | callout_extra)? outfile : NULL;
  906: int i, pre_start, post_start, subject_length;
  907: 
  908: if (callout_extra)
  909:   {
  910:   fprintf(f, "Callout %d: last capture = %d\n",
  911:     cb->callout_number, cb->capture_last);
  912: 
  913:   for (i = 0; i < cb->capture_top * 2; i += 2)
  914:     {
  915:     if (cb->offset_vector[i] < 0)
  916:       fprintf(f, "%2d: <unset>\n", i/2);
  917:     else
  918:       {
  919:       fprintf(f, "%2d: ", i/2);
  920:       (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
  921:         cb->offset_vector[i+1] - cb->offset_vector[i], f);
  922:       fprintf(f, "\n");
  923:       }
  924:     }
  925:   }
  926: 
  927: /* Re-print the subject in canonical form, the first time or if giving full
  928: datails. On subsequent calls in the same match, we use pchars just to find the
  929: printed lengths of the substrings. */
  930: 
  931: if (f != NULL) fprintf(f, "--->");
  932: 
  933: pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
  934: post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
  935:   cb->current_position - cb->start_match, f);
  936: 
  937: subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
  938: 
  939: (void)pchars((unsigned char *)(cb->subject + cb->current_position),
  940:   cb->subject_length - cb->current_position, f);
  941: 
  942: if (f != NULL) fprintf(f, "\n");
  943: 
  944: /* Always print appropriate indicators, with callout number if not already
  945: shown. For automatic callouts, show the pattern offset. */
  946: 
  947: if (cb->callout_number == 255)
  948:   {
  949:   fprintf(outfile, "%+3d ", cb->pattern_position);
  950:   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
  951:   }
  952: else
  953:   {
  954:   if (callout_extra) fprintf(outfile, "    ");
  955:     else fprintf(outfile, "%3d ", cb->callout_number);
  956:   }
  957: 
  958: for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
  959: fprintf(outfile, "^");
  960: 
  961: if (post_start > 0)
  962:   {
  963:   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
  964:   fprintf(outfile, "^");
  965:   }
  966: 
  967: for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
  968:   fprintf(outfile, " ");
  969: 
  970: fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
  971:   pbuffer + cb->pattern_position);
  972: 
  973: fprintf(outfile, "\n");
  974: first_callout = 0;
  975: 
  976: if (cb->mark != last_callout_mark)
  977:   {
  978:   fprintf(outfile, "Latest Mark: %s\n",
  979:     (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
  980:   last_callout_mark = cb->mark;
  981:   }
  982: 
  983: if (cb->callout_data != NULL)
  984:   {
  985:   int callout_data = *((int *)(cb->callout_data));
  986:   if (callout_data != 0)
  987:     {
  988:     fprintf(outfile, "Callout data = %d\n", callout_data);
  989:     return callout_data;
  990:     }
  991:   }
  992: 
  993: return (cb->callout_number != callout_fail_id)? 0 :
  994:        (++callout_count >= callout_fail_count)? 1 : 0;
  995: }
  996: 
  997: 
  998: /*************************************************
  999: *            Local malloc functions              *
 1000: *************************************************/
 1001: 
 1002: /* Alternative malloc function, to test functionality and save the size of a
 1003: compiled re, which is the first store request that pcre_compile() makes. The
 1004: show_malloc variable is set only during matching. */
 1005: 
 1006: static void *new_malloc(size_t size)
 1007: {
 1008: void *block = malloc(size);
 1009: gotten_store = size;
 1010: if (first_gotten_store == 0) first_gotten_store = size;
 1011: if (show_malloc)
 1012:   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
 1013: return block;
 1014: }
 1015: 
 1016: static void new_free(void *block)
 1017: {
 1018: if (show_malloc)
 1019:   fprintf(outfile, "free             %p\n", block);
 1020: free(block);
 1021: }
 1022: 
 1023: /* For recursion malloc/free, to test stacking calls */
 1024: 
 1025: static void *stack_malloc(size_t size)
 1026: {
 1027: void *block = malloc(size);
 1028: if (show_malloc)
 1029:   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
 1030: return block;
 1031: }
 1032: 
 1033: static void stack_free(void *block)
 1034: {
 1035: if (show_malloc)
 1036:   fprintf(outfile, "stack_free       %p\n", block);
 1037: free(block);
 1038: }
 1039: 
 1040: 
 1041: /*************************************************
 1042: *          Call pcre_fullinfo()                  *
 1043: *************************************************/
 1044: 
 1045: /* Get one piece of information from the pcre_fullinfo() function */
 1046: 
 1047: static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
 1048: {
 1049: int rc;
 1050: if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
 1051:   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
 1052: }
 1053: 
 1054: 
 1055: 
 1056: /*************************************************
 1057: *         Byte flipping function                 *
 1058: *************************************************/
 1059: 
 1060: static unsigned long int
 1061: byteflip(unsigned long int value, int n)
 1062: {
 1063: if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
 1064: return ((value & 0x000000ff) << 24) |
 1065:        ((value & 0x0000ff00) <<  8) |
 1066:        ((value & 0x00ff0000) >>  8) |
 1067:        ((value & 0xff000000) >> 24);
 1068: }
 1069: 
 1070: 
 1071: 
 1072: 
 1073: /*************************************************
 1074: *        Check match or recursion limit          *
 1075: *************************************************/
 1076: 
 1077: static int
 1078: check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
 1079:   int start_offset, int options, int *use_offsets, int use_size_offsets,
 1080:   int flag, unsigned long int *limit, int errnumber, const char *msg)
 1081: {
 1082: int count;
 1083: int min = 0;
 1084: int mid = 64;
 1085: int max = -1;
 1086: 
 1087: extra->flags |= flag;
 1088: 
 1089: for (;;)
 1090:   {
 1091:   *limit = mid;
 1092: 
 1093:   count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
 1094:     use_offsets, use_size_offsets);
 1095: 
 1096:   if (count == errnumber)
 1097:     {
 1098:     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
 1099:     min = mid;
 1100:     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
 1101:     }
 1102: 
 1103:   else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
 1104:                          count == PCRE_ERROR_PARTIAL)
 1105:     {
 1106:     if (mid == min + 1)
 1107:       {
 1108:       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
 1109:       break;
 1110:       }
 1111:     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
 1112:     max = mid;
 1113:     mid = (min + mid)/2;
 1114:     }
 1115:   else break;    /* Some other error */
 1116:   }
 1117: 
 1118: extra->flags &= ~flag;
 1119: return count;
 1120: }
 1121: 
 1122: 
 1123: 
 1124: /*************************************************
 1125: *         Case-independent strncmp() function    *
 1126: *************************************************/
 1127: 
 1128: /*
 1129: Arguments:
 1130:   s         first string
 1131:   t         second string
 1132:   n         number of characters to compare
 1133: 
 1134: Returns:    < 0, = 0, or > 0, according to the comparison
 1135: */
 1136: 
 1137: static int
 1138: strncmpic(uschar *s, uschar *t, int n)
 1139: {
 1140: while (n--)
 1141:   {
 1142:   int c = tolower(*s++) - tolower(*t++);
 1143:   if (c) return c;
 1144:   }
 1145: return 0;
 1146: }
 1147: 
 1148: 
 1149: 
 1150: /*************************************************
 1151: *         Check newline indicator                *
 1152: *************************************************/
 1153: 
 1154: /* This is used both at compile and run-time to check for <xxx> escapes. Print
 1155: a message and return 0 if there is no match.
 1156: 
 1157: Arguments:
 1158:   p           points after the leading '<'
 1159:   f           file for error message
 1160: 
 1161: Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
 1162: */
 1163: 
 1164: static int
 1165: check_newline(uschar *p, FILE *f)
 1166: {
 1167: if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
 1168: if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
 1169: if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
 1170: if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
 1171: if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
 1172: if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
 1173: if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
 1174: fprintf(f, "Unknown newline type at: <%s\n", p);
 1175: return 0;
 1176: }
 1177: 
 1178: 
 1179: 
 1180: /*************************************************
 1181: *             Usage function                     *
 1182: *************************************************/
 1183: 
 1184: static void
 1185: usage(void)
 1186: {
 1187: printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
 1188: printf("Input and output default to stdin and stdout.\n");
 1189: #ifdef SUPPORT_LIBREADLINE
 1190: printf("If input is a terminal, readline() is used to read from it.\n");
 1191: #else
 1192: printf("This version of pcretest is not linked with readline().\n");
 1193: #endif
 1194: printf("\nOptions:\n");
 1195: printf("  -b       show compiled code (bytecode)\n");
 1196: printf("  -C       show PCRE compile-time options and exit\n");
 1197: printf("  -d       debug: show compiled code and information (-b and -i)\n");
 1198: #if !defined NODFA
 1199: printf("  -dfa     force DFA matching for all subjects\n");
 1200: #endif
 1201: printf("  -help    show usage information\n");
 1202: printf("  -i       show information about compiled patterns\n"
 1203:        "  -M       find MATCH_LIMIT minimum for each subject\n"
 1204:        "  -m       output memory used information\n"
 1205:        "  -o <n>   set size of offsets vector to <n>\n");
 1206: #if !defined NOPOSIX
 1207: printf("  -p       use POSIX interface\n");
 1208: #endif
 1209: printf("  -q       quiet: do not output PCRE version number at start\n");
 1210: printf("  -S <n>   set stack size to <n> megabytes\n");
 1211: printf("  -s       force each pattern to be studied at basic level\n"
 1212:        "  -s+      force each pattern to be studied, using JIT if available\n"
 1213:        "  -t       time compilation and execution\n");
 1214: printf("  -t <n>   time compilation and execution, repeating <n> times\n");
 1215: printf("  -tm      time execution (matching) only\n");
 1216: printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
 1217: }
 1218: 
 1219: 
 1220: 
 1221: /*************************************************
 1222: *                Main Program                    *
 1223: *************************************************/
 1224: 
 1225: /* Read lines from named file or stdin and write to named file or stdout; lines
 1226: consist of a regular expression, in delimiters and optionally followed by
 1227: options, followed by a set of test data, terminated by an empty line. */
 1228: 
 1229: int main(int argc, char **argv)
 1230: {
 1231: FILE *infile = stdin;
 1232: int options = 0;
 1233: int study_options = 0;
 1234: int default_find_match_limit = FALSE;
 1235: int op = 1;
 1236: int timeit = 0;
 1237: int timeitm = 0;
 1238: int showinfo = 0;
 1239: int showstore = 0;
 1240: int force_study = -1;
 1241: int force_study_options = 0;
 1242: int quiet = 0;
 1243: int size_offsets = 45;
 1244: int size_offsets_max;
 1245: int *offsets = NULL;
 1246: #if !defined NOPOSIX
 1247: int posix = 0;
 1248: #endif
 1249: int debug = 0;
 1250: int done = 0;
 1251: int all_use_dfa = 0;
 1252: int yield = 0;
 1253: int stack_size;
 1254: 
 1255: pcre_jit_stack *jit_stack = NULL;
 1256: 
 1257: 
 1258: /* These vectors store, end-to-end, a list of captured substring names. Assume
 1259: that 1024 is plenty long enough for the few names we'll be testing. */
 1260: 
 1261: uschar copynames[1024];
 1262: uschar getnames[1024];
 1263: 
 1264: uschar *copynamesptr;
 1265: uschar *getnamesptr;
 1266: 
 1267: /* Get buffers from malloc() so that Electric Fence will check their misuse
 1268: when I am debugging. They grow automatically when very long lines are read. */
 1269: 
 1270: buffer = (unsigned char *)malloc(buffer_size);
 1271: dbuffer = (unsigned char *)malloc(buffer_size);
 1272: pbuffer = (unsigned char *)malloc(buffer_size);
 1273: 
 1274: /* The outfile variable is static so that new_malloc can use it. */
 1275: 
 1276: outfile = stdout;
 1277: 
 1278: /* The following  _setmode() stuff is some Windows magic that tells its runtime
 1279: library to translate CRLF into a single LF character. At least, that's what
 1280: I've been told: never having used Windows I take this all on trust. Originally
 1281: it set 0x8000, but then I was advised that _O_BINARY was better. */
 1282: 
 1283: #if defined(_WIN32) || defined(WIN32)
 1284: _setmode( _fileno( stdout ), _O_BINARY );
 1285: #endif
 1286: 
 1287: /* Scan options */
 1288: 
 1289: while (argc > 1 && argv[op][0] == '-')
 1290:   {
 1291:   unsigned char *endptr;
 1292: 
 1293:   if (strcmp(argv[op], "-m") == 0) showstore = 1;
 1294:   else if (strcmp(argv[op], "-s") == 0) force_study = 0;
 1295:   else if (strcmp(argv[op], "-s+") == 0)
 1296:     {
 1297:     force_study = 1;
 1298:     force_study_options = PCRE_STUDY_JIT_COMPILE;
 1299:     }
 1300:   else if (strcmp(argv[op], "-q") == 0) quiet = 1;
 1301:   else if (strcmp(argv[op], "-b") == 0) debug = 1;
 1302:   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
 1303:   else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
 1304:   else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
 1305: #if !defined NODFA
 1306:   else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
 1307: #endif
 1308:   else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
 1309:       ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
 1310:         *endptr == 0))
 1311:     {
 1312:     op++;
 1313:     argc--;
 1314:     }
 1315:   else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
 1316:     {
 1317:     int both = argv[op][2] == 0;
 1318:     int temp;
 1319:     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
 1320:                      *endptr == 0))
 1321:       {
 1322:       timeitm = temp;
 1323:       op++;
 1324:       argc--;
 1325:       }
 1326:     else timeitm = LOOPREPEAT;
 1327:     if (both) timeit = timeitm;
 1328:     }
 1329:   else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
 1330:       ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
 1331:         *endptr == 0))
 1332:     {
 1333: #if defined(_WIN32) || defined(WIN32) || defined(__minix)
 1334:     printf("PCRE: -S not supported on this OS\n");
 1335:     exit(1);
 1336: #else
 1337:     int rc;
 1338:     struct rlimit rlim;
 1339:     getrlimit(RLIMIT_STACK, &rlim);
 1340:     rlim.rlim_cur = stack_size * 1024 * 1024;
 1341:     rc = setrlimit(RLIMIT_STACK, &rlim);
 1342:     if (rc != 0)
 1343:       {
 1344:     printf("PCRE: setrlimit() failed with error %d\n", rc);
 1345:     exit(1);
 1346:       }
 1347:     op++;
 1348:     argc--;
 1349: #endif
 1350:     }
 1351: #if !defined NOPOSIX
 1352:   else if (strcmp(argv[op], "-p") == 0) posix = 1;
 1353: #endif
 1354:   else if (strcmp(argv[op], "-C") == 0)
 1355:     {
 1356:     int rc;
 1357:     unsigned long int lrc;
 1358:     printf("PCRE version %s\n", pcre_version());
 1359:     printf("Compiled with\n");
 1360:     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
 1361:     printf("  %sUTF-8 support\n", rc? "" : "No ");
 1362:     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
 1363:     printf("  %sUnicode properties support\n", rc? "" : "No ");
 1364:     (void)pcre_config(PCRE_CONFIG_JIT, &rc);
 1365:     if (rc)
 1366:       printf("  Just-in-time compiler support\n");
 1367:     else
 1368:       printf("  No just-in-time compiler support\n");
 1369:     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
 1370:     /* Note that these values are always the ASCII values, even
 1371:     in EBCDIC environments. CR is 13 and NL is 10. */
 1372:     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
 1373:       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
 1374:       (rc == -2)? "ANYCRLF" :
 1375:       (rc == -1)? "ANY" : "???");
 1376:     (void)pcre_config(PCRE_CONFIG_BSR, &rc);
 1377:     printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
 1378:                                      "all Unicode newlines");
 1379:     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
 1380:     printf("  Internal link size = %d\n", rc);
 1381:     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
 1382:     printf("  POSIX malloc threshold = %d\n", rc);
 1383:     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
 1384:     printf("  Default match limit = %ld\n", lrc);
 1385:     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
 1386:     printf("  Default recursion depth limit = %ld\n", lrc);
 1387:     (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
 1388:     printf("  Match recursion uses %s\n", rc? "stack" : "heap");
 1389:     goto EXIT;
 1390:     }
 1391:   else if (strcmp(argv[op], "-help") == 0 ||
 1392:            strcmp(argv[op], "--help") == 0)
 1393:     {
 1394:     usage();
 1395:     goto EXIT;
 1396:     }
 1397:   else
 1398:     {
 1399:     printf("** Unknown or malformed option %s\n", argv[op]);
 1400:     usage();
 1401:     yield = 1;
 1402:     goto EXIT;
 1403:     }
 1404:   op++;
 1405:   argc--;
 1406:   }
 1407: 
 1408: /* Get the store for the offsets vector, and remember what it was */
 1409: 
 1410: size_offsets_max = size_offsets;
 1411: offsets = (int *)malloc(size_offsets_max * sizeof(int));
 1412: if (offsets == NULL)
 1413:   {
 1414:   printf("** Failed to get %d bytes of memory for offsets vector\n",
 1415:     (int)(size_offsets_max * sizeof(int)));
 1416:   yield = 1;
 1417:   goto EXIT;
 1418:   }
 1419: 
 1420: /* Sort out the input and output files */
 1421: 
 1422: if (argc > 1)
 1423:   {
 1424:   infile = fopen(argv[op], INPUT_MODE);
 1425:   if (infile == NULL)
 1426:     {
 1427:     printf("** Failed to open %s\n", argv[op]);
 1428:     yield = 1;
 1429:     goto EXIT;
 1430:     }
 1431:   }
 1432: 
 1433: if (argc > 2)
 1434:   {
 1435:   outfile = fopen(argv[op+1], OUTPUT_MODE);
 1436:   if (outfile == NULL)
 1437:     {
 1438:     printf("** Failed to open %s\n", argv[op+1]);
 1439:     yield = 1;
 1440:     goto EXIT;
 1441:     }
 1442:   }
 1443: 
 1444: /* Set alternative malloc function */
 1445: 
 1446: pcre_malloc = new_malloc;
 1447: pcre_free = new_free;
 1448: pcre_stack_malloc = stack_malloc;
 1449: pcre_stack_free = stack_free;
 1450: 
 1451: /* Heading line unless quiet, then prompt for first regex if stdin */
 1452: 
 1453: if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
 1454: 
 1455: /* Main loop */
 1456: 
 1457: while (!done)
 1458:   {
 1459:   pcre *re = NULL;
 1460:   pcre_extra *extra = NULL;
 1461: 
 1462: #if !defined NOPOSIX  /* There are still compilers that require no indent */
 1463:   regex_t preg;
 1464:   int do_posix = 0;
 1465: #endif
 1466: 
 1467:   const char *error;
 1468:   unsigned char *markptr;
 1469:   unsigned char *p, *pp, *ppp;
 1470:   unsigned char *to_file = NULL;
 1471:   const unsigned char *tables = NULL;
 1472:   unsigned long int true_size, true_study_size = 0;
 1473:   size_t size, regex_gotten_store;
 1474:   int do_allcaps = 0;
 1475:   int do_mark = 0;
 1476:   int do_study = 0;
 1477:   int no_force_study = 0;
 1478:   int do_debug = debug;
 1479:   int do_G = 0;
 1480:   int do_g = 0;
 1481:   int do_showinfo = showinfo;
 1482:   int do_showrest = 0;
 1483:   int do_showcaprest = 0;
 1484:   int do_flip = 0;
 1485:   int erroroffset, len, delimiter, poffset;
 1486: 
 1487:   use_utf8 = 0;
 1488:   debug_lengths = 1;
 1489: 
 1490:   if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
 1491:   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
 1492:   fflush(outfile);
 1493: 
 1494:   p = buffer;
 1495:   while (isspace(*p)) p++;
 1496:   if (*p == 0) continue;
 1497: 
 1498:   /* See if the pattern is to be loaded pre-compiled from a file. */
 1499: 
 1500:   if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
 1501:     {
 1502:     unsigned long int magic, get_options;
 1503:     uschar sbuf[8];
 1504:     FILE *f;
 1505: 
 1506:     p++;
 1507:     pp = p + (int)strlen((char *)p);
 1508:     while (isspace(pp[-1])) pp--;
 1509:     *pp = 0;
 1510: 
 1511:     f = fopen((char *)p, "rb");
 1512:     if (f == NULL)
 1513:       {
 1514:       fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
 1515:       continue;
 1516:       }
 1517: 
 1518:     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
 1519: 
 1520:     true_size =
 1521:       (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
 1522:     true_study_size =
 1523:       (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
 1524: 
 1525:     re = (real_pcre *)new_malloc(true_size);
 1526:     regex_gotten_store = first_gotten_store;
 1527: 
 1528:     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
 1529: 
 1530:     magic = ((real_pcre *)re)->magic_number;
 1531:     if (magic != MAGIC_NUMBER)
 1532:       {
 1533:       if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
 1534:         {
 1535:         do_flip = 1;
 1536:         }
 1537:       else
 1538:         {
 1539:         fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
 1540:         fclose(f);
 1541:         continue;
 1542:         }
 1543:       }
 1544: 
 1545:     fprintf(outfile, "Compiled pattern%s loaded from %s\n",
 1546:       do_flip? " (byte-inverted)" : "", p);
 1547: 
 1548:     /* Need to know if UTF-8 for printing data strings */
 1549: 
 1550:     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
 1551:     use_utf8 = (get_options & PCRE_UTF8) != 0;
 1552: 
 1553:     /* Now see if there is any following study data. */
 1554: 
 1555:     if (true_study_size != 0)
 1556:       {
 1557:       pcre_study_data *psd;
 1558: 
 1559:       extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
 1560:       extra->flags = PCRE_EXTRA_STUDY_DATA;
 1561: 
 1562:       psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
 1563:       extra->study_data = psd;
 1564: 
 1565:       if (fread(psd, 1, true_study_size, f) != true_study_size)
 1566:         {
 1567:         FAIL_READ:
 1568:         fprintf(outfile, "Failed to read data from %s\n", p);
 1569:         if (extra != NULL) pcre_free_study(extra);
 1570:         if (re != NULL) new_free(re);
 1571:         fclose(f);
 1572:         continue;
 1573:         }
 1574:       fprintf(outfile, "Study data loaded from %s\n", p);
 1575:       do_study = 1;     /* To get the data output if requested */
 1576:       }
 1577:     else fprintf(outfile, "No study data\n");
 1578: 
 1579:     fclose(f);
 1580:     goto SHOW_INFO;
 1581:     }
 1582: 
 1583:   /* In-line pattern (the usual case). Get the delimiter and seek the end of
 1584:   the pattern; if is isn't complete, read more. */
 1585: 
 1586:   delimiter = *p++;
 1587: 
 1588:   if (isalnum(delimiter) || delimiter == '\\')
 1589:     {
 1590:     fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
 1591:     goto SKIP_DATA;
 1592:     }
 1593: 
 1594:   pp = p;
 1595:   poffset = (int)(p - buffer);
 1596: 
 1597:   for(;;)
 1598:     {
 1599:     while (*pp != 0)
 1600:       {
 1601:       if (*pp == '\\' && pp[1] != 0) pp++;
 1602:         else if (*pp == delimiter) break;
 1603:       pp++;
 1604:       }
 1605:     if (*pp != 0) break;
 1606:     if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
 1607:       {
 1608:       fprintf(outfile, "** Unexpected EOF\n");
 1609:       done = 1;
 1610:       goto CONTINUE;
 1611:       }
 1612:     if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
 1613:     }
 1614: 
 1615:   /* The buffer may have moved while being extended; reset the start of data
 1616:   pointer to the correct relative point in the buffer. */
 1617: 
 1618:   p = buffer + poffset;
 1619: 
 1620:   /* If the first character after the delimiter is backslash, make
 1621:   the pattern end with backslash. This is purely to provide a way
 1622:   of testing for the error message when a pattern ends with backslash. */
 1623: 
 1624:   if (pp[1] == '\\') *pp++ = '\\';
 1625: 
 1626:   /* Terminate the pattern at the delimiter, and save a copy of the pattern
 1627:   for callouts. */
 1628: 
 1629:   *pp++ = 0;
 1630:   strcpy((char *)pbuffer, (char *)p);
 1631: 
 1632:   /* Look for options after final delimiter */
 1633: 
 1634:   options = 0;
 1635:   study_options = 0;
 1636:   log_store = showstore;  /* default from command line */
 1637: 
 1638:   while (*pp != 0)
 1639:     {
 1640:     switch (*pp++)
 1641:       {
 1642:       case 'f': options |= PCRE_FIRSTLINE; break;
 1643:       case 'g': do_g = 1; break;
 1644:       case 'i': options |= PCRE_CASELESS; break;
 1645:       case 'm': options |= PCRE_MULTILINE; break;
 1646:       case 's': options |= PCRE_DOTALL; break;
 1647:       case 'x': options |= PCRE_EXTENDED; break;
 1648: 
 1649:       case '+':
 1650:       if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
 1651:       break;
 1652: 
 1653:       case '=': do_allcaps = 1; break;
 1654:       case 'A': options |= PCRE_ANCHORED; break;
 1655:       case 'B': do_debug = 1; break;
 1656:       case 'C': options |= PCRE_AUTO_CALLOUT; break;
 1657:       case 'D': do_debug = do_showinfo = 1; break;
 1658:       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
 1659:       case 'F': do_flip = 1; break;
 1660:       case 'G': do_G = 1; break;
 1661:       case 'I': do_showinfo = 1; break;
 1662:       case 'J': options |= PCRE_DUPNAMES; break;
 1663:       case 'K': do_mark = 1; break;
 1664:       case 'M': log_store = 1; break;
 1665:       case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
 1666: 
 1667: #if !defined NOPOSIX
 1668:       case 'P': do_posix = 1; break;
 1669: #endif
 1670: 
 1671:       case 'S':
 1672:       if (do_study == 0)
 1673:         {
 1674:         do_study = 1;
 1675:         if (*pp == '+')
 1676:           {
 1677:           study_options |= PCRE_STUDY_JIT_COMPILE;
 1678:           pp++;
 1679:           }
 1680:         }
 1681:       else
 1682:         {
 1683:         do_study = 0;
 1684:         no_force_study = 1;
 1685:         }
 1686:       break;
 1687: 
 1688:       case 'U': options |= PCRE_UNGREEDY; break;
 1689:       case 'W': options |= PCRE_UCP; break;
 1690:       case 'X': options |= PCRE_EXTRA; break;
 1691:       case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
 1692:       case 'Z': debug_lengths = 0; break;
 1693:       case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
 1694:       case '?': options |= PCRE_NO_UTF8_CHECK; break;
 1695: 
 1696:       case 'T':
 1697:       switch (*pp++)
 1698:         {
 1699:         case '0': tables = tables0; break;
 1700:         case '1': tables = tables1; break;
 1701: 
 1702:         case '\r':
 1703:         case '\n':
 1704:         case ' ':
 1705:         case 0:
 1706:         fprintf(outfile, "** Missing table number after /T\n");
 1707:         goto SKIP_DATA;
 1708: 
 1709:         default:
 1710:         fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
 1711:         goto SKIP_DATA;
 1712:         }
 1713:       break;
 1714: 
 1715:       case 'L':
 1716:       ppp = pp;
 1717:       /* The '\r' test here is so that it works on Windows. */
 1718:       /* The '0' test is just in case this is an unterminated line. */
 1719:       while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
 1720:       *ppp = 0;
 1721:       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
 1722:         {
 1723:         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
 1724:         goto SKIP_DATA;
 1725:         }
 1726:       locale_set = 1;
 1727:       tables = pcre_maketables();
 1728:       pp = ppp;
 1729:       break;
 1730: 
 1731:       case '>':
 1732:       to_file = pp;
 1733:       while (*pp != 0) pp++;
 1734:       while (isspace(pp[-1])) pp--;
 1735:       *pp = 0;
 1736:       break;
 1737: 
 1738:       case '<':
 1739:         {
 1740:         if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
 1741:           {
 1742:           options |= PCRE_JAVASCRIPT_COMPAT;
 1743:           pp += 3;
 1744:           }
 1745:         else
 1746:           {
 1747:           int x = check_newline(pp, outfile);
 1748:           if (x == 0) goto SKIP_DATA;
 1749:           options |= x;
 1750:           while (*pp++ != '>');
 1751:           }
 1752:         }
 1753:       break;
 1754: 
 1755:       case '\r':                      /* So that it works in Windows */
 1756:       case '\n':
 1757:       case ' ':
 1758:       break;
 1759: 
 1760:       default:
 1761:       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
 1762:       goto SKIP_DATA;
 1763:       }
 1764:     }
 1765: 
 1766:   /* Handle compiling via the POSIX interface, which doesn't support the
 1767:   timing, showing, or debugging options, nor the ability to pass over
 1768:   local character tables. */
 1769: 
 1770: #if !defined NOPOSIX
 1771:   if (posix || do_posix)
 1772:     {
 1773:     int rc;
 1774:     int cflags = 0;
 1775: 
 1776:     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
 1777:     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
 1778:     if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
 1779:     if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
 1780:     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
 1781:     if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
 1782:     if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
 1783: 
 1784:     first_gotten_store = 0;
 1785:     rc = regcomp(&preg, (char *)p, cflags);
 1786: 
 1787:     /* Compilation failed; go back for another re, skipping to blank line
 1788:     if non-interactive. */
 1789: 
 1790:     if (rc != 0)
 1791:       {
 1792:       (void)regerror(rc, &preg, (char *)buffer, buffer_size);
 1793:       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
 1794:       goto SKIP_DATA;
 1795:       }
 1796:     }
 1797: 
 1798:   /* Handle compiling via the native interface */
 1799: 
 1800:   else
 1801: #endif  /* !defined NOPOSIX */
 1802: 
 1803:     {
 1804:     unsigned long int get_options;
 1805: 
 1806:     if (timeit > 0)
 1807:       {
 1808:       register int i;
 1809:       clock_t time_taken;
 1810:       clock_t start_time = clock();
 1811:       for (i = 0; i < timeit; i++)
 1812:         {
 1813:         re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
 1814:         if (re != NULL) free(re);
 1815:         }
 1816:       time_taken = clock() - start_time;
 1817:       fprintf(outfile, "Compile time %.4f milliseconds\n",
 1818:         (((double)time_taken * 1000.0) / (double)timeit) /
 1819:           (double)CLOCKS_PER_SEC);
 1820:       }
 1821: 
 1822:     first_gotten_store = 0;
 1823:     re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
 1824: 
 1825:     /* Compilation failed; go back for another re, skipping to blank line
 1826:     if non-interactive. */
 1827: 
 1828:     if (re == NULL)
 1829:       {
 1830:       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
 1831:       SKIP_DATA:
 1832:       if (infile != stdin)
 1833:         {
 1834:         for (;;)
 1835:           {
 1836:           if (extend_inputline(infile, buffer, NULL) == NULL)
 1837:             {
 1838:             done = 1;
 1839:             goto CONTINUE;
 1840:             }
 1841:           len = (int)strlen((char *)buffer);
 1842:           while (len > 0 && isspace(buffer[len-1])) len--;
 1843:           if (len == 0) break;
 1844:           }
 1845:         fprintf(outfile, "\n");
 1846:         }
 1847:       goto CONTINUE;
 1848:       }
 1849: 
 1850:     /* Compilation succeeded. It is now possible to set the UTF-8 option from
 1851:     within the regex; check for this so that we know how to process the data
 1852:     lines. */
 1853: 
 1854:     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
 1855:     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
 1856: 
 1857:     /* Extract the size for possible writing before possibly flipping it,
 1858:     and remember the store that was got. */
 1859: 
 1860:     true_size = ((real_pcre *)re)->size;
 1861:     regex_gotten_store = first_gotten_store;
 1862: 
 1863:     /* Output code size information if requested */
 1864: 
 1865:     if (log_store)
 1866:       fprintf(outfile, "Memory allocation (code space): %d\n",
 1867:         (int)(first_gotten_store -
 1868:               sizeof(real_pcre) -
 1869:               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
 1870: 
 1871:     /* If -s or /S was present, study the regex to generate additional info to
 1872:     help with the matching, unless the pattern has the SS option, which
 1873:     suppresses the effect of /S (used for a few test patterns where studying is
 1874:     never sensible). */
 1875: 
 1876:     if (do_study || (force_study >= 0 && !no_force_study))
 1877:       {
 1878:       if (timeit > 0)
 1879:         {
 1880:         register int i;
 1881:         clock_t time_taken;
 1882:         clock_t start_time = clock();
 1883:         for (i = 0; i < timeit; i++)
 1884:           extra = pcre_study(re, study_options | force_study_options, &error);
 1885:         time_taken = clock() - start_time;
 1886:         if (extra != NULL) pcre_free_study(extra);
 1887:         fprintf(outfile, "  Study time %.4f milliseconds\n",
 1888:           (((double)time_taken * 1000.0) / (double)timeit) /
 1889:             (double)CLOCKS_PER_SEC);
 1890:         }
 1891:       extra = pcre_study(re, study_options | force_study_options, &error);
 1892:       if (error != NULL)
 1893:         fprintf(outfile, "Failed to study: %s\n", error);
 1894:       else if (extra != NULL)
 1895:         {
 1896:         true_study_size = ((pcre_study_data *)(extra->study_data))->size;
 1897:         if (log_store)
 1898:           {
 1899:           size_t jitsize;
 1900:           new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
 1901:           if (jitsize != 0)
 1902:             fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
 1903:           }
 1904:         }
 1905:       }
 1906: 
 1907:     /* If /K was present, we set up for handling MARK data. */
 1908: 
 1909:     if (do_mark)
 1910:       {
 1911:       if (extra == NULL)
 1912:         {
 1913:         extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 1914:         extra->flags = 0;
 1915:         }
 1916:       extra->mark = &markptr;
 1917:       extra->flags |= PCRE_EXTRA_MARK;
 1918:       }
 1919: 
 1920:     /* If the 'F' option was present, we flip the bytes of all the integer
 1921:     fields in the regex data block and the study block. This is to make it
 1922:     possible to test PCRE's handling of byte-flipped patterns, e.g. those
 1923:     compiled on a different architecture. */
 1924: 
 1925:     if (do_flip)
 1926:       {
 1927:       real_pcre *rre = (real_pcre *)re;
 1928:       rre->magic_number =
 1929:         byteflip(rre->magic_number, sizeof(rre->magic_number));
 1930:       rre->size = byteflip(rre->size, sizeof(rre->size));
 1931:       rre->options = byteflip(rre->options, sizeof(rre->options));
 1932:       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
 1933:       rre->top_bracket =
 1934:         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
 1935:       rre->top_backref =
 1936:         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
 1937:       rre->first_byte =
 1938:         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
 1939:       rre->req_byte =
 1940:         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
 1941:       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
 1942:         sizeof(rre->name_table_offset));
 1943:       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
 1944:         sizeof(rre->name_entry_size));
 1945:       rre->name_count = (pcre_uint16)byteflip(rre->name_count,
 1946:         sizeof(rre->name_count));
 1947: 
 1948:       if (extra != NULL)
 1949:         {
 1950:         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
 1951:         rsd->size = byteflip(rsd->size, sizeof(rsd->size));
 1952:         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
 1953:         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
 1954:         }
 1955:       }
 1956: 
 1957:     /* Extract information from the compiled data if required. There are now
 1958:     two info-returning functions. The old one has a limited interface and
 1959:     returns only limited data. Check that it agrees with the newer one. */
 1960: 
 1961:     SHOW_INFO:
 1962: 
 1963:     if (do_debug)
 1964:       {
 1965:       fprintf(outfile, "------------------------------------------------------------------\n");
 1966:       pcre_printint(re, outfile, debug_lengths);
 1967:       }
 1968: 
 1969:     /* We already have the options in get_options (see above) */
 1970: 
 1971:     if (do_showinfo)
 1972:       {
 1973:       unsigned long int all_options;
 1974: #if !defined NOINFOCHECK
 1975:       int old_first_char, old_options, old_count;
 1976: #endif
 1977:       int count, backrefmax, first_char, need_char, okpartial, jchanged,
 1978:         hascrorlf;
 1979:       int nameentrysize, namecount;
 1980:       const uschar *nametable;
 1981: 
 1982:       new_info(re, NULL, PCRE_INFO_SIZE, &size);
 1983:       new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
 1984:       new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
 1985:       new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
 1986:       new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
 1987:       new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
 1988:       new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
 1989:       new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
 1990:       new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
 1991:       new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
 1992:       new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
 1993: 
 1994: #if !defined NOINFOCHECK
 1995:       old_count = pcre_info(re, &old_options, &old_first_char);
 1996:       if (count < 0) fprintf(outfile,
 1997:         "Error %d from pcre_info()\n", count);
 1998:       else
 1999:         {
 2000:         if (old_count != count) fprintf(outfile,
 2001:           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
 2002:             old_count);
 2003: 
 2004:         if (old_first_char != first_char) fprintf(outfile,
 2005:           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
 2006:             first_char, old_first_char);
 2007: 
 2008:         if (old_options != (int)get_options) fprintf(outfile,
 2009:           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
 2010:             get_options, old_options);
 2011:         }
 2012: #endif
 2013: 
 2014:       if (size != regex_gotten_store) fprintf(outfile,
 2015:         "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
 2016:         (int)size, (int)regex_gotten_store);
 2017: 
 2018:       fprintf(outfile, "Capturing subpattern count = %d\n", count);
 2019:       if (backrefmax > 0)
 2020:         fprintf(outfile, "Max back reference = %d\n", backrefmax);
 2021: 
 2022:       if (namecount > 0)
 2023:         {
 2024:         fprintf(outfile, "Named capturing subpatterns:\n");
 2025:         while (namecount-- > 0)
 2026:           {
 2027:           fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
 2028:             nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
 2029:             GET2(nametable, 0));
 2030:           nametable += nameentrysize;
 2031:           }
 2032:         }
 2033: 
 2034:       if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
 2035:       if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
 2036: 
 2037:       all_options = ((real_pcre *)re)->options;
 2038:       if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
 2039: 
 2040:       if (get_options == 0) fprintf(outfile, "No options\n");
 2041:         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 2042:           ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
 2043:           ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
 2044:           ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
 2045:           ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
 2046:           ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
 2047:           ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
 2048:           ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
 2049:           ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
 2050:           ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
 2051:           ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
 2052:           ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
 2053:           ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
 2054:           ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
 2055:           ((get_options & PCRE_UCP) != 0)? " ucp" : "",
 2056:           ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
 2057:           ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
 2058:           ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
 2059: 
 2060:       if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
 2061: 
 2062:       switch (get_options & PCRE_NEWLINE_BITS)
 2063:         {
 2064:         case PCRE_NEWLINE_CR:
 2065:         fprintf(outfile, "Forced newline sequence: CR\n");
 2066:         break;
 2067: 
 2068:         case PCRE_NEWLINE_LF:
 2069:         fprintf(outfile, "Forced newline sequence: LF\n");
 2070:         break;
 2071: 
 2072:         case PCRE_NEWLINE_CRLF:
 2073:         fprintf(outfile, "Forced newline sequence: CRLF\n");
 2074:         break;
 2075: 
 2076:         case PCRE_NEWLINE_ANYCRLF:
 2077:         fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
 2078:         break;
 2079: 
 2080:         case PCRE_NEWLINE_ANY:
 2081:         fprintf(outfile, "Forced newline sequence: ANY\n");
 2082:         break;
 2083: 
 2084:         default:
 2085:         break;
 2086:         }
 2087: 
 2088:       if (first_char == -1)
 2089:         {
 2090:         fprintf(outfile, "First char at start or follows newline\n");
 2091:         }
 2092:       else if (first_char < 0)
 2093:         {
 2094:         fprintf(outfile, "No first char\n");
 2095:         }
 2096:       else
 2097:         {
 2098:         int ch = first_char & 255;
 2099:         const char *caseless = ((first_char & REQ_CASELESS) == 0)?
 2100:           "" : " (caseless)";
 2101:         if (PRINTHEX(ch))
 2102:           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
 2103:         else
 2104:           fprintf(outfile, "First char = %d%s\n", ch, caseless);
 2105:         }
 2106: 
 2107:       if (need_char < 0)
 2108:         {
 2109:         fprintf(outfile, "No need char\n");
 2110:         }
 2111:       else
 2112:         {
 2113:         int ch = need_char & 255;
 2114:         const char *caseless = ((need_char & REQ_CASELESS) == 0)?
 2115:           "" : " (caseless)";
 2116:         if (PRINTHEX(ch))
 2117:           fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
 2118:         else
 2119:           fprintf(outfile, "Need char = %d%s\n", ch, caseless);
 2120:         }
 2121: 
 2122:       /* Don't output study size; at present it is in any case a fixed
 2123:       value, but it varies, depending on the computer architecture, and
 2124:       so messes up the test suite. (And with the /F option, it might be
 2125:       flipped.) If study was forced by an external -s, don't show this
 2126:       information unless -i or -d was also present. This means that, except
 2127:       when auto-callouts are involved, the output from runs with and without
 2128:       -s should be identical. */
 2129: 
 2130:       if (do_study || (force_study >= 0 && showinfo && !no_force_study))
 2131:         {
 2132:         if (extra == NULL)
 2133:           fprintf(outfile, "Study returned NULL\n");
 2134:         else
 2135:           {
 2136:           uschar *start_bits = NULL;
 2137:           int minlength;
 2138: 
 2139:           new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
 2140:           fprintf(outfile, "Subject length lower bound = %d\n", minlength);
 2141: 
 2142:           new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
 2143:           if (start_bits == NULL)
 2144:             fprintf(outfile, "No set of starting bytes\n");
 2145:           else
 2146:             {
 2147:             int i;
 2148:             int c = 24;
 2149:             fprintf(outfile, "Starting byte set: ");
 2150:             for (i = 0; i < 256; i++)
 2151:               {
 2152:               if ((start_bits[i/8] & (1<<(i&7))) != 0)
 2153:                 {
 2154:                 if (c > 75)
 2155:                   {
 2156:                   fprintf(outfile, "\n  ");
 2157:                   c = 2;
 2158:                   }
 2159:                 if (PRINTHEX(i) && i != ' ')
 2160:                   {
 2161:                   fprintf(outfile, "%c ", i);
 2162:                   c += 2;
 2163:                   }
 2164:                 else
 2165:                   {
 2166:                   fprintf(outfile, "\\x%02x ", i);
 2167:                   c += 5;
 2168:                   }
 2169:                 }
 2170:               }
 2171:             fprintf(outfile, "\n");
 2172:             }
 2173:           }
 2174: 
 2175:         /* Show this only if the JIT was set by /S, not by -s. */
 2176: 
 2177:         if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
 2178:           {
 2179:           int jit;
 2180:           new_info(re, extra, PCRE_INFO_JIT, &jit);
 2181:           if (jit)
 2182:             fprintf(outfile, "JIT study was successful\n");
 2183:           else
 2184: #ifdef SUPPORT_JIT
 2185:             fprintf(outfile, "JIT study was not successful\n");
 2186: #else
 2187:             fprintf(outfile, "JIT support is not available in this version of PCRE\n");
 2188: #endif
 2189:           }
 2190:         }
 2191:       }
 2192: 
 2193:     /* If the '>' option was present, we write out the regex to a file, and
 2194:     that is all. The first 8 bytes of the file are the regex length and then
 2195:     the study length, in big-endian order. */
 2196: 
 2197:     if (to_file != NULL)
 2198:       {
 2199:       FILE *f = fopen((char *)to_file, "wb");
 2200:       if (f == NULL)
 2201:         {
 2202:         fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
 2203:         }
 2204:       else
 2205:         {
 2206:         uschar sbuf[8];
 2207:         sbuf[0] = (uschar)((true_size >> 24) & 255);
 2208:         sbuf[1] = (uschar)((true_size >> 16) & 255);
 2209:         sbuf[2] = (uschar)((true_size >>  8) & 255);
 2210:         sbuf[3] = (uschar)((true_size) & 255);
 2211: 
 2212:         sbuf[4] = (uschar)((true_study_size >> 24) & 255);
 2213:         sbuf[5] = (uschar)((true_study_size >> 16) & 255);
 2214:         sbuf[6] = (uschar)((true_study_size >>  8) & 255);
 2215:         sbuf[7] = (uschar)((true_study_size) & 255);
 2216: 
 2217:         if (fwrite(sbuf, 1, 8, f) < 8 ||
 2218:             fwrite(re, 1, true_size, f) < true_size)
 2219:           {
 2220:           fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
 2221:           }
 2222:         else
 2223:           {
 2224:           fprintf(outfile, "Compiled pattern written to %s\n", to_file);
 2225: 
 2226:           /* If there is study data, write it. */
 2227: 
 2228:           if (extra != NULL)
 2229:             {
 2230:             if (fwrite(extra->study_data, 1, true_study_size, f) <
 2231:                 true_study_size)
 2232:               {
 2233:               fprintf(outfile, "Write error on %s: %s\n", to_file,
 2234:                 strerror(errno));
 2235:               }
 2236:             else fprintf(outfile, "Study data written to %s\n", to_file);
 2237:             }
 2238:           }
 2239:         fclose(f);
 2240:         }
 2241: 
 2242:       new_free(re);
 2243:       if (extra != NULL) pcre_free_study(extra);
 2244:       if (locale_set)
 2245:         {
 2246:         new_free((void *)tables);
 2247:         setlocale(LC_CTYPE, "C");
 2248:         locale_set = 0;
 2249:         }
 2250:       continue;  /* With next regex */
 2251:       }
 2252:     }        /* End of non-POSIX compile */
 2253: 
 2254:   /* Read data lines and test them */
 2255: 
 2256:   for (;;)
 2257:     {
 2258:     uschar *q;
 2259:     uschar *bptr;
 2260:     int *use_offsets = offsets;
 2261:     int use_size_offsets = size_offsets;
 2262:     int callout_data = 0;
 2263:     int callout_data_set = 0;
 2264:     int count, c;
 2265:     int copystrings = 0;
 2266:     int find_match_limit = default_find_match_limit;
 2267:     int getstrings = 0;
 2268:     int getlist = 0;
 2269:     int gmatched = 0;
 2270:     int start_offset = 0;
 2271:     int start_offset_sign = 1;
 2272:     int g_notempty = 0;
 2273:     int use_dfa = 0;
 2274: 
 2275:     options = 0;
 2276: 
 2277:     *copynames = 0;
 2278:     *getnames = 0;
 2279: 
 2280:     copynamesptr = copynames;
 2281:     getnamesptr = getnames;
 2282: 
 2283:     pcre_callout = callout;
 2284:     first_callout = 1;
 2285:     last_callout_mark = NULL;
 2286:     callout_extra = 0;
 2287:     callout_count = 0;
 2288:     callout_fail_count = 999999;
 2289:     callout_fail_id = -1;
 2290:     show_malloc = 0;
 2291: 
 2292:     if (extra != NULL) extra->flags &=
 2293:       ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
 2294: 
 2295:     len = 0;
 2296:     for (;;)
 2297:       {
 2298:       if (extend_inputline(infile, buffer + len, "data> ") == NULL)
 2299:         {
 2300:         if (len > 0)    /* Reached EOF without hitting a newline */
 2301:           {
 2302:           fprintf(outfile, "\n");
 2303:           break;
 2304:           }
 2305:         done = 1;
 2306:         goto CONTINUE;
 2307:         }
 2308:       if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
 2309:       len = (int)strlen((char *)buffer);
 2310:       if (buffer[len-1] == '\n') break;
 2311:       }
 2312: 
 2313:     while (len > 0 && isspace(buffer[len-1])) len--;
 2314:     buffer[len] = 0;
 2315:     if (len == 0) break;
 2316: 
 2317:     p = buffer;
 2318:     while (isspace(*p)) p++;
 2319: 
 2320:     bptr = q = dbuffer;
 2321:     while ((c = *p++) != 0)
 2322:       {
 2323:       int i = 0;
 2324:       int n = 0;
 2325: 
 2326:       if (c == '\\') switch ((c = *p++))
 2327:         {
 2328:         case 'a': c =    7; break;
 2329:         case 'b': c = '\b'; break;
 2330:         case 'e': c =   27; break;
 2331:         case 'f': c = '\f'; break;
 2332:         case 'n': c = '\n'; break;
 2333:         case 'r': c = '\r'; break;
 2334:         case 't': c = '\t'; break;
 2335:         case 'v': c = '\v'; break;
 2336: 
 2337:         case '0': case '1': case '2': case '3':
 2338:         case '4': case '5': case '6': case '7':
 2339:         c -= '0';
 2340:         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
 2341:           c = c * 8 + *p++ - '0';
 2342: 
 2343: #if !defined NOUTF8
 2344:         if (use_utf8 && c > 255)
 2345:           {
 2346:           unsigned char buff8[8];
 2347:           int ii, utn;
 2348:           utn = ord2utf8(c, buff8);
 2349:           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
 2350:           c = buff8[ii];   /* Last byte */
 2351:           }
 2352: #endif
 2353:         break;
 2354: 
 2355:         case 'x':
 2356: 
 2357:         /* Handle \x{..} specially - new Perl thing for utf8 */
 2358: 
 2359: #if !defined NOUTF8
 2360:         if (*p == '{')
 2361:           {
 2362:           unsigned char *pt = p;
 2363:           c = 0;
 2364: 
 2365:           /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
 2366:           when isxdigit() is a macro that refers to its argument more than
 2367:           once. This is banned by the C Standard, but apparently happens in at
 2368:           least one MacOS environment. */
 2369: 
 2370:           for (pt++; isxdigit(*pt); pt++)
 2371:             c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
 2372:           if (*pt == '}')
 2373:             {
 2374:             unsigned char buff8[8];
 2375:             int ii, utn;
 2376:             if (use_utf8)
 2377:               {
 2378:               utn = ord2utf8(c, buff8);
 2379:               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
 2380:               c = buff8[ii];   /* Last byte */
 2381:               }
 2382:             else
 2383:              {
 2384:              if (c > 255)
 2385:                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
 2386:                  "UTF-8 mode is not enabled.\n"
 2387:                  "** Truncation will probably give the wrong result.\n", c);
 2388:              }
 2389:             p = pt + 1;
 2390:             break;
 2391:             }
 2392:           /* Not correct form; fall through */
 2393:           }
 2394: #endif
 2395: 
 2396:         /* Ordinary \x */
 2397: 
 2398:         c = 0;
 2399:         while (i++ < 2 && isxdigit(*p))
 2400:           {
 2401:           c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
 2402:           p++;
 2403:           }
 2404:         break;
 2405: 
 2406:         case 0:   /* \ followed by EOF allows for an empty line */
 2407:         p--;
 2408:         continue;
 2409: 
 2410:         case '>':
 2411:         if (*p == '-')
 2412:           {
 2413:           start_offset_sign = -1;
 2414:           p++;
 2415:           }
 2416:         while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
 2417:         start_offset *= start_offset_sign;
 2418:         continue;
 2419: 
 2420:         case 'A':  /* Option setting */
 2421:         options |= PCRE_ANCHORED;
 2422:         continue;
 2423: 
 2424:         case 'B':
 2425:         options |= PCRE_NOTBOL;
 2426:         continue;
 2427: 
 2428:         case 'C':
 2429:         if (isdigit(*p))    /* Set copy string */
 2430:           {
 2431:           while(isdigit(*p)) n = n * 10 + *p++ - '0';
 2432:           copystrings |= 1 << n;
 2433:           }
 2434:         else if (isalnum(*p))
 2435:           {
 2436:           uschar *npp = copynamesptr;
 2437:           while (isalnum(*p)) *npp++ = *p++;
 2438:           *npp++ = 0;
 2439:           *npp = 0;
 2440:           n = pcre_get_stringnumber(re, (char *)copynamesptr);
 2441:           if (n < 0)
 2442:             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
 2443:           copynamesptr = npp;
 2444:           }
 2445:         else if (*p == '+')
 2446:           {
 2447:           callout_extra = 1;
 2448:           p++;
 2449:           }
 2450:         else if (*p == '-')
 2451:           {
 2452:           pcre_callout = NULL;
 2453:           p++;
 2454:           }
 2455:         else if (*p == '!')
 2456:           {
 2457:           callout_fail_id = 0;
 2458:           p++;
 2459:           while(isdigit(*p))
 2460:             callout_fail_id = callout_fail_id * 10 + *p++ - '0';
 2461:           callout_fail_count = 0;
 2462:           if (*p == '!')
 2463:             {
 2464:             p++;
 2465:             while(isdigit(*p))
 2466:               callout_fail_count = callout_fail_count * 10 + *p++ - '0';
 2467:             }
 2468:           }
 2469:         else if (*p == '*')
 2470:           {
 2471:           int sign = 1;
 2472:           callout_data = 0;
 2473:           if (*(++p) == '-') { sign = -1; p++; }
 2474:           while(isdigit(*p))
 2475:             callout_data = callout_data * 10 + *p++ - '0';
 2476:           callout_data *= sign;
 2477:           callout_data_set = 1;
 2478:           }
 2479:         continue;
 2480: 
 2481: #if !defined NODFA
 2482:         case 'D':
 2483: #if !defined NOPOSIX
 2484:         if (posix || do_posix)
 2485:           printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
 2486:         else
 2487: #endif
 2488:           use_dfa = 1;
 2489:         continue;
 2490: #endif
 2491: 
 2492: #if !defined NODFA
 2493:         case 'F':
 2494:         options |= PCRE_DFA_SHORTEST;
 2495:         continue;
 2496: #endif
 2497: 
 2498:         case 'G':
 2499:         if (isdigit(*p))
 2500:           {
 2501:           while(isdigit(*p)) n = n * 10 + *p++ - '0';
 2502:           getstrings |= 1 << n;
 2503:           }
 2504:         else if (isalnum(*p))
 2505:           {
 2506:           uschar *npp = getnamesptr;
 2507:           while (isalnum(*p)) *npp++ = *p++;
 2508:           *npp++ = 0;
 2509:           *npp = 0;
 2510:           n = pcre_get_stringnumber(re, (char *)getnamesptr);
 2511:           if (n < 0)
 2512:             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
 2513:           getnamesptr = npp;
 2514:           }
 2515:         continue;
 2516: 
 2517:         case 'J':
 2518:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
 2519:         if (extra != NULL
 2520:             && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
 2521:             && extra->executable_jit != NULL)
 2522:           {
 2523: 	  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
 2524: 	  jit_stack = pcre_jit_stack_alloc(1, n * 1024);
 2525: 	  pcre_assign_jit_stack(extra, jit_callback, jit_stack);
 2526:           }
 2527:         continue;
 2528: 
 2529:         case 'L':
 2530:         getlist = 1;
 2531:         continue;
 2532: 
 2533:         case 'M':
 2534:         find_match_limit = 1;
 2535:         continue;
 2536: 
 2537:         case 'N':
 2538:         if ((options & PCRE_NOTEMPTY) != 0)
 2539:           options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
 2540:         else
 2541:           options |= PCRE_NOTEMPTY;
 2542:         continue;
 2543: 
 2544:         case 'O':
 2545:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
 2546:         if (n > size_offsets_max)
 2547:           {
 2548:           size_offsets_max = n;
 2549:           free(offsets);
 2550:           use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
 2551:           if (offsets == NULL)
 2552:             {
 2553:             printf("** Failed to get %d bytes of memory for offsets vector\n",
 2554:               (int)(size_offsets_max * sizeof(int)));
 2555:             yield = 1;
 2556:             goto EXIT;
 2557:             }
 2558:           }
 2559:         use_size_offsets = n;
 2560:         if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
 2561:         continue;
 2562: 
 2563:         case 'P':
 2564:         options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
 2565:           PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
 2566:         continue;
 2567: 
 2568:         case 'Q':
 2569:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
 2570:         if (extra == NULL)
 2571:           {
 2572:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 2573:           extra->flags = 0;
 2574:           }
 2575:         extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
 2576:         extra->match_limit_recursion = n;
 2577:         continue;
 2578: 
 2579:         case 'q':
 2580:         while(isdigit(*p)) n = n * 10 + *p++ - '0';
 2581:         if (extra == NULL)
 2582:           {
 2583:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 2584:           extra->flags = 0;
 2585:           }
 2586:         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
 2587:         extra->match_limit = n;
 2588:         continue;
 2589: 
 2590: #if !defined NODFA
 2591:         case 'R':
 2592:         options |= PCRE_DFA_RESTART;
 2593:         continue;
 2594: #endif
 2595: 
 2596:         case 'S':
 2597:         show_malloc = 1;
 2598:         continue;
 2599: 
 2600:         case 'Y':
 2601:         options |= PCRE_NO_START_OPTIMIZE;
 2602:         continue;
 2603: 
 2604:         case 'Z':
 2605:         options |= PCRE_NOTEOL;
 2606:         continue;
 2607: 
 2608:         case '?':
 2609:         options |= PCRE_NO_UTF8_CHECK;
 2610:         continue;
 2611: 
 2612:         case '<':
 2613:           {
 2614:           int x = check_newline(p, outfile);
 2615:           if (x == 0) goto NEXT_DATA;
 2616:           options |= x;
 2617:           while (*p++ != '>');
 2618:           }
 2619:         continue;
 2620:         }
 2621:       *q++ = c;
 2622:       }
 2623:     *q = 0;
 2624:     len = (int)(q - dbuffer);
 2625: 
 2626:     /* Move the data to the end of the buffer so that a read over the end of
 2627:     the buffer will be seen by valgrind, even if it doesn't cause a crash. If
 2628:     we are using the POSIX interface, we must include the terminating zero. */
 2629: 
 2630: #if !defined NOPOSIX
 2631:     if (posix || do_posix)
 2632:       {
 2633:       memmove(bptr + buffer_size - len - 1, bptr, len + 1);
 2634:       bptr += buffer_size - len - 1;
 2635:       }
 2636:     else
 2637: #endif
 2638:       {
 2639:       memmove(bptr + buffer_size - len, bptr, len);
 2640:       bptr += buffer_size - len;
 2641:       }
 2642: 
 2643:     if ((all_use_dfa || use_dfa) && find_match_limit)
 2644:       {
 2645:       printf("**Match limit not relevant for DFA matching: ignored\n");
 2646:       find_match_limit = 0;
 2647:       }
 2648: 
 2649:     /* Handle matching via the POSIX interface, which does not
 2650:     support timing or playing with the match limit or callout data. */
 2651: 
 2652: #if !defined NOPOSIX
 2653:     if (posix || do_posix)
 2654:       {
 2655:       int rc;
 2656:       int eflags = 0;
 2657:       regmatch_t *pmatch = NULL;
 2658:       if (use_size_offsets > 0)
 2659:         pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
 2660:       if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
 2661:       if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
 2662:       if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
 2663: 
 2664:       rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
 2665: 
 2666:       if (rc != 0)
 2667:         {
 2668:         (void)regerror(rc, &preg, (char *)buffer, buffer_size);
 2669:         fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
 2670:         }
 2671:       else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
 2672:               != 0)
 2673:         {
 2674:         fprintf(outfile, "Matched with REG_NOSUB\n");
 2675:         }
 2676:       else
 2677:         {
 2678:         size_t i;
 2679:         for (i = 0; i < (size_t)use_size_offsets; i++)
 2680:           {
 2681:           if (pmatch[i].rm_so >= 0)
 2682:             {
 2683:             fprintf(outfile, "%2d: ", (int)i);
 2684:             (void)pchars(dbuffer + pmatch[i].rm_so,
 2685:               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
 2686:             fprintf(outfile, "\n");
 2687:             if (do_showcaprest || (i == 0 && do_showrest))
 2688:               {
 2689:               fprintf(outfile, "%2d+ ", (int)i);
 2690:               (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
 2691:                 outfile);
 2692:               fprintf(outfile, "\n");
 2693:               }
 2694:             }
 2695:           }
 2696:         }
 2697:       free(pmatch);
 2698:       }
 2699: 
 2700:     /* Handle matching via the native interface - repeats for /g and /G */
 2701: 
 2702:     else
 2703: #endif  /* !defined NOPOSIX */
 2704: 
 2705:     for (;; gmatched++)    /* Loop for /g or /G */
 2706:       {
 2707:       markptr = NULL;
 2708: 
 2709:       if (timeitm > 0)
 2710:         {
 2711:         register int i;
 2712:         clock_t time_taken;
 2713:         clock_t start_time = clock();
 2714: 
 2715: #if !defined NODFA
 2716:         if (all_use_dfa || use_dfa)
 2717:           {
 2718:           int workspace[1000];
 2719:           for (i = 0; i < timeitm; i++)
 2720:             count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
 2721:               options | g_notempty, use_offsets, use_size_offsets, workspace,
 2722:               sizeof(workspace)/sizeof(int));
 2723:           }
 2724:         else
 2725: #endif
 2726: 
 2727:         for (i = 0; i < timeitm; i++)
 2728:           count = pcre_exec(re, extra, (char *)bptr, len,
 2729:             start_offset, options | g_notempty, use_offsets, use_size_offsets);
 2730: 
 2731:         time_taken = clock() - start_time;
 2732:         fprintf(outfile, "Execute time %.4f milliseconds\n",
 2733:           (((double)time_taken * 1000.0) / (double)timeitm) /
 2734:             (double)CLOCKS_PER_SEC);
 2735:         }
 2736: 
 2737:       /* If find_match_limit is set, we want to do repeated matches with
 2738:       varying limits in order to find the minimum value for the match limit and
 2739:       for the recursion limit. The match limits are relevant only to the normal
 2740:       running of pcre_exec(), so disable the JIT optimization. This makes it
 2741:       possible to run the same set of tests with and without JIT externally
 2742:       requested. */
 2743: 
 2744:       if (find_match_limit)
 2745:         {
 2746:         if (extra == NULL)
 2747:           {
 2748:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 2749:           extra->flags = 0;
 2750:           }
 2751:         else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
 2752: 
 2753:         (void)check_match_limit(re, extra, bptr, len, start_offset,
 2754:           options|g_notempty, use_offsets, use_size_offsets,
 2755:           PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
 2756:           PCRE_ERROR_MATCHLIMIT, "match()");
 2757: 
 2758:         count = check_match_limit(re, extra, bptr, len, start_offset,
 2759:           options|g_notempty, use_offsets, use_size_offsets,
 2760:           PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
 2761:           PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
 2762:         }
 2763: 
 2764:       /* If callout_data is set, use the interface with additional data */
 2765: 
 2766:       else if (callout_data_set)
 2767:         {
 2768:         if (extra == NULL)
 2769:           {
 2770:           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
 2771:           extra->flags = 0;
 2772:           }
 2773:         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
 2774:         extra->callout_data = &callout_data;
 2775:         count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
 2776:           options | g_notempty, use_offsets, use_size_offsets);
 2777:         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
 2778:         }
 2779: 
 2780:       /* The normal case is just to do the match once, with the default
 2781:       value of match_limit. */
 2782: 
 2783: #if !defined NODFA
 2784:       else if (all_use_dfa || use_dfa)
 2785:         {
 2786:         int workspace[1000];
 2787:         count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
 2788:           options | g_notempty, use_offsets, use_size_offsets, workspace,
 2789:           sizeof(workspace)/sizeof(int));
 2790:         if (count == 0)
 2791:           {
 2792:           fprintf(outfile, "Matched, but too many subsidiary matches\n");
 2793:           count = use_size_offsets/2;
 2794:           }
 2795:         }
 2796: #endif
 2797: 
 2798:       else
 2799:         {
 2800:         count = pcre_exec(re, extra, (char *)bptr, len,
 2801:           start_offset, options | g_notempty, use_offsets, use_size_offsets);
 2802:         if (count == 0)
 2803:           {
 2804:           fprintf(outfile, "Matched, but too many substrings\n");
 2805:           count = use_size_offsets/3;
 2806:           }
 2807:         }
 2808: 
 2809:       /* Matched */
 2810: 
 2811:       if (count >= 0)
 2812:         {
 2813:         int i, maxcount;
 2814: 
 2815: #if !defined NODFA
 2816:         if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
 2817: #endif
 2818:           maxcount = use_size_offsets/3;
 2819: 
 2820:         /* This is a check against a lunatic return value. */
 2821: 
 2822:         if (count > maxcount)
 2823:           {
 2824:           fprintf(outfile,
 2825:             "** PCRE error: returned count %d is too big for offset size %d\n",
 2826:             count, use_size_offsets);
 2827:           count = use_size_offsets/3;
 2828:           if (do_g || do_G)
 2829:             {
 2830:             fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
 2831:             do_g = do_G = FALSE;        /* Break g/G loop */
 2832:             }
 2833:           }
 2834: 
 2835:         /* do_allcaps requests showing of all captures in the pattern, to check
 2836:         unset ones at the end. */
 2837: 
 2838:         if (do_allcaps)
 2839:           {
 2840:           new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
 2841:           count++;   /* Allow for full match */
 2842:           if (count * 2 > use_size_offsets) count = use_size_offsets/2;
 2843:           }
 2844: 
 2845:         /* Output the captured substrings */
 2846: 
 2847:         for (i = 0; i < count * 2; i += 2)
 2848:           {
 2849:           if (use_offsets[i] < 0)
 2850:             {
 2851:             if (use_offsets[i] != -1)
 2852:               fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
 2853:                 use_offsets[i], i);
 2854:             if (use_offsets[i+1] != -1)
 2855:               fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
 2856:                 use_offsets[i+1], i+1);
 2857:             fprintf(outfile, "%2d: <unset>\n", i/2);
 2858:             }
 2859:           else
 2860:             {
 2861:             fprintf(outfile, "%2d: ", i/2);
 2862:             (void)pchars(bptr + use_offsets[i],
 2863:               use_offsets[i+1] - use_offsets[i], outfile);
 2864:             fprintf(outfile, "\n");
 2865:             if (do_showcaprest || (i == 0 && do_showrest))
 2866:               {
 2867:               fprintf(outfile, "%2d+ ", i/2);
 2868:               (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
 2869:                 outfile);
 2870:               fprintf(outfile, "\n");
 2871:               }
 2872:             }
 2873:           }
 2874: 
 2875:         if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
 2876: 
 2877:         for (i = 0; i < 32; i++)
 2878:           {
 2879:           if ((copystrings & (1 << i)) != 0)
 2880:             {
 2881:             char copybuffer[256];
 2882:             int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
 2883:               i, copybuffer, sizeof(copybuffer));
 2884:             if (rc < 0)
 2885:               fprintf(outfile, "copy substring %d failed %d\n", i, rc);
 2886:             else
 2887:               fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
 2888:             }
 2889:           }
 2890: 
 2891:         for (copynamesptr = copynames;
 2892:              *copynamesptr != 0;
 2893:              copynamesptr += (int)strlen((char*)copynamesptr) + 1)
 2894:           {
 2895:           char copybuffer[256];
 2896:           int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
 2897:             count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
 2898:           if (rc < 0)
 2899:             fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
 2900:           else
 2901:             fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
 2902:           }
 2903: 
 2904:         for (i = 0; i < 32; i++)
 2905:           {
 2906:           if ((getstrings & (1 << i)) != 0)
 2907:             {
 2908:             const char *substring;
 2909:             int rc = pcre_get_substring((char *)bptr, use_offsets, count,
 2910:               i, &substring);
 2911:             if (rc < 0)
 2912:               fprintf(outfile, "get substring %d failed %d\n", i, rc);
 2913:             else
 2914:               {
 2915:               fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
 2916:               pcre_free_substring(substring);
 2917:               }
 2918:             }
 2919:           }
 2920: 
 2921:         for (getnamesptr = getnames;
 2922:              *getnamesptr != 0;
 2923:              getnamesptr += (int)strlen((char*)getnamesptr) + 1)
 2924:           {
 2925:           const char *substring;
 2926:           int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
 2927:             count, (char *)getnamesptr, &substring);
 2928:           if (rc < 0)
 2929:             fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
 2930:           else
 2931:             {
 2932:             fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
 2933:             pcre_free_substring(substring);
 2934:             }
 2935:           }
 2936: 
 2937:         if (getlist)
 2938:           {
 2939:           const char **stringlist;
 2940:           int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
 2941:             &stringlist);
 2942:           if (rc < 0)
 2943:             fprintf(outfile, "get substring list failed %d\n", rc);
 2944:           else
 2945:             {
 2946:             for (i = 0; i < count; i++)
 2947:               fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
 2948:             if (stringlist[i] != NULL)
 2949:               fprintf(outfile, "string list not terminated by NULL\n");
 2950:             pcre_free_substring_list(stringlist);
 2951:             }
 2952:           }
 2953:         }
 2954: 
 2955:       /* There was a partial match */
 2956: 
 2957:       else if (count == PCRE_ERROR_PARTIAL)
 2958:         {
 2959:         if (markptr == NULL) fprintf(outfile, "Partial match");
 2960:           else fprintf(outfile, "Partial match, mark=%s", markptr);
 2961:         if (use_size_offsets > 1)
 2962:           {
 2963:           fprintf(outfile, ": ");
 2964:           pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
 2965:             outfile);
 2966:           }
 2967:         fprintf(outfile, "\n");
 2968:         break;  /* Out of the /g loop */
 2969:         }
 2970: 
 2971:       /* Failed to match. If this is a /g or /G loop and we previously set
 2972:       g_notempty after a null match, this is not necessarily the end. We want
 2973:       to advance the start offset, and continue. We won't be at the end of the
 2974:       string - that was checked before setting g_notempty.
 2975: 
 2976:       Complication arises in the case when the newline convention is "any",
 2977:       "crlf", or "anycrlf". If the previous match was at the end of a line
 2978:       terminated by CRLF, an advance of one character just passes the \r,
 2979:       whereas we should prefer the longer newline sequence, as does the code in
 2980:       pcre_exec(). Fudge the offset value to achieve this. We check for a
 2981:       newline setting in the pattern; if none was set, use pcre_config() to
 2982:       find the default.
 2983: 
 2984:       Otherwise, in the case of UTF-8 matching, the advance must be one
 2985:       character, not one byte. */
 2986: 
 2987:       else
 2988:         {
 2989:         if (g_notempty != 0)
 2990:           {
 2991:           int onechar = 1;
 2992:           unsigned int obits = ((real_pcre *)re)->options;
 2993:           use_offsets[0] = start_offset;
 2994:           if ((obits & PCRE_NEWLINE_BITS) == 0)
 2995:             {
 2996:             int d;
 2997:             (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
 2998:             /* Note that these values are always the ASCII ones, even in
 2999:             EBCDIC environments. CR = 13, NL = 10. */
 3000:             obits = (d == 13)? PCRE_NEWLINE_CR :
 3001:                     (d == 10)? PCRE_NEWLINE_LF :
 3002:                     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
 3003:                     (d == -2)? PCRE_NEWLINE_ANYCRLF :
 3004:                     (d == -1)? PCRE_NEWLINE_ANY : 0;
 3005:             }
 3006:           if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
 3007:                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
 3008:                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
 3009:               &&
 3010:               start_offset < len - 1 &&
 3011:               bptr[start_offset] == '\r' &&
 3012:               bptr[start_offset+1] == '\n')
 3013:             onechar++;
 3014:           else if (use_utf8)
 3015:             {
 3016:             while (start_offset + onechar < len)
 3017:               {
 3018:               if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
 3019:               onechar++;
 3020:               }
 3021:             }
 3022:           use_offsets[1] = start_offset + onechar;
 3023:           }
 3024:         else
 3025:           {
 3026:           switch(count)
 3027:             {
 3028:             case PCRE_ERROR_NOMATCH:
 3029:             if (gmatched == 0)
 3030:               {
 3031:               if (markptr == NULL) fprintf(outfile, "No match\n");
 3032:                 else fprintf(outfile, "No match, mark = %s\n", markptr);
 3033:               }
 3034:             break;
 3035: 
 3036:             case PCRE_ERROR_BADUTF8:
 3037:             case PCRE_ERROR_SHORTUTF8:
 3038:             fprintf(outfile, "Error %d (%s UTF-8 string)", count,
 3039:               (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
 3040:             if (use_size_offsets >= 2)
 3041:               fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
 3042:                 use_offsets[1]);
 3043:             fprintf(outfile, "\n");
 3044:             break;
 3045: 
 3046:             default:
 3047:             if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
 3048:               fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
 3049:             else
 3050:               fprintf(outfile, "Error %d (Unexpected value)\n", count);
 3051:             break;
 3052:             }
 3053: 
 3054:           break;  /* Out of the /g loop */
 3055:           }
 3056:         }
 3057: 
 3058:       /* If not /g or /G we are done */
 3059: 
 3060:       if (!do_g && !do_G) break;
 3061: 
 3062:       /* If we have matched an empty string, first check to see if we are at
 3063:       the end of the subject. If so, the /g loop is over. Otherwise, mimic what
 3064:       Perl's /g options does. This turns out to be rather cunning. First we set
 3065:       PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
 3066:       same point. If this fails (picked up above) we advance to the next
 3067:       character. */
 3068: 
 3069:       g_notempty = 0;
 3070: 
 3071:       if (use_offsets[0] == use_offsets[1])
 3072:         {
 3073:         if (use_offsets[0] == len) break;
 3074:         g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
 3075:         }
 3076: 
 3077:       /* For /g, update the start offset, leaving the rest alone */
 3078: 
 3079:       if (do_g) start_offset = use_offsets[1];
 3080: 
 3081:       /* For /G, update the pointer and length */
 3082: 
 3083:       else
 3084:         {
 3085:         bptr += use_offsets[1];
 3086:         len -= use_offsets[1];
 3087:         }
 3088:       }  /* End of loop for /g and /G */
 3089: 
 3090:     NEXT_DATA: continue;
 3091:     }    /* End of loop for data lines */
 3092: 
 3093:   CONTINUE:
 3094: 
 3095: #if !defined NOPOSIX
 3096:   if (posix || do_posix) regfree(&preg);
 3097: #endif
 3098: 
 3099:   if (re != NULL) new_free(re);
 3100:   if (extra != NULL) pcre_free_study(extra);
 3101:   if (locale_set)
 3102:     {
 3103:     new_free((void *)tables);
 3104:     setlocale(LC_CTYPE, "C");
 3105:     locale_set = 0;
 3106:     }
 3107:   if (jit_stack != NULL)
 3108:     {
 3109:     pcre_jit_stack_free(jit_stack);
 3110:     jit_stack = NULL;
 3111:     }
 3112:   }
 3113: 
 3114: if (infile == stdin) fprintf(outfile, "\n");
 3115: 
 3116: EXIT:
 3117: 
 3118: if (infile != NULL && infile != stdin) fclose(infile);
 3119: if (outfile != NULL && outfile != stdout) fclose(outfile);
 3120: 
 3121: free(buffer);
 3122: free(dbuffer);
 3123: free(pbuffer);
 3124: free(offsets);
 3125: 
 3126: return yield;
 3127: }
 3128: 
 3129: /* End of pcretest.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>