Annotation of embedaddon/pcre/pcregrep.c, revision 1.1.1.5

1.1       misho       1: /*************************************************
                      2: *               pcregrep program                 *
                      3: *************************************************/
                      4: 
                      5: /* This is a grep program that uses the PCRE regular expression library to do
1.1.1.5 ! misho       6: its pattern matching. On Unix-like, Windows, and native z/OS systems it can
        !             7: recurse into directories, and in z/OS it can handle PDS files.
1.1       misho       8: 
1.1.1.5 ! misho       9: Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
        !            10: additional header is required. That header is not included in the main PCRE
        !            11: distribution because other apparatus is needed to compile pcregrep for z/OS.
        !            12: The header can be found in the special z/OS distribution, which is available
        !            13: from www.zaconsultants.net or from www.cbttape.org.
        !            14: 
        !            15:            Copyright (c) 1997-2013 University of Cambridge
1.1       misho      16: 
                     17: -----------------------------------------------------------------------------
                     18: Redistribution and use in source and binary forms, with or without
                     19: modification, are permitted provided that the following conditions are met:
                     20: 
                     21:     * Redistributions of source code must retain the above copyright notice,
                     22:       this list of conditions and the following disclaimer.
                     23: 
                     24:     * Redistributions in binary form must reproduce the above copyright
                     25:       notice, this list of conditions and the following disclaimer in the
                     26:       documentation and/or other materials provided with the distribution.
                     27: 
                     28:     * Neither the name of the University of Cambridge nor the names of its
                     29:       contributors may be used to endorse or promote products derived from
                     30:       this software without specific prior written permission.
                     31: 
                     32: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     33: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     34: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     35: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     36: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     37: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     38: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     39: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     40: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     41: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     42: POSSIBILITY OF SUCH DAMAGE.
                     43: -----------------------------------------------------------------------------
                     44: */
                     45: 
                     46: #ifdef HAVE_CONFIG_H
                     47: #include "config.h"
                     48: #endif
                     49: 
                     50: #include <ctype.h>
                     51: #include <locale.h>
                     52: #include <stdio.h>
                     53: #include <string.h>
                     54: #include <stdlib.h>
                     55: #include <errno.h>
                     56: 
                     57: #include <sys/types.h>
                     58: #include <sys/stat.h>
                     59: 
                     60: #ifdef HAVE_UNISTD_H
                     61: #include <unistd.h>
                     62: #endif
                     63: 
                     64: #ifdef SUPPORT_LIBZ
                     65: #include <zlib.h>
                     66: #endif
                     67: 
                     68: #ifdef SUPPORT_LIBBZ2
                     69: #include <bzlib.h>
                     70: #endif
                     71: 
                     72: #include "pcre.h"
                     73: 
                     74: #define FALSE 0
                     75: #define TRUE 1
                     76: 
                     77: typedef int BOOL;
                     78: 
                     79: #define OFFSET_SIZE 99
                     80: 
                     81: #if BUFSIZ > 8192
1.1.1.4   misho      82: #define MAXPATLEN BUFSIZ
1.1       misho      83: #else
1.1.1.4   misho      84: #define MAXPATLEN 8192
1.1       misho      85: #endif
                     86: 
1.1.1.4   misho      87: #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
                     88: 
1.1       misho      89: /* Values for the "filenames" variable, which specifies options for file name
                     90: output. The order is important; it is assumed that a file name is wanted for
                     91: all values greater than FN_DEFAULT. */
                     92: 
                     93: enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
                     94: 
                     95: /* File reading styles */
                     96: 
                     97: enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
                     98: 
                     99: /* Actions for the -d and -D options */
                    100: 
                    101: enum { dee_READ, dee_SKIP, dee_RECURSE };
                    102: enum { DEE_READ, DEE_SKIP };
                    103: 
                    104: /* Actions for special processing options (flag bits) */
                    105: 
                    106: #define PO_WORD_MATCH     0x0001
                    107: #define PO_LINE_MATCH     0x0002
                    108: #define PO_FIXED_STRINGS  0x0004
                    109: 
                    110: /* Line ending types */
                    111: 
                    112: enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
                    113: 
1.1.1.3   misho     114: /* Binary file options */
                    115: 
                    116: enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
                    117: 
1.1       misho     118: /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
                    119: environments), a warning is issued if the value of fwrite() is ignored.
                    120: Unfortunately, casting to (void) does not suppress the warning. To get round
                    121: this, we use a macro that compiles a fudge. Oddly, this does not also seem to
                    122: apply to fprintf(). */
                    123: 
                    124: #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
                    125: 
                    126: 
                    127: 
                    128: /*************************************************
                    129: *               Global variables                 *
                    130: *************************************************/
                    131: 
                    132: /* Jeffrey Friedl has some debugging requirements that are not part of the
                    133: regular code. */
                    134: 
                    135: #ifdef JFRIEDL_DEBUG
                    136: static int S_arg = -1;
                    137: static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
                    138: static unsigned int jfriedl_XT = 0; /* replicate text this many times */
                    139: static const char *jfriedl_prefix = "";
                    140: static const char *jfriedl_postfix = "";
                    141: #endif
                    142: 
                    143: static int  endlinetype;
                    144: 
                    145: static char *colour_string = (char *)"1;31";
                    146: static char *colour_option = NULL;
                    147: static char *dee_option = NULL;
                    148: static char *DEE_option = NULL;
1.1.1.4   misho     149: static char *locale = NULL;
1.1       misho     150: static char *main_buffer = NULL;
                    151: static char *newline = NULL;
1.1.1.4   misho     152: static char *om_separator = (char *)"";
1.1       misho     153: static char *stdin_name = (char *)"(standard input)";
                    154: 
                    155: static const unsigned char *pcretables = NULL;
                    156: 
                    157: static int after_context = 0;
                    158: static int before_context = 0;
1.1.1.3   misho     159: static int binary_files = BIN_BINARY;
1.1       misho     160: static int both_context = 0;
                    161: static int bufthird = PCREGREP_BUFSIZE;
                    162: static int bufsize = 3*PCREGREP_BUFSIZE;
1.1.1.4   misho     163: 
                    164: #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
                    165: static int dee_action = dee_SKIP;
                    166: #else
1.1       misho     167: static int dee_action = dee_READ;
1.1.1.4   misho     168: #endif
                    169: 
1.1       misho     170: static int DEE_action = DEE_READ;
                    171: static int error_count = 0;
                    172: static int filenames = FN_DEFAULT;
1.1.1.4   misho     173: static int pcre_options = 0;
1.1       misho     174: static int process_options = 0;
                    175: 
                    176: #ifdef SUPPORT_PCREGREP_JIT
                    177: static int study_options = PCRE_STUDY_JIT_COMPILE;
                    178: #else
                    179: static int study_options = 0;
                    180: #endif
                    181: 
                    182: static unsigned long int match_limit = 0;
                    183: static unsigned long int match_limit_recursion = 0;
                    184: 
                    185: static BOOL count_only = FALSE;
                    186: static BOOL do_colour = FALSE;
                    187: static BOOL file_offsets = FALSE;
                    188: static BOOL hyphenpending = FALSE;
                    189: static BOOL invert = FALSE;
                    190: static BOOL line_buffered = FALSE;
                    191: static BOOL line_offsets = FALSE;
                    192: static BOOL multiline = FALSE;
                    193: static BOOL number = FALSE;
                    194: static BOOL omit_zero_count = FALSE;
                    195: static BOOL resource_error = FALSE;
                    196: static BOOL quiet = FALSE;
1.1.1.4   misho     197: static BOOL show_only_matching = FALSE;
1.1       misho     198: static BOOL silent = FALSE;
                    199: static BOOL utf8 = FALSE;
                    200: 
1.1.1.4   misho     201: /* Structure for list of --only-matching capturing numbers. */
                    202: 
                    203: typedef struct omstr {
                    204:   struct omstr *next;
                    205:   int groupnum;
                    206: } omstr;
                    207: 
                    208: static omstr *only_matching = NULL;
                    209: static omstr *only_matching_last = NULL;
                    210: 
                    211: /* Structure for holding the two variables that describe a number chain. */
                    212: 
                    213: typedef struct omdatastr {
                    214:   omstr **anchor;
                    215:   omstr **lastptr;
                    216: } omdatastr;
                    217: 
                    218: static omdatastr only_matching_data = { &only_matching, &only_matching_last };
                    219: 
                    220: /* Structure for list of file names (for -f and --{in,ex}clude-from) */
                    221: 
                    222: typedef struct fnstr {
                    223:   struct fnstr *next;
                    224:   char *name;
                    225: } fnstr;
                    226: 
                    227: static fnstr *exclude_from = NULL;
                    228: static fnstr *exclude_from_last = NULL;
                    229: static fnstr *include_from = NULL;
                    230: static fnstr *include_from_last = NULL;
                    231: 
                    232: static fnstr *file_lists = NULL;
                    233: static fnstr *file_lists_last = NULL;
                    234: static fnstr *pattern_files = NULL;
                    235: static fnstr *pattern_files_last = NULL;
                    236: 
                    237: /* Structure for holding the two variables that describe a file name chain. */
                    238: 
                    239: typedef struct fndatastr {
                    240:   fnstr **anchor;
                    241:   fnstr **lastptr;
                    242: } fndatastr;
                    243: 
                    244: static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
                    245: static fndatastr include_from_data = { &include_from, &include_from_last };
                    246: static fndatastr file_lists_data = { &file_lists, &file_lists_last };
                    247: static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
                    248: 
                    249: /* Structure for pattern and its compiled form; used for matching patterns and
                    250: also for include/exclude patterns. */
                    251: 
                    252: typedef struct patstr {
                    253:   struct patstr *next;
                    254:   char *string;
                    255:   pcre *compiled;
                    256:   pcre_extra *hint;
                    257: } patstr;
                    258: 
                    259: static patstr *patterns = NULL;
                    260: static patstr *patterns_last = NULL;
                    261: static patstr *include_patterns = NULL;
                    262: static patstr *include_patterns_last = NULL;
                    263: static patstr *exclude_patterns = NULL;
                    264: static patstr *exclude_patterns_last = NULL;
                    265: static patstr *include_dir_patterns = NULL;
                    266: static patstr *include_dir_patterns_last = NULL;
                    267: static patstr *exclude_dir_patterns = NULL;
                    268: static patstr *exclude_dir_patterns_last = NULL;
                    269: 
                    270: /* Structure holding the two variables that describe a pattern chain. A pointer
                    271: to such structures is used for each appropriate option. */
                    272: 
                    273: typedef struct patdatastr {
                    274:   patstr **anchor;
                    275:   patstr **lastptr;
                    276: } patdatastr;
                    277: 
                    278: static patdatastr match_patdata = { &patterns, &patterns_last };
                    279: static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
                    280: static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
                    281: static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
                    282: static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
                    283: 
                    284: static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
                    285:                                  &include_dir_patterns, &exclude_dir_patterns };
                    286: 
                    287: static const char *incexname[4] = { "--include", "--exclude",
                    288:                                     "--include-dir", "--exclude-dir" };
                    289: 
1.1       misho     290: /* Structure for options and list of them */
                    291: 
                    292: enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
1.1.1.4   misho     293:        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
1.1       misho     294: 
                    295: typedef struct option_item {
                    296:   int type;
                    297:   int one_char;
                    298:   void *dataptr;
                    299:   const char *long_name;
                    300:   const char *help_text;
                    301: } option_item;
                    302: 
                    303: /* Options without a single-letter equivalent get a negative value. This can be
                    304: used to identify them. */
                    305: 
                    306: #define N_COLOUR       (-1)
                    307: #define N_EXCLUDE      (-2)
                    308: #define N_EXCLUDE_DIR  (-3)
                    309: #define N_HELP         (-4)
                    310: #define N_INCLUDE      (-5)
                    311: #define N_INCLUDE_DIR  (-6)
                    312: #define N_LABEL        (-7)
                    313: #define N_LOCALE       (-8)
                    314: #define N_NULL         (-9)
                    315: #define N_LOFFSETS     (-10)
                    316: #define N_FOFFSETS     (-11)
                    317: #define N_LBUFFER      (-12)
                    318: #define N_M_LIMIT      (-13)
                    319: #define N_M_LIMIT_REC  (-14)
                    320: #define N_BUFSIZE      (-15)
                    321: #define N_NOJIT        (-16)
1.1.1.3   misho     322: #define N_FILE_LIST    (-17)
                    323: #define N_BINARY_FILES (-18)
1.1.1.4   misho     324: #define N_EXCLUDE_FROM (-19)
                    325: #define N_INCLUDE_FROM (-20)
                    326: #define N_OM_SEPARATOR (-21)
1.1       misho     327: 
                    328: static option_item optionlist[] = {
1.1.1.3   misho     329:   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
1.1       misho     330:   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
                    331:   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
1.1.1.3   misho     332:   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
1.1       misho     333:   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
1.1.1.3   misho     334:   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
1.1       misho     335:   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
                    336:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
                    337:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
                    338:   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
                    339:   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
                    340:   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
                    341:   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
1.1.1.4   misho     342:   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
1.1       misho     343:   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
1.1.1.4   misho     344:   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
                    345:   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
1.1       misho     346:   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
                    347:   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
                    348:   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
1.1.1.3   misho     349:   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
1.1       misho     350:   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
                    351: #ifdef SUPPORT_PCREGREP_JIT
                    352:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
                    353: #else
                    354:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
                    355: #endif
                    356:   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
                    357:   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
                    358:   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
                    359:   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
                    360:   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
                    361:   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
                    362:   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
                    363:   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
                    364:   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
                    365:   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
                    366:   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
1.1.1.4   misho     367:   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
                    368:   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
1.1       misho     369:   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
                    370:   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
1.1.1.4   misho     371:   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
                    372:   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
                    373:   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
                    374:   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
                    375:   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
                    376:   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
1.1       misho     377: 
                    378:   /* These two were accidentally implemented with underscores instead of
                    379:   hyphens in the option names. As this was not discovered for several releases,
                    380:   the incorrect versions are left in the table for compatibility. However, the
                    381:   --help function misses out any option that has an underscore in its name. */
                    382: 
1.1.1.4   misho     383:   { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
                    384:   { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
1.1       misho     385: 
                    386: #ifdef JFRIEDL_DEBUG
                    387:   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
                    388: #endif
                    389:   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
                    390:   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
                    391:   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
                    392:   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
                    393:   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
                    394:   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
                    395:   { OP_NODATA,    0,        NULL,               NULL,            NULL }
                    396: };
                    397: 
                    398: /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
                    399: options. These set the 1, 2, and 4 bits in process_options, respectively. Note
                    400: that the combination of -w and -x has the same effect as -x on its own, so we
1.1.1.4   misho     401: can treat them as the same. Note that the MAXPATLEN macro assumes the longest
                    402: prefix+suffix is 10 characters; if anything longer is added, it must be
                    403: adjusted. */
1.1       misho     404: 
                    405: static const char *prefix[] = {
                    406:   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
                    407: 
                    408: static const char *suffix[] = {
                    409:   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
                    410: 
                    411: /* UTF-8 tables - used only when the newline setting is "any". */
                    412: 
                    413: const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
                    414: 
                    415: const char utf8_table4[] = {
                    416:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                    417:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                    418:   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
                    419:   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
                    420: 
                    421: 
                    422: 
                    423: /*************************************************
                    424: *         Exit from the program                  *
                    425: *************************************************/
                    426: 
                    427: /* If there has been a resource error, give a suitable message.
                    428: 
                    429: Argument:  the return code
                    430: Returns:   does not return
                    431: */
                    432: 
                    433: static void
                    434: pcregrep_exit(int rc)
                    435: {
                    436: if (resource_error)
                    437:   {
                    438:   fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
                    439:     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
                    440:     PCRE_ERROR_JIT_STACKLIMIT);
                    441:   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
                    442:   }
                    443: exit(rc);
                    444: }
                    445: 
                    446: 
                    447: /*************************************************
1.1.1.4   misho     448: *          Add item to chain of patterns         *
                    449: *************************************************/
                    450: 
                    451: /* Used to add an item onto a chain, or just return an unconnected item if the
                    452: "after" argument is NULL.
                    453: 
                    454: Arguments:
                    455:   s          pattern string to add
                    456:   after      if not NULL points to item to insert after
                    457: 
                    458: Returns:     new pattern block
                    459: */
                    460: 
                    461: static patstr *
                    462: add_pattern(char *s, patstr *after)
                    463: {
                    464: patstr *p = (patstr *)malloc(sizeof(patstr));
                    465: if (p == NULL)
                    466:   {
                    467:   fprintf(stderr, "pcregrep: malloc failed\n");
                    468:   pcregrep_exit(2);
                    469:   }
                    470: if (strlen(s) > MAXPATLEN)
                    471:   {
                    472:   fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
                    473:     MAXPATLEN);
                    474:   return NULL;
                    475:   }
                    476: p->next = NULL;
                    477: p->string = s;
                    478: p->compiled = NULL;
                    479: p->hint = NULL;
                    480: 
                    481: if (after != NULL)
                    482:   {
                    483:   p->next = after->next;
                    484:   after->next = p;
                    485:   }
                    486: return p;
                    487: }
                    488: 
                    489: 
                    490: /*************************************************
                    491: *           Free chain of patterns               *
                    492: *************************************************/
                    493: 
                    494: /* Used for several chains of patterns.
                    495: 
                    496: Argument: pointer to start of chain
                    497: Returns:  nothing
                    498: */
                    499: 
                    500: static void
                    501: free_pattern_chain(patstr *pc)
                    502: {
                    503: while (pc != NULL)
                    504:   {
                    505:   patstr *p = pc;
                    506:   pc = p->next;
                    507:   if (p->hint != NULL) pcre_free_study(p->hint);
                    508:   if (p->compiled != NULL) pcre_free(p->compiled);
                    509:   free(p);
                    510:   }
                    511: }
                    512: 
                    513: 
                    514: /*************************************************
                    515: *           Free chain of file names             *
                    516: *************************************************/
                    517: 
                    518: /*
                    519: Argument: pointer to start of chain
                    520: Returns:  nothing
                    521: */
                    522: 
                    523: static void
                    524: free_file_chain(fnstr *fn)
                    525: {
                    526: while (fn != NULL)
                    527:   {
                    528:   fnstr *f = fn;
                    529:   fn = f->next;
                    530:   free(f);
                    531:   }
                    532: }
                    533: 
                    534: 
                    535: /*************************************************
1.1       misho     536: *            OS-specific functions               *
                    537: *************************************************/
                    538: 
1.1.1.5 ! misho     539: /* These functions are defined so that they can be made system specific.
        !           540: At present there are versions for Unix-style environments, Windows, native
        !           541: z/OS, and "no support". */
1.1       misho     542: 
                    543: 
1.1.1.5 ! misho     544: /************* Directory scanning Unix-style and z/OS ***********/
1.1       misho     545: 
1.1.1.5 ! misho     546: #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
1.1       misho     547: #include <sys/types.h>
                    548: #include <sys/stat.h>
                    549: #include <dirent.h>
                    550: 
1.1.1.5 ! misho     551: #if defined NATIVE_ZOS
        !           552: /************* Directory and PDS/E scanning for z/OS ***********/
        !           553: /************* z/OS looks mostly like Unix with USS ************/
        !           554: /* However, z/OS needs the #include statements in this header */
        !           555: #include "pcrzosfs.h"
        !           556: /* That header is not included in the main PCRE distribution because
        !           557:    other apparatus is needed to compile pcregrep for z/OS. The header
        !           558:    can be found in the special z/OS distribution, which is available
        !           559:    from www.zaconsultants.net or from www.cbttape.org. */
        !           560: #endif
        !           561: 
1.1       misho     562: typedef DIR directory_type;
1.1.1.4   misho     563: #define FILESEP '/'
1.1       misho     564: 
                    565: static int
                    566: isdirectory(char *filename)
                    567: {
                    568: struct stat statbuf;
                    569: if (stat(filename, &statbuf) < 0)
                    570:   return 0;        /* In the expectation that opening as a file will fail */
1.1.1.4   misho     571: return (statbuf.st_mode & S_IFMT) == S_IFDIR;
1.1       misho     572: }
                    573: 
                    574: static directory_type *
                    575: opendirectory(char *filename)
                    576: {
                    577: return opendir(filename);
                    578: }
                    579: 
                    580: static char *
                    581: readdirectory(directory_type *dir)
                    582: {
                    583: for (;;)
                    584:   {
                    585:   struct dirent *dent = readdir(dir);
                    586:   if (dent == NULL) return NULL;
                    587:   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
                    588:     return dent->d_name;
                    589:   }
                    590: /* Control never reaches here */
                    591: }
                    592: 
                    593: static void
                    594: closedirectory(directory_type *dir)
                    595: {
                    596: closedir(dir);
                    597: }
                    598: 
                    599: 
1.1.1.5 ! misho     600: /************* Test for regular file, Unix-style **********/
1.1       misho     601: 
                    602: static int
                    603: isregfile(char *filename)
                    604: {
                    605: struct stat statbuf;
                    606: if (stat(filename, &statbuf) < 0)
                    607:   return 1;        /* In the expectation that opening as a file will fail */
                    608: return (statbuf.st_mode & S_IFMT) == S_IFREG;
                    609: }
                    610: 
                    611: 
1.1.1.5 ! misho     612: #if defined NATIVE_ZOS
        !           613: /************* Test for a terminal in z/OS **********/
        !           614: /* isatty() does not work in a TSO environment, so always give FALSE.*/
        !           615: 
        !           616: static BOOL
        !           617: is_stdout_tty(void)
        !           618: {
        !           619: return FALSE;
        !           620: }
        !           621: 
        !           622: static BOOL
        !           623: is_file_tty(FILE *f)
        !           624: {
        !           625: return FALSE;
        !           626: }
        !           627: 
1.1       misho     628: 
1.1.1.5 ! misho     629: /************* Test for a terminal, Unix-style **********/
        !           630: 
        !           631: #else
1.1       misho     632: static BOOL
                    633: is_stdout_tty(void)
                    634: {
                    635: return isatty(fileno(stdout));
                    636: }
                    637: 
                    638: static BOOL
                    639: is_file_tty(FILE *f)
                    640: {
                    641: return isatty(fileno(f));
                    642: }
1.1.1.5 ! misho     643: #endif
        !           644: 
        !           645: /* End of Unix-style or native z/OS environment functions. */
1.1       misho     646: 
                    647: 
1.1.1.5 ! misho     648: /************* Directory scanning in Windows ***********/
1.1       misho     649: 
                    650: /* I (Philip Hazel) have no means of testing this code. It was contributed by
                    651: Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
                    652: when it did not exist. David Byron added a patch that moved the #include of
                    653: <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
                    654: The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
                    655: undefined when it is indeed undefined. */
                    656: 
                    657: #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
                    658: 
                    659: #ifndef STRICT
                    660: # define STRICT
                    661: #endif
                    662: #ifndef WIN32_LEAN_AND_MEAN
                    663: # define WIN32_LEAN_AND_MEAN
                    664: #endif
                    665: 
                    666: #include <windows.h>
                    667: 
                    668: #ifndef INVALID_FILE_ATTRIBUTES
                    669: #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
                    670: #endif
                    671: 
                    672: typedef struct directory_type
                    673: {
                    674: HANDLE handle;
                    675: BOOL first;
                    676: WIN32_FIND_DATA data;
                    677: } directory_type;
                    678: 
1.1.1.4   misho     679: #define FILESEP '/'
                    680: 
1.1       misho     681: int
                    682: isdirectory(char *filename)
                    683: {
                    684: DWORD attr = GetFileAttributes(filename);
                    685: if (attr == INVALID_FILE_ATTRIBUTES)
                    686:   return 0;
1.1.1.4   misho     687: return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
1.1       misho     688: }
                    689: 
                    690: directory_type *
                    691: opendirectory(char *filename)
                    692: {
                    693: size_t len;
                    694: char *pattern;
                    695: directory_type *dir;
                    696: DWORD err;
                    697: len = strlen(filename);
1.1.1.4   misho     698: pattern = (char *)malloc(len + 3);
                    699: dir = (directory_type *)malloc(sizeof(*dir));
1.1       misho     700: if ((pattern == NULL) || (dir == NULL))
                    701:   {
                    702:   fprintf(stderr, "pcregrep: malloc failed\n");
                    703:   pcregrep_exit(2);
                    704:   }
                    705: memcpy(pattern, filename, len);
                    706: memcpy(&(pattern[len]), "\\*", 3);
                    707: dir->handle = FindFirstFile(pattern, &(dir->data));
                    708: if (dir->handle != INVALID_HANDLE_VALUE)
                    709:   {
                    710:   free(pattern);
                    711:   dir->first = TRUE;
                    712:   return dir;
                    713:   }
                    714: err = GetLastError();
                    715: free(pattern);
                    716: free(dir);
                    717: errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
                    718: return NULL;
                    719: }
                    720: 
                    721: char *
                    722: readdirectory(directory_type *dir)
                    723: {
                    724: for (;;)
                    725:   {
                    726:   if (!dir->first)
                    727:     {
                    728:     if (!FindNextFile(dir->handle, &(dir->data)))
                    729:       return NULL;
                    730:     }
                    731:   else
                    732:     {
                    733:     dir->first = FALSE;
                    734:     }
                    735:   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
                    736:     return dir->data.cFileName;
                    737:   }
                    738: #ifndef _MSC_VER
                    739: return NULL;   /* Keep compiler happy; never executed */
                    740: #endif
                    741: }
                    742: 
                    743: void
                    744: closedirectory(directory_type *dir)
                    745: {
                    746: FindClose(dir->handle);
                    747: free(dir);
                    748: }
                    749: 
                    750: 
1.1.1.5 ! misho     751: /************* Test for regular file in Windows **********/
1.1       misho     752: 
                    753: /* I don't know how to do this, or if it can be done; assume all paths are
                    754: regular if they are not directories. */
                    755: 
                    756: int isregfile(char *filename)
                    757: {
                    758: return !isdirectory(filename);
                    759: }
                    760: 
                    761: 
1.1.1.5 ! misho     762: /************* Test for a terminal in Windows **********/
1.1       misho     763: 
                    764: /* I don't know how to do this; assume never */
                    765: 
                    766: static BOOL
                    767: is_stdout_tty(void)
                    768: {
                    769: return FALSE;
                    770: }
                    771: 
                    772: static BOOL
                    773: is_file_tty(FILE *f)
                    774: {
                    775: return FALSE;
                    776: }
                    777: 
1.1.1.5 ! misho     778: /* End of Windows functions */
        !           779: 
1.1       misho     780: 
                    781: /************* Directory scanning when we can't do it ***********/
                    782: 
                    783: /* The type is void, and apart from isdirectory(), the functions do nothing. */
                    784: 
                    785: #else
                    786: 
1.1.1.4   misho     787: #define FILESEP 0
1.1       misho     788: typedef void directory_type;
                    789: 
                    790: int isdirectory(char *filename) { return 0; }
                    791: directory_type * opendirectory(char *filename) { return (directory_type*)0;}
                    792: char *readdirectory(directory_type *dir) { return (char*)0;}
                    793: void closedirectory(directory_type *dir) {}
                    794: 
                    795: 
1.1.1.5 ! misho     796: /************* Test for regular file when we can't do it **********/
1.1       misho     797: 
                    798: /* Assume all files are regular. */
                    799: 
                    800: int isregfile(char *filename) { return 1; }
                    801: 
                    802: 
                    803: /************* Test for a terminal when we can't do it **********/
                    804: 
                    805: static BOOL
                    806: is_stdout_tty(void)
                    807: {
                    808: return FALSE;
                    809: }
                    810: 
                    811: static BOOL
                    812: is_file_tty(FILE *f)
                    813: {
                    814: return FALSE;
                    815: }
                    816: 
1.1.1.5 ! misho     817: #endif  /* End of system-specific functions */
1.1       misho     818: 
                    819: 
                    820: 
                    821: #ifndef HAVE_STRERROR
                    822: /*************************************************
                    823: *     Provide strerror() for non-ANSI libraries  *
                    824: *************************************************/
                    825: 
                    826: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
                    827: in their libraries, but can provide the same facility by this simple
                    828: alternative function. */
                    829: 
                    830: extern int   sys_nerr;
                    831: extern char *sys_errlist[];
                    832: 
                    833: char *
                    834: strerror(int n)
                    835: {
                    836: if (n < 0 || n >= sys_nerr) return "unknown error number";
                    837: return sys_errlist[n];
                    838: }
                    839: #endif /* HAVE_STRERROR */
                    840: 
                    841: 
                    842: 
                    843: /*************************************************
1.1.1.4   misho     844: *                Usage function                  *
                    845: *************************************************/
                    846: 
                    847: static int
                    848: usage(int rc)
                    849: {
                    850: option_item *op;
                    851: fprintf(stderr, "Usage: pcregrep [-");
                    852: for (op = optionlist; op->one_char != 0; op++)
                    853:   {
                    854:   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
                    855:   }
                    856: fprintf(stderr, "] [long options] [pattern] [files]\n");
                    857: fprintf(stderr, "Type `pcregrep --help' for more information and the long "
                    858:   "options.\n");
                    859: return rc;
                    860: }
                    861: 
                    862: 
                    863: 
                    864: /*************************************************
                    865: *                Help function                   *
                    866: *************************************************/
                    867: 
                    868: static void
                    869: help(void)
                    870: {
                    871: option_item *op;
                    872: 
                    873: printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
                    874: printf("Search for PATTERN in each FILE or standard input.\n");
                    875: printf("PATTERN must be present if neither -e nor -f is used.\n");
                    876: printf("\"-\" can be used as a file name to mean STDIN.\n");
                    877: 
                    878: #ifdef SUPPORT_LIBZ
                    879: printf("Files whose names end in .gz are read using zlib.\n");
                    880: #endif
                    881: 
                    882: #ifdef SUPPORT_LIBBZ2
                    883: printf("Files whose names end in .bz2 are read using bzlib2.\n");
                    884: #endif
                    885: 
                    886: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
                    887: printf("Other files and the standard input are read as plain files.\n\n");
                    888: #else
                    889: printf("All files are read as plain files, without any interpretation.\n\n");
                    890: #endif
                    891: 
                    892: printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
                    893: printf("Options:\n");
                    894: 
                    895: for (op = optionlist; op->one_char != 0; op++)
                    896:   {
                    897:   int n;
                    898:   char s[4];
                    899: 
                    900:   /* Two options were accidentally implemented and documented with underscores
                    901:   instead of hyphens in their names, something that was not noticed for quite a
                    902:   few releases. When fixing this, I left the underscored versions in the list
                    903:   in case people were using them. However, we don't want to display them in the
                    904:   help data. There are no other options that contain underscores, and we do not
                    905:   expect ever to implement such options. Therefore, just omit any option that
                    906:   contains an underscore. */
                    907: 
                    908:   if (strchr(op->long_name, '_') != NULL) continue;
                    909: 
                    910:   if (op->one_char > 0 && (op->long_name)[0] == 0)
                    911:     n = 31 - printf("  -%c", op->one_char);
                    912:   else
                    913:     {
                    914:     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
                    915:       else strcpy(s, "   ");
                    916:     n = 31 - printf("  %s --%s", s, op->long_name);
                    917:     }
                    918: 
                    919:   if (n < 1) n = 1;
                    920:   printf("%.*s%s\n", n, "                           ", op->help_text);
                    921:   }
                    922: 
                    923: printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
                    924: printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
                    925: printf("When reading patterns or file names from a file, trailing white\n");
                    926: printf("space is removed and blank lines are ignored.\n");
                    927: printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
                    928: 
                    929: printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
                    930: printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
                    931: }
                    932: 
                    933: 
                    934: 
                    935: /*************************************************
                    936: *            Test exclude/includes               *
                    937: *************************************************/
                    938: 
                    939: /* If any exclude pattern matches, the path is excluded. Otherwise, unless
                    940: there are no includes, the path must match an include pattern.
                    941: 
                    942: Arguments:
                    943:   path      the path to be matched
                    944:   ip        the chain of include patterns
                    945:   ep        the chain of exclude patterns
                    946: 
                    947: Returns:    TRUE if the path is not excluded
                    948: */
                    949: 
                    950: static BOOL
                    951: test_incexc(char *path, patstr *ip, patstr *ep)
                    952: {
                    953: int plen = strlen(path);
                    954: 
                    955: for (; ep != NULL; ep = ep->next)
                    956:   {
                    957:   if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
                    958:     return FALSE;
                    959:   }
                    960: 
                    961: if (ip == NULL) return TRUE;
                    962: 
                    963: for (; ip != NULL; ip = ip->next)
                    964:   {
                    965:   if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
                    966:     return TRUE;
                    967:   }
                    968: 
                    969: return FALSE;
                    970: }
                    971: 
                    972: 
                    973: 
                    974: /*************************************************
                    975: *         Decode integer argument value          *
                    976: *************************************************/
                    977: 
                    978: /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
                    979: because SunOS4 doesn't have it. This is used only for unpicking arguments, so
                    980: just keep it simple.
                    981: 
                    982: Arguments:
                    983:   option_data   the option data string
                    984:   op            the option item (for error messages)
                    985:   longop        TRUE if option given in long form
                    986: 
                    987: Returns:        a long integer
                    988: */
                    989: 
                    990: static long int
                    991: decode_number(char *option_data, option_item *op, BOOL longop)
                    992: {
                    993: unsigned long int n = 0;
                    994: char *endptr = option_data;
                    995: while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
                    996: while (isdigit((unsigned char)(*endptr)))
                    997:   n = n * 10 + (int)(*endptr++ - '0');
                    998: if (toupper(*endptr) == 'K')
                    999:   {
                   1000:   n *= 1024;
                   1001:   endptr++;
                   1002:   }
                   1003: else if (toupper(*endptr) == 'M')
                   1004:   {
                   1005:   n *= 1024*1024;
                   1006:   endptr++;
                   1007:   }
                   1008: 
                   1009: if (*endptr != 0)   /* Error */
                   1010:   {
                   1011:   if (longop)
                   1012:     {
                   1013:     char *equals = strchr(op->long_name, '=');
                   1014:     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
                   1015:       (int)(equals - op->long_name);
                   1016:     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
                   1017:       option_data, nlen, op->long_name);
                   1018:     }
                   1019:   else
                   1020:     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
                   1021:       option_data, op->one_char);
                   1022:   pcregrep_exit(usage(2));
                   1023:   }
                   1024: 
                   1025: return n;
                   1026: }
                   1027: 
                   1028: 
                   1029: 
                   1030: /*************************************************
                   1031: *       Add item to a chain of numbers           *
                   1032: *************************************************/
                   1033: 
                   1034: /* Used to add an item onto a chain, or just return an unconnected item if the
                   1035: "after" argument is NULL.
                   1036: 
                   1037: Arguments:
                   1038:   n          the number to add
                   1039:   after      if not NULL points to item to insert after
                   1040: 
                   1041: Returns:     new number block
                   1042: */
                   1043: 
                   1044: static omstr *
                   1045: add_number(int n, omstr *after)
                   1046: {
                   1047: omstr *om = (omstr *)malloc(sizeof(omstr));
                   1048: 
                   1049: if (om == NULL)
                   1050:   {
                   1051:   fprintf(stderr, "pcregrep: malloc failed\n");
                   1052:   pcregrep_exit(2);
                   1053:   }
                   1054: om->next = NULL;
                   1055: om->groupnum = n;
                   1056: 
                   1057: if (after != NULL)
                   1058:   {
                   1059:   om->next = after->next;
                   1060:   after->next = om;
                   1061:   }
                   1062: return om;
                   1063: }
                   1064: 
                   1065: 
                   1066: 
                   1067: /*************************************************
1.1       misho    1068: *            Read one line of input              *
                   1069: *************************************************/
                   1070: 
                   1071: /* Normally, input is read using fread() into a large buffer, so many lines may
                   1072: be read at once. However, doing this for tty input means that no output appears
                   1073: until a lot of input has been typed. Instead, tty input is handled line by
                   1074: line. We cannot use fgets() for this, because it does not stop at a binary
                   1075: zero, and therefore there is no way of telling how many characters it has read,
                   1076: because there may be binary zeros embedded in the data.
                   1077: 
                   1078: Arguments:
                   1079:   buffer     the buffer to read into
                   1080:   length     the maximum number of characters to read
                   1081:   f          the file
                   1082: 
                   1083: Returns:     the number of characters read, zero at end of file
                   1084: */
                   1085: 
1.1.1.2   misho    1086: static unsigned int
1.1       misho    1087: read_one_line(char *buffer, int length, FILE *f)
                   1088: {
                   1089: int c;
                   1090: int yield = 0;
                   1091: while ((c = fgetc(f)) != EOF)
                   1092:   {
                   1093:   buffer[yield++] = c;
                   1094:   if (c == '\n' || yield >= length) break;
                   1095:   }
                   1096: return yield;
                   1097: }
                   1098: 
                   1099: 
                   1100: 
                   1101: /*************************************************
                   1102: *             Find end of line                   *
                   1103: *************************************************/
                   1104: 
                   1105: /* The length of the endline sequence that is found is set via lenptr. This may
                   1106: be zero at the very end of the file if there is no line-ending sequence there.
                   1107: 
                   1108: Arguments:
                   1109:   p         current position in line
                   1110:   endptr    end of available data
                   1111:   lenptr    where to put the length of the eol sequence
                   1112: 
                   1113: Returns:    pointer after the last byte of the line,
                   1114:             including the newline byte(s)
                   1115: */
                   1116: 
                   1117: static char *
                   1118: end_of_line(char *p, char *endptr, int *lenptr)
                   1119: {
                   1120: switch(endlinetype)
                   1121:   {
                   1122:   default:      /* Just in case */
                   1123:   case EL_LF:
                   1124:   while (p < endptr && *p != '\n') p++;
                   1125:   if (p < endptr)
                   1126:     {
                   1127:     *lenptr = 1;
                   1128:     return p + 1;
                   1129:     }
                   1130:   *lenptr = 0;
                   1131:   return endptr;
                   1132: 
                   1133:   case EL_CR:
                   1134:   while (p < endptr && *p != '\r') p++;
                   1135:   if (p < endptr)
                   1136:     {
                   1137:     *lenptr = 1;
                   1138:     return p + 1;
                   1139:     }
                   1140:   *lenptr = 0;
                   1141:   return endptr;
                   1142: 
                   1143:   case EL_CRLF:
                   1144:   for (;;)
                   1145:     {
                   1146:     while (p < endptr && *p != '\r') p++;
                   1147:     if (++p >= endptr)
                   1148:       {
                   1149:       *lenptr = 0;
                   1150:       return endptr;
                   1151:       }
                   1152:     if (*p == '\n')
                   1153:       {
                   1154:       *lenptr = 2;
                   1155:       return p + 1;
                   1156:       }
                   1157:     }
                   1158:   break;
                   1159: 
                   1160:   case EL_ANYCRLF:
                   1161:   while (p < endptr)
                   1162:     {
                   1163:     int extra = 0;
                   1164:     register int c = *((unsigned char *)p);
                   1165: 
                   1166:     if (utf8 && c >= 0xc0)
                   1167:       {
                   1168:       int gcii, gcss;
                   1169:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
                   1170:       gcss = 6*extra;
                   1171:       c = (c & utf8_table3[extra]) << gcss;
                   1172:       for (gcii = 1; gcii <= extra; gcii++)
                   1173:         {
                   1174:         gcss -= 6;
                   1175:         c |= (p[gcii] & 0x3f) << gcss;
                   1176:         }
                   1177:       }
                   1178: 
                   1179:     p += 1 + extra;
                   1180: 
                   1181:     switch (c)
                   1182:       {
1.1.1.4   misho    1183:       case '\n':
1.1       misho    1184:       *lenptr = 1;
                   1185:       return p;
                   1186: 
1.1.1.4   misho    1187:       case '\r':
                   1188:       if (p < endptr && *p == '\n')
1.1       misho    1189:         {
                   1190:         *lenptr = 2;
                   1191:         p++;
                   1192:         }
                   1193:       else *lenptr = 1;
                   1194:       return p;
                   1195: 
                   1196:       default:
                   1197:       break;
                   1198:       }
                   1199:     }   /* End of loop for ANYCRLF case */
                   1200: 
                   1201:   *lenptr = 0;  /* Must have hit the end */
                   1202:   return endptr;
                   1203: 
                   1204:   case EL_ANY:
                   1205:   while (p < endptr)
                   1206:     {
                   1207:     int extra = 0;
                   1208:     register int c = *((unsigned char *)p);
                   1209: 
                   1210:     if (utf8 && c >= 0xc0)
                   1211:       {
                   1212:       int gcii, gcss;
                   1213:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
                   1214:       gcss = 6*extra;
                   1215:       c = (c & utf8_table3[extra]) << gcss;
                   1216:       for (gcii = 1; gcii <= extra; gcii++)
                   1217:         {
                   1218:         gcss -= 6;
                   1219:         c |= (p[gcii] & 0x3f) << gcss;
                   1220:         }
                   1221:       }
                   1222: 
                   1223:     p += 1 + extra;
                   1224: 
                   1225:     switch (c)
                   1226:       {
1.1.1.4   misho    1227:       case '\n':    /* LF */
                   1228:       case '\v':    /* VT */
                   1229:       case '\f':    /* FF */
1.1       misho    1230:       *lenptr = 1;
                   1231:       return p;
                   1232: 
1.1.1.4   misho    1233:       case '\r':    /* CR */
                   1234:       if (p < endptr && *p == '\n')
1.1       misho    1235:         {
                   1236:         *lenptr = 2;
                   1237:         p++;
                   1238:         }
                   1239:       else *lenptr = 1;
                   1240:       return p;
                   1241: 
1.1.1.4   misho    1242: #ifndef EBCDIC
                   1243:       case 0x85:    /* Unicode NEL */
1.1       misho    1244:       *lenptr = utf8? 2 : 1;
                   1245:       return p;
                   1246: 
1.1.1.4   misho    1247:       case 0x2028:  /* Unicode LS */
                   1248:       case 0x2029:  /* Unicode PS */
1.1       misho    1249:       *lenptr = 3;
                   1250:       return p;
1.1.1.4   misho    1251: #endif  /* Not EBCDIC */
1.1       misho    1252: 
                   1253:       default:
                   1254:       break;
                   1255:       }
                   1256:     }   /* End of loop for ANY case */
                   1257: 
                   1258:   *lenptr = 0;  /* Must have hit the end */
                   1259:   return endptr;
                   1260:   }     /* End of overall switch */
                   1261: }
                   1262: 
                   1263: 
                   1264: 
                   1265: /*************************************************
                   1266: *         Find start of previous line            *
                   1267: *************************************************/
                   1268: 
                   1269: /* This is called when looking back for before lines to print.
                   1270: 
                   1271: Arguments:
                   1272:   p         start of the subsequent line
                   1273:   startptr  start of available data
                   1274: 
                   1275: Returns:    pointer to the start of the previous line
                   1276: */
                   1277: 
                   1278: static char *
                   1279: previous_line(char *p, char *startptr)
                   1280: {
                   1281: switch(endlinetype)
                   1282:   {
                   1283:   default:      /* Just in case */
                   1284:   case EL_LF:
                   1285:   p--;
                   1286:   while (p > startptr && p[-1] != '\n') p--;
                   1287:   return p;
                   1288: 
                   1289:   case EL_CR:
                   1290:   p--;
                   1291:   while (p > startptr && p[-1] != '\n') p--;
                   1292:   return p;
                   1293: 
                   1294:   case EL_CRLF:
                   1295:   for (;;)
                   1296:     {
                   1297:     p -= 2;
                   1298:     while (p > startptr && p[-1] != '\n') p--;
                   1299:     if (p <= startptr + 1 || p[-2] == '\r') return p;
                   1300:     }
                   1301:   return p;   /* But control should never get here */
                   1302: 
                   1303:   case EL_ANY:
                   1304:   case EL_ANYCRLF:
                   1305:   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
                   1306:   if (utf8) while ((*p & 0xc0) == 0x80) p--;
                   1307: 
                   1308:   while (p > startptr)
                   1309:     {
1.1.1.4   misho    1310:     register unsigned int c;
1.1       misho    1311:     char *pp = p - 1;
                   1312: 
                   1313:     if (utf8)
                   1314:       {
                   1315:       int extra = 0;
                   1316:       while ((*pp & 0xc0) == 0x80) pp--;
                   1317:       c = *((unsigned char *)pp);
                   1318:       if (c >= 0xc0)
                   1319:         {
                   1320:         int gcii, gcss;
                   1321:         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
                   1322:         gcss = 6*extra;
                   1323:         c = (c & utf8_table3[extra]) << gcss;
                   1324:         for (gcii = 1; gcii <= extra; gcii++)
                   1325:           {
                   1326:           gcss -= 6;
                   1327:           c |= (pp[gcii] & 0x3f) << gcss;
                   1328:           }
                   1329:         }
                   1330:       }
                   1331:     else c = *((unsigned char *)pp);
                   1332: 
                   1333:     if (endlinetype == EL_ANYCRLF) switch (c)
                   1334:       {
1.1.1.4   misho    1335:       case '\n':    /* LF */
                   1336:       case '\r':    /* CR */
1.1       misho    1337:       return p;
                   1338: 
                   1339:       default:
                   1340:       break;
                   1341:       }
                   1342: 
                   1343:     else switch (c)
                   1344:       {
1.1.1.4   misho    1345:       case '\n':    /* LF */
                   1346:       case '\v':    /* VT */
                   1347:       case '\f':    /* FF */
                   1348:       case '\r':    /* CR */
                   1349: #ifndef EBCDIE
                   1350:       case 0x85:    /* Unicode NEL */
                   1351:       case 0x2028:  /* Unicode LS */
                   1352:       case 0x2029:  /* Unicode PS */
                   1353: #endif  /* Not EBCDIC */
1.1       misho    1354:       return p;
                   1355: 
                   1356:       default:
                   1357:       break;
                   1358:       }
                   1359: 
                   1360:     p = pp;  /* Back one character */
                   1361:     }        /* End of loop for ANY case */
                   1362: 
                   1363:   return startptr;  /* Hit start of data */
                   1364:   }     /* End of overall switch */
                   1365: }
                   1366: 
                   1367: 
                   1368: 
                   1369: 
                   1370: 
                   1371: /*************************************************
                   1372: *       Print the previous "after" lines         *
                   1373: *************************************************/
                   1374: 
                   1375: /* This is called if we are about to lose said lines because of buffer filling,
                   1376: and at the end of the file. The data in the line is written using fwrite() so
                   1377: that a binary zero does not terminate it.
                   1378: 
                   1379: Arguments:
                   1380:   lastmatchnumber   the number of the last matching line, plus one
                   1381:   lastmatchrestart  where we restarted after the last match
                   1382:   endptr            end of available data
                   1383:   printname         filename for printing
                   1384: 
                   1385: Returns:            nothing
                   1386: */
                   1387: 
1.1.1.4   misho    1388: static void
                   1389: do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
                   1390:   char *printname)
1.1       misho    1391: {
                   1392: if (after_context > 0 && lastmatchnumber > 0)
                   1393:   {
                   1394:   int count = 0;
                   1395:   while (lastmatchrestart < endptr && count++ < after_context)
                   1396:     {
                   1397:     int ellength;
                   1398:     char *pp = lastmatchrestart;
                   1399:     if (printname != NULL) fprintf(stdout, "%s-", printname);
                   1400:     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
                   1401:     pp = end_of_line(pp, endptr, &ellength);
                   1402:     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
                   1403:     lastmatchrestart = pp;
                   1404:     }
                   1405:   hyphenpending = TRUE;
                   1406:   }
                   1407: }
                   1408: 
                   1409: 
                   1410: 
                   1411: /*************************************************
                   1412: *   Apply patterns to subject till one matches   *
                   1413: *************************************************/
                   1414: 
                   1415: /* This function is called to run through all patterns, looking for a match. It
                   1416: is used multiple times for the same subject when colouring is enabled, in order
                   1417: to find all possible matches.
                   1418: 
                   1419: Arguments:
                   1420:   matchptr     the start of the subject
                   1421:   length       the length of the subject to match
1.1.1.4   misho    1422:   options      options for pcre_exec
1.1       misho    1423:   startoffset  where to start matching
                   1424:   offsets      the offets vector to fill in
                   1425:   mrc          address of where to put the result of pcre_exec()
                   1426: 
                   1427: Returns:      TRUE if there was a match
                   1428:               FALSE if there was no match
                   1429:               invert if there was a non-fatal error
                   1430: */
                   1431: 
                   1432: static BOOL
1.1.1.4   misho    1433: match_patterns(char *matchptr, size_t length, unsigned int options,
                   1434:   int startoffset, int *offsets, int *mrc)
1.1       misho    1435: {
                   1436: int i;
                   1437: size_t slen = length;
1.1.1.4   misho    1438: patstr *p = patterns;
1.1       misho    1439: const char *msg = "this text:\n\n";
1.1.1.4   misho    1440: 
1.1       misho    1441: if (slen > 200)
                   1442:   {
                   1443:   slen = 200;
                   1444:   msg = "text that starts:\n\n";
                   1445:   }
1.1.1.4   misho    1446: for (i = 1; p != NULL; p = p->next, i++)
1.1       misho    1447:   {
1.1.1.4   misho    1448:   *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
                   1449:     startoffset, options, offsets, OFFSET_SIZE);
1.1       misho    1450:   if (*mrc >= 0) return TRUE;
                   1451:   if (*mrc == PCRE_ERROR_NOMATCH) continue;
                   1452:   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1.1.1.4   misho    1453:   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1.1       misho    1454:   fprintf(stderr, "%s", msg);
                   1455:   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
                   1456:   fprintf(stderr, "\n\n");
                   1457:   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
                   1458:       *mrc == PCRE_ERROR_JIT_STACKLIMIT)
                   1459:     resource_error = TRUE;
                   1460:   if (error_count++ > 20)
                   1461:     {
                   1462:     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
                   1463:     pcregrep_exit(2);
                   1464:     }
                   1465:   return invert;    /* No more matching; don't show the line again */
                   1466:   }
                   1467: 
                   1468: return FALSE;  /* No match, no errors */
                   1469: }
                   1470: 
                   1471: 
                   1472: 
                   1473: /*************************************************
                   1474: *            Grep an individual file             *
                   1475: *************************************************/
                   1476: 
                   1477: /* This is called from grep_or_recurse() below. It uses a buffer that is three
                   1478: times the value of bufthird. The matching point is never allowed to stray into
                   1479: the top third of the buffer, thus keeping more of the file available for
                   1480: context printing or for multiline scanning. For large files, the pointer will
                   1481: be in the middle third most of the time, so the bottom third is available for
                   1482: "before" context printing.
                   1483: 
                   1484: Arguments:
                   1485:   handle       the fopened FILE stream for a normal file
                   1486:                the gzFile pointer when reading is via libz
                   1487:                the BZFILE pointer when reading is via libbz2
                   1488:   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
                   1489:   filename     the file name or NULL (for errors)
                   1490:   printname    the file name if it is to be printed for each match
                   1491:                or NULL if the file name is not to be printed
                   1492:                it cannot be NULL if filenames[_nomatch]_only is set
                   1493: 
                   1494: Returns:       0 if there was at least one match
                   1495:                1 otherwise (no matches)
                   1496:                2 if an overlong line is encountered
                   1497:                3 if there is a read error on a .bz2 file
                   1498: */
                   1499: 
                   1500: static int
                   1501: pcregrep(void *handle, int frtype, char *filename, char *printname)
                   1502: {
                   1503: int rc = 1;
                   1504: int linenumber = 1;
                   1505: int lastmatchnumber = 0;
                   1506: int count = 0;
                   1507: int filepos = 0;
                   1508: int offsets[OFFSET_SIZE];
                   1509: char *lastmatchrestart = NULL;
                   1510: char *ptr = main_buffer;
                   1511: char *endptr;
                   1512: size_t bufflength;
1.1.1.3   misho    1513: BOOL binary = FALSE;
1.1       misho    1514: BOOL endhyphenpending = FALSE;
                   1515: BOOL input_line_buffered = line_buffered;
                   1516: FILE *in = NULL;                    /* Ensure initialized */
                   1517: 
                   1518: #ifdef SUPPORT_LIBZ
                   1519: gzFile ingz = NULL;
                   1520: #endif
                   1521: 
                   1522: #ifdef SUPPORT_LIBBZ2
                   1523: BZFILE *inbz2 = NULL;
                   1524: #endif
                   1525: 
                   1526: 
                   1527: /* Do the first read into the start of the buffer and set up the pointer to end
                   1528: of what we have. In the case of libz, a non-zipped .gz file will be read as a
                   1529: plain file. However, if a .bz2 file isn't actually bzipped, the first read will
                   1530: fail. */
                   1531: 
1.1.1.4   misho    1532: (void)frtype;
                   1533: 
1.1       misho    1534: #ifdef SUPPORT_LIBZ
                   1535: if (frtype == FR_LIBZ)
                   1536:   {
                   1537:   ingz = (gzFile)handle;
                   1538:   bufflength = gzread (ingz, main_buffer, bufsize);
                   1539:   }
                   1540: else
                   1541: #endif
                   1542: 
                   1543: #ifdef SUPPORT_LIBBZ2
                   1544: if (frtype == FR_LIBBZ2)
                   1545:   {
                   1546:   inbz2 = (BZFILE *)handle;
                   1547:   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
                   1548:   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
                   1549:   }                                    /* without the cast it is unsigned. */
                   1550: else
                   1551: #endif
                   1552: 
                   1553:   {
                   1554:   in = (FILE *)handle;
                   1555:   if (is_file_tty(in)) input_line_buffered = TRUE;
                   1556:   bufflength = input_line_buffered?
                   1557:     read_one_line(main_buffer, bufsize, in) :
                   1558:     fread(main_buffer, 1, bufsize, in);
                   1559:   }
                   1560: 
                   1561: endptr = main_buffer + bufflength;
                   1562: 
1.1.1.3   misho    1563: /* Unless binary-files=text, see if we have a binary file. This uses the same
                   1564: rule as GNU grep, namely, a search for a binary zero byte near the start of the
                   1565: file. */
                   1566: 
                   1567: if (binary_files != BIN_TEXT)
                   1568:   {
                   1569:   binary =
                   1570:     memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
                   1571:   if (binary && binary_files == BIN_NOMATCH) return 1;
                   1572:   }
                   1573: 
1.1       misho    1574: /* Loop while the current pointer is not at the end of the file. For large
                   1575: files, endptr will be at the end of the buffer when we are in the middle of the
                   1576: file, but ptr will never get there, because as soon as it gets over 2/3 of the
                   1577: way, the buffer is shifted left and re-filled. */
                   1578: 
                   1579: while (ptr < endptr)
                   1580:   {
                   1581:   int endlinelength;
                   1582:   int mrc = 0;
                   1583:   int startoffset = 0;
1.1.1.4   misho    1584:   unsigned int options = 0;
1.1       misho    1585:   BOOL match;
                   1586:   char *matchptr = ptr;
                   1587:   char *t = ptr;
                   1588:   size_t length, linelength;
                   1589: 
                   1590:   /* At this point, ptr is at the start of a line. We need to find the length
                   1591:   of the subject string to pass to pcre_exec(). In multiline mode, it is the
                   1592:   length remainder of the data in the buffer. Otherwise, it is the length of
                   1593:   the next line, excluding the terminating newline. After matching, we always
                   1594:   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
                   1595:   option is used for compiling, so that any match is constrained to be in the
                   1596:   first line. */
                   1597: 
                   1598:   t = end_of_line(t, endptr, &endlinelength);
                   1599:   linelength = t - ptr - endlinelength;
                   1600:   length = multiline? (size_t)(endptr - ptr) : linelength;
                   1601: 
                   1602:   /* Check to see if the line we are looking at extends right to the very end
                   1603:   of the buffer without a line terminator. This means the line is too long to
                   1604:   handle. */
                   1605: 
                   1606:   if (endlinelength == 0 && t == main_buffer + bufsize)
                   1607:     {
                   1608:     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
                   1609:                     "pcregrep: check the --buffer-size option\n",
                   1610:                     linenumber,
                   1611:                     (filename == NULL)? "" : " of file ",
                   1612:                     (filename == NULL)? "" : filename);
                   1613:     return 2;
                   1614:     }
                   1615: 
                   1616:   /* Extra processing for Jeffrey Friedl's debugging. */
                   1617: 
                   1618: #ifdef JFRIEDL_DEBUG
                   1619:   if (jfriedl_XT || jfriedl_XR)
                   1620:   {
1.1.1.4   misho    1621: #     include <sys/time.h>
                   1622: #     include <time.h>
1.1       misho    1623:       struct timeval start_time, end_time;
                   1624:       struct timezone dummy;
                   1625:       int i;
                   1626: 
                   1627:       if (jfriedl_XT)
                   1628:       {
                   1629:           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
                   1630:           const char *orig = ptr;
                   1631:           ptr = malloc(newlen + 1);
                   1632:           if (!ptr) {
                   1633:                   printf("out of memory");
                   1634:                   pcregrep_exit(2);
                   1635:           }
                   1636:           endptr = ptr;
                   1637:           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
                   1638:           for (i = 0; i < jfriedl_XT; i++) {
                   1639:                   strncpy(endptr, orig,  length);
                   1640:                   endptr += length;
                   1641:           }
                   1642:           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
                   1643:           length = newlen;
                   1644:       }
                   1645: 
                   1646:       if (gettimeofday(&start_time, &dummy) != 0)
                   1647:               perror("bad gettimeofday");
                   1648: 
                   1649: 
                   1650:       for (i = 0; i < jfriedl_XR; i++)
1.1.1.4   misho    1651:           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1.1       misho    1652:               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
                   1653: 
                   1654:       if (gettimeofday(&end_time, &dummy) != 0)
                   1655:               perror("bad gettimeofday");
                   1656: 
                   1657:       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
                   1658:                       -
                   1659:                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
                   1660: 
                   1661:       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
                   1662:       return 0;
                   1663:   }
                   1664: #endif
                   1665: 
1.1.1.4   misho    1666:   /* We come back here after a match when show_only_matching is set, in order
                   1667:   to find any further matches in the same line. This applies to
                   1668:   --only-matching, --file-offsets, and --line-offsets. */
1.1       misho    1669: 
                   1670:   ONLY_MATCHING_RESTART:
                   1671: 
                   1672:   /* Run through all the patterns until one matches or there is an error other
                   1673:   than NOMATCH. This code is in a subroutine so that it can be re-used for
1.1.1.4   misho    1674:   finding subsequent matches when colouring matched lines. After finding one
                   1675:   match, set PCRE_NOTEMPTY to disable any further matches of null strings in
                   1676:   this line. */
1.1       misho    1677: 
1.1.1.4   misho    1678:   match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
                   1679:   options = PCRE_NOTEMPTY;
1.1       misho    1680: 
                   1681:   /* If it's a match or a not-match (as required), do what's wanted. */
                   1682: 
                   1683:   if (match != invert)
                   1684:     {
                   1685:     BOOL hyphenprinted = FALSE;
                   1686: 
                   1687:     /* We've failed if we want a file that doesn't have any matches. */
                   1688: 
                   1689:     if (filenames == FN_NOMATCH_ONLY) return 1;
                   1690: 
                   1691:     /* Just count if just counting is wanted. */
                   1692: 
                   1693:     if (count_only) count++;
                   1694: 
1.1.1.3   misho    1695:     /* When handling a binary file and binary-files==binary, the "binary"
                   1696:     variable will be set true (it's false in all other cases). In this
                   1697:     situation we just want to output the file name. No need to scan further. */
                   1698: 
                   1699:     else if (binary)
                   1700:       {
                   1701:       fprintf(stdout, "Binary file %s matches\n", filename);
                   1702:       return 0;
                   1703:       }
                   1704: 
1.1       misho    1705:     /* If all we want is a file name, there is no need to scan any more lines
                   1706:     in the file. */
                   1707: 
                   1708:     else if (filenames == FN_MATCH_ONLY)
                   1709:       {
                   1710:       fprintf(stdout, "%s\n", printname);
                   1711:       return 0;
                   1712:       }
                   1713: 
                   1714:     /* Likewise, if all we want is a yes/no answer. */
                   1715: 
                   1716:     else if (quiet) return 0;
                   1717: 
1.1.1.4   misho    1718:     /* The --only-matching option prints just the substring that matched,
                   1719:     and/or one or more captured portions of it, as long as these strings are
                   1720:     not empty. The --file-offsets and --line-offsets options output offsets for
                   1721:     the matching substring (all three set show_only_matching). None of these
                   1722:     mutually exclusive options prints any context. Afterwards, adjust the start
                   1723:     and then jump back to look for further matches in the same line. If we are
                   1724:     in invert mode, however, nothing is printed and we do not restart - this
                   1725:     could still be useful because the return code is set. */
1.1       misho    1726: 
1.1.1.4   misho    1727:     else if (show_only_matching)
1.1       misho    1728:       {
                   1729:       if (!invert)
                   1730:         {
                   1731:         if (printname != NULL) fprintf(stdout, "%s:", printname);
                   1732:         if (number) fprintf(stdout, "%d:", linenumber);
1.1.1.4   misho    1733: 
                   1734:         /* Handle --line-offsets */
                   1735: 
1.1       misho    1736:         if (line_offsets)
                   1737:           fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
                   1738:             offsets[1] - offsets[0]);
1.1.1.4   misho    1739: 
                   1740:         /* Handle --file-offsets */
                   1741: 
1.1       misho    1742:         else if (file_offsets)
                   1743:           fprintf(stdout, "%d,%d\n",
                   1744:             (int)(filepos + matchptr + offsets[0] - ptr),
                   1745:             offsets[1] - offsets[0]);
1.1.1.4   misho    1746: 
                   1747:         /* Handle --only-matching, which may occur many times */
                   1748: 
                   1749:         else
1.1       misho    1750:           {
1.1.1.4   misho    1751:           BOOL printed = FALSE;
                   1752:           omstr *om;
                   1753: 
                   1754:           for (om = only_matching; om != NULL; om = om->next)
1.1       misho    1755:             {
1.1.1.4   misho    1756:             int n = om->groupnum;
                   1757:             if (n < mrc)
                   1758:               {
                   1759:               int plen = offsets[2*n + 1] - offsets[2*n];
                   1760:               if (plen > 0)
                   1761:                 {
                   1762:                 if (printed) fprintf(stdout, "%s", om_separator);
                   1763:                 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
                   1764:                 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
                   1765:                 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
                   1766:                 printed = TRUE;
                   1767:                 }
                   1768:               }
1.1       misho    1769:             }
1.1.1.4   misho    1770: 
                   1771:           if (printed || printname != NULL || number) fprintf(stdout, "\n");
1.1       misho    1772:           }
1.1.1.4   misho    1773: 
                   1774:         /* Prepare to repeat to find the next match */
                   1775: 
1.1       misho    1776:         match = FALSE;
                   1777:         if (line_buffered) fflush(stdout);
                   1778:         rc = 0;                      /* Had some success */
                   1779:         startoffset = offsets[1];    /* Restart after the match */
                   1780:         goto ONLY_MATCHING_RESTART;
                   1781:         }
                   1782:       }
                   1783: 
                   1784:     /* This is the default case when none of the above options is set. We print
                   1785:     the matching lines(s), possibly preceded and/or followed by other lines of
                   1786:     context. */
                   1787: 
                   1788:     else
                   1789:       {
                   1790:       /* See if there is a requirement to print some "after" lines from a
                   1791:       previous match. We never print any overlaps. */
                   1792: 
                   1793:       if (after_context > 0 && lastmatchnumber > 0)
                   1794:         {
                   1795:         int ellength;
                   1796:         int linecount = 0;
                   1797:         char *p = lastmatchrestart;
                   1798: 
                   1799:         while (p < ptr && linecount < after_context)
                   1800:           {
                   1801:           p = end_of_line(p, ptr, &ellength);
                   1802:           linecount++;
                   1803:           }
                   1804: 
                   1805:         /* It is important to advance lastmatchrestart during this printing so
                   1806:         that it interacts correctly with any "before" printing below. Print
                   1807:         each line's data using fwrite() in case there are binary zeroes. */
                   1808: 
                   1809:         while (lastmatchrestart < p)
                   1810:           {
                   1811:           char *pp = lastmatchrestart;
                   1812:           if (printname != NULL) fprintf(stdout, "%s-", printname);
                   1813:           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
                   1814:           pp = end_of_line(pp, endptr, &ellength);
                   1815:           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
                   1816:           lastmatchrestart = pp;
                   1817:           }
                   1818:         if (lastmatchrestart != ptr) hyphenpending = TRUE;
                   1819:         }
                   1820: 
                   1821:       /* If there were non-contiguous lines printed above, insert hyphens. */
                   1822: 
                   1823:       if (hyphenpending)
                   1824:         {
                   1825:         fprintf(stdout, "--\n");
                   1826:         hyphenpending = FALSE;
                   1827:         hyphenprinted = TRUE;
                   1828:         }
                   1829: 
                   1830:       /* See if there is a requirement to print some "before" lines for this
                   1831:       match. Again, don't print overlaps. */
                   1832: 
                   1833:       if (before_context > 0)
                   1834:         {
                   1835:         int linecount = 0;
                   1836:         char *p = ptr;
                   1837: 
                   1838:         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
                   1839:                linecount < before_context)
                   1840:           {
                   1841:           linecount++;
                   1842:           p = previous_line(p, main_buffer);
                   1843:           }
                   1844: 
                   1845:         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
                   1846:           fprintf(stdout, "--\n");
                   1847: 
                   1848:         while (p < ptr)
                   1849:           {
                   1850:           int ellength;
                   1851:           char *pp = p;
                   1852:           if (printname != NULL) fprintf(stdout, "%s-", printname);
                   1853:           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
                   1854:           pp = end_of_line(pp, endptr, &ellength);
                   1855:           FWRITE(p, 1, pp - p, stdout);
                   1856:           p = pp;
                   1857:           }
                   1858:         }
                   1859: 
                   1860:       /* Now print the matching line(s); ensure we set hyphenpending at the end
                   1861:       of the file if any context lines are being output. */
                   1862: 
                   1863:       if (after_context > 0 || before_context > 0)
                   1864:         endhyphenpending = TRUE;
                   1865: 
                   1866:       if (printname != NULL) fprintf(stdout, "%s:", printname);
                   1867:       if (number) fprintf(stdout, "%d:", linenumber);
                   1868: 
                   1869:       /* In multiline mode, we want to print to the end of the line in which
                   1870:       the end of the matched string is found, so we adjust linelength and the
                   1871:       line number appropriately, but only when there actually was a match
                   1872:       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
                   1873:       the match will always be before the first newline sequence. */
                   1874: 
                   1875:       if (multiline & !invert)
                   1876:         {
                   1877:         char *endmatch = ptr + offsets[1];
                   1878:         t = ptr;
1.1.1.5 ! misho    1879:         while (t <= endmatch)
1.1       misho    1880:           {
                   1881:           t = end_of_line(t, endptr, &endlinelength);
                   1882:           if (t < endmatch) linenumber++; else break;
                   1883:           }
                   1884:         linelength = t - ptr - endlinelength;
                   1885:         }
                   1886: 
                   1887:       /*** NOTE: Use only fwrite() to output the data line, so that binary
                   1888:       zeroes are treated as just another data character. */
                   1889: 
                   1890:       /* This extra option, for Jeffrey Friedl's debugging requirements,
                   1891:       replaces the matched string, or a specific captured string if it exists,
                   1892:       with X. When this happens, colouring is ignored. */
                   1893: 
                   1894: #ifdef JFRIEDL_DEBUG
                   1895:       if (S_arg >= 0 && S_arg < mrc)
                   1896:         {
                   1897:         int first = S_arg * 2;
                   1898:         int last  = first + 1;
                   1899:         FWRITE(ptr, 1, offsets[first], stdout);
                   1900:         fprintf(stdout, "X");
                   1901:         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
                   1902:         }
                   1903:       else
                   1904: #endif
                   1905: 
                   1906:       /* We have to split the line(s) up if colouring, and search for further
                   1907:       matches, but not of course if the line is a non-match. */
                   1908: 
                   1909:       if (do_colour && !invert)
                   1910:         {
                   1911:         int plength;
                   1912:         FWRITE(ptr, 1, offsets[0], stdout);
                   1913:         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
                   1914:         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
                   1915:         fprintf(stdout, "%c[00m", 0x1b);
                   1916:         for (;;)
                   1917:           {
                   1918:           startoffset = offsets[1];
                   1919:           if (startoffset >= (int)linelength + endlinelength ||
1.1.1.4   misho    1920:               !match_patterns(matchptr, length, options, startoffset, offsets,
                   1921:                 &mrc))
1.1       misho    1922:             break;
                   1923:           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
                   1924:           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
                   1925:           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
                   1926:           fprintf(stdout, "%c[00m", 0x1b);
                   1927:           }
                   1928: 
                   1929:         /* In multiline mode, we may have already printed the complete line
                   1930:         and its line-ending characters (if they matched the pattern), so there
                   1931:         may be no more to print. */
                   1932: 
                   1933:         plength = (int)((linelength + endlinelength) - startoffset);
                   1934:         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
                   1935:         }
                   1936: 
                   1937:       /* Not colouring; no need to search for further matches */
                   1938: 
                   1939:       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
                   1940:       }
                   1941: 
                   1942:     /* End of doing what has to be done for a match. If --line-buffered was
                   1943:     given, flush the output. */
                   1944: 
                   1945:     if (line_buffered) fflush(stdout);
                   1946:     rc = 0;    /* Had some success */
                   1947: 
                   1948:     /* Remember where the last match happened for after_context. We remember
                   1949:     where we are about to restart, and that line's number. */
                   1950: 
                   1951:     lastmatchrestart = ptr + linelength + endlinelength;
                   1952:     lastmatchnumber = linenumber + 1;
                   1953:     }
                   1954: 
                   1955:   /* For a match in multiline inverted mode (which of course did not cause
                   1956:   anything to be printed), we have to move on to the end of the match before
                   1957:   proceeding. */
                   1958: 
                   1959:   if (multiline && invert && match)
                   1960:     {
                   1961:     int ellength;
                   1962:     char *endmatch = ptr + offsets[1];
                   1963:     t = ptr;
                   1964:     while (t < endmatch)
                   1965:       {
                   1966:       t = end_of_line(t, endptr, &ellength);
                   1967:       if (t <= endmatch) linenumber++; else break;
                   1968:       }
                   1969:     endmatch = end_of_line(endmatch, endptr, &ellength);
                   1970:     linelength = endmatch - ptr - ellength;
                   1971:     }
                   1972: 
                   1973:   /* Advance to after the newline and increment the line number. The file
                   1974:   offset to the current line is maintained in filepos. */
                   1975: 
                   1976:   ptr += linelength + endlinelength;
                   1977:   filepos += (int)(linelength + endlinelength);
                   1978:   linenumber++;
                   1979: 
                   1980:   /* If input is line buffered, and the buffer is not yet full, read another
                   1981:   line and add it into the buffer. */
                   1982: 
                   1983:   if (input_line_buffered && bufflength < (size_t)bufsize)
                   1984:     {
                   1985:     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
                   1986:     bufflength += add;
                   1987:     endptr += add;
                   1988:     }
                   1989: 
                   1990:   /* If we haven't yet reached the end of the file (the buffer is full), and
                   1991:   the current point is in the top 1/3 of the buffer, slide the buffer down by
                   1992:   1/3 and refill it. Before we do this, if some unprinted "after" lines are
                   1993:   about to be lost, print them. */
                   1994: 
                   1995:   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
                   1996:     {
                   1997:     if (after_context > 0 &&
                   1998:         lastmatchnumber > 0 &&
                   1999:         lastmatchrestart < main_buffer + bufthird)
                   2000:       {
                   2001:       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
                   2002:       lastmatchnumber = 0;
                   2003:       }
                   2004: 
                   2005:     /* Now do the shuffle */
                   2006: 
                   2007:     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
                   2008:     ptr -= bufthird;
                   2009: 
                   2010: #ifdef SUPPORT_LIBZ
                   2011:     if (frtype == FR_LIBZ)
                   2012:       bufflength = 2*bufthird +
                   2013:         gzread (ingz, main_buffer + 2*bufthird, bufthird);
                   2014:     else
                   2015: #endif
                   2016: 
                   2017: #ifdef SUPPORT_LIBBZ2
                   2018:     if (frtype == FR_LIBBZ2)
                   2019:       bufflength = 2*bufthird +
                   2020:         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
                   2021:     else
                   2022: #endif
                   2023: 
                   2024:     bufflength = 2*bufthird +
                   2025:       (input_line_buffered?
                   2026:        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
                   2027:        fread(main_buffer + 2*bufthird, 1, bufthird, in));
                   2028:     endptr = main_buffer + bufflength;
                   2029: 
                   2030:     /* Adjust any last match point */
                   2031: 
                   2032:     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
                   2033:     }
                   2034:   }     /* Loop through the whole file */
                   2035: 
                   2036: /* End of file; print final "after" lines if wanted; do_after_lines sets
                   2037: hyphenpending if it prints something. */
                   2038: 
1.1.1.4   misho    2039: if (!show_only_matching && !count_only)
1.1       misho    2040:   {
                   2041:   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
                   2042:   hyphenpending |= endhyphenpending;
                   2043:   }
                   2044: 
                   2045: /* Print the file name if we are looking for those without matches and there
                   2046: were none. If we found a match, we won't have got this far. */
                   2047: 
                   2048: if (filenames == FN_NOMATCH_ONLY)
                   2049:   {
                   2050:   fprintf(stdout, "%s\n", printname);
                   2051:   return 0;
                   2052:   }
                   2053: 
                   2054: /* Print the match count if wanted */
                   2055: 
                   2056: if (count_only)
                   2057:   {
                   2058:   if (count > 0 || !omit_zero_count)
                   2059:     {
                   2060:     if (printname != NULL && filenames != FN_NONE)
                   2061:       fprintf(stdout, "%s:", printname);
                   2062:     fprintf(stdout, "%d\n", count);
                   2063:     }
                   2064:   }
                   2065: 
                   2066: return rc;
                   2067: }
                   2068: 
                   2069: 
                   2070: 
                   2071: /*************************************************
                   2072: *     Grep a file or recurse into a directory    *
                   2073: *************************************************/
                   2074: 
                   2075: /* Given a path name, if it's a directory, scan all the files if we are
                   2076: recursing; if it's a file, grep it.
                   2077: 
                   2078: Arguments:
                   2079:   pathname          the path to investigate
                   2080:   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
                   2081:   only_one_at_top   TRUE if the path is the only one at toplevel
                   2082: 
1.1.1.4   misho    2083: Returns:  -1 the file/directory was skipped
                   2084:            0 if there was at least one match
1.1       misho    2085:            1 if there were no matches
                   2086:            2 there was some kind of error
                   2087: 
                   2088: However, file opening failures are suppressed if "silent" is set.
                   2089: */
                   2090: 
                   2091: static int
                   2092: grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
                   2093: {
                   2094: int rc = 1;
                   2095: int frtype;
                   2096: void *handle;
1.1.1.4   misho    2097: char *lastcomp;
1.1       misho    2098: FILE *in = NULL;           /* Ensure initialized */
                   2099: 
                   2100: #ifdef SUPPORT_LIBZ
                   2101: gzFile ingz = NULL;
                   2102: #endif
                   2103: 
                   2104: #ifdef SUPPORT_LIBBZ2
                   2105: BZFILE *inbz2 = NULL;
                   2106: #endif
                   2107: 
1.1.1.3   misho    2108: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1.1.1.2   misho    2109: int pathlen;
                   2110: #endif
                   2111: 
1.1.1.5 ! misho    2112: #if defined NATIVE_ZOS
        !          2113: int zos_type;
        !          2114: FILE *zos_test_file;
        !          2115: #endif
        !          2116: 
1.1       misho    2117: /* If the file name is "-" we scan stdin */
                   2118: 
                   2119: if (strcmp(pathname, "-") == 0)
                   2120:   {
                   2121:   return pcregrep(stdin, FR_PLAIN, stdin_name,
                   2122:     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
                   2123:       stdin_name : NULL);
                   2124:   }
                   2125: 
1.1.1.4   misho    2126: /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
                   2127: directories, whereas --include and --exclude apply to everything else. The test
                   2128: is against the final component of the path. */
                   2129: 
                   2130: lastcomp = strrchr(pathname, FILESEP);
                   2131: lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
                   2132: 
                   2133: /* If the file is a directory, skip if not recursing or if explicitly excluded.
                   2134: Otherwise, scan the directory and recurse for each path within it. The scanning
                   2135: code is localized so it can be made system-specific. */
                   2136: 
1.1.1.5 ! misho    2137: 
        !          2138: /* For z/OS, determine the file type. */
        !          2139: 
        !          2140: #if defined NATIVE_ZOS
        !          2141: zos_test_file =  fopen(pathname,"rb");
        !          2142: 
        !          2143: if (zos_test_file == NULL)
        !          2144:    {
        !          2145:    if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
        !          2146:      pathname, strerror(errno));
        !          2147:    return -1;
        !          2148:    }
        !          2149: zos_type = identifyzosfiletype (zos_test_file);
        !          2150: fclose (zos_test_file);
        !          2151: 
        !          2152: /* Handle a PDS in separate code */
        !          2153: 
        !          2154: if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
        !          2155:    {
        !          2156:    return travelonpdsdir (pathname, only_one_at_top);
        !          2157:    }
        !          2158: 
        !          2159: /* Deal with regular files in the normal way below. These types are:
        !          2160:    zos_type == __ZOS_PDS_MEMBER
        !          2161:    zos_type == __ZOS_PS
        !          2162:    zos_type == __ZOS_VSAM_KSDS
        !          2163:    zos_type == __ZOS_VSAM_ESDS
        !          2164:    zos_type == __ZOS_VSAM_RRDS
        !          2165: */
        !          2166: 
        !          2167: /* Handle a z/OS directory using common code. */
        !          2168: 
        !          2169: else if (zos_type == __ZOS_HFS)
        !          2170:  {
        !          2171: #endif  /* NATIVE_ZOS */
        !          2172: 
        !          2173: 
        !          2174: /* Handle directories: common code for all OS */
        !          2175: 
1.1.1.4   misho    2176: if (isdirectory(pathname))
                   2177:   {
                   2178:   if (dee_action == dee_SKIP ||
                   2179:       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
                   2180:     return -1;
1.1       misho    2181: 
                   2182:   if (dee_action == dee_RECURSE)
                   2183:     {
                   2184:     char buffer[1024];
                   2185:     char *nextfile;
                   2186:     directory_type *dir = opendirectory(pathname);
                   2187: 
                   2188:     if (dir == NULL)
                   2189:       {
                   2190:       if (!silent)
                   2191:         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
                   2192:           strerror(errno));
                   2193:       return 2;
                   2194:       }
                   2195: 
                   2196:     while ((nextfile = readdirectory(dir)) != NULL)
                   2197:       {
1.1.1.4   misho    2198:       int frc;
                   2199:       sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
1.1       misho    2200:       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
                   2201:       if (frc > 1) rc = frc;
                   2202:        else if (frc == 0 && rc == 1) rc = 0;
                   2203:       }
                   2204: 
                   2205:     closedirectory(dir);
                   2206:     return rc;
                   2207:     }
                   2208:   }
                   2209: 
1.1.1.5 ! misho    2210: #if defined NATIVE_ZOS
        !          2211:  }
        !          2212: #endif
1.1       misho    2213: 
1.1.1.5 ! misho    2214: /* If the file is not a directory, check for a regular file, and if it is not,
        !          2215: skip it if that's been requested. Otherwise, check for an explicit inclusion or
        !          2216: exclusion. */
        !          2217: 
        !          2218: else if (
        !          2219: #if defined NATIVE_ZOS
        !          2220:         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
        !          2221: #else  /* all other OS */
        !          2222:         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
        !          2223: #endif
        !          2224:         !test_incexc(lastcomp, include_patterns, exclude_patterns))
        !          2225:   return -1;  /* File skipped */
1.1       misho    2226: 
                   2227: /* Control reaches here if we have a regular file, or if we have a directory
                   2228: and recursion or skipping was not requested, or if we have anything else and
                   2229: skipping was not requested. The scan proceeds. If this is the first and only
                   2230: argument at top level, we don't show the file name, unless we are only showing
                   2231: the file name, or the filename was forced (-H). */
                   2232: 
1.1.1.3   misho    2233: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1.1       misho    2234: pathlen = (int)(strlen(pathname));
1.1.1.2   misho    2235: #endif
1.1       misho    2236: 
                   2237: /* Open using zlib if it is supported and the file name ends with .gz. */
                   2238: 
                   2239: #ifdef SUPPORT_LIBZ
                   2240: if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
                   2241:   {
                   2242:   ingz = gzopen(pathname, "rb");
                   2243:   if (ingz == NULL)
                   2244:     {
                   2245:     if (!silent)
                   2246:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
                   2247:         strerror(errno));
                   2248:     return 2;
                   2249:     }
                   2250:   handle = (void *)ingz;
                   2251:   frtype = FR_LIBZ;
                   2252:   }
                   2253: else
                   2254: #endif
                   2255: 
                   2256: /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
                   2257: 
                   2258: #ifdef SUPPORT_LIBBZ2
                   2259: if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
                   2260:   {
                   2261:   inbz2 = BZ2_bzopen(pathname, "rb");
                   2262:   handle = (void *)inbz2;
                   2263:   frtype = FR_LIBBZ2;
                   2264:   }
                   2265: else
                   2266: #endif
                   2267: 
                   2268: /* Otherwise use plain fopen(). The label is so that we can come back here if
                   2269: an attempt to read a .bz2 file indicates that it really is a plain file. */
                   2270: 
                   2271: #ifdef SUPPORT_LIBBZ2
                   2272: PLAIN_FILE:
                   2273: #endif
                   2274:   {
                   2275:   in = fopen(pathname, "rb");
                   2276:   handle = (void *)in;
                   2277:   frtype = FR_PLAIN;
                   2278:   }
                   2279: 
                   2280: /* All the opening methods return errno when they fail. */
                   2281: 
                   2282: if (handle == NULL)
                   2283:   {
                   2284:   if (!silent)
                   2285:     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
                   2286:       strerror(errno));
                   2287:   return 2;
                   2288:   }
                   2289: 
                   2290: /* Now grep the file */
                   2291: 
                   2292: rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
                   2293:   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
                   2294: 
                   2295: /* Close in an appropriate manner. */
                   2296: 
                   2297: #ifdef SUPPORT_LIBZ
                   2298: if (frtype == FR_LIBZ)
                   2299:   gzclose(ingz);
                   2300: else
                   2301: #endif
                   2302: 
                   2303: /* If it is a .bz2 file and the result is 3, it means that the first attempt to
                   2304: read failed. If the error indicates that the file isn't in fact bzipped, try
                   2305: again as a normal file. */
                   2306: 
                   2307: #ifdef SUPPORT_LIBBZ2
                   2308: if (frtype == FR_LIBBZ2)
                   2309:   {
                   2310:   if (rc == 3)
                   2311:     {
                   2312:     int errnum;
                   2313:     const char *err = BZ2_bzerror(inbz2, &errnum);
                   2314:     if (errnum == BZ_DATA_ERROR_MAGIC)
                   2315:       {
                   2316:       BZ2_bzclose(inbz2);
                   2317:       goto PLAIN_FILE;
                   2318:       }
                   2319:     else if (!silent)
                   2320:       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
                   2321:         pathname, err);
                   2322:     rc = 2;    /* The normal "something went wrong" code */
                   2323:     }
                   2324:   BZ2_bzclose(inbz2);
                   2325:   }
                   2326: else
                   2327: #endif
                   2328: 
                   2329: /* Normal file close */
                   2330: 
                   2331: fclose(in);
                   2332: 
                   2333: /* Pass back the yield from pcregrep(). */
                   2334: 
                   2335: return rc;
                   2336: }
                   2337: 
                   2338: 
                   2339: 
                   2340: /*************************************************
                   2341: *    Handle a single-letter, no data option      *
                   2342: *************************************************/
                   2343: 
                   2344: static int
                   2345: handle_option(int letter, int options)
                   2346: {
                   2347: switch(letter)
                   2348:   {
                   2349:   case N_FOFFSETS: file_offsets = TRUE; break;
                   2350:   case N_HELP: help(); pcregrep_exit(0);
                   2351:   case N_LBUFFER: line_buffered = TRUE; break;
                   2352:   case N_LOFFSETS: line_offsets = number = TRUE; break;
                   2353:   case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1.1.1.3   misho    2354:   case 'a': binary_files = BIN_TEXT; break;
1.1       misho    2355:   case 'c': count_only = TRUE; break;
                   2356:   case 'F': process_options |= PO_FIXED_STRINGS; break;
                   2357:   case 'H': filenames = FN_FORCE; break;
1.1.1.3   misho    2358:   case 'I': binary_files = BIN_NOMATCH; break;
1.1       misho    2359:   case 'h': filenames = FN_NONE; break;
                   2360:   case 'i': options |= PCRE_CASELESS; break;
                   2361:   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
                   2362:   case 'L': filenames = FN_NOMATCH_ONLY; break;
                   2363:   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
                   2364:   case 'n': number = TRUE; break;
1.1.1.4   misho    2365: 
                   2366:   case 'o':
                   2367:   only_matching_last = add_number(0, only_matching_last);
                   2368:   if (only_matching == NULL) only_matching = only_matching_last;
                   2369:   break;
                   2370: 
1.1       misho    2371:   case 'q': quiet = TRUE; break;
                   2372:   case 'r': dee_action = dee_RECURSE; break;
                   2373:   case 's': silent = TRUE; break;
                   2374:   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
                   2375:   case 'v': invert = TRUE; break;
                   2376:   case 'w': process_options |= PO_WORD_MATCH; break;
                   2377:   case 'x': process_options |= PO_LINE_MATCH; break;
                   2378: 
                   2379:   case 'V':
1.1.1.4   misho    2380:   fprintf(stdout, "pcregrep version %s\n", pcre_version());
1.1       misho    2381:   pcregrep_exit(0);
                   2382:   break;
                   2383: 
                   2384:   default:
                   2385:   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
                   2386:   pcregrep_exit(usage(2));
                   2387:   }
                   2388: 
                   2389: return options;
                   2390: }
                   2391: 
                   2392: 
                   2393: 
                   2394: 
                   2395: /*************************************************
                   2396: *          Construct printed ordinal             *
                   2397: *************************************************/
                   2398: 
                   2399: /* This turns a number into "1st", "3rd", etc. */
                   2400: 
                   2401: static char *
                   2402: ordin(int n)
                   2403: {
                   2404: static char buffer[8];
                   2405: char *p = buffer;
                   2406: sprintf(p, "%d", n);
                   2407: while (*p != 0) p++;
                   2408: switch (n%10)
                   2409:   {
                   2410:   case 1: strcpy(p, "st"); break;
                   2411:   case 2: strcpy(p, "nd"); break;
                   2412:   case 3: strcpy(p, "rd"); break;
                   2413:   default: strcpy(p, "th"); break;
                   2414:   }
                   2415: return buffer;
                   2416: }
                   2417: 
                   2418: 
                   2419: 
                   2420: /*************************************************
                   2421: *          Compile a single pattern              *
                   2422: *************************************************/
                   2423: 
1.1.1.4   misho    2424: /* Do nothing if the pattern has already been compiled. This is the case for
                   2425: include/exclude patterns read from a file.
                   2426: 
                   2427: When the -F option has been used, each "pattern" may be a list of strings,
                   2428: separated by line breaks. They will be matched literally. We split such a
                   2429: string and compile the first substring, inserting an additional block into the
                   2430: pattern chain.
1.1       misho    2431: 
                   2432: Arguments:
1.1.1.4   misho    2433:   p              points to the pattern block
1.1       misho    2434:   options        the PCRE options
1.1.1.4   misho    2435:   popts          the processing options
                   2436:   fromfile       TRUE if the pattern was read from a file
                   2437:   fromtext       file name or identifying text (e.g. "include")
1.1       misho    2438:   count          0 if this is the only command line pattern, or
                   2439:                  number of the command line pattern, or
                   2440:                  linenumber for a pattern from a file
                   2441: 
                   2442: Returns:         TRUE on success, FALSE after an error
                   2443: */
                   2444: 
                   2445: static BOOL
1.1.1.4   misho    2446: compile_pattern(patstr *p, int options, int popts, int fromfile,
                   2447:   const char *fromtext, int count)
1.1       misho    2448: {
                   2449: char buffer[PATBUFSIZE];
                   2450: const char *error;
1.1.1.4   misho    2451: char *ps = p->string;
                   2452: int patlen = strlen(ps);
1.1       misho    2453: int errptr;
                   2454: 
1.1.1.4   misho    2455: if (p->compiled != NULL) return TRUE;
1.1       misho    2456: 
1.1.1.4   misho    2457: if ((popts & PO_FIXED_STRINGS) != 0)
1.1       misho    2458:   {
1.1.1.4   misho    2459:   int ellength;
                   2460:   char *eop = ps + patlen;
                   2461:   char *pe = end_of_line(ps, eop, &ellength);
                   2462: 
                   2463:   if (ellength != 0)
                   2464:     {
                   2465:     if (add_pattern(pe, p) == NULL) return FALSE;
                   2466:     patlen = (int)(pe - ps - ellength);
                   2467:     }
1.1       misho    2468:   }
                   2469: 
1.1.1.4   misho    2470: sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
                   2471: p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
                   2472: if (p->compiled != NULL) return TRUE;
                   2473: 
1.1       misho    2474: /* Handle compile errors */
                   2475: 
1.1.1.4   misho    2476: errptr -= (int)strlen(prefix[popts]);
                   2477: if (errptr > patlen) errptr = patlen;
1.1       misho    2478: 
1.1.1.4   misho    2479: if (fromfile)
1.1       misho    2480:   {
1.1.1.4   misho    2481:   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
                   2482:     "at offset %d: %s\n", count, fromtext, errptr, error);
1.1       misho    2483:   }
                   2484: else
                   2485:   {
1.1.1.4   misho    2486:   if (count == 0)
                   2487:     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
                   2488:       fromtext, errptr, error);
                   2489:   else
                   2490:     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
                   2491:       ordin(count), fromtext, errptr, error);
1.1       misho    2492:   }
                   2493: 
                   2494: return FALSE;
                   2495: }
                   2496: 
                   2497: 
                   2498: 
                   2499: /*************************************************
1.1.1.4   misho    2500: *     Read and compile a file of patterns        *
1.1       misho    2501: *************************************************/
                   2502: 
1.1.1.4   misho    2503: /* This is used for --filelist, --include-from, and --exclude-from.
1.1       misho    2504: 
                   2505: Arguments:
1.1.1.4   misho    2506:   name         the name of the file; "-" is stdin
                   2507:   patptr       pointer to the pattern chain anchor
                   2508:   patlastptr   pointer to the last pattern pointer
                   2509:   popts        the process options to pass to pattern_compile()
1.1       misho    2510: 
1.1.1.4   misho    2511: Returns:       TRUE if all went well
1.1       misho    2512: */
                   2513: 
                   2514: static BOOL
1.1.1.4   misho    2515: read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
1.1       misho    2516: {
1.1.1.4   misho    2517: int linenumber = 0;
                   2518: FILE *f;
                   2519: char *filename;
                   2520: char buffer[PATBUFSIZE];
                   2521: 
                   2522: if (strcmp(name, "-") == 0)
1.1       misho    2523:   {
1.1.1.4   misho    2524:   f = stdin;
                   2525:   filename = stdin_name;
                   2526:   }
                   2527: else
                   2528:   {
                   2529:   f = fopen(name, "r");
                   2530:   if (f == NULL)
                   2531:     {
                   2532:     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
                   2533:     return FALSE;
                   2534:     }
                   2535:   filename = name;
                   2536:   }
                   2537: 
                   2538: while (fgets(buffer, PATBUFSIZE, f) != NULL)
                   2539:   {
                   2540:   char *s = buffer + (int)strlen(buffer);
                   2541:   while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
                   2542:   *s = 0;
                   2543:   linenumber++;
                   2544:   if (buffer[0] == 0) continue;   /* Skip blank lines */
                   2545: 
                   2546:   /* Note: this call to add_pattern() puts a pointer to the local variable
                   2547:   "buffer" into the pattern chain. However, that pointer is used only when
                   2548:   compiling the pattern, which happens immediately below, so we flatten it
                   2549:   afterwards, as a precaution against any later code trying to use it. */
                   2550: 
                   2551:   *patlastptr = add_pattern(buffer, *patlastptr);
                   2552:   if (*patlastptr == NULL) return FALSE;
                   2553:   if (*patptr == NULL) *patptr = *patlastptr;
                   2554: 
                   2555:   /* This loop is needed because compiling a "pattern" when -F is set may add
                   2556:   on additional literal patterns if the original contains a newline. In the
                   2557:   common case, it never will, because fgets() stops at a newline. However,
                   2558:   the -N option can be used to give pcregrep a different newline setting. */
                   2559: 
1.1       misho    2560:   for(;;)
                   2561:     {
1.1.1.4   misho    2562:     if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
                   2563:         linenumber))
1.1       misho    2564:       return FALSE;
1.1.1.4   misho    2565:     (*patlastptr)->string = NULL;            /* Insurance */
                   2566:     if ((*patlastptr)->next == NULL) break;
                   2567:     *patlastptr = (*patlastptr)->next;
1.1       misho    2568:     }
                   2569:   }
1.1.1.4   misho    2570: 
                   2571: if (f != stdin) fclose(f);
                   2572: return TRUE;
1.1       misho    2573: }
                   2574: 
                   2575: 
                   2576: 
                   2577: /*************************************************
                   2578: *                Main program                    *
                   2579: *************************************************/
                   2580: 
                   2581: /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
                   2582: 
                   2583: int
                   2584: main(int argc, char **argv)
                   2585: {
                   2586: int i, j;
                   2587: int rc = 1;
                   2588: BOOL only_one_at_top;
1.1.1.4   misho    2589: patstr *cp;
                   2590: fnstr *fn;
1.1       misho    2591: const char *locale_from = "--locale";
                   2592: const char *error;
                   2593: 
                   2594: #ifdef SUPPORT_PCREGREP_JIT
                   2595: pcre_jit_stack *jit_stack = NULL;
                   2596: #endif
                   2597: 
                   2598: /* Set the default line ending value from the default in the PCRE library;
                   2599: "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
                   2600: Note that the return values from pcre_config(), though derived from the ASCII
                   2601: codes, are the same in EBCDIC environments, so we must use the actual values
                   2602: rather than escapes such as as '\r'. */
                   2603: 
                   2604: (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
                   2605: switch(i)
                   2606:   {
                   2607:   default:               newline = (char *)"lf"; break;
                   2608:   case 13:               newline = (char *)"cr"; break;
                   2609:   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
                   2610:   case -1:               newline = (char *)"any"; break;
                   2611:   case -2:               newline = (char *)"anycrlf"; break;
                   2612:   }
                   2613: 
                   2614: /* Process the options */
                   2615: 
                   2616: for (i = 1; i < argc; i++)
                   2617:   {
                   2618:   option_item *op = NULL;
                   2619:   char *option_data = (char *)"";    /* default to keep compiler happy */
                   2620:   BOOL longop;
                   2621:   BOOL longopwasequals = FALSE;
                   2622: 
                   2623:   if (argv[i][0] != '-') break;
                   2624: 
                   2625:   /* If we hit an argument that is just "-", it may be a reference to STDIN,
                   2626:   but only if we have previously had -e or -f to define the patterns. */
                   2627: 
                   2628:   if (argv[i][1] == 0)
                   2629:     {
1.1.1.4   misho    2630:     if (pattern_files != NULL || patterns != NULL) break;
1.1       misho    2631:       else pcregrep_exit(usage(2));
                   2632:     }
                   2633: 
                   2634:   /* Handle a long name option, or -- to terminate the options */
                   2635: 
                   2636:   if (argv[i][1] == '-')
                   2637:     {
                   2638:     char *arg = argv[i] + 2;
                   2639:     char *argequals = strchr(arg, '=');
                   2640: 
                   2641:     if (*arg == 0)    /* -- terminates options */
                   2642:       {
                   2643:       i++;
                   2644:       break;                /* out of the options-handling loop */
                   2645:       }
                   2646: 
                   2647:     longop = TRUE;
                   2648: 
                   2649:     /* Some long options have data that follows after =, for example file=name.
                   2650:     Some options have variations in the long name spelling: specifically, we
                   2651:     allow "regexp" because GNU grep allows it, though I personally go along
                   2652:     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
                   2653:     These options are entered in the table as "regex(p)". Options can be in
                   2654:     both these categories. */
                   2655: 
                   2656:     for (op = optionlist; op->one_char != 0; op++)
                   2657:       {
                   2658:       char *opbra = strchr(op->long_name, '(');
                   2659:       char *equals = strchr(op->long_name, '=');
                   2660: 
                   2661:       /* Handle options with only one spelling of the name */
                   2662: 
                   2663:       if (opbra == NULL)     /* Does not contain '(' */
                   2664:         {
                   2665:         if (equals == NULL)  /* Not thing=data case */
                   2666:           {
                   2667:           if (strcmp(arg, op->long_name) == 0) break;
                   2668:           }
                   2669:         else                 /* Special case xxx=data */
                   2670:           {
                   2671:           int oplen = (int)(equals - op->long_name);
                   2672:           int arglen = (argequals == NULL)?
                   2673:             (int)strlen(arg) : (int)(argequals - arg);
                   2674:           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
                   2675:             {
                   2676:             option_data = arg + arglen;
                   2677:             if (*option_data == '=')
                   2678:               {
                   2679:               option_data++;
                   2680:               longopwasequals = TRUE;
                   2681:               }
                   2682:             break;
                   2683:             }
                   2684:           }
                   2685:         }
                   2686: 
                   2687:       /* Handle options with an alternate spelling of the name */
                   2688: 
                   2689:       else
                   2690:         {
                   2691:         char buff1[24];
                   2692:         char buff2[24];
                   2693: 
                   2694:         int baselen = (int)(opbra - op->long_name);
                   2695:         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
                   2696:         int arglen = (argequals == NULL || equals == NULL)?
                   2697:           (int)strlen(arg) : (int)(argequals - arg);
                   2698: 
                   2699:         sprintf(buff1, "%.*s", baselen, op->long_name);
                   2700:         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
                   2701: 
                   2702:         if (strncmp(arg, buff1, arglen) == 0 ||
                   2703:            strncmp(arg, buff2, arglen) == 0)
                   2704:           {
                   2705:           if (equals != NULL && argequals != NULL)
                   2706:             {
                   2707:             option_data = argequals;
                   2708:             if (*option_data == '=')
                   2709:               {
                   2710:               option_data++;
                   2711:               longopwasequals = TRUE;
                   2712:               }
                   2713:             }
                   2714:           break;
                   2715:           }
                   2716:         }
                   2717:       }
                   2718: 
                   2719:     if (op->one_char == 0)
                   2720:       {
                   2721:       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
                   2722:       pcregrep_exit(usage(2));
                   2723:       }
                   2724:     }
                   2725: 
                   2726:   /* Jeffrey Friedl's debugging harness uses these additional options which
                   2727:   are not in the right form for putting in the option table because they use
                   2728:   only one hyphen, yet are more than one character long. By putting them
                   2729:   separately here, they will not get displayed as part of the help() output,
                   2730:   but I don't think Jeffrey will care about that. */
                   2731: 
                   2732: #ifdef JFRIEDL_DEBUG
                   2733:   else if (strcmp(argv[i], "-pre") == 0) {
                   2734:           jfriedl_prefix = argv[++i];
                   2735:           continue;
                   2736:   } else if (strcmp(argv[i], "-post") == 0) {
                   2737:           jfriedl_postfix = argv[++i];
                   2738:           continue;
                   2739:   } else if (strcmp(argv[i], "-XT") == 0) {
                   2740:           sscanf(argv[++i], "%d", &jfriedl_XT);
                   2741:           continue;
                   2742:   } else if (strcmp(argv[i], "-XR") == 0) {
                   2743:           sscanf(argv[++i], "%d", &jfriedl_XR);
                   2744:           continue;
                   2745:   }
                   2746: #endif
                   2747: 
                   2748: 
                   2749:   /* One-char options; many that have no data may be in a single argument; we
                   2750:   continue till we hit the last one or one that needs data. */
                   2751: 
                   2752:   else
                   2753:     {
                   2754:     char *s = argv[i] + 1;
                   2755:     longop = FALSE;
1.1.1.4   misho    2756: 
1.1       misho    2757:     while (*s != 0)
                   2758:       {
                   2759:       for (op = optionlist; op->one_char != 0; op++)
                   2760:         {
                   2761:         if (*s == op->one_char) break;
                   2762:         }
                   2763:       if (op->one_char == 0)
                   2764:         {
                   2765:         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
                   2766:           *s, argv[i]);
                   2767:         pcregrep_exit(usage(2));
                   2768:         }
                   2769: 
1.1.1.4   misho    2770:       option_data = s+1;
                   2771: 
                   2772:       /* Break out if this is the last character in the string; it's handled
                   2773:       below like a single multi-char option. */
                   2774: 
                   2775:       if (*option_data == 0) break;
                   2776: 
                   2777:       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
                   2778:       are used for ones that either have a numerical number or defaults, i.e.
                   2779:       the data is optional. If a digit follows, there is data; if not, carry on
1.1       misho    2780:       with other single-character options in the same string. */
                   2781: 
1.1.1.4   misho    2782:       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
1.1       misho    2783:         {
                   2784:         if (isdigit((unsigned char)s[1])) break;
                   2785:         }
1.1.1.4   misho    2786:       else   /* Check for an option with data */
1.1       misho    2787:         {
1.1.1.4   misho    2788:         if (op->type != OP_NODATA) break;
1.1       misho    2789:         }
                   2790: 
                   2791:       /* Handle a single-character option with no data, then loop for the
                   2792:       next character in the string. */
                   2793: 
                   2794:       pcre_options = handle_option(*s++, pcre_options);
                   2795:       }
                   2796:     }
                   2797: 
                   2798:   /* At this point we should have op pointing to a matched option. If the type
                   2799:   is NO_DATA, it means that there is no data, and the option might set
                   2800:   something in the PCRE options. */
                   2801: 
                   2802:   if (op->type == OP_NODATA)
                   2803:     {
                   2804:     pcre_options = handle_option(op->one_char, pcre_options);
                   2805:     continue;
                   2806:     }
                   2807: 
1.1.1.4   misho    2808:   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
1.1       misho    2809:   either has a value or defaults to something. It cannot have data in a
                   2810:   separate item. At the moment, the only such options are "colo(u)r",
                   2811:   "only-matching", and Jeffrey Friedl's special -S debugging option. */
                   2812: 
                   2813:   if (*option_data == 0 &&
1.1.1.4   misho    2814:       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
                   2815:        op->type == OP_OP_NUMBERS))
1.1       misho    2816:     {
                   2817:     switch (op->one_char)
                   2818:       {
                   2819:       case N_COLOUR:
                   2820:       colour_option = (char *)"auto";
                   2821:       break;
                   2822: 
                   2823:       case 'o':
1.1.1.4   misho    2824:       only_matching_last = add_number(0, only_matching_last);
                   2825:       if (only_matching == NULL) only_matching = only_matching_last;
1.1       misho    2826:       break;
                   2827: 
                   2828: #ifdef JFRIEDL_DEBUG
                   2829:       case 'S':
                   2830:       S_arg = 0;
                   2831:       break;
                   2832: #endif
                   2833:       }
                   2834:     continue;
                   2835:     }
                   2836: 
                   2837:   /* Otherwise, find the data string for the option. */
                   2838: 
                   2839:   if (*option_data == 0)
                   2840:     {
                   2841:     if (i >= argc - 1 || longopwasequals)
                   2842:       {
                   2843:       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
                   2844:       pcregrep_exit(usage(2));
                   2845:       }
                   2846:     option_data = argv[++i];
                   2847:     }
                   2848: 
1.1.1.4   misho    2849:   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
                   2850:   added to a chain of numbers. */
                   2851: 
                   2852:   if (op->type == OP_OP_NUMBERS)
                   2853:     {
                   2854:     unsigned long int n = decode_number(option_data, op, longop);
                   2855:     omdatastr *omd = (omdatastr *)op->dataptr;
                   2856:     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
                   2857:     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
                   2858:     }
                   2859: 
                   2860:   /* If the option type is OP_PATLIST, it's the -e option, or one of the
                   2861:   include/exclude options, which can be called multiple times to create lists
                   2862:   of patterns. */
1.1       misho    2863: 
1.1.1.4   misho    2864:   else if (op->type == OP_PATLIST)
1.1       misho    2865:     {
1.1.1.4   misho    2866:     patdatastr *pd = (patdatastr *)op->dataptr;
                   2867:     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
                   2868:     if (*(pd->lastptr) == NULL) goto EXIT2;
                   2869:     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
                   2870:     }
                   2871: 
                   2872:   /* If the option type is OP_FILELIST, it's one of the options that names a
                   2873:   file. */
                   2874: 
                   2875:   else if (op->type == OP_FILELIST)
                   2876:     {
                   2877:     fndatastr *fd = (fndatastr *)op->dataptr;
                   2878:     fn = (fnstr *)malloc(sizeof(fnstr));
                   2879:     if (fn == NULL)
1.1       misho    2880:       {
1.1.1.4   misho    2881:       fprintf(stderr, "pcregrep: malloc failed\n");
                   2882:       goto EXIT2;
1.1       misho    2883:       }
1.1.1.4   misho    2884:     fn->next = NULL;
                   2885:     fn->name = option_data;
                   2886:     if (*(fd->anchor) == NULL)
                   2887:       *(fd->anchor) = fn;
                   2888:     else
                   2889:       (*(fd->lastptr))->next = fn;
                   2890:     *(fd->lastptr) = fn;
1.1       misho    2891:     }
                   2892: 
1.1.1.3   misho    2893:   /* Handle OP_BINARY_FILES */
                   2894: 
                   2895:   else if (op->type == OP_BINFILES)
                   2896:     {
                   2897:     if (strcmp(option_data, "binary") == 0)
                   2898:       binary_files = BIN_BINARY;
                   2899:     else if (strcmp(option_data, "without-match") == 0)
                   2900:       binary_files = BIN_NOMATCH;
                   2901:     else if (strcmp(option_data, "text") == 0)
                   2902:       binary_files = BIN_TEXT;
                   2903:     else
                   2904:       {
                   2905:       fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
                   2906:         option_data);
                   2907:       pcregrep_exit(usage(2));
                   2908:       }
                   2909:     }
                   2910: 
1.1.1.4   misho    2911:   /* Otherwise, deal with a single string or numeric data value. */
1.1       misho    2912: 
                   2913:   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
                   2914:            op->type != OP_OP_NUMBER)
                   2915:     {
                   2916:     *((char **)op->dataptr) = option_data;
                   2917:     }
                   2918:   else
                   2919:     {
1.1.1.4   misho    2920:     unsigned long int n = decode_number(option_data, op, longop);
                   2921:     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
                   2922:       else *((int *)op->dataptr) = n;
1.1       misho    2923:     }
                   2924:   }
                   2925: 
                   2926: /* Options have been decoded. If -C was used, its value is used as a default
                   2927: for -A and -B. */
                   2928: 
                   2929: if (both_context > 0)
                   2930:   {
                   2931:   if (after_context == 0) after_context = both_context;
                   2932:   if (before_context == 0) before_context = both_context;
                   2933:   }
                   2934: 
                   2935: /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1.1.1.4   misho    2936: However, all three set show_only_matching because they display, each in their
                   2937: own way, only the data that has matched. */
1.1       misho    2938: 
1.1.1.4   misho    2939: if ((only_matching != NULL && (file_offsets || line_offsets)) ||
1.1       misho    2940:     (file_offsets && line_offsets))
                   2941:   {
                   2942:   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
                   2943:     "and/or --line-offsets\n");
                   2944:   pcregrep_exit(usage(2));
                   2945:   }
                   2946: 
1.1.1.4   misho    2947: if (only_matching != NULL || file_offsets || line_offsets)
                   2948:   show_only_matching = TRUE;
1.1       misho    2949: 
                   2950: /* If a locale has not been provided as an option, see if the LC_CTYPE or
                   2951: LC_ALL environment variable is set, and if so, use it. */
                   2952: 
                   2953: if (locale == NULL)
                   2954:   {
                   2955:   locale = getenv("LC_ALL");
                   2956:   locale_from = "LCC_ALL";
                   2957:   }
                   2958: 
                   2959: if (locale == NULL)
                   2960:   {
                   2961:   locale = getenv("LC_CTYPE");
                   2962:   locale_from = "LC_CTYPE";
                   2963:   }
                   2964: 
                   2965: /* If a locale has been provided, set it, and generate the tables the PCRE
                   2966: needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
                   2967: 
                   2968: if (locale != NULL)
                   2969:   {
                   2970:   if (setlocale(LC_CTYPE, locale) == NULL)
                   2971:     {
                   2972:     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
                   2973:       locale, locale_from);
                   2974:     return 2;
                   2975:     }
                   2976:   pcretables = pcre_maketables();
                   2977:   }
                   2978: 
                   2979: /* Sort out colouring */
                   2980: 
                   2981: if (colour_option != NULL && strcmp(colour_option, "never") != 0)
                   2982:   {
                   2983:   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
                   2984:   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
                   2985:   else
                   2986:     {
                   2987:     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
                   2988:       colour_option);
                   2989:     return 2;
                   2990:     }
                   2991:   if (do_colour)
                   2992:     {
                   2993:     char *cs = getenv("PCREGREP_COLOUR");
                   2994:     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
                   2995:     if (cs != NULL) colour_string = cs;
                   2996:     }
                   2997:   }
                   2998: 
                   2999: /* Interpret the newline type; the default settings are Unix-like. */
                   3000: 
                   3001: if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
                   3002:   {
                   3003:   pcre_options |= PCRE_NEWLINE_CR;
                   3004:   endlinetype = EL_CR;
                   3005:   }
                   3006: else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
                   3007:   {
                   3008:   pcre_options |= PCRE_NEWLINE_LF;
                   3009:   endlinetype = EL_LF;
                   3010:   }
                   3011: else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
                   3012:   {
                   3013:   pcre_options |= PCRE_NEWLINE_CRLF;
                   3014:   endlinetype = EL_CRLF;
                   3015:   }
                   3016: else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
                   3017:   {
                   3018:   pcre_options |= PCRE_NEWLINE_ANY;
                   3019:   endlinetype = EL_ANY;
                   3020:   }
                   3021: else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
                   3022:   {
                   3023:   pcre_options |= PCRE_NEWLINE_ANYCRLF;
                   3024:   endlinetype = EL_ANYCRLF;
                   3025:   }
                   3026: else
                   3027:   {
                   3028:   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
                   3029:   return 2;
                   3030:   }
                   3031: 
                   3032: /* Interpret the text values for -d and -D */
                   3033: 
                   3034: if (dee_option != NULL)
                   3035:   {
                   3036:   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
                   3037:   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
                   3038:   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
                   3039:   else
                   3040:     {
                   3041:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
                   3042:     return 2;
                   3043:     }
                   3044:   }
                   3045: 
                   3046: if (DEE_option != NULL)
                   3047:   {
                   3048:   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
                   3049:   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
                   3050:   else
                   3051:     {
                   3052:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
                   3053:     return 2;
                   3054:     }
                   3055:   }
                   3056: 
                   3057: /* Check the values for Jeffrey Friedl's debugging options. */
                   3058: 
                   3059: #ifdef JFRIEDL_DEBUG
                   3060: if (S_arg > 9)
                   3061:   {
                   3062:   fprintf(stderr, "pcregrep: bad value for -S option\n");
                   3063:   return 2;
                   3064:   }
                   3065: if (jfriedl_XT != 0 || jfriedl_XR != 0)
                   3066:   {
                   3067:   if (jfriedl_XT == 0) jfriedl_XT = 1;
                   3068:   if (jfriedl_XR == 0) jfriedl_XR = 1;
                   3069:   }
                   3070: #endif
                   3071: 
1.1.1.4   misho    3072: /* Get memory for the main buffer. */
1.1       misho    3073: 
                   3074: bufsize = 3*bufthird;
                   3075: main_buffer = (char *)malloc(bufsize);
                   3076: 
1.1.1.4   misho    3077: if (main_buffer == NULL)
1.1       misho    3078:   {
                   3079:   fprintf(stderr, "pcregrep: malloc failed\n");
                   3080:   goto EXIT2;
                   3081:   }
                   3082: 
1.1.1.4   misho    3083: /* If no patterns were provided by -e, and there are no files provided by -f,
1.1       misho    3084: the first argument is the one and only pattern, and it must exist. */
                   3085: 
1.1.1.4   misho    3086: if (patterns == NULL && pattern_files == NULL)
1.1       misho    3087:   {
                   3088:   if (i >= argc) return usage(2);
1.1.1.4   misho    3089:   patterns = patterns_last = add_pattern(argv[i++], NULL);
                   3090:   if (patterns == NULL) goto EXIT2;
1.1       misho    3091:   }
                   3092: 
                   3093: /* Compile the patterns that were provided on the command line, either by
1.1.1.4   misho    3094: multiple uses of -e or as a single unkeyed pattern. We cannot do this until
                   3095: after all the command-line options are read so that we know which PCRE options
                   3096: to use. When -F is used, compile_pattern() may add another block into the
                   3097: chain, so we must not access the next pointer till after the compile. */
1.1       misho    3098: 
1.1.1.4   misho    3099: for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
1.1       misho    3100:   {
1.1.1.4   misho    3101:   if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
                   3102:        (j == 1 && patterns->next == NULL)? 0 : j))
1.1       misho    3103:     goto EXIT2;
                   3104:   }
                   3105: 
1.1.1.4   misho    3106: /* Read and compile the regular expressions that are provided in files. */
1.1       misho    3107: 
1.1.1.4   misho    3108: for (fn = pattern_files; fn != NULL; fn = fn->next)
1.1       misho    3109:   {
1.1.1.4   misho    3110:   if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
                   3111:     goto EXIT2;
                   3112:   }
1.1       misho    3113: 
1.1.1.4   misho    3114: /* Study the regular expressions, as we will be running them many times. If an
                   3115: extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
                   3116: returned, even if studying produces no data. */
1.1       misho    3117: 
1.1.1.4   misho    3118: if (match_limit > 0 || match_limit_recursion > 0)
                   3119:   study_options |= PCRE_STUDY_EXTRA_NEEDED;
1.1       misho    3120: 
1.1.1.4   misho    3121: /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
1.1       misho    3122: 
                   3123: #ifdef SUPPORT_PCREGREP_JIT
                   3124: if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
                   3125:   jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
                   3126: #endif
                   3127: 
1.1.1.4   misho    3128: for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
1.1       misho    3129:   {
1.1.1.4   misho    3130:   cp->hint = pcre_study(cp->compiled, study_options, &error);
1.1       misho    3131:   if (error != NULL)
                   3132:     {
                   3133:     char s[16];
1.1.1.4   misho    3134:     if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
1.1       misho    3135:     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
                   3136:     goto EXIT2;
                   3137:     }
                   3138: #ifdef SUPPORT_PCREGREP_JIT
1.1.1.4   misho    3139:   if (jit_stack != NULL && cp->hint != NULL)
                   3140:     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
1.1       misho    3141: #endif
                   3142:   }
                   3143: 
                   3144: /* If --match-limit or --recursion-limit was set, put the value(s) into the
1.1.1.4   misho    3145: pcre_extra block for each pattern. There will always be an extra block because
                   3146: of the use of PCRE_STUDY_EXTRA_NEEDED above. */
1.1       misho    3147: 
1.1.1.4   misho    3148: for (cp = patterns; cp != NULL; cp = cp->next)
1.1       misho    3149:   {
1.1.1.4   misho    3150:   if (match_limit > 0)
1.1       misho    3151:     {
1.1.1.4   misho    3152:     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
                   3153:     cp->hint->match_limit = match_limit;
                   3154:     }
                   3155: 
                   3156:   if (match_limit_recursion > 0)
                   3157:     {
                   3158:     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
                   3159:     cp->hint->match_limit_recursion = match_limit_recursion;
1.1       misho    3160:     }
                   3161:   }
                   3162: 
1.1.1.4   misho    3163: /* If there are include or exclude patterns read from the command line, compile
                   3164: them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
                   3165: 0. */
1.1       misho    3166: 
1.1.1.4   misho    3167: for (j = 0; j < 4; j++)
1.1       misho    3168:   {
1.1.1.4   misho    3169:   int k;
                   3170:   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
1.1       misho    3171:     {
1.1.1.4   misho    3172:     if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
                   3173:          (k == 1 && cp->next == NULL)? 0 : k))
                   3174:       goto EXIT2;
1.1       misho    3175:     }
                   3176:   }
                   3177: 
1.1.1.4   misho    3178: /* Read and compile include/exclude patterns from files. */
                   3179: 
                   3180: for (fn = include_from; fn != NULL; fn = fn->next)
1.1       misho    3181:   {
1.1.1.4   misho    3182:   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
1.1       misho    3183:     goto EXIT2;
                   3184:   }
                   3185: 
1.1.1.4   misho    3186: for (fn = exclude_from; fn != NULL; fn = fn->next)
1.1       misho    3187:   {
1.1.1.4   misho    3188:   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
1.1       misho    3189:     goto EXIT2;
                   3190:   }
                   3191: 
1.1.1.4   misho    3192: /* If there are no files that contain lists of files to search, and there are
                   3193: no file arguments, search stdin, and then exit. */
                   3194: 
                   3195: if (file_lists == NULL && i >= argc)
1.1       misho    3196:   {
1.1.1.4   misho    3197:   rc = pcregrep(stdin, FR_PLAIN, stdin_name,
                   3198:     (filenames > FN_DEFAULT)? stdin_name : NULL);
                   3199:   goto EXIT;
1.1       misho    3200:   }
                   3201: 
1.1.1.4   misho    3202: /* If any files that contains a list of files to search have been specified,
                   3203: read them line by line and search the given files. */
1.1.1.3   misho    3204: 
1.1.1.4   misho    3205: for (fn = file_lists; fn != NULL; fn = fn->next)
1.1.1.3   misho    3206:   {
                   3207:   char buffer[PATBUFSIZE];
                   3208:   FILE *fl;
1.1.1.4   misho    3209:   if (strcmp(fn->name, "-") == 0) fl = stdin; else
1.1.1.3   misho    3210:     {
1.1.1.4   misho    3211:     fl = fopen(fn->name, "rb");
1.1.1.3   misho    3212:     if (fl == NULL)
                   3213:       {
1.1.1.4   misho    3214:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
1.1.1.3   misho    3215:         strerror(errno));
                   3216:       goto EXIT2;
                   3217:       }
                   3218:     }
                   3219:   while (fgets(buffer, PATBUFSIZE, fl) != NULL)
                   3220:     {
                   3221:     int frc;
                   3222:     char *end = buffer + (int)strlen(buffer);
                   3223:     while (end > buffer && isspace(end[-1])) end--;
                   3224:     *end = 0;
                   3225:     if (*buffer != 0)
                   3226:       {
                   3227:       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
                   3228:       if (frc > 1) rc = frc;
                   3229:         else if (frc == 0 && rc == 1) rc = 0;
                   3230:       }
                   3231:     }
1.1.1.4   misho    3232:   if (fl != stdin) fclose(fl);
1.1.1.3   misho    3233:   }
                   3234: 
1.1.1.4   misho    3235: /* After handling file-list, work through remaining arguments. Pass in the fact
                   3236: that there is only one argument at top level - this suppresses the file name if
                   3237: the argument is not a directory and filenames are not otherwise forced. */
1.1       misho    3238: 
1.1.1.4   misho    3239: only_one_at_top = i == argc - 1 && file_lists == NULL;
1.1       misho    3240: 
                   3241: for (; i < argc; i++)
                   3242:   {
                   3243:   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
                   3244:     only_one_at_top);
                   3245:   if (frc > 1) rc = frc;
                   3246:     else if (frc == 0 && rc == 1) rc = 0;
                   3247:   }
                   3248: 
                   3249: EXIT:
                   3250: #ifdef SUPPORT_PCREGREP_JIT
                   3251: if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
                   3252: #endif
1.1.1.4   misho    3253: 
1.1       misho    3254: if (main_buffer != NULL) free(main_buffer);
1.1.1.4   misho    3255: 
                   3256: free_pattern_chain(patterns);
                   3257: free_pattern_chain(include_patterns);
                   3258: free_pattern_chain(include_dir_patterns);
                   3259: free_pattern_chain(exclude_patterns);
                   3260: free_pattern_chain(exclude_dir_patterns);
                   3261: 
                   3262: free_file_chain(exclude_from);
                   3263: free_file_chain(include_from);
                   3264: free_file_chain(pattern_files);
                   3265: free_file_chain(file_lists);
                   3266: 
                   3267: while (only_matching != NULL)
                   3268:   {
                   3269:   omstr *this = only_matching;
                   3270:   only_matching = this->next;
                   3271:   free(this);
1.1       misho    3272:   }
1.1.1.4   misho    3273: 
1.1       misho    3274: pcregrep_exit(rc);
                   3275: 
                   3276: EXIT2:
                   3277: rc = 2;
                   3278: goto EXIT;
                   3279: }
                   3280: 
                   3281: /* End of pcregrep */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>