Annotation of embedaddon/pcre/pcregrep.c, revision 1.1.1.4

1.1       misho       1: /*************************************************
                      2: *               pcregrep program                 *
                      3: *************************************************/
                      4: 
                      5: /* This is a grep program that uses the PCRE regular expression library to do
                      6: its pattern matching. On a Unix or Win32 system it can recurse into
                      7: directories.
                      8: 
1.1.1.2   misho       9:            Copyright (c) 1997-2012 University of Cambridge
1.1       misho      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: #ifdef HAVE_CONFIG_H
                     41: #include "config.h"
                     42: #endif
                     43: 
                     44: #include <ctype.h>
                     45: #include <locale.h>
                     46: #include <stdio.h>
                     47: #include <string.h>
                     48: #include <stdlib.h>
                     49: #include <errno.h>
                     50: 
                     51: #include <sys/types.h>
                     52: #include <sys/stat.h>
                     53: 
                     54: #ifdef HAVE_UNISTD_H
                     55: #include <unistd.h>
                     56: #endif
                     57: 
                     58: #ifdef SUPPORT_LIBZ
                     59: #include <zlib.h>
                     60: #endif
                     61: 
                     62: #ifdef SUPPORT_LIBBZ2
                     63: #include <bzlib.h>
                     64: #endif
                     65: 
                     66: #include "pcre.h"
                     67: 
                     68: #define FALSE 0
                     69: #define TRUE 1
                     70: 
                     71: typedef int BOOL;
                     72: 
                     73: #define OFFSET_SIZE 99
                     74: 
                     75: #if BUFSIZ > 8192
1.1.1.4 ! misho      76: #define MAXPATLEN BUFSIZ
1.1       misho      77: #else
1.1.1.4 ! misho      78: #define MAXPATLEN 8192
1.1       misho      79: #endif
                     80: 
1.1.1.4 ! misho      81: #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
        !            82: 
1.1       misho      83: /* Values for the "filenames" variable, which specifies options for file name
                     84: output. The order is important; it is assumed that a file name is wanted for
                     85: all values greater than FN_DEFAULT. */
                     86: 
                     87: enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
                     88: 
                     89: /* File reading styles */
                     90: 
                     91: enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
                     92: 
                     93: /* Actions for the -d and -D options */
                     94: 
                     95: enum { dee_READ, dee_SKIP, dee_RECURSE };
                     96: enum { DEE_READ, DEE_SKIP };
                     97: 
                     98: /* Actions for special processing options (flag bits) */
                     99: 
                    100: #define PO_WORD_MATCH     0x0001
                    101: #define PO_LINE_MATCH     0x0002
                    102: #define PO_FIXED_STRINGS  0x0004
                    103: 
                    104: /* Line ending types */
                    105: 
                    106: enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
                    107: 
1.1.1.3   misho     108: /* Binary file options */
                    109: 
                    110: enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
                    111: 
1.1       misho     112: /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
                    113: environments), a warning is issued if the value of fwrite() is ignored.
                    114: Unfortunately, casting to (void) does not suppress the warning. To get round
                    115: this, we use a macro that compiles a fudge. Oddly, this does not also seem to
                    116: apply to fprintf(). */
                    117: 
                    118: #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
                    119: 
                    120: 
                    121: 
                    122: /*************************************************
                    123: *               Global variables                 *
                    124: *************************************************/
                    125: 
                    126: /* Jeffrey Friedl has some debugging requirements that are not part of the
                    127: regular code. */
                    128: 
                    129: #ifdef JFRIEDL_DEBUG
                    130: static int S_arg = -1;
                    131: static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
                    132: static unsigned int jfriedl_XT = 0; /* replicate text this many times */
                    133: static const char *jfriedl_prefix = "";
                    134: static const char *jfriedl_postfix = "";
                    135: #endif
                    136: 
                    137: static int  endlinetype;
                    138: 
                    139: static char *colour_string = (char *)"1;31";
                    140: static char *colour_option = NULL;
                    141: static char *dee_option = NULL;
                    142: static char *DEE_option = NULL;
1.1.1.4 ! misho     143: static char *locale = NULL;
1.1       misho     144: static char *main_buffer = NULL;
                    145: static char *newline = NULL;
1.1.1.4 ! misho     146: static char *om_separator = (char *)"";
1.1       misho     147: static char *stdin_name = (char *)"(standard input)";
                    148: 
                    149: static const unsigned char *pcretables = NULL;
                    150: 
                    151: static int after_context = 0;
                    152: static int before_context = 0;
1.1.1.3   misho     153: static int binary_files = BIN_BINARY;
1.1       misho     154: static int both_context = 0;
                    155: static int bufthird = PCREGREP_BUFSIZE;
                    156: static int bufsize = 3*PCREGREP_BUFSIZE;
1.1.1.4 ! misho     157: 
        !           158: #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
        !           159: static int dee_action = dee_SKIP;
        !           160: #else
1.1       misho     161: static int dee_action = dee_READ;
1.1.1.4 ! misho     162: #endif
        !           163: 
1.1       misho     164: static int DEE_action = DEE_READ;
                    165: static int error_count = 0;
                    166: static int filenames = FN_DEFAULT;
1.1.1.4 ! misho     167: static int pcre_options = 0;
1.1       misho     168: static int process_options = 0;
                    169: 
                    170: #ifdef SUPPORT_PCREGREP_JIT
                    171: static int study_options = PCRE_STUDY_JIT_COMPILE;
                    172: #else
                    173: static int study_options = 0;
                    174: #endif
                    175: 
                    176: static unsigned long int match_limit = 0;
                    177: static unsigned long int match_limit_recursion = 0;
                    178: 
                    179: static BOOL count_only = FALSE;
                    180: static BOOL do_colour = FALSE;
                    181: static BOOL file_offsets = FALSE;
                    182: static BOOL hyphenpending = FALSE;
                    183: static BOOL invert = FALSE;
                    184: static BOOL line_buffered = FALSE;
                    185: static BOOL line_offsets = FALSE;
                    186: static BOOL multiline = FALSE;
                    187: static BOOL number = FALSE;
                    188: static BOOL omit_zero_count = FALSE;
                    189: static BOOL resource_error = FALSE;
                    190: static BOOL quiet = FALSE;
1.1.1.4 ! misho     191: static BOOL show_only_matching = FALSE;
1.1       misho     192: static BOOL silent = FALSE;
                    193: static BOOL utf8 = FALSE;
                    194: 
1.1.1.4 ! misho     195: /* Structure for list of --only-matching capturing numbers. */
        !           196: 
        !           197: typedef struct omstr {
        !           198:   struct omstr *next;
        !           199:   int groupnum;
        !           200: } omstr;
        !           201: 
        !           202: static omstr *only_matching = NULL;
        !           203: static omstr *only_matching_last = NULL;
        !           204: 
        !           205: /* Structure for holding the two variables that describe a number chain. */
        !           206: 
        !           207: typedef struct omdatastr {
        !           208:   omstr **anchor;
        !           209:   omstr **lastptr;
        !           210: } omdatastr;
        !           211: 
        !           212: static omdatastr only_matching_data = { &only_matching, &only_matching_last };
        !           213: 
        !           214: /* Structure for list of file names (for -f and --{in,ex}clude-from) */
        !           215: 
        !           216: typedef struct fnstr {
        !           217:   struct fnstr *next;
        !           218:   char *name;
        !           219: } fnstr;
        !           220: 
        !           221: static fnstr *exclude_from = NULL;
        !           222: static fnstr *exclude_from_last = NULL;
        !           223: static fnstr *include_from = NULL;
        !           224: static fnstr *include_from_last = NULL;
        !           225: 
        !           226: static fnstr *file_lists = NULL;
        !           227: static fnstr *file_lists_last = NULL;
        !           228: static fnstr *pattern_files = NULL;
        !           229: static fnstr *pattern_files_last = NULL;
        !           230: 
        !           231: /* Structure for holding the two variables that describe a file name chain. */
        !           232: 
        !           233: typedef struct fndatastr {
        !           234:   fnstr **anchor;
        !           235:   fnstr **lastptr;
        !           236: } fndatastr;
        !           237: 
        !           238: static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
        !           239: static fndatastr include_from_data = { &include_from, &include_from_last };
        !           240: static fndatastr file_lists_data = { &file_lists, &file_lists_last };
        !           241: static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
        !           242: 
        !           243: /* Structure for pattern and its compiled form; used for matching patterns and
        !           244: also for include/exclude patterns. */
        !           245: 
        !           246: typedef struct patstr {
        !           247:   struct patstr *next;
        !           248:   char *string;
        !           249:   pcre *compiled;
        !           250:   pcre_extra *hint;
        !           251: } patstr;
        !           252: 
        !           253: static patstr *patterns = NULL;
        !           254: static patstr *patterns_last = NULL;
        !           255: static patstr *include_patterns = NULL;
        !           256: static patstr *include_patterns_last = NULL;
        !           257: static patstr *exclude_patterns = NULL;
        !           258: static patstr *exclude_patterns_last = NULL;
        !           259: static patstr *include_dir_patterns = NULL;
        !           260: static patstr *include_dir_patterns_last = NULL;
        !           261: static patstr *exclude_dir_patterns = NULL;
        !           262: static patstr *exclude_dir_patterns_last = NULL;
        !           263: 
        !           264: /* Structure holding the two variables that describe a pattern chain. A pointer
        !           265: to such structures is used for each appropriate option. */
        !           266: 
        !           267: typedef struct patdatastr {
        !           268:   patstr **anchor;
        !           269:   patstr **lastptr;
        !           270: } patdatastr;
        !           271: 
        !           272: static patdatastr match_patdata = { &patterns, &patterns_last };
        !           273: static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
        !           274: static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
        !           275: static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
        !           276: static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
        !           277: 
        !           278: static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
        !           279:                                  &include_dir_patterns, &exclude_dir_patterns };
        !           280: 
        !           281: static const char *incexname[4] = { "--include", "--exclude",
        !           282:                                     "--include-dir", "--exclude-dir" };
        !           283: 
1.1       misho     284: /* Structure for options and list of them */
                    285: 
                    286: enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
1.1.1.4 ! misho     287:        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
1.1       misho     288: 
                    289: typedef struct option_item {
                    290:   int type;
                    291:   int one_char;
                    292:   void *dataptr;
                    293:   const char *long_name;
                    294:   const char *help_text;
                    295: } option_item;
                    296: 
                    297: /* Options without a single-letter equivalent get a negative value. This can be
                    298: used to identify them. */
                    299: 
                    300: #define N_COLOUR       (-1)
                    301: #define N_EXCLUDE      (-2)
                    302: #define N_EXCLUDE_DIR  (-3)
                    303: #define N_HELP         (-4)
                    304: #define N_INCLUDE      (-5)
                    305: #define N_INCLUDE_DIR  (-6)
                    306: #define N_LABEL        (-7)
                    307: #define N_LOCALE       (-8)
                    308: #define N_NULL         (-9)
                    309: #define N_LOFFSETS     (-10)
                    310: #define N_FOFFSETS     (-11)
                    311: #define N_LBUFFER      (-12)
                    312: #define N_M_LIMIT      (-13)
                    313: #define N_M_LIMIT_REC  (-14)
                    314: #define N_BUFSIZE      (-15)
                    315: #define N_NOJIT        (-16)
1.1.1.3   misho     316: #define N_FILE_LIST    (-17)
                    317: #define N_BINARY_FILES (-18)
1.1.1.4 ! misho     318: #define N_EXCLUDE_FROM (-19)
        !           319: #define N_INCLUDE_FROM (-20)
        !           320: #define N_OM_SEPARATOR (-21)
1.1       misho     321: 
                    322: static option_item optionlist[] = {
1.1.1.3   misho     323:   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
1.1       misho     324:   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
                    325:   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
1.1.1.3   misho     326:   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
1.1       misho     327:   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
1.1.1.3   misho     328:   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
1.1       misho     329:   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
                    330:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
                    331:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
                    332:   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
                    333:   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
                    334:   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
                    335:   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
1.1.1.4 ! misho     336:   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
1.1       misho     337:   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
1.1.1.4 ! misho     338:   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
        !           339:   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
1.1       misho     340:   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
                    341:   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
                    342:   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
1.1.1.3   misho     343:   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
1.1       misho     344:   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
                    345: #ifdef SUPPORT_PCREGREP_JIT
                    346:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
                    347: #else
                    348:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
                    349: #endif
                    350:   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
                    351:   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
                    352:   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
                    353:   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
                    354:   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
                    355:   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
                    356:   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
                    357:   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
                    358:   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
                    359:   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
                    360:   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
1.1.1.4 ! misho     361:   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
        !           362:   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
1.1       misho     363:   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
                    364:   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
1.1.1.4 ! misho     365:   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
        !           366:   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
        !           367:   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
        !           368:   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
        !           369:   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
        !           370:   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
1.1       misho     371: 
                    372:   /* These two were accidentally implemented with underscores instead of
                    373:   hyphens in the option names. As this was not discovered for several releases,
                    374:   the incorrect versions are left in the table for compatibility. However, the
                    375:   --help function misses out any option that has an underscore in its name. */
                    376: 
1.1.1.4 ! misho     377:   { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
        !           378:   { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
1.1       misho     379: 
                    380: #ifdef JFRIEDL_DEBUG
                    381:   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
                    382: #endif
                    383:   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
                    384:   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
                    385:   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
                    386:   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
                    387:   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
                    388:   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
                    389:   { OP_NODATA,    0,        NULL,               NULL,            NULL }
                    390: };
                    391: 
                    392: /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
                    393: options. These set the 1, 2, and 4 bits in process_options, respectively. Note
                    394: that the combination of -w and -x has the same effect as -x on its own, so we
1.1.1.4 ! misho     395: can treat them as the same. Note that the MAXPATLEN macro assumes the longest
        !           396: prefix+suffix is 10 characters; if anything longer is added, it must be
        !           397: adjusted. */
1.1       misho     398: 
                    399: static const char *prefix[] = {
                    400:   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
                    401: 
                    402: static const char *suffix[] = {
                    403:   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
                    404: 
                    405: /* UTF-8 tables - used only when the newline setting is "any". */
                    406: 
                    407: const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
                    408: 
                    409: const char utf8_table4[] = {
                    410:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                    411:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                    412:   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
                    413:   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
                    414: 
                    415: 
                    416: 
                    417: /*************************************************
                    418: *         Exit from the program                  *
                    419: *************************************************/
                    420: 
                    421: /* If there has been a resource error, give a suitable message.
                    422: 
                    423: Argument:  the return code
                    424: Returns:   does not return
                    425: */
                    426: 
                    427: static void
                    428: pcregrep_exit(int rc)
                    429: {
                    430: if (resource_error)
                    431:   {
                    432:   fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
                    433:     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
                    434:     PCRE_ERROR_JIT_STACKLIMIT);
                    435:   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
                    436:   }
                    437: exit(rc);
                    438: }
                    439: 
                    440: 
                    441: /*************************************************
1.1.1.4 ! misho     442: *          Add item to chain of patterns         *
        !           443: *************************************************/
        !           444: 
        !           445: /* Used to add an item onto a chain, or just return an unconnected item if the
        !           446: "after" argument is NULL.
        !           447: 
        !           448: Arguments:
        !           449:   s          pattern string to add
        !           450:   after      if not NULL points to item to insert after
        !           451: 
        !           452: Returns:     new pattern block
        !           453: */
        !           454: 
        !           455: static patstr *
        !           456: add_pattern(char *s, patstr *after)
        !           457: {
        !           458: patstr *p = (patstr *)malloc(sizeof(patstr));
        !           459: if (p == NULL)
        !           460:   {
        !           461:   fprintf(stderr, "pcregrep: malloc failed\n");
        !           462:   pcregrep_exit(2);
        !           463:   }
        !           464: if (strlen(s) > MAXPATLEN)
        !           465:   {
        !           466:   fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
        !           467:     MAXPATLEN);
        !           468:   return NULL;
        !           469:   }
        !           470: p->next = NULL;
        !           471: p->string = s;
        !           472: p->compiled = NULL;
        !           473: p->hint = NULL;
        !           474: 
        !           475: if (after != NULL)
        !           476:   {
        !           477:   p->next = after->next;
        !           478:   after->next = p;
        !           479:   }
        !           480: return p;
        !           481: }
        !           482: 
        !           483: 
        !           484: /*************************************************
        !           485: *           Free chain of patterns               *
        !           486: *************************************************/
        !           487: 
        !           488: /* Used for several chains of patterns.
        !           489: 
        !           490: Argument: pointer to start of chain
        !           491: Returns:  nothing
        !           492: */
        !           493: 
        !           494: static void
        !           495: free_pattern_chain(patstr *pc)
        !           496: {
        !           497: while (pc != NULL)
        !           498:   {
        !           499:   patstr *p = pc;
        !           500:   pc = p->next;
        !           501:   if (p->hint != NULL) pcre_free_study(p->hint);
        !           502:   if (p->compiled != NULL) pcre_free(p->compiled);
        !           503:   free(p);
        !           504:   }
        !           505: }
        !           506: 
        !           507: 
        !           508: /*************************************************
        !           509: *           Free chain of file names             *
        !           510: *************************************************/
        !           511: 
        !           512: /*
        !           513: Argument: pointer to start of chain
        !           514: Returns:  nothing
        !           515: */
        !           516: 
        !           517: static void
        !           518: free_file_chain(fnstr *fn)
        !           519: {
        !           520: while (fn != NULL)
        !           521:   {
        !           522:   fnstr *f = fn;
        !           523:   fn = f->next;
        !           524:   free(f);
        !           525:   }
        !           526: }
        !           527: 
        !           528: 
        !           529: /*************************************************
1.1       misho     530: *            OS-specific functions               *
                    531: *************************************************/
                    532: 
                    533: /* These functions are defined so that they can be made system specific,
                    534: although at present the only ones are for Unix, Win32, and for "no support". */
                    535: 
                    536: 
                    537: /************* Directory scanning in Unix ***********/
                    538: 
                    539: #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
                    540: #include <sys/types.h>
                    541: #include <sys/stat.h>
                    542: #include <dirent.h>
                    543: 
                    544: typedef DIR directory_type;
1.1.1.4 ! misho     545: #define FILESEP '/'
1.1       misho     546: 
                    547: static int
                    548: isdirectory(char *filename)
                    549: {
                    550: struct stat statbuf;
                    551: if (stat(filename, &statbuf) < 0)
                    552:   return 0;        /* In the expectation that opening as a file will fail */
1.1.1.4 ! misho     553: return (statbuf.st_mode & S_IFMT) == S_IFDIR;
1.1       misho     554: }
                    555: 
                    556: static directory_type *
                    557: opendirectory(char *filename)
                    558: {
                    559: return opendir(filename);
                    560: }
                    561: 
                    562: static char *
                    563: readdirectory(directory_type *dir)
                    564: {
                    565: for (;;)
                    566:   {
                    567:   struct dirent *dent = readdir(dir);
                    568:   if (dent == NULL) return NULL;
                    569:   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
                    570:     return dent->d_name;
                    571:   }
                    572: /* Control never reaches here */
                    573: }
                    574: 
                    575: static void
                    576: closedirectory(directory_type *dir)
                    577: {
                    578: closedir(dir);
                    579: }
                    580: 
                    581: 
                    582: /************* Test for regular file in Unix **********/
                    583: 
                    584: static int
                    585: isregfile(char *filename)
                    586: {
                    587: struct stat statbuf;
                    588: if (stat(filename, &statbuf) < 0)
                    589:   return 1;        /* In the expectation that opening as a file will fail */
                    590: return (statbuf.st_mode & S_IFMT) == S_IFREG;
                    591: }
                    592: 
                    593: 
                    594: /************* Test for a terminal in Unix **********/
                    595: 
                    596: static BOOL
                    597: is_stdout_tty(void)
                    598: {
                    599: return isatty(fileno(stdout));
                    600: }
                    601: 
                    602: static BOOL
                    603: is_file_tty(FILE *f)
                    604: {
                    605: return isatty(fileno(f));
                    606: }
                    607: 
                    608: 
                    609: /************* Directory scanning in Win32 ***********/
                    610: 
                    611: /* I (Philip Hazel) have no means of testing this code. It was contributed by
                    612: Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
                    613: when it did not exist. David Byron added a patch that moved the #include of
                    614: <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
                    615: The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
                    616: undefined when it is indeed undefined. */
                    617: 
                    618: #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
                    619: 
                    620: #ifndef STRICT
                    621: # define STRICT
                    622: #endif
                    623: #ifndef WIN32_LEAN_AND_MEAN
                    624: # define WIN32_LEAN_AND_MEAN
                    625: #endif
                    626: 
                    627: #include <windows.h>
                    628: 
                    629: #ifndef INVALID_FILE_ATTRIBUTES
                    630: #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
                    631: #endif
                    632: 
                    633: typedef struct directory_type
                    634: {
                    635: HANDLE handle;
                    636: BOOL first;
                    637: WIN32_FIND_DATA data;
                    638: } directory_type;
                    639: 
1.1.1.4 ! misho     640: #define FILESEP '/'
        !           641: 
1.1       misho     642: int
                    643: isdirectory(char *filename)
                    644: {
                    645: DWORD attr = GetFileAttributes(filename);
                    646: if (attr == INVALID_FILE_ATTRIBUTES)
                    647:   return 0;
1.1.1.4 ! misho     648: return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
1.1       misho     649: }
                    650: 
                    651: directory_type *
                    652: opendirectory(char *filename)
                    653: {
                    654: size_t len;
                    655: char *pattern;
                    656: directory_type *dir;
                    657: DWORD err;
                    658: len = strlen(filename);
1.1.1.4 ! misho     659: pattern = (char *)malloc(len + 3);
        !           660: dir = (directory_type *)malloc(sizeof(*dir));
1.1       misho     661: if ((pattern == NULL) || (dir == NULL))
                    662:   {
                    663:   fprintf(stderr, "pcregrep: malloc failed\n");
                    664:   pcregrep_exit(2);
                    665:   }
                    666: memcpy(pattern, filename, len);
                    667: memcpy(&(pattern[len]), "\\*", 3);
                    668: dir->handle = FindFirstFile(pattern, &(dir->data));
                    669: if (dir->handle != INVALID_HANDLE_VALUE)
                    670:   {
                    671:   free(pattern);
                    672:   dir->first = TRUE;
                    673:   return dir;
                    674:   }
                    675: err = GetLastError();
                    676: free(pattern);
                    677: free(dir);
                    678: errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
                    679: return NULL;
                    680: }
                    681: 
                    682: char *
                    683: readdirectory(directory_type *dir)
                    684: {
                    685: for (;;)
                    686:   {
                    687:   if (!dir->first)
                    688:     {
                    689:     if (!FindNextFile(dir->handle, &(dir->data)))
                    690:       return NULL;
                    691:     }
                    692:   else
                    693:     {
                    694:     dir->first = FALSE;
                    695:     }
                    696:   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
                    697:     return dir->data.cFileName;
                    698:   }
                    699: #ifndef _MSC_VER
                    700: return NULL;   /* Keep compiler happy; never executed */
                    701: #endif
                    702: }
                    703: 
                    704: void
                    705: closedirectory(directory_type *dir)
                    706: {
                    707: FindClose(dir->handle);
                    708: free(dir);
                    709: }
                    710: 
                    711: 
                    712: /************* Test for regular file in Win32 **********/
                    713: 
                    714: /* I don't know how to do this, or if it can be done; assume all paths are
                    715: regular if they are not directories. */
                    716: 
                    717: int isregfile(char *filename)
                    718: {
                    719: return !isdirectory(filename);
                    720: }
                    721: 
                    722: 
                    723: /************* Test for a terminal in Win32 **********/
                    724: 
                    725: /* I don't know how to do this; assume never */
                    726: 
                    727: static BOOL
                    728: is_stdout_tty(void)
                    729: {
                    730: return FALSE;
                    731: }
                    732: 
                    733: static BOOL
                    734: is_file_tty(FILE *f)
                    735: {
                    736: return FALSE;
                    737: }
                    738: 
                    739: 
                    740: /************* Directory scanning when we can't do it ***********/
                    741: 
                    742: /* The type is void, and apart from isdirectory(), the functions do nothing. */
                    743: 
                    744: #else
                    745: 
1.1.1.4 ! misho     746: #define FILESEP 0
1.1       misho     747: typedef void directory_type;
                    748: 
                    749: int isdirectory(char *filename) { return 0; }
                    750: directory_type * opendirectory(char *filename) { return (directory_type*)0;}
                    751: char *readdirectory(directory_type *dir) { return (char*)0;}
                    752: void closedirectory(directory_type *dir) {}
                    753: 
                    754: 
                    755: /************* Test for regular when we can't do it **********/
                    756: 
                    757: /* Assume all files are regular. */
                    758: 
                    759: int isregfile(char *filename) { return 1; }
                    760: 
                    761: 
                    762: /************* Test for a terminal when we can't do it **********/
                    763: 
                    764: static BOOL
                    765: is_stdout_tty(void)
                    766: {
                    767: return FALSE;
                    768: }
                    769: 
                    770: static BOOL
                    771: is_file_tty(FILE *f)
                    772: {
                    773: return FALSE;
                    774: }
                    775: 
                    776: #endif
                    777: 
                    778: 
                    779: 
                    780: #ifndef HAVE_STRERROR
                    781: /*************************************************
                    782: *     Provide strerror() for non-ANSI libraries  *
                    783: *************************************************/
                    784: 
                    785: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
                    786: in their libraries, but can provide the same facility by this simple
                    787: alternative function. */
                    788: 
                    789: extern int   sys_nerr;
                    790: extern char *sys_errlist[];
                    791: 
                    792: char *
                    793: strerror(int n)
                    794: {
                    795: if (n < 0 || n >= sys_nerr) return "unknown error number";
                    796: return sys_errlist[n];
                    797: }
                    798: #endif /* HAVE_STRERROR */
                    799: 
                    800: 
                    801: 
                    802: /*************************************************
1.1.1.4 ! misho     803: *                Usage function                  *
        !           804: *************************************************/
        !           805: 
        !           806: static int
        !           807: usage(int rc)
        !           808: {
        !           809: option_item *op;
        !           810: fprintf(stderr, "Usage: pcregrep [-");
        !           811: for (op = optionlist; op->one_char != 0; op++)
        !           812:   {
        !           813:   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
        !           814:   }
        !           815: fprintf(stderr, "] [long options] [pattern] [files]\n");
        !           816: fprintf(stderr, "Type `pcregrep --help' for more information and the long "
        !           817:   "options.\n");
        !           818: return rc;
        !           819: }
        !           820: 
        !           821: 
        !           822: 
        !           823: /*************************************************
        !           824: *                Help function                   *
        !           825: *************************************************/
        !           826: 
        !           827: static void
        !           828: help(void)
        !           829: {
        !           830: option_item *op;
        !           831: 
        !           832: printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
        !           833: printf("Search for PATTERN in each FILE or standard input.\n");
        !           834: printf("PATTERN must be present if neither -e nor -f is used.\n");
        !           835: printf("\"-\" can be used as a file name to mean STDIN.\n");
        !           836: 
        !           837: #ifdef SUPPORT_LIBZ
        !           838: printf("Files whose names end in .gz are read using zlib.\n");
        !           839: #endif
        !           840: 
        !           841: #ifdef SUPPORT_LIBBZ2
        !           842: printf("Files whose names end in .bz2 are read using bzlib2.\n");
        !           843: #endif
        !           844: 
        !           845: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
        !           846: printf("Other files and the standard input are read as plain files.\n\n");
        !           847: #else
        !           848: printf("All files are read as plain files, without any interpretation.\n\n");
        !           849: #endif
        !           850: 
        !           851: printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
        !           852: printf("Options:\n");
        !           853: 
        !           854: for (op = optionlist; op->one_char != 0; op++)
        !           855:   {
        !           856:   int n;
        !           857:   char s[4];
        !           858: 
        !           859:   /* Two options were accidentally implemented and documented with underscores
        !           860:   instead of hyphens in their names, something that was not noticed for quite a
        !           861:   few releases. When fixing this, I left the underscored versions in the list
        !           862:   in case people were using them. However, we don't want to display them in the
        !           863:   help data. There are no other options that contain underscores, and we do not
        !           864:   expect ever to implement such options. Therefore, just omit any option that
        !           865:   contains an underscore. */
        !           866: 
        !           867:   if (strchr(op->long_name, '_') != NULL) continue;
        !           868: 
        !           869:   if (op->one_char > 0 && (op->long_name)[0] == 0)
        !           870:     n = 31 - printf("  -%c", op->one_char);
        !           871:   else
        !           872:     {
        !           873:     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
        !           874:       else strcpy(s, "   ");
        !           875:     n = 31 - printf("  %s --%s", s, op->long_name);
        !           876:     }
        !           877: 
        !           878:   if (n < 1) n = 1;
        !           879:   printf("%.*s%s\n", n, "                           ", op->help_text);
        !           880:   }
        !           881: 
        !           882: printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
        !           883: printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
        !           884: printf("When reading patterns or file names from a file, trailing white\n");
        !           885: printf("space is removed and blank lines are ignored.\n");
        !           886: printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
        !           887: 
        !           888: printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
        !           889: printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
        !           890: }
        !           891: 
        !           892: 
        !           893: 
        !           894: /*************************************************
        !           895: *            Test exclude/includes               *
        !           896: *************************************************/
        !           897: 
        !           898: /* If any exclude pattern matches, the path is excluded. Otherwise, unless
        !           899: there are no includes, the path must match an include pattern.
        !           900: 
        !           901: Arguments:
        !           902:   path      the path to be matched
        !           903:   ip        the chain of include patterns
        !           904:   ep        the chain of exclude patterns
        !           905: 
        !           906: Returns:    TRUE if the path is not excluded
        !           907: */
        !           908: 
        !           909: static BOOL
        !           910: test_incexc(char *path, patstr *ip, patstr *ep)
        !           911: {
        !           912: int plen = strlen(path);
        !           913: 
        !           914: for (; ep != NULL; ep = ep->next)
        !           915:   {
        !           916:   if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
        !           917:     return FALSE;
        !           918:   }
        !           919: 
        !           920: if (ip == NULL) return TRUE;
        !           921: 
        !           922: for (; ip != NULL; ip = ip->next)
        !           923:   {
        !           924:   if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
        !           925:     return TRUE;
        !           926:   }
        !           927: 
        !           928: return FALSE;
        !           929: }
        !           930: 
        !           931: 
        !           932: 
        !           933: /*************************************************
        !           934: *         Decode integer argument value          *
        !           935: *************************************************/
        !           936: 
        !           937: /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
        !           938: because SunOS4 doesn't have it. This is used only for unpicking arguments, so
        !           939: just keep it simple.
        !           940: 
        !           941: Arguments:
        !           942:   option_data   the option data string
        !           943:   op            the option item (for error messages)
        !           944:   longop        TRUE if option given in long form
        !           945: 
        !           946: Returns:        a long integer
        !           947: */
        !           948: 
        !           949: static long int
        !           950: decode_number(char *option_data, option_item *op, BOOL longop)
        !           951: {
        !           952: unsigned long int n = 0;
        !           953: char *endptr = option_data;
        !           954: while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
        !           955: while (isdigit((unsigned char)(*endptr)))
        !           956:   n = n * 10 + (int)(*endptr++ - '0');
        !           957: if (toupper(*endptr) == 'K')
        !           958:   {
        !           959:   n *= 1024;
        !           960:   endptr++;
        !           961:   }
        !           962: else if (toupper(*endptr) == 'M')
        !           963:   {
        !           964:   n *= 1024*1024;
        !           965:   endptr++;
        !           966:   }
        !           967: 
        !           968: if (*endptr != 0)   /* Error */
        !           969:   {
        !           970:   if (longop)
        !           971:     {
        !           972:     char *equals = strchr(op->long_name, '=');
        !           973:     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
        !           974:       (int)(equals - op->long_name);
        !           975:     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
        !           976:       option_data, nlen, op->long_name);
        !           977:     }
        !           978:   else
        !           979:     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
        !           980:       option_data, op->one_char);
        !           981:   pcregrep_exit(usage(2));
        !           982:   }
        !           983: 
        !           984: return n;
        !           985: }
        !           986: 
        !           987: 
        !           988: 
        !           989: /*************************************************
        !           990: *       Add item to a chain of numbers           *
        !           991: *************************************************/
        !           992: 
        !           993: /* Used to add an item onto a chain, or just return an unconnected item if the
        !           994: "after" argument is NULL.
        !           995: 
        !           996: Arguments:
        !           997:   n          the number to add
        !           998:   after      if not NULL points to item to insert after
        !           999: 
        !          1000: Returns:     new number block
        !          1001: */
        !          1002: 
        !          1003: static omstr *
        !          1004: add_number(int n, omstr *after)
        !          1005: {
        !          1006: omstr *om = (omstr *)malloc(sizeof(omstr));
        !          1007: 
        !          1008: if (om == NULL)
        !          1009:   {
        !          1010:   fprintf(stderr, "pcregrep: malloc failed\n");
        !          1011:   pcregrep_exit(2);
        !          1012:   }
        !          1013: om->next = NULL;
        !          1014: om->groupnum = n;
        !          1015: 
        !          1016: if (after != NULL)
        !          1017:   {
        !          1018:   om->next = after->next;
        !          1019:   after->next = om;
        !          1020:   }
        !          1021: return om;
        !          1022: }
        !          1023: 
        !          1024: 
        !          1025: 
        !          1026: /*************************************************
1.1       misho    1027: *            Read one line of input              *
                   1028: *************************************************/
                   1029: 
                   1030: /* Normally, input is read using fread() into a large buffer, so many lines may
                   1031: be read at once. However, doing this for tty input means that no output appears
                   1032: until a lot of input has been typed. Instead, tty input is handled line by
                   1033: line. We cannot use fgets() for this, because it does not stop at a binary
                   1034: zero, and therefore there is no way of telling how many characters it has read,
                   1035: because there may be binary zeros embedded in the data.
                   1036: 
                   1037: Arguments:
                   1038:   buffer     the buffer to read into
                   1039:   length     the maximum number of characters to read
                   1040:   f          the file
                   1041: 
                   1042: Returns:     the number of characters read, zero at end of file
                   1043: */
                   1044: 
1.1.1.2   misho    1045: static unsigned int
1.1       misho    1046: read_one_line(char *buffer, int length, FILE *f)
                   1047: {
                   1048: int c;
                   1049: int yield = 0;
                   1050: while ((c = fgetc(f)) != EOF)
                   1051:   {
                   1052:   buffer[yield++] = c;
                   1053:   if (c == '\n' || yield >= length) break;
                   1054:   }
                   1055: return yield;
                   1056: }
                   1057: 
                   1058: 
                   1059: 
                   1060: /*************************************************
                   1061: *             Find end of line                   *
                   1062: *************************************************/
                   1063: 
                   1064: /* The length of the endline sequence that is found is set via lenptr. This may
                   1065: be zero at the very end of the file if there is no line-ending sequence there.
                   1066: 
                   1067: Arguments:
                   1068:   p         current position in line
                   1069:   endptr    end of available data
                   1070:   lenptr    where to put the length of the eol sequence
                   1071: 
                   1072: Returns:    pointer after the last byte of the line,
                   1073:             including the newline byte(s)
                   1074: */
                   1075: 
                   1076: static char *
                   1077: end_of_line(char *p, char *endptr, int *lenptr)
                   1078: {
                   1079: switch(endlinetype)
                   1080:   {
                   1081:   default:      /* Just in case */
                   1082:   case EL_LF:
                   1083:   while (p < endptr && *p != '\n') p++;
                   1084:   if (p < endptr)
                   1085:     {
                   1086:     *lenptr = 1;
                   1087:     return p + 1;
                   1088:     }
                   1089:   *lenptr = 0;
                   1090:   return endptr;
                   1091: 
                   1092:   case EL_CR:
                   1093:   while (p < endptr && *p != '\r') p++;
                   1094:   if (p < endptr)
                   1095:     {
                   1096:     *lenptr = 1;
                   1097:     return p + 1;
                   1098:     }
                   1099:   *lenptr = 0;
                   1100:   return endptr;
                   1101: 
                   1102:   case EL_CRLF:
                   1103:   for (;;)
                   1104:     {
                   1105:     while (p < endptr && *p != '\r') p++;
                   1106:     if (++p >= endptr)
                   1107:       {
                   1108:       *lenptr = 0;
                   1109:       return endptr;
                   1110:       }
                   1111:     if (*p == '\n')
                   1112:       {
                   1113:       *lenptr = 2;
                   1114:       return p + 1;
                   1115:       }
                   1116:     }
                   1117:   break;
                   1118: 
                   1119:   case EL_ANYCRLF:
                   1120:   while (p < endptr)
                   1121:     {
                   1122:     int extra = 0;
                   1123:     register int c = *((unsigned char *)p);
                   1124: 
                   1125:     if (utf8 && c >= 0xc0)
                   1126:       {
                   1127:       int gcii, gcss;
                   1128:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
                   1129:       gcss = 6*extra;
                   1130:       c = (c & utf8_table3[extra]) << gcss;
                   1131:       for (gcii = 1; gcii <= extra; gcii++)
                   1132:         {
                   1133:         gcss -= 6;
                   1134:         c |= (p[gcii] & 0x3f) << gcss;
                   1135:         }
                   1136:       }
                   1137: 
                   1138:     p += 1 + extra;
                   1139: 
                   1140:     switch (c)
                   1141:       {
1.1.1.4 ! misho    1142:       case '\n':
1.1       misho    1143:       *lenptr = 1;
                   1144:       return p;
                   1145: 
1.1.1.4 ! misho    1146:       case '\r':
        !          1147:       if (p < endptr && *p == '\n')
1.1       misho    1148:         {
                   1149:         *lenptr = 2;
                   1150:         p++;
                   1151:         }
                   1152:       else *lenptr = 1;
                   1153:       return p;
                   1154: 
                   1155:       default:
                   1156:       break;
                   1157:       }
                   1158:     }   /* End of loop for ANYCRLF case */
                   1159: 
                   1160:   *lenptr = 0;  /* Must have hit the end */
                   1161:   return endptr;
                   1162: 
                   1163:   case EL_ANY:
                   1164:   while (p < endptr)
                   1165:     {
                   1166:     int extra = 0;
                   1167:     register int c = *((unsigned char *)p);
                   1168: 
                   1169:     if (utf8 && c >= 0xc0)
                   1170:       {
                   1171:       int gcii, gcss;
                   1172:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
                   1173:       gcss = 6*extra;
                   1174:       c = (c & utf8_table3[extra]) << gcss;
                   1175:       for (gcii = 1; gcii <= extra; gcii++)
                   1176:         {
                   1177:         gcss -= 6;
                   1178:         c |= (p[gcii] & 0x3f) << gcss;
                   1179:         }
                   1180:       }
                   1181: 
                   1182:     p += 1 + extra;
                   1183: 
                   1184:     switch (c)
                   1185:       {
1.1.1.4 ! misho    1186:       case '\n':    /* LF */
        !          1187:       case '\v':    /* VT */
        !          1188:       case '\f':    /* FF */
1.1       misho    1189:       *lenptr = 1;
                   1190:       return p;
                   1191: 
1.1.1.4 ! misho    1192:       case '\r':    /* CR */
        !          1193:       if (p < endptr && *p == '\n')
1.1       misho    1194:         {
                   1195:         *lenptr = 2;
                   1196:         p++;
                   1197:         }
                   1198:       else *lenptr = 1;
                   1199:       return p;
                   1200: 
1.1.1.4 ! misho    1201: #ifndef EBCDIC
        !          1202:       case 0x85:    /* Unicode NEL */
1.1       misho    1203:       *lenptr = utf8? 2 : 1;
                   1204:       return p;
                   1205: 
1.1.1.4 ! misho    1206:       case 0x2028:  /* Unicode LS */
        !          1207:       case 0x2029:  /* Unicode PS */
1.1       misho    1208:       *lenptr = 3;
                   1209:       return p;
1.1.1.4 ! misho    1210: #endif  /* Not EBCDIC */
1.1       misho    1211: 
                   1212:       default:
                   1213:       break;
                   1214:       }
                   1215:     }   /* End of loop for ANY case */
                   1216: 
                   1217:   *lenptr = 0;  /* Must have hit the end */
                   1218:   return endptr;
                   1219:   }     /* End of overall switch */
                   1220: }
                   1221: 
                   1222: 
                   1223: 
                   1224: /*************************************************
                   1225: *         Find start of previous line            *
                   1226: *************************************************/
                   1227: 
                   1228: /* This is called when looking back for before lines to print.
                   1229: 
                   1230: Arguments:
                   1231:   p         start of the subsequent line
                   1232:   startptr  start of available data
                   1233: 
                   1234: Returns:    pointer to the start of the previous line
                   1235: */
                   1236: 
                   1237: static char *
                   1238: previous_line(char *p, char *startptr)
                   1239: {
                   1240: switch(endlinetype)
                   1241:   {
                   1242:   default:      /* Just in case */
                   1243:   case EL_LF:
                   1244:   p--;
                   1245:   while (p > startptr && p[-1] != '\n') p--;
                   1246:   return p;
                   1247: 
                   1248:   case EL_CR:
                   1249:   p--;
                   1250:   while (p > startptr && p[-1] != '\n') p--;
                   1251:   return p;
                   1252: 
                   1253:   case EL_CRLF:
                   1254:   for (;;)
                   1255:     {
                   1256:     p -= 2;
                   1257:     while (p > startptr && p[-1] != '\n') p--;
                   1258:     if (p <= startptr + 1 || p[-2] == '\r') return p;
                   1259:     }
                   1260:   return p;   /* But control should never get here */
                   1261: 
                   1262:   case EL_ANY:
                   1263:   case EL_ANYCRLF:
                   1264:   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
                   1265:   if (utf8) while ((*p & 0xc0) == 0x80) p--;
                   1266: 
                   1267:   while (p > startptr)
                   1268:     {
1.1.1.4 ! misho    1269:     register unsigned int c;
1.1       misho    1270:     char *pp = p - 1;
                   1271: 
                   1272:     if (utf8)
                   1273:       {
                   1274:       int extra = 0;
                   1275:       while ((*pp & 0xc0) == 0x80) pp--;
                   1276:       c = *((unsigned char *)pp);
                   1277:       if (c >= 0xc0)
                   1278:         {
                   1279:         int gcii, gcss;
                   1280:         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
                   1281:         gcss = 6*extra;
                   1282:         c = (c & utf8_table3[extra]) << gcss;
                   1283:         for (gcii = 1; gcii <= extra; gcii++)
                   1284:           {
                   1285:           gcss -= 6;
                   1286:           c |= (pp[gcii] & 0x3f) << gcss;
                   1287:           }
                   1288:         }
                   1289:       }
                   1290:     else c = *((unsigned char *)pp);
                   1291: 
                   1292:     if (endlinetype == EL_ANYCRLF) switch (c)
                   1293:       {
1.1.1.4 ! misho    1294:       case '\n':    /* LF */
        !          1295:       case '\r':    /* CR */
1.1       misho    1296:       return p;
                   1297: 
                   1298:       default:
                   1299:       break;
                   1300:       }
                   1301: 
                   1302:     else switch (c)
                   1303:       {
1.1.1.4 ! misho    1304:       case '\n':    /* LF */
        !          1305:       case '\v':    /* VT */
        !          1306:       case '\f':    /* FF */
        !          1307:       case '\r':    /* CR */
        !          1308: #ifndef EBCDIE
        !          1309:       case 0x85:    /* Unicode NEL */
        !          1310:       case 0x2028:  /* Unicode LS */
        !          1311:       case 0x2029:  /* Unicode PS */
        !          1312: #endif  /* Not EBCDIC */
1.1       misho    1313:       return p;
                   1314: 
                   1315:       default:
                   1316:       break;
                   1317:       }
                   1318: 
                   1319:     p = pp;  /* Back one character */
                   1320:     }        /* End of loop for ANY case */
                   1321: 
                   1322:   return startptr;  /* Hit start of data */
                   1323:   }     /* End of overall switch */
                   1324: }
                   1325: 
                   1326: 
                   1327: 
                   1328: 
                   1329: 
                   1330: /*************************************************
                   1331: *       Print the previous "after" lines         *
                   1332: *************************************************/
                   1333: 
                   1334: /* This is called if we are about to lose said lines because of buffer filling,
                   1335: and at the end of the file. The data in the line is written using fwrite() so
                   1336: that a binary zero does not terminate it.
                   1337: 
                   1338: Arguments:
                   1339:   lastmatchnumber   the number of the last matching line, plus one
                   1340:   lastmatchrestart  where we restarted after the last match
                   1341:   endptr            end of available data
                   1342:   printname         filename for printing
                   1343: 
                   1344: Returns:            nothing
                   1345: */
                   1346: 
1.1.1.4 ! misho    1347: static void
        !          1348: do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
        !          1349:   char *printname)
1.1       misho    1350: {
                   1351: if (after_context > 0 && lastmatchnumber > 0)
                   1352:   {
                   1353:   int count = 0;
                   1354:   while (lastmatchrestart < endptr && count++ < after_context)
                   1355:     {
                   1356:     int ellength;
                   1357:     char *pp = lastmatchrestart;
                   1358:     if (printname != NULL) fprintf(stdout, "%s-", printname);
                   1359:     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
                   1360:     pp = end_of_line(pp, endptr, &ellength);
                   1361:     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
                   1362:     lastmatchrestart = pp;
                   1363:     }
                   1364:   hyphenpending = TRUE;
                   1365:   }
                   1366: }
                   1367: 
                   1368: 
                   1369: 
                   1370: /*************************************************
                   1371: *   Apply patterns to subject till one matches   *
                   1372: *************************************************/
                   1373: 
                   1374: /* This function is called to run through all patterns, looking for a match. It
                   1375: is used multiple times for the same subject when colouring is enabled, in order
                   1376: to find all possible matches.
                   1377: 
                   1378: Arguments:
                   1379:   matchptr     the start of the subject
                   1380:   length       the length of the subject to match
1.1.1.4 ! misho    1381:   options      options for pcre_exec
1.1       misho    1382:   startoffset  where to start matching
                   1383:   offsets      the offets vector to fill in
                   1384:   mrc          address of where to put the result of pcre_exec()
                   1385: 
                   1386: Returns:      TRUE if there was a match
                   1387:               FALSE if there was no match
                   1388:               invert if there was a non-fatal error
                   1389: */
                   1390: 
                   1391: static BOOL
1.1.1.4 ! misho    1392: match_patterns(char *matchptr, size_t length, unsigned int options,
        !          1393:   int startoffset, int *offsets, int *mrc)
1.1       misho    1394: {
                   1395: int i;
                   1396: size_t slen = length;
1.1.1.4 ! misho    1397: patstr *p = patterns;
1.1       misho    1398: const char *msg = "this text:\n\n";
1.1.1.4 ! misho    1399: 
1.1       misho    1400: if (slen > 200)
                   1401:   {
                   1402:   slen = 200;
                   1403:   msg = "text that starts:\n\n";
                   1404:   }
1.1.1.4 ! misho    1405: for (i = 1; p != NULL; p = p->next, i++)
1.1       misho    1406:   {
1.1.1.4 ! misho    1407:   *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
        !          1408:     startoffset, options, offsets, OFFSET_SIZE);
1.1       misho    1409:   if (*mrc >= 0) return TRUE;
                   1410:   if (*mrc == PCRE_ERROR_NOMATCH) continue;
                   1411:   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1.1.1.4 ! misho    1412:   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1.1       misho    1413:   fprintf(stderr, "%s", msg);
                   1414:   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
                   1415:   fprintf(stderr, "\n\n");
                   1416:   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
                   1417:       *mrc == PCRE_ERROR_JIT_STACKLIMIT)
                   1418:     resource_error = TRUE;
                   1419:   if (error_count++ > 20)
                   1420:     {
                   1421:     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
                   1422:     pcregrep_exit(2);
                   1423:     }
                   1424:   return invert;    /* No more matching; don't show the line again */
                   1425:   }
                   1426: 
                   1427: return FALSE;  /* No match, no errors */
                   1428: }
                   1429: 
                   1430: 
                   1431: 
                   1432: /*************************************************
                   1433: *            Grep an individual file             *
                   1434: *************************************************/
                   1435: 
                   1436: /* This is called from grep_or_recurse() below. It uses a buffer that is three
                   1437: times the value of bufthird. The matching point is never allowed to stray into
                   1438: the top third of the buffer, thus keeping more of the file available for
                   1439: context printing or for multiline scanning. For large files, the pointer will
                   1440: be in the middle third most of the time, so the bottom third is available for
                   1441: "before" context printing.
                   1442: 
                   1443: Arguments:
                   1444:   handle       the fopened FILE stream for a normal file
                   1445:                the gzFile pointer when reading is via libz
                   1446:                the BZFILE pointer when reading is via libbz2
                   1447:   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
                   1448:   filename     the file name or NULL (for errors)
                   1449:   printname    the file name if it is to be printed for each match
                   1450:                or NULL if the file name is not to be printed
                   1451:                it cannot be NULL if filenames[_nomatch]_only is set
                   1452: 
                   1453: Returns:       0 if there was at least one match
                   1454:                1 otherwise (no matches)
                   1455:                2 if an overlong line is encountered
                   1456:                3 if there is a read error on a .bz2 file
                   1457: */
                   1458: 
                   1459: static int
                   1460: pcregrep(void *handle, int frtype, char *filename, char *printname)
                   1461: {
                   1462: int rc = 1;
                   1463: int linenumber = 1;
                   1464: int lastmatchnumber = 0;
                   1465: int count = 0;
                   1466: int filepos = 0;
                   1467: int offsets[OFFSET_SIZE];
                   1468: char *lastmatchrestart = NULL;
                   1469: char *ptr = main_buffer;
                   1470: char *endptr;
                   1471: size_t bufflength;
1.1.1.3   misho    1472: BOOL binary = FALSE;
1.1       misho    1473: BOOL endhyphenpending = FALSE;
                   1474: BOOL input_line_buffered = line_buffered;
                   1475: FILE *in = NULL;                    /* Ensure initialized */
                   1476: 
                   1477: #ifdef SUPPORT_LIBZ
                   1478: gzFile ingz = NULL;
                   1479: #endif
                   1480: 
                   1481: #ifdef SUPPORT_LIBBZ2
                   1482: BZFILE *inbz2 = NULL;
                   1483: #endif
                   1484: 
                   1485: 
                   1486: /* Do the first read into the start of the buffer and set up the pointer to end
                   1487: of what we have. In the case of libz, a non-zipped .gz file will be read as a
                   1488: plain file. However, if a .bz2 file isn't actually bzipped, the first read will
                   1489: fail. */
                   1490: 
1.1.1.4 ! misho    1491: (void)frtype;
        !          1492: 
1.1       misho    1493: #ifdef SUPPORT_LIBZ
                   1494: if (frtype == FR_LIBZ)
                   1495:   {
                   1496:   ingz = (gzFile)handle;
                   1497:   bufflength = gzread (ingz, main_buffer, bufsize);
                   1498:   }
                   1499: else
                   1500: #endif
                   1501: 
                   1502: #ifdef SUPPORT_LIBBZ2
                   1503: if (frtype == FR_LIBBZ2)
                   1504:   {
                   1505:   inbz2 = (BZFILE *)handle;
                   1506:   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
                   1507:   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
                   1508:   }                                    /* without the cast it is unsigned. */
                   1509: else
                   1510: #endif
                   1511: 
                   1512:   {
                   1513:   in = (FILE *)handle;
                   1514:   if (is_file_tty(in)) input_line_buffered = TRUE;
                   1515:   bufflength = input_line_buffered?
                   1516:     read_one_line(main_buffer, bufsize, in) :
                   1517:     fread(main_buffer, 1, bufsize, in);
                   1518:   }
                   1519: 
                   1520: endptr = main_buffer + bufflength;
                   1521: 
1.1.1.3   misho    1522: /* Unless binary-files=text, see if we have a binary file. This uses the same
                   1523: rule as GNU grep, namely, a search for a binary zero byte near the start of the
                   1524: file. */
                   1525: 
                   1526: if (binary_files != BIN_TEXT)
                   1527:   {
                   1528:   binary =
                   1529:     memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
                   1530:   if (binary && binary_files == BIN_NOMATCH) return 1;
                   1531:   }
                   1532: 
1.1       misho    1533: /* Loop while the current pointer is not at the end of the file. For large
                   1534: files, endptr will be at the end of the buffer when we are in the middle of the
                   1535: file, but ptr will never get there, because as soon as it gets over 2/3 of the
                   1536: way, the buffer is shifted left and re-filled. */
                   1537: 
                   1538: while (ptr < endptr)
                   1539:   {
                   1540:   int endlinelength;
                   1541:   int mrc = 0;
                   1542:   int startoffset = 0;
1.1.1.4 ! misho    1543:   unsigned int options = 0;
1.1       misho    1544:   BOOL match;
                   1545:   char *matchptr = ptr;
                   1546:   char *t = ptr;
                   1547:   size_t length, linelength;
                   1548: 
                   1549:   /* At this point, ptr is at the start of a line. We need to find the length
                   1550:   of the subject string to pass to pcre_exec(). In multiline mode, it is the
                   1551:   length remainder of the data in the buffer. Otherwise, it is the length of
                   1552:   the next line, excluding the terminating newline. After matching, we always
                   1553:   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
                   1554:   option is used for compiling, so that any match is constrained to be in the
                   1555:   first line. */
                   1556: 
                   1557:   t = end_of_line(t, endptr, &endlinelength);
                   1558:   linelength = t - ptr - endlinelength;
                   1559:   length = multiline? (size_t)(endptr - ptr) : linelength;
                   1560: 
                   1561:   /* Check to see if the line we are looking at extends right to the very end
                   1562:   of the buffer without a line terminator. This means the line is too long to
                   1563:   handle. */
                   1564: 
                   1565:   if (endlinelength == 0 && t == main_buffer + bufsize)
                   1566:     {
                   1567:     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
                   1568:                     "pcregrep: check the --buffer-size option\n",
                   1569:                     linenumber,
                   1570:                     (filename == NULL)? "" : " of file ",
                   1571:                     (filename == NULL)? "" : filename);
                   1572:     return 2;
                   1573:     }
                   1574: 
                   1575:   /* Extra processing for Jeffrey Friedl's debugging. */
                   1576: 
                   1577: #ifdef JFRIEDL_DEBUG
                   1578:   if (jfriedl_XT || jfriedl_XR)
                   1579:   {
1.1.1.4 ! misho    1580: #     include <sys/time.h>
        !          1581: #     include <time.h>
1.1       misho    1582:       struct timeval start_time, end_time;
                   1583:       struct timezone dummy;
                   1584:       int i;
                   1585: 
                   1586:       if (jfriedl_XT)
                   1587:       {
                   1588:           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
                   1589:           const char *orig = ptr;
                   1590:           ptr = malloc(newlen + 1);
                   1591:           if (!ptr) {
                   1592:                   printf("out of memory");
                   1593:                   pcregrep_exit(2);
                   1594:           }
                   1595:           endptr = ptr;
                   1596:           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
                   1597:           for (i = 0; i < jfriedl_XT; i++) {
                   1598:                   strncpy(endptr, orig,  length);
                   1599:                   endptr += length;
                   1600:           }
                   1601:           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
                   1602:           length = newlen;
                   1603:       }
                   1604: 
                   1605:       if (gettimeofday(&start_time, &dummy) != 0)
                   1606:               perror("bad gettimeofday");
                   1607: 
                   1608: 
                   1609:       for (i = 0; i < jfriedl_XR; i++)
1.1.1.4 ! misho    1610:           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1.1       misho    1611:               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
                   1612: 
                   1613:       if (gettimeofday(&end_time, &dummy) != 0)
                   1614:               perror("bad gettimeofday");
                   1615: 
                   1616:       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
                   1617:                       -
                   1618:                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
                   1619: 
                   1620:       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
                   1621:       return 0;
                   1622:   }
                   1623: #endif
                   1624: 
1.1.1.4 ! misho    1625:   /* We come back here after a match when show_only_matching is set, in order
        !          1626:   to find any further matches in the same line. This applies to
        !          1627:   --only-matching, --file-offsets, and --line-offsets. */
1.1       misho    1628: 
                   1629:   ONLY_MATCHING_RESTART:
                   1630: 
                   1631:   /* Run through all the patterns until one matches or there is an error other
                   1632:   than NOMATCH. This code is in a subroutine so that it can be re-used for
1.1.1.4 ! misho    1633:   finding subsequent matches when colouring matched lines. After finding one
        !          1634:   match, set PCRE_NOTEMPTY to disable any further matches of null strings in
        !          1635:   this line. */
1.1       misho    1636: 
1.1.1.4 ! misho    1637:   match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
        !          1638:   options = PCRE_NOTEMPTY;
1.1       misho    1639: 
                   1640:   /* If it's a match or a not-match (as required), do what's wanted. */
                   1641: 
                   1642:   if (match != invert)
                   1643:     {
                   1644:     BOOL hyphenprinted = FALSE;
                   1645: 
                   1646:     /* We've failed if we want a file that doesn't have any matches. */
                   1647: 
                   1648:     if (filenames == FN_NOMATCH_ONLY) return 1;
                   1649: 
                   1650:     /* Just count if just counting is wanted. */
                   1651: 
                   1652:     if (count_only) count++;
                   1653: 
1.1.1.3   misho    1654:     /* When handling a binary file and binary-files==binary, the "binary"
                   1655:     variable will be set true (it's false in all other cases). In this
                   1656:     situation we just want to output the file name. No need to scan further. */
                   1657: 
                   1658:     else if (binary)
                   1659:       {
                   1660:       fprintf(stdout, "Binary file %s matches\n", filename);
                   1661:       return 0;
                   1662:       }
                   1663: 
1.1       misho    1664:     /* If all we want is a file name, there is no need to scan any more lines
                   1665:     in the file. */
                   1666: 
                   1667:     else if (filenames == FN_MATCH_ONLY)
                   1668:       {
                   1669:       fprintf(stdout, "%s\n", printname);
                   1670:       return 0;
                   1671:       }
                   1672: 
                   1673:     /* Likewise, if all we want is a yes/no answer. */
                   1674: 
                   1675:     else if (quiet) return 0;
                   1676: 
1.1.1.4 ! misho    1677:     /* The --only-matching option prints just the substring that matched,
        !          1678:     and/or one or more captured portions of it, as long as these strings are
        !          1679:     not empty. The --file-offsets and --line-offsets options output offsets for
        !          1680:     the matching substring (all three set show_only_matching). None of these
        !          1681:     mutually exclusive options prints any context. Afterwards, adjust the start
        !          1682:     and then jump back to look for further matches in the same line. If we are
        !          1683:     in invert mode, however, nothing is printed and we do not restart - this
        !          1684:     could still be useful because the return code is set. */
1.1       misho    1685: 
1.1.1.4 ! misho    1686:     else if (show_only_matching)
1.1       misho    1687:       {
                   1688:       if (!invert)
                   1689:         {
                   1690:         if (printname != NULL) fprintf(stdout, "%s:", printname);
                   1691:         if (number) fprintf(stdout, "%d:", linenumber);
1.1.1.4 ! misho    1692: 
        !          1693:         /* Handle --line-offsets */
        !          1694: 
1.1       misho    1695:         if (line_offsets)
                   1696:           fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
                   1697:             offsets[1] - offsets[0]);
1.1.1.4 ! misho    1698: 
        !          1699:         /* Handle --file-offsets */
        !          1700: 
1.1       misho    1701:         else if (file_offsets)
                   1702:           fprintf(stdout, "%d,%d\n",
                   1703:             (int)(filepos + matchptr + offsets[0] - ptr),
                   1704:             offsets[1] - offsets[0]);
1.1.1.4 ! misho    1705: 
        !          1706:         /* Handle --only-matching, which may occur many times */
        !          1707: 
        !          1708:         else
1.1       misho    1709:           {
1.1.1.4 ! misho    1710:           BOOL printed = FALSE;
        !          1711:           omstr *om;
        !          1712: 
        !          1713:           for (om = only_matching; om != NULL; om = om->next)
1.1       misho    1714:             {
1.1.1.4 ! misho    1715:             int n = om->groupnum;
        !          1716:             if (n < mrc)
        !          1717:               {
        !          1718:               int plen = offsets[2*n + 1] - offsets[2*n];
        !          1719:               if (plen > 0)
        !          1720:                 {
        !          1721:                 if (printed) fprintf(stdout, "%s", om_separator);
        !          1722:                 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
        !          1723:                 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
        !          1724:                 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
        !          1725:                 printed = TRUE;
        !          1726:                 }
        !          1727:               }
1.1       misho    1728:             }
1.1.1.4 ! misho    1729: 
        !          1730:           if (printed || printname != NULL || number) fprintf(stdout, "\n");
1.1       misho    1731:           }
1.1.1.4 ! misho    1732: 
        !          1733:         /* Prepare to repeat to find the next match */
        !          1734: 
1.1       misho    1735:         match = FALSE;
                   1736:         if (line_buffered) fflush(stdout);
                   1737:         rc = 0;                      /* Had some success */
                   1738:         startoffset = offsets[1];    /* Restart after the match */
                   1739:         goto ONLY_MATCHING_RESTART;
                   1740:         }
                   1741:       }
                   1742: 
                   1743:     /* This is the default case when none of the above options is set. We print
                   1744:     the matching lines(s), possibly preceded and/or followed by other lines of
                   1745:     context. */
                   1746: 
                   1747:     else
                   1748:       {
                   1749:       /* See if there is a requirement to print some "after" lines from a
                   1750:       previous match. We never print any overlaps. */
                   1751: 
                   1752:       if (after_context > 0 && lastmatchnumber > 0)
                   1753:         {
                   1754:         int ellength;
                   1755:         int linecount = 0;
                   1756:         char *p = lastmatchrestart;
                   1757: 
                   1758:         while (p < ptr && linecount < after_context)
                   1759:           {
                   1760:           p = end_of_line(p, ptr, &ellength);
                   1761:           linecount++;
                   1762:           }
                   1763: 
                   1764:         /* It is important to advance lastmatchrestart during this printing so
                   1765:         that it interacts correctly with any "before" printing below. Print
                   1766:         each line's data using fwrite() in case there are binary zeroes. */
                   1767: 
                   1768:         while (lastmatchrestart < p)
                   1769:           {
                   1770:           char *pp = lastmatchrestart;
                   1771:           if (printname != NULL) fprintf(stdout, "%s-", printname);
                   1772:           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
                   1773:           pp = end_of_line(pp, endptr, &ellength);
                   1774:           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
                   1775:           lastmatchrestart = pp;
                   1776:           }
                   1777:         if (lastmatchrestart != ptr) hyphenpending = TRUE;
                   1778:         }
                   1779: 
                   1780:       /* If there were non-contiguous lines printed above, insert hyphens. */
                   1781: 
                   1782:       if (hyphenpending)
                   1783:         {
                   1784:         fprintf(stdout, "--\n");
                   1785:         hyphenpending = FALSE;
                   1786:         hyphenprinted = TRUE;
                   1787:         }
                   1788: 
                   1789:       /* See if there is a requirement to print some "before" lines for this
                   1790:       match. Again, don't print overlaps. */
                   1791: 
                   1792:       if (before_context > 0)
                   1793:         {
                   1794:         int linecount = 0;
                   1795:         char *p = ptr;
                   1796: 
                   1797:         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
                   1798:                linecount < before_context)
                   1799:           {
                   1800:           linecount++;
                   1801:           p = previous_line(p, main_buffer);
                   1802:           }
                   1803: 
                   1804:         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
                   1805:           fprintf(stdout, "--\n");
                   1806: 
                   1807:         while (p < ptr)
                   1808:           {
                   1809:           int ellength;
                   1810:           char *pp = p;
                   1811:           if (printname != NULL) fprintf(stdout, "%s-", printname);
                   1812:           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
                   1813:           pp = end_of_line(pp, endptr, &ellength);
                   1814:           FWRITE(p, 1, pp - p, stdout);
                   1815:           p = pp;
                   1816:           }
                   1817:         }
                   1818: 
                   1819:       /* Now print the matching line(s); ensure we set hyphenpending at the end
                   1820:       of the file if any context lines are being output. */
                   1821: 
                   1822:       if (after_context > 0 || before_context > 0)
                   1823:         endhyphenpending = TRUE;
                   1824: 
                   1825:       if (printname != NULL) fprintf(stdout, "%s:", printname);
                   1826:       if (number) fprintf(stdout, "%d:", linenumber);
                   1827: 
                   1828:       /* In multiline mode, we want to print to the end of the line in which
                   1829:       the end of the matched string is found, so we adjust linelength and the
                   1830:       line number appropriately, but only when there actually was a match
                   1831:       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
                   1832:       the match will always be before the first newline sequence. */
                   1833: 
                   1834:       if (multiline & !invert)
                   1835:         {
                   1836:         char *endmatch = ptr + offsets[1];
                   1837:         t = ptr;
                   1838:         while (t < endmatch)
                   1839:           {
                   1840:           t = end_of_line(t, endptr, &endlinelength);
                   1841:           if (t < endmatch) linenumber++; else break;
                   1842:           }
                   1843:         linelength = t - ptr - endlinelength;
                   1844:         }
                   1845: 
                   1846:       /*** NOTE: Use only fwrite() to output the data line, so that binary
                   1847:       zeroes are treated as just another data character. */
                   1848: 
                   1849:       /* This extra option, for Jeffrey Friedl's debugging requirements,
                   1850:       replaces the matched string, or a specific captured string if it exists,
                   1851:       with X. When this happens, colouring is ignored. */
                   1852: 
                   1853: #ifdef JFRIEDL_DEBUG
                   1854:       if (S_arg >= 0 && S_arg < mrc)
                   1855:         {
                   1856:         int first = S_arg * 2;
                   1857:         int last  = first + 1;
                   1858:         FWRITE(ptr, 1, offsets[first], stdout);
                   1859:         fprintf(stdout, "X");
                   1860:         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
                   1861:         }
                   1862:       else
                   1863: #endif
                   1864: 
                   1865:       /* We have to split the line(s) up if colouring, and search for further
                   1866:       matches, but not of course if the line is a non-match. */
                   1867: 
                   1868:       if (do_colour && !invert)
                   1869:         {
                   1870:         int plength;
                   1871:         FWRITE(ptr, 1, offsets[0], stdout);
                   1872:         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
                   1873:         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
                   1874:         fprintf(stdout, "%c[00m", 0x1b);
                   1875:         for (;;)
                   1876:           {
                   1877:           startoffset = offsets[1];
                   1878:           if (startoffset >= (int)linelength + endlinelength ||
1.1.1.4 ! misho    1879:               !match_patterns(matchptr, length, options, startoffset, offsets,
        !          1880:                 &mrc))
1.1       misho    1881:             break;
                   1882:           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
                   1883:           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
                   1884:           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
                   1885:           fprintf(stdout, "%c[00m", 0x1b);
                   1886:           }
                   1887: 
                   1888:         /* In multiline mode, we may have already printed the complete line
                   1889:         and its line-ending characters (if they matched the pattern), so there
                   1890:         may be no more to print. */
                   1891: 
                   1892:         plength = (int)((linelength + endlinelength) - startoffset);
                   1893:         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
                   1894:         }
                   1895: 
                   1896:       /* Not colouring; no need to search for further matches */
                   1897: 
                   1898:       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
                   1899:       }
                   1900: 
                   1901:     /* End of doing what has to be done for a match. If --line-buffered was
                   1902:     given, flush the output. */
                   1903: 
                   1904:     if (line_buffered) fflush(stdout);
                   1905:     rc = 0;    /* Had some success */
                   1906: 
                   1907:     /* Remember where the last match happened for after_context. We remember
                   1908:     where we are about to restart, and that line's number. */
                   1909: 
                   1910:     lastmatchrestart = ptr + linelength + endlinelength;
                   1911:     lastmatchnumber = linenumber + 1;
                   1912:     }
                   1913: 
                   1914:   /* For a match in multiline inverted mode (which of course did not cause
                   1915:   anything to be printed), we have to move on to the end of the match before
                   1916:   proceeding. */
                   1917: 
                   1918:   if (multiline && invert && match)
                   1919:     {
                   1920:     int ellength;
                   1921:     char *endmatch = ptr + offsets[1];
                   1922:     t = ptr;
                   1923:     while (t < endmatch)
                   1924:       {
                   1925:       t = end_of_line(t, endptr, &ellength);
                   1926:       if (t <= endmatch) linenumber++; else break;
                   1927:       }
                   1928:     endmatch = end_of_line(endmatch, endptr, &ellength);
                   1929:     linelength = endmatch - ptr - ellength;
                   1930:     }
                   1931: 
                   1932:   /* Advance to after the newline and increment the line number. The file
                   1933:   offset to the current line is maintained in filepos. */
                   1934: 
                   1935:   ptr += linelength + endlinelength;
                   1936:   filepos += (int)(linelength + endlinelength);
                   1937:   linenumber++;
                   1938: 
                   1939:   /* If input is line buffered, and the buffer is not yet full, read another
                   1940:   line and add it into the buffer. */
                   1941: 
                   1942:   if (input_line_buffered && bufflength < (size_t)bufsize)
                   1943:     {
                   1944:     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
                   1945:     bufflength += add;
                   1946:     endptr += add;
                   1947:     }
                   1948: 
                   1949:   /* If we haven't yet reached the end of the file (the buffer is full), and
                   1950:   the current point is in the top 1/3 of the buffer, slide the buffer down by
                   1951:   1/3 and refill it. Before we do this, if some unprinted "after" lines are
                   1952:   about to be lost, print them. */
                   1953: 
                   1954:   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
                   1955:     {
                   1956:     if (after_context > 0 &&
                   1957:         lastmatchnumber > 0 &&
                   1958:         lastmatchrestart < main_buffer + bufthird)
                   1959:       {
                   1960:       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
                   1961:       lastmatchnumber = 0;
                   1962:       }
                   1963: 
                   1964:     /* Now do the shuffle */
                   1965: 
                   1966:     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
                   1967:     ptr -= bufthird;
                   1968: 
                   1969: #ifdef SUPPORT_LIBZ
                   1970:     if (frtype == FR_LIBZ)
                   1971:       bufflength = 2*bufthird +
                   1972:         gzread (ingz, main_buffer + 2*bufthird, bufthird);
                   1973:     else
                   1974: #endif
                   1975: 
                   1976: #ifdef SUPPORT_LIBBZ2
                   1977:     if (frtype == FR_LIBBZ2)
                   1978:       bufflength = 2*bufthird +
                   1979:         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
                   1980:     else
                   1981: #endif
                   1982: 
                   1983:     bufflength = 2*bufthird +
                   1984:       (input_line_buffered?
                   1985:        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
                   1986:        fread(main_buffer + 2*bufthird, 1, bufthird, in));
                   1987:     endptr = main_buffer + bufflength;
                   1988: 
                   1989:     /* Adjust any last match point */
                   1990: 
                   1991:     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
                   1992:     }
                   1993:   }     /* Loop through the whole file */
                   1994: 
                   1995: /* End of file; print final "after" lines if wanted; do_after_lines sets
                   1996: hyphenpending if it prints something. */
                   1997: 
1.1.1.4 ! misho    1998: if (!show_only_matching && !count_only)
1.1       misho    1999:   {
                   2000:   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
                   2001:   hyphenpending |= endhyphenpending;
                   2002:   }
                   2003: 
                   2004: /* Print the file name if we are looking for those without matches and there
                   2005: were none. If we found a match, we won't have got this far. */
                   2006: 
                   2007: if (filenames == FN_NOMATCH_ONLY)
                   2008:   {
                   2009:   fprintf(stdout, "%s\n", printname);
                   2010:   return 0;
                   2011:   }
                   2012: 
                   2013: /* Print the match count if wanted */
                   2014: 
                   2015: if (count_only)
                   2016:   {
                   2017:   if (count > 0 || !omit_zero_count)
                   2018:     {
                   2019:     if (printname != NULL && filenames != FN_NONE)
                   2020:       fprintf(stdout, "%s:", printname);
                   2021:     fprintf(stdout, "%d\n", count);
                   2022:     }
                   2023:   }
                   2024: 
                   2025: return rc;
                   2026: }
                   2027: 
                   2028: 
                   2029: 
                   2030: /*************************************************
                   2031: *     Grep a file or recurse into a directory    *
                   2032: *************************************************/
                   2033: 
                   2034: /* Given a path name, if it's a directory, scan all the files if we are
                   2035: recursing; if it's a file, grep it.
                   2036: 
                   2037: Arguments:
                   2038:   pathname          the path to investigate
                   2039:   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
                   2040:   only_one_at_top   TRUE if the path is the only one at toplevel
                   2041: 
1.1.1.4 ! misho    2042: Returns:  -1 the file/directory was skipped
        !          2043:            0 if there was at least one match
1.1       misho    2044:            1 if there were no matches
                   2045:            2 there was some kind of error
                   2046: 
                   2047: However, file opening failures are suppressed if "silent" is set.
                   2048: */
                   2049: 
                   2050: static int
                   2051: grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
                   2052: {
                   2053: int rc = 1;
                   2054: int frtype;
                   2055: void *handle;
1.1.1.4 ! misho    2056: char *lastcomp;
1.1       misho    2057: FILE *in = NULL;           /* Ensure initialized */
                   2058: 
                   2059: #ifdef SUPPORT_LIBZ
                   2060: gzFile ingz = NULL;
                   2061: #endif
                   2062: 
                   2063: #ifdef SUPPORT_LIBBZ2
                   2064: BZFILE *inbz2 = NULL;
                   2065: #endif
                   2066: 
1.1.1.3   misho    2067: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1.1.1.2   misho    2068: int pathlen;
                   2069: #endif
                   2070: 
1.1       misho    2071: /* If the file name is "-" we scan stdin */
                   2072: 
                   2073: if (strcmp(pathname, "-") == 0)
                   2074:   {
                   2075:   return pcregrep(stdin, FR_PLAIN, stdin_name,
                   2076:     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
                   2077:       stdin_name : NULL);
                   2078:   }
                   2079: 
1.1.1.4 ! misho    2080: /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
        !          2081: directories, whereas --include and --exclude apply to everything else. The test
        !          2082: is against the final component of the path. */
        !          2083: 
        !          2084: lastcomp = strrchr(pathname, FILESEP);
        !          2085: lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
        !          2086: 
        !          2087: /* If the file is a directory, skip if not recursing or if explicitly excluded.
        !          2088: Otherwise, scan the directory and recurse for each path within it. The scanning
        !          2089: code is localized so it can be made system-specific. */
        !          2090: 
        !          2091: if (isdirectory(pathname))
        !          2092:   {
        !          2093:   if (dee_action == dee_SKIP ||
        !          2094:       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
        !          2095:     return -1;
1.1       misho    2096: 
                   2097:   if (dee_action == dee_RECURSE)
                   2098:     {
                   2099:     char buffer[1024];
                   2100:     char *nextfile;
                   2101:     directory_type *dir = opendirectory(pathname);
                   2102: 
                   2103:     if (dir == NULL)
                   2104:       {
                   2105:       if (!silent)
                   2106:         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
                   2107:           strerror(errno));
                   2108:       return 2;
                   2109:       }
                   2110: 
                   2111:     while ((nextfile = readdirectory(dir)) != NULL)
                   2112:       {
1.1.1.4 ! misho    2113:       int frc;
        !          2114:       sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
1.1       misho    2115:       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
                   2116:       if (frc > 1) rc = frc;
                   2117:        else if (frc == 0 && rc == 1) rc = 0;
                   2118:       }
                   2119: 
                   2120:     closedirectory(dir);
                   2121:     return rc;
                   2122:     }
                   2123:   }
                   2124: 
                   2125: /* If the file is not a directory and not a regular file, skip it if that's
1.1.1.4 ! misho    2126: been requested. Otherwise, check for explicit include/exclude. */
1.1       misho    2127: 
1.1.1.4 ! misho    2128: else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
        !          2129:           !test_incexc(lastcomp, include_patterns, exclude_patterns))
        !          2130:         return -1;
1.1       misho    2131: 
                   2132: /* Control reaches here if we have a regular file, or if we have a directory
                   2133: and recursion or skipping was not requested, or if we have anything else and
                   2134: skipping was not requested. The scan proceeds. If this is the first and only
                   2135: argument at top level, we don't show the file name, unless we are only showing
                   2136: the file name, or the filename was forced (-H). */
                   2137: 
1.1.1.3   misho    2138: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1.1       misho    2139: pathlen = (int)(strlen(pathname));
1.1.1.2   misho    2140: #endif
1.1       misho    2141: 
                   2142: /* Open using zlib if it is supported and the file name ends with .gz. */
                   2143: 
                   2144: #ifdef SUPPORT_LIBZ
                   2145: if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
                   2146:   {
                   2147:   ingz = gzopen(pathname, "rb");
                   2148:   if (ingz == NULL)
                   2149:     {
                   2150:     if (!silent)
                   2151:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
                   2152:         strerror(errno));
                   2153:     return 2;
                   2154:     }
                   2155:   handle = (void *)ingz;
                   2156:   frtype = FR_LIBZ;
                   2157:   }
                   2158: else
                   2159: #endif
                   2160: 
                   2161: /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
                   2162: 
                   2163: #ifdef SUPPORT_LIBBZ2
                   2164: if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
                   2165:   {
                   2166:   inbz2 = BZ2_bzopen(pathname, "rb");
                   2167:   handle = (void *)inbz2;
                   2168:   frtype = FR_LIBBZ2;
                   2169:   }
                   2170: else
                   2171: #endif
                   2172: 
                   2173: /* Otherwise use plain fopen(). The label is so that we can come back here if
                   2174: an attempt to read a .bz2 file indicates that it really is a plain file. */
                   2175: 
                   2176: #ifdef SUPPORT_LIBBZ2
                   2177: PLAIN_FILE:
                   2178: #endif
                   2179:   {
                   2180:   in = fopen(pathname, "rb");
                   2181:   handle = (void *)in;
                   2182:   frtype = FR_PLAIN;
                   2183:   }
                   2184: 
                   2185: /* All the opening methods return errno when they fail. */
                   2186: 
                   2187: if (handle == NULL)
                   2188:   {
                   2189:   if (!silent)
                   2190:     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
                   2191:       strerror(errno));
                   2192:   return 2;
                   2193:   }
                   2194: 
                   2195: /* Now grep the file */
                   2196: 
                   2197: rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
                   2198:   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
                   2199: 
                   2200: /* Close in an appropriate manner. */
                   2201: 
                   2202: #ifdef SUPPORT_LIBZ
                   2203: if (frtype == FR_LIBZ)
                   2204:   gzclose(ingz);
                   2205: else
                   2206: #endif
                   2207: 
                   2208: /* If it is a .bz2 file and the result is 3, it means that the first attempt to
                   2209: read failed. If the error indicates that the file isn't in fact bzipped, try
                   2210: again as a normal file. */
                   2211: 
                   2212: #ifdef SUPPORT_LIBBZ2
                   2213: if (frtype == FR_LIBBZ2)
                   2214:   {
                   2215:   if (rc == 3)
                   2216:     {
                   2217:     int errnum;
                   2218:     const char *err = BZ2_bzerror(inbz2, &errnum);
                   2219:     if (errnum == BZ_DATA_ERROR_MAGIC)
                   2220:       {
                   2221:       BZ2_bzclose(inbz2);
                   2222:       goto PLAIN_FILE;
                   2223:       }
                   2224:     else if (!silent)
                   2225:       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
                   2226:         pathname, err);
                   2227:     rc = 2;    /* The normal "something went wrong" code */
                   2228:     }
                   2229:   BZ2_bzclose(inbz2);
                   2230:   }
                   2231: else
                   2232: #endif
                   2233: 
                   2234: /* Normal file close */
                   2235: 
                   2236: fclose(in);
                   2237: 
                   2238: /* Pass back the yield from pcregrep(). */
                   2239: 
                   2240: return rc;
                   2241: }
                   2242: 
                   2243: 
                   2244: 
                   2245: /*************************************************
                   2246: *    Handle a single-letter, no data option      *
                   2247: *************************************************/
                   2248: 
                   2249: static int
                   2250: handle_option(int letter, int options)
                   2251: {
                   2252: switch(letter)
                   2253:   {
                   2254:   case N_FOFFSETS: file_offsets = TRUE; break;
                   2255:   case N_HELP: help(); pcregrep_exit(0);
                   2256:   case N_LBUFFER: line_buffered = TRUE; break;
                   2257:   case N_LOFFSETS: line_offsets = number = TRUE; break;
                   2258:   case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1.1.1.3   misho    2259:   case 'a': binary_files = BIN_TEXT; break;
1.1       misho    2260:   case 'c': count_only = TRUE; break;
                   2261:   case 'F': process_options |= PO_FIXED_STRINGS; break;
                   2262:   case 'H': filenames = FN_FORCE; break;
1.1.1.3   misho    2263:   case 'I': binary_files = BIN_NOMATCH; break;
1.1       misho    2264:   case 'h': filenames = FN_NONE; break;
                   2265:   case 'i': options |= PCRE_CASELESS; break;
                   2266:   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
                   2267:   case 'L': filenames = FN_NOMATCH_ONLY; break;
                   2268:   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
                   2269:   case 'n': number = TRUE; break;
1.1.1.4 ! misho    2270: 
        !          2271:   case 'o':
        !          2272:   only_matching_last = add_number(0, only_matching_last);
        !          2273:   if (only_matching == NULL) only_matching = only_matching_last;
        !          2274:   break;
        !          2275: 
1.1       misho    2276:   case 'q': quiet = TRUE; break;
                   2277:   case 'r': dee_action = dee_RECURSE; break;
                   2278:   case 's': silent = TRUE; break;
                   2279:   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
                   2280:   case 'v': invert = TRUE; break;
                   2281:   case 'w': process_options |= PO_WORD_MATCH; break;
                   2282:   case 'x': process_options |= PO_LINE_MATCH; break;
                   2283: 
                   2284:   case 'V':
1.1.1.4 ! misho    2285:   fprintf(stdout, "pcregrep version %s\n", pcre_version());
1.1       misho    2286:   pcregrep_exit(0);
                   2287:   break;
                   2288: 
                   2289:   default:
                   2290:   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
                   2291:   pcregrep_exit(usage(2));
                   2292:   }
                   2293: 
                   2294: return options;
                   2295: }
                   2296: 
                   2297: 
                   2298: 
                   2299: 
                   2300: /*************************************************
                   2301: *          Construct printed ordinal             *
                   2302: *************************************************/
                   2303: 
                   2304: /* This turns a number into "1st", "3rd", etc. */
                   2305: 
                   2306: static char *
                   2307: ordin(int n)
                   2308: {
                   2309: static char buffer[8];
                   2310: char *p = buffer;
                   2311: sprintf(p, "%d", n);
                   2312: while (*p != 0) p++;
                   2313: switch (n%10)
                   2314:   {
                   2315:   case 1: strcpy(p, "st"); break;
                   2316:   case 2: strcpy(p, "nd"); break;
                   2317:   case 3: strcpy(p, "rd"); break;
                   2318:   default: strcpy(p, "th"); break;
                   2319:   }
                   2320: return buffer;
                   2321: }
                   2322: 
                   2323: 
                   2324: 
                   2325: /*************************************************
                   2326: *          Compile a single pattern              *
                   2327: *************************************************/
                   2328: 
1.1.1.4 ! misho    2329: /* Do nothing if the pattern has already been compiled. This is the case for
        !          2330: include/exclude patterns read from a file.
        !          2331: 
        !          2332: When the -F option has been used, each "pattern" may be a list of strings,
        !          2333: separated by line breaks. They will be matched literally. We split such a
        !          2334: string and compile the first substring, inserting an additional block into the
        !          2335: pattern chain.
1.1       misho    2336: 
                   2337: Arguments:
1.1.1.4 ! misho    2338:   p              points to the pattern block
1.1       misho    2339:   options        the PCRE options
1.1.1.4 ! misho    2340:   popts          the processing options
        !          2341:   fromfile       TRUE if the pattern was read from a file
        !          2342:   fromtext       file name or identifying text (e.g. "include")
1.1       misho    2343:   count          0 if this is the only command line pattern, or
                   2344:                  number of the command line pattern, or
                   2345:                  linenumber for a pattern from a file
                   2346: 
                   2347: Returns:         TRUE on success, FALSE after an error
                   2348: */
                   2349: 
                   2350: static BOOL
1.1.1.4 ! misho    2351: compile_pattern(patstr *p, int options, int popts, int fromfile,
        !          2352:   const char *fromtext, int count)
1.1       misho    2353: {
                   2354: char buffer[PATBUFSIZE];
                   2355: const char *error;
1.1.1.4 ! misho    2356: char *ps = p->string;
        !          2357: int patlen = strlen(ps);
1.1       misho    2358: int errptr;
                   2359: 
1.1.1.4 ! misho    2360: if (p->compiled != NULL) return TRUE;
1.1       misho    2361: 
1.1.1.4 ! misho    2362: if ((popts & PO_FIXED_STRINGS) != 0)
1.1       misho    2363:   {
1.1.1.4 ! misho    2364:   int ellength;
        !          2365:   char *eop = ps + patlen;
        !          2366:   char *pe = end_of_line(ps, eop, &ellength);
        !          2367: 
        !          2368:   if (ellength != 0)
        !          2369:     {
        !          2370:     if (add_pattern(pe, p) == NULL) return FALSE;
        !          2371:     patlen = (int)(pe - ps - ellength);
        !          2372:     }
1.1       misho    2373:   }
                   2374: 
1.1.1.4 ! misho    2375: sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
        !          2376: p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
        !          2377: if (p->compiled != NULL) return TRUE;
        !          2378: 
1.1       misho    2379: /* Handle compile errors */
                   2380: 
1.1.1.4 ! misho    2381: errptr -= (int)strlen(prefix[popts]);
        !          2382: if (errptr > patlen) errptr = patlen;
1.1       misho    2383: 
1.1.1.4 ! misho    2384: if (fromfile)
1.1       misho    2385:   {
1.1.1.4 ! misho    2386:   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
        !          2387:     "at offset %d: %s\n", count, fromtext, errptr, error);
1.1       misho    2388:   }
                   2389: else
                   2390:   {
1.1.1.4 ! misho    2391:   if (count == 0)
        !          2392:     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
        !          2393:       fromtext, errptr, error);
        !          2394:   else
        !          2395:     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
        !          2396:       ordin(count), fromtext, errptr, error);
1.1       misho    2397:   }
                   2398: 
                   2399: return FALSE;
                   2400: }
                   2401: 
                   2402: 
                   2403: 
                   2404: /*************************************************
1.1.1.4 ! misho    2405: *     Read and compile a file of patterns        *
1.1       misho    2406: *************************************************/
                   2407: 
1.1.1.4 ! misho    2408: /* This is used for --filelist, --include-from, and --exclude-from.
1.1       misho    2409: 
                   2410: Arguments:
1.1.1.4 ! misho    2411:   name         the name of the file; "-" is stdin
        !          2412:   patptr       pointer to the pattern chain anchor
        !          2413:   patlastptr   pointer to the last pattern pointer
        !          2414:   popts        the process options to pass to pattern_compile()
1.1       misho    2415: 
1.1.1.4 ! misho    2416: Returns:       TRUE if all went well
1.1       misho    2417: */
                   2418: 
                   2419: static BOOL
1.1.1.4 ! misho    2420: read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
1.1       misho    2421: {
1.1.1.4 ! misho    2422: int linenumber = 0;
        !          2423: FILE *f;
        !          2424: char *filename;
        !          2425: char buffer[PATBUFSIZE];
        !          2426: 
        !          2427: if (strcmp(name, "-") == 0)
1.1       misho    2428:   {
1.1.1.4 ! misho    2429:   f = stdin;
        !          2430:   filename = stdin_name;
        !          2431:   }
        !          2432: else
        !          2433:   {
        !          2434:   f = fopen(name, "r");
        !          2435:   if (f == NULL)
        !          2436:     {
        !          2437:     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
        !          2438:     return FALSE;
        !          2439:     }
        !          2440:   filename = name;
        !          2441:   }
        !          2442: 
        !          2443: while (fgets(buffer, PATBUFSIZE, f) != NULL)
        !          2444:   {
        !          2445:   char *s = buffer + (int)strlen(buffer);
        !          2446:   while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
        !          2447:   *s = 0;
        !          2448:   linenumber++;
        !          2449:   if (buffer[0] == 0) continue;   /* Skip blank lines */
        !          2450: 
        !          2451:   /* Note: this call to add_pattern() puts a pointer to the local variable
        !          2452:   "buffer" into the pattern chain. However, that pointer is used only when
        !          2453:   compiling the pattern, which happens immediately below, so we flatten it
        !          2454:   afterwards, as a precaution against any later code trying to use it. */
        !          2455: 
        !          2456:   *patlastptr = add_pattern(buffer, *patlastptr);
        !          2457:   if (*patlastptr == NULL) return FALSE;
        !          2458:   if (*patptr == NULL) *patptr = *patlastptr;
        !          2459: 
        !          2460:   /* This loop is needed because compiling a "pattern" when -F is set may add
        !          2461:   on additional literal patterns if the original contains a newline. In the
        !          2462:   common case, it never will, because fgets() stops at a newline. However,
        !          2463:   the -N option can be used to give pcregrep a different newline setting. */
        !          2464: 
1.1       misho    2465:   for(;;)
                   2466:     {
1.1.1.4 ! misho    2467:     if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
        !          2468:         linenumber))
1.1       misho    2469:       return FALSE;
1.1.1.4 ! misho    2470:     (*patlastptr)->string = NULL;            /* Insurance */
        !          2471:     if ((*patlastptr)->next == NULL) break;
        !          2472:     *patlastptr = (*patlastptr)->next;
1.1       misho    2473:     }
                   2474:   }
1.1.1.4 ! misho    2475: 
        !          2476: if (f != stdin) fclose(f);
        !          2477: return TRUE;
1.1       misho    2478: }
                   2479: 
                   2480: 
                   2481: 
                   2482: /*************************************************
                   2483: *                Main program                    *
                   2484: *************************************************/
                   2485: 
                   2486: /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
                   2487: 
                   2488: int
                   2489: main(int argc, char **argv)
                   2490: {
                   2491: int i, j;
                   2492: int rc = 1;
                   2493: BOOL only_one_at_top;
1.1.1.4 ! misho    2494: patstr *cp;
        !          2495: fnstr *fn;
1.1       misho    2496: const char *locale_from = "--locale";
                   2497: const char *error;
                   2498: 
                   2499: #ifdef SUPPORT_PCREGREP_JIT
                   2500: pcre_jit_stack *jit_stack = NULL;
                   2501: #endif
                   2502: 
                   2503: /* Set the default line ending value from the default in the PCRE library;
                   2504: "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
                   2505: Note that the return values from pcre_config(), though derived from the ASCII
                   2506: codes, are the same in EBCDIC environments, so we must use the actual values
                   2507: rather than escapes such as as '\r'. */
                   2508: 
                   2509: (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
                   2510: switch(i)
                   2511:   {
                   2512:   default:               newline = (char *)"lf"; break;
                   2513:   case 13:               newline = (char *)"cr"; break;
                   2514:   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
                   2515:   case -1:               newline = (char *)"any"; break;
                   2516:   case -2:               newline = (char *)"anycrlf"; break;
                   2517:   }
                   2518: 
                   2519: /* Process the options */
                   2520: 
                   2521: for (i = 1; i < argc; i++)
                   2522:   {
                   2523:   option_item *op = NULL;
                   2524:   char *option_data = (char *)"";    /* default to keep compiler happy */
                   2525:   BOOL longop;
                   2526:   BOOL longopwasequals = FALSE;
                   2527: 
                   2528:   if (argv[i][0] != '-') break;
                   2529: 
                   2530:   /* If we hit an argument that is just "-", it may be a reference to STDIN,
                   2531:   but only if we have previously had -e or -f to define the patterns. */
                   2532: 
                   2533:   if (argv[i][1] == 0)
                   2534:     {
1.1.1.4 ! misho    2535:     if (pattern_files != NULL || patterns != NULL) break;
1.1       misho    2536:       else pcregrep_exit(usage(2));
                   2537:     }
                   2538: 
                   2539:   /* Handle a long name option, or -- to terminate the options */
                   2540: 
                   2541:   if (argv[i][1] == '-')
                   2542:     {
                   2543:     char *arg = argv[i] + 2;
                   2544:     char *argequals = strchr(arg, '=');
                   2545: 
                   2546:     if (*arg == 0)    /* -- terminates options */
                   2547:       {
                   2548:       i++;
                   2549:       break;                /* out of the options-handling loop */
                   2550:       }
                   2551: 
                   2552:     longop = TRUE;
                   2553: 
                   2554:     /* Some long options have data that follows after =, for example file=name.
                   2555:     Some options have variations in the long name spelling: specifically, we
                   2556:     allow "regexp" because GNU grep allows it, though I personally go along
                   2557:     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
                   2558:     These options are entered in the table as "regex(p)". Options can be in
                   2559:     both these categories. */
                   2560: 
                   2561:     for (op = optionlist; op->one_char != 0; op++)
                   2562:       {
                   2563:       char *opbra = strchr(op->long_name, '(');
                   2564:       char *equals = strchr(op->long_name, '=');
                   2565: 
                   2566:       /* Handle options with only one spelling of the name */
                   2567: 
                   2568:       if (opbra == NULL)     /* Does not contain '(' */
                   2569:         {
                   2570:         if (equals == NULL)  /* Not thing=data case */
                   2571:           {
                   2572:           if (strcmp(arg, op->long_name) == 0) break;
                   2573:           }
                   2574:         else                 /* Special case xxx=data */
                   2575:           {
                   2576:           int oplen = (int)(equals - op->long_name);
                   2577:           int arglen = (argequals == NULL)?
                   2578:             (int)strlen(arg) : (int)(argequals - arg);
                   2579:           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
                   2580:             {
                   2581:             option_data = arg + arglen;
                   2582:             if (*option_data == '=')
                   2583:               {
                   2584:               option_data++;
                   2585:               longopwasequals = TRUE;
                   2586:               }
                   2587:             break;
                   2588:             }
                   2589:           }
                   2590:         }
                   2591: 
                   2592:       /* Handle options with an alternate spelling of the name */
                   2593: 
                   2594:       else
                   2595:         {
                   2596:         char buff1[24];
                   2597:         char buff2[24];
                   2598: 
                   2599:         int baselen = (int)(opbra - op->long_name);
                   2600:         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
                   2601:         int arglen = (argequals == NULL || equals == NULL)?
                   2602:           (int)strlen(arg) : (int)(argequals - arg);
                   2603: 
                   2604:         sprintf(buff1, "%.*s", baselen, op->long_name);
                   2605:         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
                   2606: 
                   2607:         if (strncmp(arg, buff1, arglen) == 0 ||
                   2608:            strncmp(arg, buff2, arglen) == 0)
                   2609:           {
                   2610:           if (equals != NULL && argequals != NULL)
                   2611:             {
                   2612:             option_data = argequals;
                   2613:             if (*option_data == '=')
                   2614:               {
                   2615:               option_data++;
                   2616:               longopwasequals = TRUE;
                   2617:               }
                   2618:             }
                   2619:           break;
                   2620:           }
                   2621:         }
                   2622:       }
                   2623: 
                   2624:     if (op->one_char == 0)
                   2625:       {
                   2626:       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
                   2627:       pcregrep_exit(usage(2));
                   2628:       }
                   2629:     }
                   2630: 
                   2631:   /* Jeffrey Friedl's debugging harness uses these additional options which
                   2632:   are not in the right form for putting in the option table because they use
                   2633:   only one hyphen, yet are more than one character long. By putting them
                   2634:   separately here, they will not get displayed as part of the help() output,
                   2635:   but I don't think Jeffrey will care about that. */
                   2636: 
                   2637: #ifdef JFRIEDL_DEBUG
                   2638:   else if (strcmp(argv[i], "-pre") == 0) {
                   2639:           jfriedl_prefix = argv[++i];
                   2640:           continue;
                   2641:   } else if (strcmp(argv[i], "-post") == 0) {
                   2642:           jfriedl_postfix = argv[++i];
                   2643:           continue;
                   2644:   } else if (strcmp(argv[i], "-XT") == 0) {
                   2645:           sscanf(argv[++i], "%d", &jfriedl_XT);
                   2646:           continue;
                   2647:   } else if (strcmp(argv[i], "-XR") == 0) {
                   2648:           sscanf(argv[++i], "%d", &jfriedl_XR);
                   2649:           continue;
                   2650:   }
                   2651: #endif
                   2652: 
                   2653: 
                   2654:   /* One-char options; many that have no data may be in a single argument; we
                   2655:   continue till we hit the last one or one that needs data. */
                   2656: 
                   2657:   else
                   2658:     {
                   2659:     char *s = argv[i] + 1;
                   2660:     longop = FALSE;
1.1.1.4 ! misho    2661: 
1.1       misho    2662:     while (*s != 0)
                   2663:       {
                   2664:       for (op = optionlist; op->one_char != 0; op++)
                   2665:         {
                   2666:         if (*s == op->one_char) break;
                   2667:         }
                   2668:       if (op->one_char == 0)
                   2669:         {
                   2670:         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
                   2671:           *s, argv[i]);
                   2672:         pcregrep_exit(usage(2));
                   2673:         }
                   2674: 
1.1.1.4 ! misho    2675:       option_data = s+1;
        !          2676: 
        !          2677:       /* Break out if this is the last character in the string; it's handled
        !          2678:       below like a single multi-char option. */
        !          2679: 
        !          2680:       if (*option_data == 0) break;
        !          2681: 
        !          2682:       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
        !          2683:       are used for ones that either have a numerical number or defaults, i.e.
        !          2684:       the data is optional. If a digit follows, there is data; if not, carry on
1.1       misho    2685:       with other single-character options in the same string. */
                   2686: 
1.1.1.4 ! misho    2687:       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
1.1       misho    2688:         {
                   2689:         if (isdigit((unsigned char)s[1])) break;
                   2690:         }
1.1.1.4 ! misho    2691:       else   /* Check for an option with data */
1.1       misho    2692:         {
1.1.1.4 ! misho    2693:         if (op->type != OP_NODATA) break;
1.1       misho    2694:         }
                   2695: 
                   2696:       /* Handle a single-character option with no data, then loop for the
                   2697:       next character in the string. */
                   2698: 
                   2699:       pcre_options = handle_option(*s++, pcre_options);
                   2700:       }
                   2701:     }
                   2702: 
                   2703:   /* At this point we should have op pointing to a matched option. If the type
                   2704:   is NO_DATA, it means that there is no data, and the option might set
                   2705:   something in the PCRE options. */
                   2706: 
                   2707:   if (op->type == OP_NODATA)
                   2708:     {
                   2709:     pcre_options = handle_option(op->one_char, pcre_options);
                   2710:     continue;
                   2711:     }
                   2712: 
1.1.1.4 ! misho    2713:   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
1.1       misho    2714:   either has a value or defaults to something. It cannot have data in a
                   2715:   separate item. At the moment, the only such options are "colo(u)r",
                   2716:   "only-matching", and Jeffrey Friedl's special -S debugging option. */
                   2717: 
                   2718:   if (*option_data == 0 &&
1.1.1.4 ! misho    2719:       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
        !          2720:        op->type == OP_OP_NUMBERS))
1.1       misho    2721:     {
                   2722:     switch (op->one_char)
                   2723:       {
                   2724:       case N_COLOUR:
                   2725:       colour_option = (char *)"auto";
                   2726:       break;
                   2727: 
                   2728:       case 'o':
1.1.1.4 ! misho    2729:       only_matching_last = add_number(0, only_matching_last);
        !          2730:       if (only_matching == NULL) only_matching = only_matching_last;
1.1       misho    2731:       break;
                   2732: 
                   2733: #ifdef JFRIEDL_DEBUG
                   2734:       case 'S':
                   2735:       S_arg = 0;
                   2736:       break;
                   2737: #endif
                   2738:       }
                   2739:     continue;
                   2740:     }
                   2741: 
                   2742:   /* Otherwise, find the data string for the option. */
                   2743: 
                   2744:   if (*option_data == 0)
                   2745:     {
                   2746:     if (i >= argc - 1 || longopwasequals)
                   2747:       {
                   2748:       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
                   2749:       pcregrep_exit(usage(2));
                   2750:       }
                   2751:     option_data = argv[++i];
                   2752:     }
                   2753: 
1.1.1.4 ! misho    2754:   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
        !          2755:   added to a chain of numbers. */
        !          2756: 
        !          2757:   if (op->type == OP_OP_NUMBERS)
        !          2758:     {
        !          2759:     unsigned long int n = decode_number(option_data, op, longop);
        !          2760:     omdatastr *omd = (omdatastr *)op->dataptr;
        !          2761:     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
        !          2762:     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
        !          2763:     }
        !          2764: 
        !          2765:   /* If the option type is OP_PATLIST, it's the -e option, or one of the
        !          2766:   include/exclude options, which can be called multiple times to create lists
        !          2767:   of patterns. */
1.1       misho    2768: 
1.1.1.4 ! misho    2769:   else if (op->type == OP_PATLIST)
1.1       misho    2770:     {
1.1.1.4 ! misho    2771:     patdatastr *pd = (patdatastr *)op->dataptr;
        !          2772:     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
        !          2773:     if (*(pd->lastptr) == NULL) goto EXIT2;
        !          2774:     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
        !          2775:     }
        !          2776: 
        !          2777:   /* If the option type is OP_FILELIST, it's one of the options that names a
        !          2778:   file. */
        !          2779: 
        !          2780:   else if (op->type == OP_FILELIST)
        !          2781:     {
        !          2782:     fndatastr *fd = (fndatastr *)op->dataptr;
        !          2783:     fn = (fnstr *)malloc(sizeof(fnstr));
        !          2784:     if (fn == NULL)
1.1       misho    2785:       {
1.1.1.4 ! misho    2786:       fprintf(stderr, "pcregrep: malloc failed\n");
        !          2787:       goto EXIT2;
1.1       misho    2788:       }
1.1.1.4 ! misho    2789:     fn->next = NULL;
        !          2790:     fn->name = option_data;
        !          2791:     if (*(fd->anchor) == NULL)
        !          2792:       *(fd->anchor) = fn;
        !          2793:     else
        !          2794:       (*(fd->lastptr))->next = fn;
        !          2795:     *(fd->lastptr) = fn;
1.1       misho    2796:     }
                   2797: 
1.1.1.3   misho    2798:   /* Handle OP_BINARY_FILES */
                   2799: 
                   2800:   else if (op->type == OP_BINFILES)
                   2801:     {
                   2802:     if (strcmp(option_data, "binary") == 0)
                   2803:       binary_files = BIN_BINARY;
                   2804:     else if (strcmp(option_data, "without-match") == 0)
                   2805:       binary_files = BIN_NOMATCH;
                   2806:     else if (strcmp(option_data, "text") == 0)
                   2807:       binary_files = BIN_TEXT;
                   2808:     else
                   2809:       {
                   2810:       fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
                   2811:         option_data);
                   2812:       pcregrep_exit(usage(2));
                   2813:       }
                   2814:     }
                   2815: 
1.1.1.4 ! misho    2816:   /* Otherwise, deal with a single string or numeric data value. */
1.1       misho    2817: 
                   2818:   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
                   2819:            op->type != OP_OP_NUMBER)
                   2820:     {
                   2821:     *((char **)op->dataptr) = option_data;
                   2822:     }
                   2823:   else
                   2824:     {
1.1.1.4 ! misho    2825:     unsigned long int n = decode_number(option_data, op, longop);
        !          2826:     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
        !          2827:       else *((int *)op->dataptr) = n;
1.1       misho    2828:     }
                   2829:   }
                   2830: 
                   2831: /* Options have been decoded. If -C was used, its value is used as a default
                   2832: for -A and -B. */
                   2833: 
                   2834: if (both_context > 0)
                   2835:   {
                   2836:   if (after_context == 0) after_context = both_context;
                   2837:   if (before_context == 0) before_context = both_context;
                   2838:   }
                   2839: 
                   2840: /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1.1.1.4 ! misho    2841: However, all three set show_only_matching because they display, each in their
        !          2842: own way, only the data that has matched. */
1.1       misho    2843: 
1.1.1.4 ! misho    2844: if ((only_matching != NULL && (file_offsets || line_offsets)) ||
1.1       misho    2845:     (file_offsets && line_offsets))
                   2846:   {
                   2847:   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
                   2848:     "and/or --line-offsets\n");
                   2849:   pcregrep_exit(usage(2));
                   2850:   }
                   2851: 
1.1.1.4 ! misho    2852: if (only_matching != NULL || file_offsets || line_offsets)
        !          2853:   show_only_matching = TRUE;
1.1       misho    2854: 
                   2855: /* If a locale has not been provided as an option, see if the LC_CTYPE or
                   2856: LC_ALL environment variable is set, and if so, use it. */
                   2857: 
                   2858: if (locale == NULL)
                   2859:   {
                   2860:   locale = getenv("LC_ALL");
                   2861:   locale_from = "LCC_ALL";
                   2862:   }
                   2863: 
                   2864: if (locale == NULL)
                   2865:   {
                   2866:   locale = getenv("LC_CTYPE");
                   2867:   locale_from = "LC_CTYPE";
                   2868:   }
                   2869: 
                   2870: /* If a locale has been provided, set it, and generate the tables the PCRE
                   2871: needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
                   2872: 
                   2873: if (locale != NULL)
                   2874:   {
                   2875:   if (setlocale(LC_CTYPE, locale) == NULL)
                   2876:     {
                   2877:     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
                   2878:       locale, locale_from);
                   2879:     return 2;
                   2880:     }
                   2881:   pcretables = pcre_maketables();
                   2882:   }
                   2883: 
                   2884: /* Sort out colouring */
                   2885: 
                   2886: if (colour_option != NULL && strcmp(colour_option, "never") != 0)
                   2887:   {
                   2888:   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
                   2889:   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
                   2890:   else
                   2891:     {
                   2892:     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
                   2893:       colour_option);
                   2894:     return 2;
                   2895:     }
                   2896:   if (do_colour)
                   2897:     {
                   2898:     char *cs = getenv("PCREGREP_COLOUR");
                   2899:     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
                   2900:     if (cs != NULL) colour_string = cs;
                   2901:     }
                   2902:   }
                   2903: 
                   2904: /* Interpret the newline type; the default settings are Unix-like. */
                   2905: 
                   2906: if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
                   2907:   {
                   2908:   pcre_options |= PCRE_NEWLINE_CR;
                   2909:   endlinetype = EL_CR;
                   2910:   }
                   2911: else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
                   2912:   {
                   2913:   pcre_options |= PCRE_NEWLINE_LF;
                   2914:   endlinetype = EL_LF;
                   2915:   }
                   2916: else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
                   2917:   {
                   2918:   pcre_options |= PCRE_NEWLINE_CRLF;
                   2919:   endlinetype = EL_CRLF;
                   2920:   }
                   2921: else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
                   2922:   {
                   2923:   pcre_options |= PCRE_NEWLINE_ANY;
                   2924:   endlinetype = EL_ANY;
                   2925:   }
                   2926: else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
                   2927:   {
                   2928:   pcre_options |= PCRE_NEWLINE_ANYCRLF;
                   2929:   endlinetype = EL_ANYCRLF;
                   2930:   }
                   2931: else
                   2932:   {
                   2933:   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
                   2934:   return 2;
                   2935:   }
                   2936: 
                   2937: /* Interpret the text values for -d and -D */
                   2938: 
                   2939: if (dee_option != NULL)
                   2940:   {
                   2941:   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
                   2942:   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
                   2943:   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
                   2944:   else
                   2945:     {
                   2946:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
                   2947:     return 2;
                   2948:     }
                   2949:   }
                   2950: 
                   2951: if (DEE_option != NULL)
                   2952:   {
                   2953:   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
                   2954:   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
                   2955:   else
                   2956:     {
                   2957:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
                   2958:     return 2;
                   2959:     }
                   2960:   }
                   2961: 
                   2962: /* Check the values for Jeffrey Friedl's debugging options. */
                   2963: 
                   2964: #ifdef JFRIEDL_DEBUG
                   2965: if (S_arg > 9)
                   2966:   {
                   2967:   fprintf(stderr, "pcregrep: bad value for -S option\n");
                   2968:   return 2;
                   2969:   }
                   2970: if (jfriedl_XT != 0 || jfriedl_XR != 0)
                   2971:   {
                   2972:   if (jfriedl_XT == 0) jfriedl_XT = 1;
                   2973:   if (jfriedl_XR == 0) jfriedl_XR = 1;
                   2974:   }
                   2975: #endif
                   2976: 
1.1.1.4 ! misho    2977: /* Get memory for the main buffer. */
1.1       misho    2978: 
                   2979: bufsize = 3*bufthird;
                   2980: main_buffer = (char *)malloc(bufsize);
                   2981: 
1.1.1.4 ! misho    2982: if (main_buffer == NULL)
1.1       misho    2983:   {
                   2984:   fprintf(stderr, "pcregrep: malloc failed\n");
                   2985:   goto EXIT2;
                   2986:   }
                   2987: 
1.1.1.4 ! misho    2988: /* If no patterns were provided by -e, and there are no files provided by -f,
1.1       misho    2989: the first argument is the one and only pattern, and it must exist. */
                   2990: 
1.1.1.4 ! misho    2991: if (patterns == NULL && pattern_files == NULL)
1.1       misho    2992:   {
                   2993:   if (i >= argc) return usage(2);
1.1.1.4 ! misho    2994:   patterns = patterns_last = add_pattern(argv[i++], NULL);
        !          2995:   if (patterns == NULL) goto EXIT2;
1.1       misho    2996:   }
                   2997: 
                   2998: /* Compile the patterns that were provided on the command line, either by
1.1.1.4 ! misho    2999: multiple uses of -e or as a single unkeyed pattern. We cannot do this until
        !          3000: after all the command-line options are read so that we know which PCRE options
        !          3001: to use. When -F is used, compile_pattern() may add another block into the
        !          3002: chain, so we must not access the next pointer till after the compile. */
1.1       misho    3003: 
1.1.1.4 ! misho    3004: for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
1.1       misho    3005:   {
1.1.1.4 ! misho    3006:   if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
        !          3007:        (j == 1 && patterns->next == NULL)? 0 : j))
1.1       misho    3008:     goto EXIT2;
                   3009:   }
                   3010: 
1.1.1.4 ! misho    3011: /* Read and compile the regular expressions that are provided in files. */
1.1       misho    3012: 
1.1.1.4 ! misho    3013: for (fn = pattern_files; fn != NULL; fn = fn->next)
1.1       misho    3014:   {
1.1.1.4 ! misho    3015:   if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
        !          3016:     goto EXIT2;
        !          3017:   }
1.1       misho    3018: 
1.1.1.4 ! misho    3019: /* Study the regular expressions, as we will be running them many times. If an
        !          3020: extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
        !          3021: returned, even if studying produces no data. */
1.1       misho    3022: 
1.1.1.4 ! misho    3023: if (match_limit > 0 || match_limit_recursion > 0)
        !          3024:   study_options |= PCRE_STUDY_EXTRA_NEEDED;
1.1       misho    3025: 
1.1.1.4 ! misho    3026: /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
1.1       misho    3027: 
                   3028: #ifdef SUPPORT_PCREGREP_JIT
                   3029: if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
                   3030:   jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
                   3031: #endif
                   3032: 
1.1.1.4 ! misho    3033: for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
1.1       misho    3034:   {
1.1.1.4 ! misho    3035:   cp->hint = pcre_study(cp->compiled, study_options, &error);
1.1       misho    3036:   if (error != NULL)
                   3037:     {
                   3038:     char s[16];
1.1.1.4 ! misho    3039:     if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
1.1       misho    3040:     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
                   3041:     goto EXIT2;
                   3042:     }
                   3043: #ifdef SUPPORT_PCREGREP_JIT
1.1.1.4 ! misho    3044:   if (jit_stack != NULL && cp->hint != NULL)
        !          3045:     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
1.1       misho    3046: #endif
                   3047:   }
                   3048: 
                   3049: /* If --match-limit or --recursion-limit was set, put the value(s) into the
1.1.1.4 ! misho    3050: pcre_extra block for each pattern. There will always be an extra block because
        !          3051: of the use of PCRE_STUDY_EXTRA_NEEDED above. */
1.1       misho    3052: 
1.1.1.4 ! misho    3053: for (cp = patterns; cp != NULL; cp = cp->next)
1.1       misho    3054:   {
1.1.1.4 ! misho    3055:   if (match_limit > 0)
1.1       misho    3056:     {
1.1.1.4 ! misho    3057:     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
        !          3058:     cp->hint->match_limit = match_limit;
        !          3059:     }
        !          3060: 
        !          3061:   if (match_limit_recursion > 0)
        !          3062:     {
        !          3063:     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
        !          3064:     cp->hint->match_limit_recursion = match_limit_recursion;
1.1       misho    3065:     }
                   3066:   }
                   3067: 
1.1.1.4 ! misho    3068: /* If there are include or exclude patterns read from the command line, compile
        !          3069: them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
        !          3070: 0. */
1.1       misho    3071: 
1.1.1.4 ! misho    3072: for (j = 0; j < 4; j++)
1.1       misho    3073:   {
1.1.1.4 ! misho    3074:   int k;
        !          3075:   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
1.1       misho    3076:     {
1.1.1.4 ! misho    3077:     if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
        !          3078:          (k == 1 && cp->next == NULL)? 0 : k))
        !          3079:       goto EXIT2;
1.1       misho    3080:     }
                   3081:   }
                   3082: 
1.1.1.4 ! misho    3083: /* Read and compile include/exclude patterns from files. */
        !          3084: 
        !          3085: for (fn = include_from; fn != NULL; fn = fn->next)
1.1       misho    3086:   {
1.1.1.4 ! misho    3087:   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
1.1       misho    3088:     goto EXIT2;
                   3089:   }
                   3090: 
1.1.1.4 ! misho    3091: for (fn = exclude_from; fn != NULL; fn = fn->next)
1.1       misho    3092:   {
1.1.1.4 ! misho    3093:   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
1.1       misho    3094:     goto EXIT2;
                   3095:   }
                   3096: 
1.1.1.4 ! misho    3097: /* If there are no files that contain lists of files to search, and there are
        !          3098: no file arguments, search stdin, and then exit. */
        !          3099: 
        !          3100: if (file_lists == NULL && i >= argc)
1.1       misho    3101:   {
1.1.1.4 ! misho    3102:   rc = pcregrep(stdin, FR_PLAIN, stdin_name,
        !          3103:     (filenames > FN_DEFAULT)? stdin_name : NULL);
        !          3104:   goto EXIT;
1.1       misho    3105:   }
                   3106: 
1.1.1.4 ! misho    3107: /* If any files that contains a list of files to search have been specified,
        !          3108: read them line by line and search the given files. */
1.1.1.3   misho    3109: 
1.1.1.4 ! misho    3110: for (fn = file_lists; fn != NULL; fn = fn->next)
1.1.1.3   misho    3111:   {
                   3112:   char buffer[PATBUFSIZE];
                   3113:   FILE *fl;
1.1.1.4 ! misho    3114:   if (strcmp(fn->name, "-") == 0) fl = stdin; else
1.1.1.3   misho    3115:     {
1.1.1.4 ! misho    3116:     fl = fopen(fn->name, "rb");
1.1.1.3   misho    3117:     if (fl == NULL)
                   3118:       {
1.1.1.4 ! misho    3119:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
1.1.1.3   misho    3120:         strerror(errno));
                   3121:       goto EXIT2;
                   3122:       }
                   3123:     }
                   3124:   while (fgets(buffer, PATBUFSIZE, fl) != NULL)
                   3125:     {
                   3126:     int frc;
                   3127:     char *end = buffer + (int)strlen(buffer);
                   3128:     while (end > buffer && isspace(end[-1])) end--;
                   3129:     *end = 0;
                   3130:     if (*buffer != 0)
                   3131:       {
                   3132:       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
                   3133:       if (frc > 1) rc = frc;
                   3134:         else if (frc == 0 && rc == 1) rc = 0;
                   3135:       }
                   3136:     }
1.1.1.4 ! misho    3137:   if (fl != stdin) fclose(fl);
1.1.1.3   misho    3138:   }
                   3139: 
1.1.1.4 ! misho    3140: /* After handling file-list, work through remaining arguments. Pass in the fact
        !          3141: that there is only one argument at top level - this suppresses the file name if
        !          3142: the argument is not a directory and filenames are not otherwise forced. */
1.1       misho    3143: 
1.1.1.4 ! misho    3144: only_one_at_top = i == argc - 1 && file_lists == NULL;
1.1       misho    3145: 
                   3146: for (; i < argc; i++)
                   3147:   {
                   3148:   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
                   3149:     only_one_at_top);
                   3150:   if (frc > 1) rc = frc;
                   3151:     else if (frc == 0 && rc == 1) rc = 0;
                   3152:   }
                   3153: 
                   3154: EXIT:
                   3155: #ifdef SUPPORT_PCREGREP_JIT
                   3156: if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
                   3157: #endif
1.1.1.4 ! misho    3158: 
1.1       misho    3159: if (main_buffer != NULL) free(main_buffer);
1.1.1.4 ! misho    3160: 
        !          3161: free_pattern_chain(patterns);
        !          3162: free_pattern_chain(include_patterns);
        !          3163: free_pattern_chain(include_dir_patterns);
        !          3164: free_pattern_chain(exclude_patterns);
        !          3165: free_pattern_chain(exclude_dir_patterns);
        !          3166: 
        !          3167: free_file_chain(exclude_from);
        !          3168: free_file_chain(include_from);
        !          3169: free_file_chain(pattern_files);
        !          3170: free_file_chain(file_lists);
        !          3171: 
        !          3172: while (only_matching != NULL)
        !          3173:   {
        !          3174:   omstr *this = only_matching;
        !          3175:   only_matching = this->next;
        !          3176:   free(this);
1.1       misho    3177:   }
1.1.1.4 ! misho    3178: 
1.1       misho    3179: pcregrep_exit(rc);
                   3180: 
                   3181: EXIT2:
                   3182: rc = 2;
                   3183: goto EXIT;
                   3184: }
                   3185: 
                   3186: /* End of pcregrep */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>