File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcregrep.c
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:03 2014 UTC (10 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: /*************************************************
    2: *               pcregrep program                 *
    3: *************************************************/
    4: 
    5: /* This is a grep program that uses the PCRE regular expression library to do
    6: its pattern matching. On Unix-like, Windows, and native z/OS systems it can
    7: recurse into directories, and in z/OS it can handle PDS files.
    8: 
    9: Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
   10: additional header is required. That header is not included in the main PCRE
   11: distribution because other apparatus is needed to compile pcregrep for z/OS.
   12: The header can be found in the special z/OS distribution, which is available
   13: from www.zaconsultants.net or from www.cbttape.org.
   14: 
   15:            Copyright (c) 1997-2013 University of Cambridge
   16: 
   17: -----------------------------------------------------------------------------
   18: Redistribution and use in source and binary forms, with or without
   19: modification, are permitted provided that the following conditions are met:
   20: 
   21:     * Redistributions of source code must retain the above copyright notice,
   22:       this list of conditions and the following disclaimer.
   23: 
   24:     * Redistributions in binary form must reproduce the above copyright
   25:       notice, this list of conditions and the following disclaimer in the
   26:       documentation and/or other materials provided with the distribution.
   27: 
   28:     * Neither the name of the University of Cambridge nor the names of its
   29:       contributors may be used to endorse or promote products derived from
   30:       this software without specific prior written permission.
   31: 
   32: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   33: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   34: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   35: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   36: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   37: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   38: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   39: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   40: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   41: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   42: POSSIBILITY OF SUCH DAMAGE.
   43: -----------------------------------------------------------------------------
   44: */
   45: 
   46: #ifdef HAVE_CONFIG_H
   47: #include "config.h"
   48: #endif
   49: 
   50: #include <ctype.h>
   51: #include <locale.h>
   52: #include <stdio.h>
   53: #include <string.h>
   54: #include <stdlib.h>
   55: #include <errno.h>
   56: 
   57: #include <sys/types.h>
   58: #include <sys/stat.h>
   59: 
   60: #ifdef HAVE_UNISTD_H
   61: #include <unistd.h>
   62: #endif
   63: 
   64: #ifdef SUPPORT_LIBZ
   65: #include <zlib.h>
   66: #endif
   67: 
   68: #ifdef SUPPORT_LIBBZ2
   69: #include <bzlib.h>
   70: #endif
   71: 
   72: #include "pcre.h"
   73: 
   74: #define FALSE 0
   75: #define TRUE 1
   76: 
   77: typedef int BOOL;
   78: 
   79: #define OFFSET_SIZE 99
   80: 
   81: #if BUFSIZ > 8192
   82: #define MAXPATLEN BUFSIZ
   83: #else
   84: #define MAXPATLEN 8192
   85: #endif
   86: 
   87: #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
   88: 
   89: /* Values for the "filenames" variable, which specifies options for file name
   90: output. The order is important; it is assumed that a file name is wanted for
   91: all values greater than FN_DEFAULT. */
   92: 
   93: enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
   94: 
   95: /* File reading styles */
   96: 
   97: enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
   98: 
   99: /* Actions for the -d and -D options */
  100: 
  101: enum { dee_READ, dee_SKIP, dee_RECURSE };
  102: enum { DEE_READ, DEE_SKIP };
  103: 
  104: /* Actions for special processing options (flag bits) */
  105: 
  106: #define PO_WORD_MATCH     0x0001
  107: #define PO_LINE_MATCH     0x0002
  108: #define PO_FIXED_STRINGS  0x0004
  109: 
  110: /* Line ending types */
  111: 
  112: enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
  113: 
  114: /* Binary file options */
  115: 
  116: enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
  117: 
  118: /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
  119: environments), a warning is issued if the value of fwrite() is ignored.
  120: Unfortunately, casting to (void) does not suppress the warning. To get round
  121: this, we use a macro that compiles a fudge. Oddly, this does not also seem to
  122: apply to fprintf(). */
  123: 
  124: #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
  125: 
  126: 
  127: 
  128: /*************************************************
  129: *               Global variables                 *
  130: *************************************************/
  131: 
  132: /* Jeffrey Friedl has some debugging requirements that are not part of the
  133: regular code. */
  134: 
  135: #ifdef JFRIEDL_DEBUG
  136: static int S_arg = -1;
  137: static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
  138: static unsigned int jfriedl_XT = 0; /* replicate text this many times */
  139: static const char *jfriedl_prefix = "";
  140: static const char *jfriedl_postfix = "";
  141: #endif
  142: 
  143: static int  endlinetype;
  144: 
  145: static char *colour_string = (char *)"1;31";
  146: static char *colour_option = NULL;
  147: static char *dee_option = NULL;
  148: static char *DEE_option = NULL;
  149: static char *locale = NULL;
  150: static char *main_buffer = NULL;
  151: static char *newline = NULL;
  152: static char *om_separator = (char *)"";
  153: static char *stdin_name = (char *)"(standard input)";
  154: 
  155: static const unsigned char *pcretables = NULL;
  156: 
  157: static int after_context = 0;
  158: static int before_context = 0;
  159: static int binary_files = BIN_BINARY;
  160: static int both_context = 0;
  161: static int bufthird = PCREGREP_BUFSIZE;
  162: static int bufsize = 3*PCREGREP_BUFSIZE;
  163: 
  164: #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
  165: static int dee_action = dee_SKIP;
  166: #else
  167: static int dee_action = dee_READ;
  168: #endif
  169: 
  170: static int DEE_action = DEE_READ;
  171: static int error_count = 0;
  172: static int filenames = FN_DEFAULT;
  173: static int pcre_options = 0;
  174: static int process_options = 0;
  175: 
  176: #ifdef SUPPORT_PCREGREP_JIT
  177: static int study_options = PCRE_STUDY_JIT_COMPILE;
  178: #else
  179: static int study_options = 0;
  180: #endif
  181: 
  182: static unsigned long int match_limit = 0;
  183: static unsigned long int match_limit_recursion = 0;
  184: 
  185: static BOOL count_only = FALSE;
  186: static BOOL do_colour = FALSE;
  187: static BOOL file_offsets = FALSE;
  188: static BOOL hyphenpending = FALSE;
  189: static BOOL invert = FALSE;
  190: static BOOL line_buffered = FALSE;
  191: static BOOL line_offsets = FALSE;
  192: static BOOL multiline = FALSE;
  193: static BOOL number = FALSE;
  194: static BOOL omit_zero_count = FALSE;
  195: static BOOL resource_error = FALSE;
  196: static BOOL quiet = FALSE;
  197: static BOOL show_only_matching = FALSE;
  198: static BOOL silent = FALSE;
  199: static BOOL utf8 = FALSE;
  200: 
  201: /* Structure for list of --only-matching capturing numbers. */
  202: 
  203: typedef struct omstr {
  204:   struct omstr *next;
  205:   int groupnum;
  206: } omstr;
  207: 
  208: static omstr *only_matching = NULL;
  209: static omstr *only_matching_last = NULL;
  210: 
  211: /* Structure for holding the two variables that describe a number chain. */
  212: 
  213: typedef struct omdatastr {
  214:   omstr **anchor;
  215:   omstr **lastptr;
  216: } omdatastr;
  217: 
  218: static omdatastr only_matching_data = { &only_matching, &only_matching_last };
  219: 
  220: /* Structure for list of file names (for -f and --{in,ex}clude-from) */
  221: 
  222: typedef struct fnstr {
  223:   struct fnstr *next;
  224:   char *name;
  225: } fnstr;
  226: 
  227: static fnstr *exclude_from = NULL;
  228: static fnstr *exclude_from_last = NULL;
  229: static fnstr *include_from = NULL;
  230: static fnstr *include_from_last = NULL;
  231: 
  232: static fnstr *file_lists = NULL;
  233: static fnstr *file_lists_last = NULL;
  234: static fnstr *pattern_files = NULL;
  235: static fnstr *pattern_files_last = NULL;
  236: 
  237: /* Structure for holding the two variables that describe a file name chain. */
  238: 
  239: typedef struct fndatastr {
  240:   fnstr **anchor;
  241:   fnstr **lastptr;
  242: } fndatastr;
  243: 
  244: static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
  245: static fndatastr include_from_data = { &include_from, &include_from_last };
  246: static fndatastr file_lists_data = { &file_lists, &file_lists_last };
  247: static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
  248: 
  249: /* Structure for pattern and its compiled form; used for matching patterns and
  250: also for include/exclude patterns. */
  251: 
  252: typedef struct patstr {
  253:   struct patstr *next;
  254:   char *string;
  255:   pcre *compiled;
  256:   pcre_extra *hint;
  257: } patstr;
  258: 
  259: static patstr *patterns = NULL;
  260: static patstr *patterns_last = NULL;
  261: static patstr *include_patterns = NULL;
  262: static patstr *include_patterns_last = NULL;
  263: static patstr *exclude_patterns = NULL;
  264: static patstr *exclude_patterns_last = NULL;
  265: static patstr *include_dir_patterns = NULL;
  266: static patstr *include_dir_patterns_last = NULL;
  267: static patstr *exclude_dir_patterns = NULL;
  268: static patstr *exclude_dir_patterns_last = NULL;
  269: 
  270: /* Structure holding the two variables that describe a pattern chain. A pointer
  271: to such structures is used for each appropriate option. */
  272: 
  273: typedef struct patdatastr {
  274:   patstr **anchor;
  275:   patstr **lastptr;
  276: } patdatastr;
  277: 
  278: static patdatastr match_patdata = { &patterns, &patterns_last };
  279: static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
  280: static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
  281: static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
  282: static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
  283: 
  284: static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
  285:                                  &include_dir_patterns, &exclude_dir_patterns };
  286: 
  287: static const char *incexname[4] = { "--include", "--exclude",
  288:                                     "--include-dir", "--exclude-dir" };
  289: 
  290: /* Structure for options and list of them */
  291: 
  292: enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
  293:        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
  294: 
  295: typedef struct option_item {
  296:   int type;
  297:   int one_char;
  298:   void *dataptr;
  299:   const char *long_name;
  300:   const char *help_text;
  301: } option_item;
  302: 
  303: /* Options without a single-letter equivalent get a negative value. This can be
  304: used to identify them. */
  305: 
  306: #define N_COLOUR       (-1)
  307: #define N_EXCLUDE      (-2)
  308: #define N_EXCLUDE_DIR  (-3)
  309: #define N_HELP         (-4)
  310: #define N_INCLUDE      (-5)
  311: #define N_INCLUDE_DIR  (-6)
  312: #define N_LABEL        (-7)
  313: #define N_LOCALE       (-8)
  314: #define N_NULL         (-9)
  315: #define N_LOFFSETS     (-10)
  316: #define N_FOFFSETS     (-11)
  317: #define N_LBUFFER      (-12)
  318: #define N_M_LIMIT      (-13)
  319: #define N_M_LIMIT_REC  (-14)
  320: #define N_BUFSIZE      (-15)
  321: #define N_NOJIT        (-16)
  322: #define N_FILE_LIST    (-17)
  323: #define N_BINARY_FILES (-18)
  324: #define N_EXCLUDE_FROM (-19)
  325: #define N_INCLUDE_FROM (-20)
  326: #define N_OM_SEPARATOR (-21)
  327: 
  328: static option_item optionlist[] = {
  329:   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
  330:   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
  331:   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
  332:   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
  333:   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
  334:   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
  335:   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
  336:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
  337:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
  338:   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
  339:   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
  340:   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
  341:   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
  342:   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
  343:   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
  344:   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
  345:   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
  346:   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
  347:   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
  348:   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
  349:   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
  350:   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
  351: #ifdef SUPPORT_PCREGREP_JIT
  352:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
  353: #else
  354:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
  355: #endif
  356:   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
  357:   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
  358:   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
  359:   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
  360:   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
  361:   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
  362:   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
  363:   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
  364:   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
  365:   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
  366:   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
  367:   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
  368:   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
  369:   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
  370:   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
  371:   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
  372:   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
  373:   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
  374:   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
  375:   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
  376:   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
  377: 
  378:   /* These two were accidentally implemented with underscores instead of
  379:   hyphens in the option names. As this was not discovered for several releases,
  380:   the incorrect versions are left in the table for compatibility. However, the
  381:   --help function misses out any option that has an underscore in its name. */
  382: 
  383:   { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
  384:   { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
  385: 
  386: #ifdef JFRIEDL_DEBUG
  387:   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
  388: #endif
  389:   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
  390:   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
  391:   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
  392:   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
  393:   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
  394:   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
  395:   { OP_NODATA,    0,        NULL,               NULL,            NULL }
  396: };
  397: 
  398: /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
  399: options. These set the 1, 2, and 4 bits in process_options, respectively. Note
  400: that the combination of -w and -x has the same effect as -x on its own, so we
  401: can treat them as the same. Note that the MAXPATLEN macro assumes the longest
  402: prefix+suffix is 10 characters; if anything longer is added, it must be
  403: adjusted. */
  404: 
  405: static const char *prefix[] = {
  406:   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
  407: 
  408: static const char *suffix[] = {
  409:   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
  410: 
  411: /* UTF-8 tables - used only when the newline setting is "any". */
  412: 
  413: const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
  414: 
  415: const char utf8_table4[] = {
  416:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  417:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  418:   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  419:   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
  420: 
  421: 
  422: 
  423: /*************************************************
  424: *         Exit from the program                  *
  425: *************************************************/
  426: 
  427: /* If there has been a resource error, give a suitable message.
  428: 
  429: Argument:  the return code
  430: Returns:   does not return
  431: */
  432: 
  433: static void
  434: pcregrep_exit(int rc)
  435: {
  436: if (resource_error)
  437:   {
  438:   fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
  439:     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
  440:     PCRE_ERROR_JIT_STACKLIMIT);
  441:   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
  442:   }
  443: exit(rc);
  444: }
  445: 
  446: 
  447: /*************************************************
  448: *          Add item to chain of patterns         *
  449: *************************************************/
  450: 
  451: /* Used to add an item onto a chain, or just return an unconnected item if the
  452: "after" argument is NULL.
  453: 
  454: Arguments:
  455:   s          pattern string to add
  456:   after      if not NULL points to item to insert after
  457: 
  458: Returns:     new pattern block
  459: */
  460: 
  461: static patstr *
  462: add_pattern(char *s, patstr *after)
  463: {
  464: patstr *p = (patstr *)malloc(sizeof(patstr));
  465: if (p == NULL)
  466:   {
  467:   fprintf(stderr, "pcregrep: malloc failed\n");
  468:   pcregrep_exit(2);
  469:   }
  470: if (strlen(s) > MAXPATLEN)
  471:   {
  472:   fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
  473:     MAXPATLEN);
  474:   return NULL;
  475:   }
  476: p->next = NULL;
  477: p->string = s;
  478: p->compiled = NULL;
  479: p->hint = NULL;
  480: 
  481: if (after != NULL)
  482:   {
  483:   p->next = after->next;
  484:   after->next = p;
  485:   }
  486: return p;
  487: }
  488: 
  489: 
  490: /*************************************************
  491: *           Free chain of patterns               *
  492: *************************************************/
  493: 
  494: /* Used for several chains of patterns.
  495: 
  496: Argument: pointer to start of chain
  497: Returns:  nothing
  498: */
  499: 
  500: static void
  501: free_pattern_chain(patstr *pc)
  502: {
  503: while (pc != NULL)
  504:   {
  505:   patstr *p = pc;
  506:   pc = p->next;
  507:   if (p->hint != NULL) pcre_free_study(p->hint);
  508:   if (p->compiled != NULL) pcre_free(p->compiled);
  509:   free(p);
  510:   }
  511: }
  512: 
  513: 
  514: /*************************************************
  515: *           Free chain of file names             *
  516: *************************************************/
  517: 
  518: /*
  519: Argument: pointer to start of chain
  520: Returns:  nothing
  521: */
  522: 
  523: static void
  524: free_file_chain(fnstr *fn)
  525: {
  526: while (fn != NULL)
  527:   {
  528:   fnstr *f = fn;
  529:   fn = f->next;
  530:   free(f);
  531:   }
  532: }
  533: 
  534: 
  535: /*************************************************
  536: *            OS-specific functions               *
  537: *************************************************/
  538: 
  539: /* These functions are defined so that they can be made system specific.
  540: At present there are versions for Unix-style environments, Windows, native
  541: z/OS, and "no support". */
  542: 
  543: 
  544: /************* Directory scanning Unix-style and z/OS ***********/
  545: 
  546: #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
  547: #include <sys/types.h>
  548: #include <sys/stat.h>
  549: #include <dirent.h>
  550: 
  551: #if defined NATIVE_ZOS
  552: /************* Directory and PDS/E scanning for z/OS ***********/
  553: /************* z/OS looks mostly like Unix with USS ************/
  554: /* However, z/OS needs the #include statements in this header */
  555: #include "pcrzosfs.h"
  556: /* That header is not included in the main PCRE distribution because
  557:    other apparatus is needed to compile pcregrep for z/OS. The header
  558:    can be found in the special z/OS distribution, which is available
  559:    from www.zaconsultants.net or from www.cbttape.org. */
  560: #endif
  561: 
  562: typedef DIR directory_type;
  563: #define FILESEP '/'
  564: 
  565: static int
  566: isdirectory(char *filename)
  567: {
  568: struct stat statbuf;
  569: if (stat(filename, &statbuf) < 0)
  570:   return 0;        /* In the expectation that opening as a file will fail */
  571: return (statbuf.st_mode & S_IFMT) == S_IFDIR;
  572: }
  573: 
  574: static directory_type *
  575: opendirectory(char *filename)
  576: {
  577: return opendir(filename);
  578: }
  579: 
  580: static char *
  581: readdirectory(directory_type *dir)
  582: {
  583: for (;;)
  584:   {
  585:   struct dirent *dent = readdir(dir);
  586:   if (dent == NULL) return NULL;
  587:   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
  588:     return dent->d_name;
  589:   }
  590: /* Control never reaches here */
  591: }
  592: 
  593: static void
  594: closedirectory(directory_type *dir)
  595: {
  596: closedir(dir);
  597: }
  598: 
  599: 
  600: /************* Test for regular file, Unix-style **********/
  601: 
  602: static int
  603: isregfile(char *filename)
  604: {
  605: struct stat statbuf;
  606: if (stat(filename, &statbuf) < 0)
  607:   return 1;        /* In the expectation that opening as a file will fail */
  608: return (statbuf.st_mode & S_IFMT) == S_IFREG;
  609: }
  610: 
  611: 
  612: #if defined NATIVE_ZOS
  613: /************* Test for a terminal in z/OS **********/
  614: /* isatty() does not work in a TSO environment, so always give FALSE.*/
  615: 
  616: static BOOL
  617: is_stdout_tty(void)
  618: {
  619: return FALSE;
  620: }
  621: 
  622: static BOOL
  623: is_file_tty(FILE *f)
  624: {
  625: return FALSE;
  626: }
  627: 
  628: 
  629: /************* Test for a terminal, Unix-style **********/
  630: 
  631: #else
  632: static BOOL
  633: is_stdout_tty(void)
  634: {
  635: return isatty(fileno(stdout));
  636: }
  637: 
  638: static BOOL
  639: is_file_tty(FILE *f)
  640: {
  641: return isatty(fileno(f));
  642: }
  643: #endif
  644: 
  645: /* End of Unix-style or native z/OS environment functions. */
  646: 
  647: 
  648: /************* Directory scanning in Windows ***********/
  649: 
  650: /* I (Philip Hazel) have no means of testing this code. It was contributed by
  651: Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
  652: when it did not exist. David Byron added a patch that moved the #include of
  653: <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
  654: The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
  655: undefined when it is indeed undefined. */
  656: 
  657: #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
  658: 
  659: #ifndef STRICT
  660: # define STRICT
  661: #endif
  662: #ifndef WIN32_LEAN_AND_MEAN
  663: # define WIN32_LEAN_AND_MEAN
  664: #endif
  665: 
  666: #include <windows.h>
  667: 
  668: #ifndef INVALID_FILE_ATTRIBUTES
  669: #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
  670: #endif
  671: 
  672: typedef struct directory_type
  673: {
  674: HANDLE handle;
  675: BOOL first;
  676: WIN32_FIND_DATA data;
  677: } directory_type;
  678: 
  679: #define FILESEP '/'
  680: 
  681: int
  682: isdirectory(char *filename)
  683: {
  684: DWORD attr = GetFileAttributes(filename);
  685: if (attr == INVALID_FILE_ATTRIBUTES)
  686:   return 0;
  687: return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
  688: }
  689: 
  690: directory_type *
  691: opendirectory(char *filename)
  692: {
  693: size_t len;
  694: char *pattern;
  695: directory_type *dir;
  696: DWORD err;
  697: len = strlen(filename);
  698: pattern = (char *)malloc(len + 3);
  699: dir = (directory_type *)malloc(sizeof(*dir));
  700: if ((pattern == NULL) || (dir == NULL))
  701:   {
  702:   fprintf(stderr, "pcregrep: malloc failed\n");
  703:   pcregrep_exit(2);
  704:   }
  705: memcpy(pattern, filename, len);
  706: memcpy(&(pattern[len]), "\\*", 3);
  707: dir->handle = FindFirstFile(pattern, &(dir->data));
  708: if (dir->handle != INVALID_HANDLE_VALUE)
  709:   {
  710:   free(pattern);
  711:   dir->first = TRUE;
  712:   return dir;
  713:   }
  714: err = GetLastError();
  715: free(pattern);
  716: free(dir);
  717: errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
  718: return NULL;
  719: }
  720: 
  721: char *
  722: readdirectory(directory_type *dir)
  723: {
  724: for (;;)
  725:   {
  726:   if (!dir->first)
  727:     {
  728:     if (!FindNextFile(dir->handle, &(dir->data)))
  729:       return NULL;
  730:     }
  731:   else
  732:     {
  733:     dir->first = FALSE;
  734:     }
  735:   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
  736:     return dir->data.cFileName;
  737:   }
  738: #ifndef _MSC_VER
  739: return NULL;   /* Keep compiler happy; never executed */
  740: #endif
  741: }
  742: 
  743: void
  744: closedirectory(directory_type *dir)
  745: {
  746: FindClose(dir->handle);
  747: free(dir);
  748: }
  749: 
  750: 
  751: /************* Test for regular file in Windows **********/
  752: 
  753: /* I don't know how to do this, or if it can be done; assume all paths are
  754: regular if they are not directories. */
  755: 
  756: int isregfile(char *filename)
  757: {
  758: return !isdirectory(filename);
  759: }
  760: 
  761: 
  762: /************* Test for a terminal in Windows **********/
  763: 
  764: /* I don't know how to do this; assume never */
  765: 
  766: static BOOL
  767: is_stdout_tty(void)
  768: {
  769: return FALSE;
  770: }
  771: 
  772: static BOOL
  773: is_file_tty(FILE *f)
  774: {
  775: return FALSE;
  776: }
  777: 
  778: /* End of Windows functions */
  779: 
  780: 
  781: /************* Directory scanning when we can't do it ***********/
  782: 
  783: /* The type is void, and apart from isdirectory(), the functions do nothing. */
  784: 
  785: #else
  786: 
  787: #define FILESEP 0
  788: typedef void directory_type;
  789: 
  790: int isdirectory(char *filename) { return 0; }
  791: directory_type * opendirectory(char *filename) { return (directory_type*)0;}
  792: char *readdirectory(directory_type *dir) { return (char*)0;}
  793: void closedirectory(directory_type *dir) {}
  794: 
  795: 
  796: /************* Test for regular file when we can't do it **********/
  797: 
  798: /* Assume all files are regular. */
  799: 
  800: int isregfile(char *filename) { return 1; }
  801: 
  802: 
  803: /************* Test for a terminal when we can't do it **********/
  804: 
  805: static BOOL
  806: is_stdout_tty(void)
  807: {
  808: return FALSE;
  809: }
  810: 
  811: static BOOL
  812: is_file_tty(FILE *f)
  813: {
  814: return FALSE;
  815: }
  816: 
  817: #endif  /* End of system-specific functions */
  818: 
  819: 
  820: 
  821: #ifndef HAVE_STRERROR
  822: /*************************************************
  823: *     Provide strerror() for non-ANSI libraries  *
  824: *************************************************/
  825: 
  826: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
  827: in their libraries, but can provide the same facility by this simple
  828: alternative function. */
  829: 
  830: extern int   sys_nerr;
  831: extern char *sys_errlist[];
  832: 
  833: char *
  834: strerror(int n)
  835: {
  836: if (n < 0 || n >= sys_nerr) return "unknown error number";
  837: return sys_errlist[n];
  838: }
  839: #endif /* HAVE_STRERROR */
  840: 
  841: 
  842: 
  843: /*************************************************
  844: *                Usage function                  *
  845: *************************************************/
  846: 
  847: static int
  848: usage(int rc)
  849: {
  850: option_item *op;
  851: fprintf(stderr, "Usage: pcregrep [-");
  852: for (op = optionlist; op->one_char != 0; op++)
  853:   {
  854:   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
  855:   }
  856: fprintf(stderr, "] [long options] [pattern] [files]\n");
  857: fprintf(stderr, "Type `pcregrep --help' for more information and the long "
  858:   "options.\n");
  859: return rc;
  860: }
  861: 
  862: 
  863: 
  864: /*************************************************
  865: *                Help function                   *
  866: *************************************************/
  867: 
  868: static void
  869: help(void)
  870: {
  871: option_item *op;
  872: 
  873: printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
  874: printf("Search for PATTERN in each FILE or standard input.\n");
  875: printf("PATTERN must be present if neither -e nor -f is used.\n");
  876: printf("\"-\" can be used as a file name to mean STDIN.\n");
  877: 
  878: #ifdef SUPPORT_LIBZ
  879: printf("Files whose names end in .gz are read using zlib.\n");
  880: #endif
  881: 
  882: #ifdef SUPPORT_LIBBZ2
  883: printf("Files whose names end in .bz2 are read using bzlib2.\n");
  884: #endif
  885: 
  886: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
  887: printf("Other files and the standard input are read as plain files.\n\n");
  888: #else
  889: printf("All files are read as plain files, without any interpretation.\n\n");
  890: #endif
  891: 
  892: printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
  893: printf("Options:\n");
  894: 
  895: for (op = optionlist; op->one_char != 0; op++)
  896:   {
  897:   int n;
  898:   char s[4];
  899: 
  900:   /* Two options were accidentally implemented and documented with underscores
  901:   instead of hyphens in their names, something that was not noticed for quite a
  902:   few releases. When fixing this, I left the underscored versions in the list
  903:   in case people were using them. However, we don't want to display them in the
  904:   help data. There are no other options that contain underscores, and we do not
  905:   expect ever to implement such options. Therefore, just omit any option that
  906:   contains an underscore. */
  907: 
  908:   if (strchr(op->long_name, '_') != NULL) continue;
  909: 
  910:   if (op->one_char > 0 && (op->long_name)[0] == 0)
  911:     n = 31 - printf("  -%c", op->one_char);
  912:   else
  913:     {
  914:     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
  915:       else strcpy(s, "   ");
  916:     n = 31 - printf("  %s --%s", s, op->long_name);
  917:     }
  918: 
  919:   if (n < 1) n = 1;
  920:   printf("%.*s%s\n", n, "                           ", op->help_text);
  921:   }
  922: 
  923: printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
  924: printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
  925: printf("When reading patterns or file names from a file, trailing white\n");
  926: printf("space is removed and blank lines are ignored.\n");
  927: printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
  928: 
  929: printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
  930: printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
  931: }
  932: 
  933: 
  934: 
  935: /*************************************************
  936: *            Test exclude/includes               *
  937: *************************************************/
  938: 
  939: /* If any exclude pattern matches, the path is excluded. Otherwise, unless
  940: there are no includes, the path must match an include pattern.
  941: 
  942: Arguments:
  943:   path      the path to be matched
  944:   ip        the chain of include patterns
  945:   ep        the chain of exclude patterns
  946: 
  947: Returns:    TRUE if the path is not excluded
  948: */
  949: 
  950: static BOOL
  951: test_incexc(char *path, patstr *ip, patstr *ep)
  952: {
  953: int plen = strlen(path);
  954: 
  955: for (; ep != NULL; ep = ep->next)
  956:   {
  957:   if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
  958:     return FALSE;
  959:   }
  960: 
  961: if (ip == NULL) return TRUE;
  962: 
  963: for (; ip != NULL; ip = ip->next)
  964:   {
  965:   if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
  966:     return TRUE;
  967:   }
  968: 
  969: return FALSE;
  970: }
  971: 
  972: 
  973: 
  974: /*************************************************
  975: *         Decode integer argument value          *
  976: *************************************************/
  977: 
  978: /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
  979: because SunOS4 doesn't have it. This is used only for unpicking arguments, so
  980: just keep it simple.
  981: 
  982: Arguments:
  983:   option_data   the option data string
  984:   op            the option item (for error messages)
  985:   longop        TRUE if option given in long form
  986: 
  987: Returns:        a long integer
  988: */
  989: 
  990: static long int
  991: decode_number(char *option_data, option_item *op, BOOL longop)
  992: {
  993: unsigned long int n = 0;
  994: char *endptr = option_data;
  995: while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
  996: while (isdigit((unsigned char)(*endptr)))
  997:   n = n * 10 + (int)(*endptr++ - '0');
  998: if (toupper(*endptr) == 'K')
  999:   {
 1000:   n *= 1024;
 1001:   endptr++;
 1002:   }
 1003: else if (toupper(*endptr) == 'M')
 1004:   {
 1005:   n *= 1024*1024;
 1006:   endptr++;
 1007:   }
 1008: 
 1009: if (*endptr != 0)   /* Error */
 1010:   {
 1011:   if (longop)
 1012:     {
 1013:     char *equals = strchr(op->long_name, '=');
 1014:     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
 1015:       (int)(equals - op->long_name);
 1016:     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
 1017:       option_data, nlen, op->long_name);
 1018:     }
 1019:   else
 1020:     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
 1021:       option_data, op->one_char);
 1022:   pcregrep_exit(usage(2));
 1023:   }
 1024: 
 1025: return n;
 1026: }
 1027: 
 1028: 
 1029: 
 1030: /*************************************************
 1031: *       Add item to a chain of numbers           *
 1032: *************************************************/
 1033: 
 1034: /* Used to add an item onto a chain, or just return an unconnected item if the
 1035: "after" argument is NULL.
 1036: 
 1037: Arguments:
 1038:   n          the number to add
 1039:   after      if not NULL points to item to insert after
 1040: 
 1041: Returns:     new number block
 1042: */
 1043: 
 1044: static omstr *
 1045: add_number(int n, omstr *after)
 1046: {
 1047: omstr *om = (omstr *)malloc(sizeof(omstr));
 1048: 
 1049: if (om == NULL)
 1050:   {
 1051:   fprintf(stderr, "pcregrep: malloc failed\n");
 1052:   pcregrep_exit(2);
 1053:   }
 1054: om->next = NULL;
 1055: om->groupnum = n;
 1056: 
 1057: if (after != NULL)
 1058:   {
 1059:   om->next = after->next;
 1060:   after->next = om;
 1061:   }
 1062: return om;
 1063: }
 1064: 
 1065: 
 1066: 
 1067: /*************************************************
 1068: *            Read one line of input              *
 1069: *************************************************/
 1070: 
 1071: /* Normally, input is read using fread() into a large buffer, so many lines may
 1072: be read at once. However, doing this for tty input means that no output appears
 1073: until a lot of input has been typed. Instead, tty input is handled line by
 1074: line. We cannot use fgets() for this, because it does not stop at a binary
 1075: zero, and therefore there is no way of telling how many characters it has read,
 1076: because there may be binary zeros embedded in the data.
 1077: 
 1078: Arguments:
 1079:   buffer     the buffer to read into
 1080:   length     the maximum number of characters to read
 1081:   f          the file
 1082: 
 1083: Returns:     the number of characters read, zero at end of file
 1084: */
 1085: 
 1086: static unsigned int
 1087: read_one_line(char *buffer, int length, FILE *f)
 1088: {
 1089: int c;
 1090: int yield = 0;
 1091: while ((c = fgetc(f)) != EOF)
 1092:   {
 1093:   buffer[yield++] = c;
 1094:   if (c == '\n' || yield >= length) break;
 1095:   }
 1096: return yield;
 1097: }
 1098: 
 1099: 
 1100: 
 1101: /*************************************************
 1102: *             Find end of line                   *
 1103: *************************************************/
 1104: 
 1105: /* The length of the endline sequence that is found is set via lenptr. This may
 1106: be zero at the very end of the file if there is no line-ending sequence there.
 1107: 
 1108: Arguments:
 1109:   p         current position in line
 1110:   endptr    end of available data
 1111:   lenptr    where to put the length of the eol sequence
 1112: 
 1113: Returns:    pointer after the last byte of the line,
 1114:             including the newline byte(s)
 1115: */
 1116: 
 1117: static char *
 1118: end_of_line(char *p, char *endptr, int *lenptr)
 1119: {
 1120: switch(endlinetype)
 1121:   {
 1122:   default:      /* Just in case */
 1123:   case EL_LF:
 1124:   while (p < endptr && *p != '\n') p++;
 1125:   if (p < endptr)
 1126:     {
 1127:     *lenptr = 1;
 1128:     return p + 1;
 1129:     }
 1130:   *lenptr = 0;
 1131:   return endptr;
 1132: 
 1133:   case EL_CR:
 1134:   while (p < endptr && *p != '\r') p++;
 1135:   if (p < endptr)
 1136:     {
 1137:     *lenptr = 1;
 1138:     return p + 1;
 1139:     }
 1140:   *lenptr = 0;
 1141:   return endptr;
 1142: 
 1143:   case EL_CRLF:
 1144:   for (;;)
 1145:     {
 1146:     while (p < endptr && *p != '\r') p++;
 1147:     if (++p >= endptr)
 1148:       {
 1149:       *lenptr = 0;
 1150:       return endptr;
 1151:       }
 1152:     if (*p == '\n')
 1153:       {
 1154:       *lenptr = 2;
 1155:       return p + 1;
 1156:       }
 1157:     }
 1158:   break;
 1159: 
 1160:   case EL_ANYCRLF:
 1161:   while (p < endptr)
 1162:     {
 1163:     int extra = 0;
 1164:     register int c = *((unsigned char *)p);
 1165: 
 1166:     if (utf8 && c >= 0xc0)
 1167:       {
 1168:       int gcii, gcss;
 1169:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
 1170:       gcss = 6*extra;
 1171:       c = (c & utf8_table3[extra]) << gcss;
 1172:       for (gcii = 1; gcii <= extra; gcii++)
 1173:         {
 1174:         gcss -= 6;
 1175:         c |= (p[gcii] & 0x3f) << gcss;
 1176:         }
 1177:       }
 1178: 
 1179:     p += 1 + extra;
 1180: 
 1181:     switch (c)
 1182:       {
 1183:       case '\n':
 1184:       *lenptr = 1;
 1185:       return p;
 1186: 
 1187:       case '\r':
 1188:       if (p < endptr && *p == '\n')
 1189:         {
 1190:         *lenptr = 2;
 1191:         p++;
 1192:         }
 1193:       else *lenptr = 1;
 1194:       return p;
 1195: 
 1196:       default:
 1197:       break;
 1198:       }
 1199:     }   /* End of loop for ANYCRLF case */
 1200: 
 1201:   *lenptr = 0;  /* Must have hit the end */
 1202:   return endptr;
 1203: 
 1204:   case EL_ANY:
 1205:   while (p < endptr)
 1206:     {
 1207:     int extra = 0;
 1208:     register int c = *((unsigned char *)p);
 1209: 
 1210:     if (utf8 && c >= 0xc0)
 1211:       {
 1212:       int gcii, gcss;
 1213:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
 1214:       gcss = 6*extra;
 1215:       c = (c & utf8_table3[extra]) << gcss;
 1216:       for (gcii = 1; gcii <= extra; gcii++)
 1217:         {
 1218:         gcss -= 6;
 1219:         c |= (p[gcii] & 0x3f) << gcss;
 1220:         }
 1221:       }
 1222: 
 1223:     p += 1 + extra;
 1224: 
 1225:     switch (c)
 1226:       {
 1227:       case '\n':    /* LF */
 1228:       case '\v':    /* VT */
 1229:       case '\f':    /* FF */
 1230:       *lenptr = 1;
 1231:       return p;
 1232: 
 1233:       case '\r':    /* CR */
 1234:       if (p < endptr && *p == '\n')
 1235:         {
 1236:         *lenptr = 2;
 1237:         p++;
 1238:         }
 1239:       else *lenptr = 1;
 1240:       return p;
 1241: 
 1242: #ifndef EBCDIC
 1243:       case 0x85:    /* Unicode NEL */
 1244:       *lenptr = utf8? 2 : 1;
 1245:       return p;
 1246: 
 1247:       case 0x2028:  /* Unicode LS */
 1248:       case 0x2029:  /* Unicode PS */
 1249:       *lenptr = 3;
 1250:       return p;
 1251: #endif  /* Not EBCDIC */
 1252: 
 1253:       default:
 1254:       break;
 1255:       }
 1256:     }   /* End of loop for ANY case */
 1257: 
 1258:   *lenptr = 0;  /* Must have hit the end */
 1259:   return endptr;
 1260:   }     /* End of overall switch */
 1261: }
 1262: 
 1263: 
 1264: 
 1265: /*************************************************
 1266: *         Find start of previous line            *
 1267: *************************************************/
 1268: 
 1269: /* This is called when looking back for before lines to print.
 1270: 
 1271: Arguments:
 1272:   p         start of the subsequent line
 1273:   startptr  start of available data
 1274: 
 1275: Returns:    pointer to the start of the previous line
 1276: */
 1277: 
 1278: static char *
 1279: previous_line(char *p, char *startptr)
 1280: {
 1281: switch(endlinetype)
 1282:   {
 1283:   default:      /* Just in case */
 1284:   case EL_LF:
 1285:   p--;
 1286:   while (p > startptr && p[-1] != '\n') p--;
 1287:   return p;
 1288: 
 1289:   case EL_CR:
 1290:   p--;
 1291:   while (p > startptr && p[-1] != '\n') p--;
 1292:   return p;
 1293: 
 1294:   case EL_CRLF:
 1295:   for (;;)
 1296:     {
 1297:     p -= 2;
 1298:     while (p > startptr && p[-1] != '\n') p--;
 1299:     if (p <= startptr + 1 || p[-2] == '\r') return p;
 1300:     }
 1301:   return p;   /* But control should never get here */
 1302: 
 1303:   case EL_ANY:
 1304:   case EL_ANYCRLF:
 1305:   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
 1306:   if (utf8) while ((*p & 0xc0) == 0x80) p--;
 1307: 
 1308:   while (p > startptr)
 1309:     {
 1310:     register unsigned int c;
 1311:     char *pp = p - 1;
 1312: 
 1313:     if (utf8)
 1314:       {
 1315:       int extra = 0;
 1316:       while ((*pp & 0xc0) == 0x80) pp--;
 1317:       c = *((unsigned char *)pp);
 1318:       if (c >= 0xc0)
 1319:         {
 1320:         int gcii, gcss;
 1321:         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
 1322:         gcss = 6*extra;
 1323:         c = (c & utf8_table3[extra]) << gcss;
 1324:         for (gcii = 1; gcii <= extra; gcii++)
 1325:           {
 1326:           gcss -= 6;
 1327:           c |= (pp[gcii] & 0x3f) << gcss;
 1328:           }
 1329:         }
 1330:       }
 1331:     else c = *((unsigned char *)pp);
 1332: 
 1333:     if (endlinetype == EL_ANYCRLF) switch (c)
 1334:       {
 1335:       case '\n':    /* LF */
 1336:       case '\r':    /* CR */
 1337:       return p;
 1338: 
 1339:       default:
 1340:       break;
 1341:       }
 1342: 
 1343:     else switch (c)
 1344:       {
 1345:       case '\n':    /* LF */
 1346:       case '\v':    /* VT */
 1347:       case '\f':    /* FF */
 1348:       case '\r':    /* CR */
 1349: #ifndef EBCDIE
 1350:       case 0x85:    /* Unicode NEL */
 1351:       case 0x2028:  /* Unicode LS */
 1352:       case 0x2029:  /* Unicode PS */
 1353: #endif  /* Not EBCDIC */
 1354:       return p;
 1355: 
 1356:       default:
 1357:       break;
 1358:       }
 1359: 
 1360:     p = pp;  /* Back one character */
 1361:     }        /* End of loop for ANY case */
 1362: 
 1363:   return startptr;  /* Hit start of data */
 1364:   }     /* End of overall switch */
 1365: }
 1366: 
 1367: 
 1368: 
 1369: 
 1370: 
 1371: /*************************************************
 1372: *       Print the previous "after" lines         *
 1373: *************************************************/
 1374: 
 1375: /* This is called if we are about to lose said lines because of buffer filling,
 1376: and at the end of the file. The data in the line is written using fwrite() so
 1377: that a binary zero does not terminate it.
 1378: 
 1379: Arguments:
 1380:   lastmatchnumber   the number of the last matching line, plus one
 1381:   lastmatchrestart  where we restarted after the last match
 1382:   endptr            end of available data
 1383:   printname         filename for printing
 1384: 
 1385: Returns:            nothing
 1386: */
 1387: 
 1388: static void
 1389: do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
 1390:   char *printname)
 1391: {
 1392: if (after_context > 0 && lastmatchnumber > 0)
 1393:   {
 1394:   int count = 0;
 1395:   while (lastmatchrestart < endptr && count++ < after_context)
 1396:     {
 1397:     int ellength;
 1398:     char *pp = lastmatchrestart;
 1399:     if (printname != NULL) fprintf(stdout, "%s-", printname);
 1400:     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
 1401:     pp = end_of_line(pp, endptr, &ellength);
 1402:     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
 1403:     lastmatchrestart = pp;
 1404:     }
 1405:   hyphenpending = TRUE;
 1406:   }
 1407: }
 1408: 
 1409: 
 1410: 
 1411: /*************************************************
 1412: *   Apply patterns to subject till one matches   *
 1413: *************************************************/
 1414: 
 1415: /* This function is called to run through all patterns, looking for a match. It
 1416: is used multiple times for the same subject when colouring is enabled, in order
 1417: to find all possible matches.
 1418: 
 1419: Arguments:
 1420:   matchptr     the start of the subject
 1421:   length       the length of the subject to match
 1422:   options      options for pcre_exec
 1423:   startoffset  where to start matching
 1424:   offsets      the offets vector to fill in
 1425:   mrc          address of where to put the result of pcre_exec()
 1426: 
 1427: Returns:      TRUE if there was a match
 1428:               FALSE if there was no match
 1429:               invert if there was a non-fatal error
 1430: */
 1431: 
 1432: static BOOL
 1433: match_patterns(char *matchptr, size_t length, unsigned int options,
 1434:   int startoffset, int *offsets, int *mrc)
 1435: {
 1436: int i;
 1437: size_t slen = length;
 1438: patstr *p = patterns;
 1439: const char *msg = "this text:\n\n";
 1440: 
 1441: if (slen > 200)
 1442:   {
 1443:   slen = 200;
 1444:   msg = "text that starts:\n\n";
 1445:   }
 1446: for (i = 1; p != NULL; p = p->next, i++)
 1447:   {
 1448:   *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
 1449:     startoffset, options, offsets, OFFSET_SIZE);
 1450:   if (*mrc >= 0) return TRUE;
 1451:   if (*mrc == PCRE_ERROR_NOMATCH) continue;
 1452:   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
 1453:   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
 1454:   fprintf(stderr, "%s", msg);
 1455:   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
 1456:   fprintf(stderr, "\n\n");
 1457:   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
 1458:       *mrc == PCRE_ERROR_JIT_STACKLIMIT)
 1459:     resource_error = TRUE;
 1460:   if (error_count++ > 20)
 1461:     {
 1462:     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
 1463:     pcregrep_exit(2);
 1464:     }
 1465:   return invert;    /* No more matching; don't show the line again */
 1466:   }
 1467: 
 1468: return FALSE;  /* No match, no errors */
 1469: }
 1470: 
 1471: 
 1472: 
 1473: /*************************************************
 1474: *            Grep an individual file             *
 1475: *************************************************/
 1476: 
 1477: /* This is called from grep_or_recurse() below. It uses a buffer that is three
 1478: times the value of bufthird. The matching point is never allowed to stray into
 1479: the top third of the buffer, thus keeping more of the file available for
 1480: context printing or for multiline scanning. For large files, the pointer will
 1481: be in the middle third most of the time, so the bottom third is available for
 1482: "before" context printing.
 1483: 
 1484: Arguments:
 1485:   handle       the fopened FILE stream for a normal file
 1486:                the gzFile pointer when reading is via libz
 1487:                the BZFILE pointer when reading is via libbz2
 1488:   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
 1489:   filename     the file name or NULL (for errors)
 1490:   printname    the file name if it is to be printed for each match
 1491:                or NULL if the file name is not to be printed
 1492:                it cannot be NULL if filenames[_nomatch]_only is set
 1493: 
 1494: Returns:       0 if there was at least one match
 1495:                1 otherwise (no matches)
 1496:                2 if an overlong line is encountered
 1497:                3 if there is a read error on a .bz2 file
 1498: */
 1499: 
 1500: static int
 1501: pcregrep(void *handle, int frtype, char *filename, char *printname)
 1502: {
 1503: int rc = 1;
 1504: int linenumber = 1;
 1505: int lastmatchnumber = 0;
 1506: int count = 0;
 1507: int filepos = 0;
 1508: int offsets[OFFSET_SIZE];
 1509: char *lastmatchrestart = NULL;
 1510: char *ptr = main_buffer;
 1511: char *endptr;
 1512: size_t bufflength;
 1513: BOOL binary = FALSE;
 1514: BOOL endhyphenpending = FALSE;
 1515: BOOL input_line_buffered = line_buffered;
 1516: FILE *in = NULL;                    /* Ensure initialized */
 1517: 
 1518: #ifdef SUPPORT_LIBZ
 1519: gzFile ingz = NULL;
 1520: #endif
 1521: 
 1522: #ifdef SUPPORT_LIBBZ2
 1523: BZFILE *inbz2 = NULL;
 1524: #endif
 1525: 
 1526: 
 1527: /* Do the first read into the start of the buffer and set up the pointer to end
 1528: of what we have. In the case of libz, a non-zipped .gz file will be read as a
 1529: plain file. However, if a .bz2 file isn't actually bzipped, the first read will
 1530: fail. */
 1531: 
 1532: (void)frtype;
 1533: 
 1534: #ifdef SUPPORT_LIBZ
 1535: if (frtype == FR_LIBZ)
 1536:   {
 1537:   ingz = (gzFile)handle;
 1538:   bufflength = gzread (ingz, main_buffer, bufsize);
 1539:   }
 1540: else
 1541: #endif
 1542: 
 1543: #ifdef SUPPORT_LIBBZ2
 1544: if (frtype == FR_LIBBZ2)
 1545:   {
 1546:   inbz2 = (BZFILE *)handle;
 1547:   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
 1548:   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
 1549:   }                                    /* without the cast it is unsigned. */
 1550: else
 1551: #endif
 1552: 
 1553:   {
 1554:   in = (FILE *)handle;
 1555:   if (is_file_tty(in)) input_line_buffered = TRUE;
 1556:   bufflength = input_line_buffered?
 1557:     read_one_line(main_buffer, bufsize, in) :
 1558:     fread(main_buffer, 1, bufsize, in);
 1559:   }
 1560: 
 1561: endptr = main_buffer + bufflength;
 1562: 
 1563: /* Unless binary-files=text, see if we have a binary file. This uses the same
 1564: rule as GNU grep, namely, a search for a binary zero byte near the start of the
 1565: file. */
 1566: 
 1567: if (binary_files != BIN_TEXT)
 1568:   {
 1569:   binary =
 1570:     memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
 1571:   if (binary && binary_files == BIN_NOMATCH) return 1;
 1572:   }
 1573: 
 1574: /* Loop while the current pointer is not at the end of the file. For large
 1575: files, endptr will be at the end of the buffer when we are in the middle of the
 1576: file, but ptr will never get there, because as soon as it gets over 2/3 of the
 1577: way, the buffer is shifted left and re-filled. */
 1578: 
 1579: while (ptr < endptr)
 1580:   {
 1581:   int endlinelength;
 1582:   int mrc = 0;
 1583:   int startoffset = 0;
 1584:   unsigned int options = 0;
 1585:   BOOL match;
 1586:   char *matchptr = ptr;
 1587:   char *t = ptr;
 1588:   size_t length, linelength;
 1589: 
 1590:   /* At this point, ptr is at the start of a line. We need to find the length
 1591:   of the subject string to pass to pcre_exec(). In multiline mode, it is the
 1592:   length remainder of the data in the buffer. Otherwise, it is the length of
 1593:   the next line, excluding the terminating newline. After matching, we always
 1594:   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
 1595:   option is used for compiling, so that any match is constrained to be in the
 1596:   first line. */
 1597: 
 1598:   t = end_of_line(t, endptr, &endlinelength);
 1599:   linelength = t - ptr - endlinelength;
 1600:   length = multiline? (size_t)(endptr - ptr) : linelength;
 1601: 
 1602:   /* Check to see if the line we are looking at extends right to the very end
 1603:   of the buffer without a line terminator. This means the line is too long to
 1604:   handle. */
 1605: 
 1606:   if (endlinelength == 0 && t == main_buffer + bufsize)
 1607:     {
 1608:     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
 1609:                     "pcregrep: check the --buffer-size option\n",
 1610:                     linenumber,
 1611:                     (filename == NULL)? "" : " of file ",
 1612:                     (filename == NULL)? "" : filename);
 1613:     return 2;
 1614:     }
 1615: 
 1616:   /* Extra processing for Jeffrey Friedl's debugging. */
 1617: 
 1618: #ifdef JFRIEDL_DEBUG
 1619:   if (jfriedl_XT || jfriedl_XR)
 1620:   {
 1621: #     include <sys/time.h>
 1622: #     include <time.h>
 1623:       struct timeval start_time, end_time;
 1624:       struct timezone dummy;
 1625:       int i;
 1626: 
 1627:       if (jfriedl_XT)
 1628:       {
 1629:           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
 1630:           const char *orig = ptr;
 1631:           ptr = malloc(newlen + 1);
 1632:           if (!ptr) {
 1633:                   printf("out of memory");
 1634:                   pcregrep_exit(2);
 1635:           }
 1636:           endptr = ptr;
 1637:           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
 1638:           for (i = 0; i < jfriedl_XT; i++) {
 1639:                   strncpy(endptr, orig,  length);
 1640:                   endptr += length;
 1641:           }
 1642:           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
 1643:           length = newlen;
 1644:       }
 1645: 
 1646:       if (gettimeofday(&start_time, &dummy) != 0)
 1647:               perror("bad gettimeofday");
 1648: 
 1649: 
 1650:       for (i = 0; i < jfriedl_XR; i++)
 1651:           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
 1652:               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
 1653: 
 1654:       if (gettimeofday(&end_time, &dummy) != 0)
 1655:               perror("bad gettimeofday");
 1656: 
 1657:       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
 1658:                       -
 1659:                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
 1660: 
 1661:       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
 1662:       return 0;
 1663:   }
 1664: #endif
 1665: 
 1666:   /* We come back here after a match when show_only_matching is set, in order
 1667:   to find any further matches in the same line. This applies to
 1668:   --only-matching, --file-offsets, and --line-offsets. */
 1669: 
 1670:   ONLY_MATCHING_RESTART:
 1671: 
 1672:   /* Run through all the patterns until one matches or there is an error other
 1673:   than NOMATCH. This code is in a subroutine so that it can be re-used for
 1674:   finding subsequent matches when colouring matched lines. After finding one
 1675:   match, set PCRE_NOTEMPTY to disable any further matches of null strings in
 1676:   this line. */
 1677: 
 1678:   match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
 1679:   options = PCRE_NOTEMPTY;
 1680: 
 1681:   /* If it's a match or a not-match (as required), do what's wanted. */
 1682: 
 1683:   if (match != invert)
 1684:     {
 1685:     BOOL hyphenprinted = FALSE;
 1686: 
 1687:     /* We've failed if we want a file that doesn't have any matches. */
 1688: 
 1689:     if (filenames == FN_NOMATCH_ONLY) return 1;
 1690: 
 1691:     /* Just count if just counting is wanted. */
 1692: 
 1693:     if (count_only) count++;
 1694: 
 1695:     /* When handling a binary file and binary-files==binary, the "binary"
 1696:     variable will be set true (it's false in all other cases). In this
 1697:     situation we just want to output the file name. No need to scan further. */
 1698: 
 1699:     else if (binary)
 1700:       {
 1701:       fprintf(stdout, "Binary file %s matches\n", filename);
 1702:       return 0;
 1703:       }
 1704: 
 1705:     /* If all we want is a file name, there is no need to scan any more lines
 1706:     in the file. */
 1707: 
 1708:     else if (filenames == FN_MATCH_ONLY)
 1709:       {
 1710:       fprintf(stdout, "%s\n", printname);
 1711:       return 0;
 1712:       }
 1713: 
 1714:     /* Likewise, if all we want is a yes/no answer. */
 1715: 
 1716:     else if (quiet) return 0;
 1717: 
 1718:     /* The --only-matching option prints just the substring that matched,
 1719:     and/or one or more captured portions of it, as long as these strings are
 1720:     not empty. The --file-offsets and --line-offsets options output offsets for
 1721:     the matching substring (all three set show_only_matching). None of these
 1722:     mutually exclusive options prints any context. Afterwards, adjust the start
 1723:     and then jump back to look for further matches in the same line. If we are
 1724:     in invert mode, however, nothing is printed and we do not restart - this
 1725:     could still be useful because the return code is set. */
 1726: 
 1727:     else if (show_only_matching)
 1728:       {
 1729:       if (!invert)
 1730:         {
 1731:         if (printname != NULL) fprintf(stdout, "%s:", printname);
 1732:         if (number) fprintf(stdout, "%d:", linenumber);
 1733: 
 1734:         /* Handle --line-offsets */
 1735: 
 1736:         if (line_offsets)
 1737:           fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
 1738:             offsets[1] - offsets[0]);
 1739: 
 1740:         /* Handle --file-offsets */
 1741: 
 1742:         else if (file_offsets)
 1743:           fprintf(stdout, "%d,%d\n",
 1744:             (int)(filepos + matchptr + offsets[0] - ptr),
 1745:             offsets[1] - offsets[0]);
 1746: 
 1747:         /* Handle --only-matching, which may occur many times */
 1748: 
 1749:         else
 1750:           {
 1751:           BOOL printed = FALSE;
 1752:           omstr *om;
 1753: 
 1754:           for (om = only_matching; om != NULL; om = om->next)
 1755:             {
 1756:             int n = om->groupnum;
 1757:             if (n < mrc)
 1758:               {
 1759:               int plen = offsets[2*n + 1] - offsets[2*n];
 1760:               if (plen > 0)
 1761:                 {
 1762:                 if (printed) fprintf(stdout, "%s", om_separator);
 1763:                 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1764:                 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
 1765:                 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
 1766:                 printed = TRUE;
 1767:                 }
 1768:               }
 1769:             }
 1770: 
 1771:           if (printed || printname != NULL || number) fprintf(stdout, "\n");
 1772:           }
 1773: 
 1774:         /* Prepare to repeat to find the next match */
 1775: 
 1776:         match = FALSE;
 1777:         if (line_buffered) fflush(stdout);
 1778:         rc = 0;                      /* Had some success */
 1779:         startoffset = offsets[1];    /* Restart after the match */
 1780:         goto ONLY_MATCHING_RESTART;
 1781:         }
 1782:       }
 1783: 
 1784:     /* This is the default case when none of the above options is set. We print
 1785:     the matching lines(s), possibly preceded and/or followed by other lines of
 1786:     context. */
 1787: 
 1788:     else
 1789:       {
 1790:       /* See if there is a requirement to print some "after" lines from a
 1791:       previous match. We never print any overlaps. */
 1792: 
 1793:       if (after_context > 0 && lastmatchnumber > 0)
 1794:         {
 1795:         int ellength;
 1796:         int linecount = 0;
 1797:         char *p = lastmatchrestart;
 1798: 
 1799:         while (p < ptr && linecount < after_context)
 1800:           {
 1801:           p = end_of_line(p, ptr, &ellength);
 1802:           linecount++;
 1803:           }
 1804: 
 1805:         /* It is important to advance lastmatchrestart during this printing so
 1806:         that it interacts correctly with any "before" printing below. Print
 1807:         each line's data using fwrite() in case there are binary zeroes. */
 1808: 
 1809:         while (lastmatchrestart < p)
 1810:           {
 1811:           char *pp = lastmatchrestart;
 1812:           if (printname != NULL) fprintf(stdout, "%s-", printname);
 1813:           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
 1814:           pp = end_of_line(pp, endptr, &ellength);
 1815:           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
 1816:           lastmatchrestart = pp;
 1817:           }
 1818:         if (lastmatchrestart != ptr) hyphenpending = TRUE;
 1819:         }
 1820: 
 1821:       /* If there were non-contiguous lines printed above, insert hyphens. */
 1822: 
 1823:       if (hyphenpending)
 1824:         {
 1825:         fprintf(stdout, "--\n");
 1826:         hyphenpending = FALSE;
 1827:         hyphenprinted = TRUE;
 1828:         }
 1829: 
 1830:       /* See if there is a requirement to print some "before" lines for this
 1831:       match. Again, don't print overlaps. */
 1832: 
 1833:       if (before_context > 0)
 1834:         {
 1835:         int linecount = 0;
 1836:         char *p = ptr;
 1837: 
 1838:         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
 1839:                linecount < before_context)
 1840:           {
 1841:           linecount++;
 1842:           p = previous_line(p, main_buffer);
 1843:           }
 1844: 
 1845:         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
 1846:           fprintf(stdout, "--\n");
 1847: 
 1848:         while (p < ptr)
 1849:           {
 1850:           int ellength;
 1851:           char *pp = p;
 1852:           if (printname != NULL) fprintf(stdout, "%s-", printname);
 1853:           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
 1854:           pp = end_of_line(pp, endptr, &ellength);
 1855:           FWRITE(p, 1, pp - p, stdout);
 1856:           p = pp;
 1857:           }
 1858:         }
 1859: 
 1860:       /* Now print the matching line(s); ensure we set hyphenpending at the end
 1861:       of the file if any context lines are being output. */
 1862: 
 1863:       if (after_context > 0 || before_context > 0)
 1864:         endhyphenpending = TRUE;
 1865: 
 1866:       if (printname != NULL) fprintf(stdout, "%s:", printname);
 1867:       if (number) fprintf(stdout, "%d:", linenumber);
 1868: 
 1869:       /* In multiline mode, we want to print to the end of the line in which
 1870:       the end of the matched string is found, so we adjust linelength and the
 1871:       line number appropriately, but only when there actually was a match
 1872:       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
 1873:       the match will always be before the first newline sequence. */
 1874: 
 1875:       if (multiline & !invert)
 1876:         {
 1877:         char *endmatch = ptr + offsets[1];
 1878:         t = ptr;
 1879:         while (t <= endmatch)
 1880:           {
 1881:           t = end_of_line(t, endptr, &endlinelength);
 1882:           if (t < endmatch) linenumber++; else break;
 1883:           }
 1884:         linelength = t - ptr - endlinelength;
 1885:         }
 1886: 
 1887:       /*** NOTE: Use only fwrite() to output the data line, so that binary
 1888:       zeroes are treated as just another data character. */
 1889: 
 1890:       /* This extra option, for Jeffrey Friedl's debugging requirements,
 1891:       replaces the matched string, or a specific captured string if it exists,
 1892:       with X. When this happens, colouring is ignored. */
 1893: 
 1894: #ifdef JFRIEDL_DEBUG
 1895:       if (S_arg >= 0 && S_arg < mrc)
 1896:         {
 1897:         int first = S_arg * 2;
 1898:         int last  = first + 1;
 1899:         FWRITE(ptr, 1, offsets[first], stdout);
 1900:         fprintf(stdout, "X");
 1901:         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
 1902:         }
 1903:       else
 1904: #endif
 1905: 
 1906:       /* We have to split the line(s) up if colouring, and search for further
 1907:       matches, but not of course if the line is a non-match. */
 1908: 
 1909:       if (do_colour && !invert)
 1910:         {
 1911:         int plength;
 1912:         FWRITE(ptr, 1, offsets[0], stdout);
 1913:         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1914:         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
 1915:         fprintf(stdout, "%c[00m", 0x1b);
 1916:         for (;;)
 1917:           {
 1918:           startoffset = offsets[1];
 1919:           if (startoffset >= (int)linelength + endlinelength ||
 1920:               !match_patterns(matchptr, length, options, startoffset, offsets,
 1921:                 &mrc))
 1922:             break;
 1923:           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
 1924:           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1925:           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
 1926:           fprintf(stdout, "%c[00m", 0x1b);
 1927:           }
 1928: 
 1929:         /* In multiline mode, we may have already printed the complete line
 1930:         and its line-ending characters (if they matched the pattern), so there
 1931:         may be no more to print. */
 1932: 
 1933:         plength = (int)((linelength + endlinelength) - startoffset);
 1934:         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
 1935:         }
 1936: 
 1937:       /* Not colouring; no need to search for further matches */
 1938: 
 1939:       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
 1940:       }
 1941: 
 1942:     /* End of doing what has to be done for a match. If --line-buffered was
 1943:     given, flush the output. */
 1944: 
 1945:     if (line_buffered) fflush(stdout);
 1946:     rc = 0;    /* Had some success */
 1947: 
 1948:     /* Remember where the last match happened for after_context. We remember
 1949:     where we are about to restart, and that line's number. */
 1950: 
 1951:     lastmatchrestart = ptr + linelength + endlinelength;
 1952:     lastmatchnumber = linenumber + 1;
 1953:     }
 1954: 
 1955:   /* For a match in multiline inverted mode (which of course did not cause
 1956:   anything to be printed), we have to move on to the end of the match before
 1957:   proceeding. */
 1958: 
 1959:   if (multiline && invert && match)
 1960:     {
 1961:     int ellength;
 1962:     char *endmatch = ptr + offsets[1];
 1963:     t = ptr;
 1964:     while (t < endmatch)
 1965:       {
 1966:       t = end_of_line(t, endptr, &ellength);
 1967:       if (t <= endmatch) linenumber++; else break;
 1968:       }
 1969:     endmatch = end_of_line(endmatch, endptr, &ellength);
 1970:     linelength = endmatch - ptr - ellength;
 1971:     }
 1972: 
 1973:   /* Advance to after the newline and increment the line number. The file
 1974:   offset to the current line is maintained in filepos. */
 1975: 
 1976:   ptr += linelength + endlinelength;
 1977:   filepos += (int)(linelength + endlinelength);
 1978:   linenumber++;
 1979: 
 1980:   /* If input is line buffered, and the buffer is not yet full, read another
 1981:   line and add it into the buffer. */
 1982: 
 1983:   if (input_line_buffered && bufflength < (size_t)bufsize)
 1984:     {
 1985:     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
 1986:     bufflength += add;
 1987:     endptr += add;
 1988:     }
 1989: 
 1990:   /* If we haven't yet reached the end of the file (the buffer is full), and
 1991:   the current point is in the top 1/3 of the buffer, slide the buffer down by
 1992:   1/3 and refill it. Before we do this, if some unprinted "after" lines are
 1993:   about to be lost, print them. */
 1994: 
 1995:   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
 1996:     {
 1997:     if (after_context > 0 &&
 1998:         lastmatchnumber > 0 &&
 1999:         lastmatchrestart < main_buffer + bufthird)
 2000:       {
 2001:       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
 2002:       lastmatchnumber = 0;
 2003:       }
 2004: 
 2005:     /* Now do the shuffle */
 2006: 
 2007:     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
 2008:     ptr -= bufthird;
 2009: 
 2010: #ifdef SUPPORT_LIBZ
 2011:     if (frtype == FR_LIBZ)
 2012:       bufflength = 2*bufthird +
 2013:         gzread (ingz, main_buffer + 2*bufthird, bufthird);
 2014:     else
 2015: #endif
 2016: 
 2017: #ifdef SUPPORT_LIBBZ2
 2018:     if (frtype == FR_LIBBZ2)
 2019:       bufflength = 2*bufthird +
 2020:         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
 2021:     else
 2022: #endif
 2023: 
 2024:     bufflength = 2*bufthird +
 2025:       (input_line_buffered?
 2026:        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
 2027:        fread(main_buffer + 2*bufthird, 1, bufthird, in));
 2028:     endptr = main_buffer + bufflength;
 2029: 
 2030:     /* Adjust any last match point */
 2031: 
 2032:     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
 2033:     }
 2034:   }     /* Loop through the whole file */
 2035: 
 2036: /* End of file; print final "after" lines if wanted; do_after_lines sets
 2037: hyphenpending if it prints something. */
 2038: 
 2039: if (!show_only_matching && !count_only)
 2040:   {
 2041:   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
 2042:   hyphenpending |= endhyphenpending;
 2043:   }
 2044: 
 2045: /* Print the file name if we are looking for those without matches and there
 2046: were none. If we found a match, we won't have got this far. */
 2047: 
 2048: if (filenames == FN_NOMATCH_ONLY)
 2049:   {
 2050:   fprintf(stdout, "%s\n", printname);
 2051:   return 0;
 2052:   }
 2053: 
 2054: /* Print the match count if wanted */
 2055: 
 2056: if (count_only)
 2057:   {
 2058:   if (count > 0 || !omit_zero_count)
 2059:     {
 2060:     if (printname != NULL && filenames != FN_NONE)
 2061:       fprintf(stdout, "%s:", printname);
 2062:     fprintf(stdout, "%d\n", count);
 2063:     }
 2064:   }
 2065: 
 2066: return rc;
 2067: }
 2068: 
 2069: 
 2070: 
 2071: /*************************************************
 2072: *     Grep a file or recurse into a directory    *
 2073: *************************************************/
 2074: 
 2075: /* Given a path name, if it's a directory, scan all the files if we are
 2076: recursing; if it's a file, grep it.
 2077: 
 2078: Arguments:
 2079:   pathname          the path to investigate
 2080:   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
 2081:   only_one_at_top   TRUE if the path is the only one at toplevel
 2082: 
 2083: Returns:  -1 the file/directory was skipped
 2084:            0 if there was at least one match
 2085:            1 if there were no matches
 2086:            2 there was some kind of error
 2087: 
 2088: However, file opening failures are suppressed if "silent" is set.
 2089: */
 2090: 
 2091: static int
 2092: grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
 2093: {
 2094: int rc = 1;
 2095: int frtype;
 2096: void *handle;
 2097: char *lastcomp;
 2098: FILE *in = NULL;           /* Ensure initialized */
 2099: 
 2100: #ifdef SUPPORT_LIBZ
 2101: gzFile ingz = NULL;
 2102: #endif
 2103: 
 2104: #ifdef SUPPORT_LIBBZ2
 2105: BZFILE *inbz2 = NULL;
 2106: #endif
 2107: 
 2108: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
 2109: int pathlen;
 2110: #endif
 2111: 
 2112: #if defined NATIVE_ZOS
 2113: int zos_type;
 2114: FILE *zos_test_file;
 2115: #endif
 2116: 
 2117: /* If the file name is "-" we scan stdin */
 2118: 
 2119: if (strcmp(pathname, "-") == 0)
 2120:   {
 2121:   return pcregrep(stdin, FR_PLAIN, stdin_name,
 2122:     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
 2123:       stdin_name : NULL);
 2124:   }
 2125: 
 2126: /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
 2127: directories, whereas --include and --exclude apply to everything else. The test
 2128: is against the final component of the path. */
 2129: 
 2130: lastcomp = strrchr(pathname, FILESEP);
 2131: lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
 2132: 
 2133: /* If the file is a directory, skip if not recursing or if explicitly excluded.
 2134: Otherwise, scan the directory and recurse for each path within it. The scanning
 2135: code is localized so it can be made system-specific. */
 2136: 
 2137: 
 2138: /* For z/OS, determine the file type. */
 2139: 
 2140: #if defined NATIVE_ZOS
 2141: zos_test_file =  fopen(pathname,"rb");
 2142: 
 2143: if (zos_test_file == NULL)
 2144:    {
 2145:    if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
 2146:      pathname, strerror(errno));
 2147:    return -1;
 2148:    }
 2149: zos_type = identifyzosfiletype (zos_test_file);
 2150: fclose (zos_test_file);
 2151: 
 2152: /* Handle a PDS in separate code */
 2153: 
 2154: if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
 2155:    {
 2156:    return travelonpdsdir (pathname, only_one_at_top);
 2157:    }
 2158: 
 2159: /* Deal with regular files in the normal way below. These types are:
 2160:    zos_type == __ZOS_PDS_MEMBER
 2161:    zos_type == __ZOS_PS
 2162:    zos_type == __ZOS_VSAM_KSDS
 2163:    zos_type == __ZOS_VSAM_ESDS
 2164:    zos_type == __ZOS_VSAM_RRDS
 2165: */
 2166: 
 2167: /* Handle a z/OS directory using common code. */
 2168: 
 2169: else if (zos_type == __ZOS_HFS)
 2170:  {
 2171: #endif  /* NATIVE_ZOS */
 2172: 
 2173: 
 2174: /* Handle directories: common code for all OS */
 2175: 
 2176: if (isdirectory(pathname))
 2177:   {
 2178:   if (dee_action == dee_SKIP ||
 2179:       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
 2180:     return -1;
 2181: 
 2182:   if (dee_action == dee_RECURSE)
 2183:     {
 2184:     char buffer[1024];
 2185:     char *nextfile;
 2186:     directory_type *dir = opendirectory(pathname);
 2187: 
 2188:     if (dir == NULL)
 2189:       {
 2190:       if (!silent)
 2191:         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
 2192:           strerror(errno));
 2193:       return 2;
 2194:       }
 2195: 
 2196:     while ((nextfile = readdirectory(dir)) != NULL)
 2197:       {
 2198:       int frc;
 2199:       sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
 2200:       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
 2201:       if (frc > 1) rc = frc;
 2202:        else if (frc == 0 && rc == 1) rc = 0;
 2203:       }
 2204: 
 2205:     closedirectory(dir);
 2206:     return rc;
 2207:     }
 2208:   }
 2209: 
 2210: #if defined NATIVE_ZOS
 2211:  }
 2212: #endif
 2213: 
 2214: /* If the file is not a directory, check for a regular file, and if it is not,
 2215: skip it if that's been requested. Otherwise, check for an explicit inclusion or
 2216: exclusion. */
 2217: 
 2218: else if (
 2219: #if defined NATIVE_ZOS
 2220:         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
 2221: #else  /* all other OS */
 2222:         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
 2223: #endif
 2224:         !test_incexc(lastcomp, include_patterns, exclude_patterns))
 2225:   return -1;  /* File skipped */
 2226: 
 2227: /* Control reaches here if we have a regular file, or if we have a directory
 2228: and recursion or skipping was not requested, or if we have anything else and
 2229: skipping was not requested. The scan proceeds. If this is the first and only
 2230: argument at top level, we don't show the file name, unless we are only showing
 2231: the file name, or the filename was forced (-H). */
 2232: 
 2233: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
 2234: pathlen = (int)(strlen(pathname));
 2235: #endif
 2236: 
 2237: /* Open using zlib if it is supported and the file name ends with .gz. */
 2238: 
 2239: #ifdef SUPPORT_LIBZ
 2240: if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
 2241:   {
 2242:   ingz = gzopen(pathname, "rb");
 2243:   if (ingz == NULL)
 2244:     {
 2245:     if (!silent)
 2246:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
 2247:         strerror(errno));
 2248:     return 2;
 2249:     }
 2250:   handle = (void *)ingz;
 2251:   frtype = FR_LIBZ;
 2252:   }
 2253: else
 2254: #endif
 2255: 
 2256: /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
 2257: 
 2258: #ifdef SUPPORT_LIBBZ2
 2259: if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
 2260:   {
 2261:   inbz2 = BZ2_bzopen(pathname, "rb");
 2262:   handle = (void *)inbz2;
 2263:   frtype = FR_LIBBZ2;
 2264:   }
 2265: else
 2266: #endif
 2267: 
 2268: /* Otherwise use plain fopen(). The label is so that we can come back here if
 2269: an attempt to read a .bz2 file indicates that it really is a plain file. */
 2270: 
 2271: #ifdef SUPPORT_LIBBZ2
 2272: PLAIN_FILE:
 2273: #endif
 2274:   {
 2275:   in = fopen(pathname, "rb");
 2276:   handle = (void *)in;
 2277:   frtype = FR_PLAIN;
 2278:   }
 2279: 
 2280: /* All the opening methods return errno when they fail. */
 2281: 
 2282: if (handle == NULL)
 2283:   {
 2284:   if (!silent)
 2285:     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
 2286:       strerror(errno));
 2287:   return 2;
 2288:   }
 2289: 
 2290: /* Now grep the file */
 2291: 
 2292: rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
 2293:   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
 2294: 
 2295: /* Close in an appropriate manner. */
 2296: 
 2297: #ifdef SUPPORT_LIBZ
 2298: if (frtype == FR_LIBZ)
 2299:   gzclose(ingz);
 2300: else
 2301: #endif
 2302: 
 2303: /* If it is a .bz2 file and the result is 3, it means that the first attempt to
 2304: read failed. If the error indicates that the file isn't in fact bzipped, try
 2305: again as a normal file. */
 2306: 
 2307: #ifdef SUPPORT_LIBBZ2
 2308: if (frtype == FR_LIBBZ2)
 2309:   {
 2310:   if (rc == 3)
 2311:     {
 2312:     int errnum;
 2313:     const char *err = BZ2_bzerror(inbz2, &errnum);
 2314:     if (errnum == BZ_DATA_ERROR_MAGIC)
 2315:       {
 2316:       BZ2_bzclose(inbz2);
 2317:       goto PLAIN_FILE;
 2318:       }
 2319:     else if (!silent)
 2320:       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
 2321:         pathname, err);
 2322:     rc = 2;    /* The normal "something went wrong" code */
 2323:     }
 2324:   BZ2_bzclose(inbz2);
 2325:   }
 2326: else
 2327: #endif
 2328: 
 2329: /* Normal file close */
 2330: 
 2331: fclose(in);
 2332: 
 2333: /* Pass back the yield from pcregrep(). */
 2334: 
 2335: return rc;
 2336: }
 2337: 
 2338: 
 2339: 
 2340: /*************************************************
 2341: *    Handle a single-letter, no data option      *
 2342: *************************************************/
 2343: 
 2344: static int
 2345: handle_option(int letter, int options)
 2346: {
 2347: switch(letter)
 2348:   {
 2349:   case N_FOFFSETS: file_offsets = TRUE; break;
 2350:   case N_HELP: help(); pcregrep_exit(0);
 2351:   case N_LBUFFER: line_buffered = TRUE; break;
 2352:   case N_LOFFSETS: line_offsets = number = TRUE; break;
 2353:   case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
 2354:   case 'a': binary_files = BIN_TEXT; break;
 2355:   case 'c': count_only = TRUE; break;
 2356:   case 'F': process_options |= PO_FIXED_STRINGS; break;
 2357:   case 'H': filenames = FN_FORCE; break;
 2358:   case 'I': binary_files = BIN_NOMATCH; break;
 2359:   case 'h': filenames = FN_NONE; break;
 2360:   case 'i': options |= PCRE_CASELESS; break;
 2361:   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
 2362:   case 'L': filenames = FN_NOMATCH_ONLY; break;
 2363:   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
 2364:   case 'n': number = TRUE; break;
 2365: 
 2366:   case 'o':
 2367:   only_matching_last = add_number(0, only_matching_last);
 2368:   if (only_matching == NULL) only_matching = only_matching_last;
 2369:   break;
 2370: 
 2371:   case 'q': quiet = TRUE; break;
 2372:   case 'r': dee_action = dee_RECURSE; break;
 2373:   case 's': silent = TRUE; break;
 2374:   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
 2375:   case 'v': invert = TRUE; break;
 2376:   case 'w': process_options |= PO_WORD_MATCH; break;
 2377:   case 'x': process_options |= PO_LINE_MATCH; break;
 2378: 
 2379:   case 'V':
 2380:   fprintf(stdout, "pcregrep version %s\n", pcre_version());
 2381:   pcregrep_exit(0);
 2382:   break;
 2383: 
 2384:   default:
 2385:   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
 2386:   pcregrep_exit(usage(2));
 2387:   }
 2388: 
 2389: return options;
 2390: }
 2391: 
 2392: 
 2393: 
 2394: 
 2395: /*************************************************
 2396: *          Construct printed ordinal             *
 2397: *************************************************/
 2398: 
 2399: /* This turns a number into "1st", "3rd", etc. */
 2400: 
 2401: static char *
 2402: ordin(int n)
 2403: {
 2404: static char buffer[8];
 2405: char *p = buffer;
 2406: sprintf(p, "%d", n);
 2407: while (*p != 0) p++;
 2408: switch (n%10)
 2409:   {
 2410:   case 1: strcpy(p, "st"); break;
 2411:   case 2: strcpy(p, "nd"); break;
 2412:   case 3: strcpy(p, "rd"); break;
 2413:   default: strcpy(p, "th"); break;
 2414:   }
 2415: return buffer;
 2416: }
 2417: 
 2418: 
 2419: 
 2420: /*************************************************
 2421: *          Compile a single pattern              *
 2422: *************************************************/
 2423: 
 2424: /* Do nothing if the pattern has already been compiled. This is the case for
 2425: include/exclude patterns read from a file.
 2426: 
 2427: When the -F option has been used, each "pattern" may be a list of strings,
 2428: separated by line breaks. They will be matched literally. We split such a
 2429: string and compile the first substring, inserting an additional block into the
 2430: pattern chain.
 2431: 
 2432: Arguments:
 2433:   p              points to the pattern block
 2434:   options        the PCRE options
 2435:   popts          the processing options
 2436:   fromfile       TRUE if the pattern was read from a file
 2437:   fromtext       file name or identifying text (e.g. "include")
 2438:   count          0 if this is the only command line pattern, or
 2439:                  number of the command line pattern, or
 2440:                  linenumber for a pattern from a file
 2441: 
 2442: Returns:         TRUE on success, FALSE after an error
 2443: */
 2444: 
 2445: static BOOL
 2446: compile_pattern(patstr *p, int options, int popts, int fromfile,
 2447:   const char *fromtext, int count)
 2448: {
 2449: char buffer[PATBUFSIZE];
 2450: const char *error;
 2451: char *ps = p->string;
 2452: int patlen = strlen(ps);
 2453: int errptr;
 2454: 
 2455: if (p->compiled != NULL) return TRUE;
 2456: 
 2457: if ((popts & PO_FIXED_STRINGS) != 0)
 2458:   {
 2459:   int ellength;
 2460:   char *eop = ps + patlen;
 2461:   char *pe = end_of_line(ps, eop, &ellength);
 2462: 
 2463:   if (ellength != 0)
 2464:     {
 2465:     if (add_pattern(pe, p) == NULL) return FALSE;
 2466:     patlen = (int)(pe - ps - ellength);
 2467:     }
 2468:   }
 2469: 
 2470: sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
 2471: p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
 2472: if (p->compiled != NULL) return TRUE;
 2473: 
 2474: /* Handle compile errors */
 2475: 
 2476: errptr -= (int)strlen(prefix[popts]);
 2477: if (errptr > patlen) errptr = patlen;
 2478: 
 2479: if (fromfile)
 2480:   {
 2481:   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
 2482:     "at offset %d: %s\n", count, fromtext, errptr, error);
 2483:   }
 2484: else
 2485:   {
 2486:   if (count == 0)
 2487:     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
 2488:       fromtext, errptr, error);
 2489:   else
 2490:     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
 2491:       ordin(count), fromtext, errptr, error);
 2492:   }
 2493: 
 2494: return FALSE;
 2495: }
 2496: 
 2497: 
 2498: 
 2499: /*************************************************
 2500: *     Read and compile a file of patterns        *
 2501: *************************************************/
 2502: 
 2503: /* This is used for --filelist, --include-from, and --exclude-from.
 2504: 
 2505: Arguments:
 2506:   name         the name of the file; "-" is stdin
 2507:   patptr       pointer to the pattern chain anchor
 2508:   patlastptr   pointer to the last pattern pointer
 2509:   popts        the process options to pass to pattern_compile()
 2510: 
 2511: Returns:       TRUE if all went well
 2512: */
 2513: 
 2514: static BOOL
 2515: read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
 2516: {
 2517: int linenumber = 0;
 2518: FILE *f;
 2519: char *filename;
 2520: char buffer[PATBUFSIZE];
 2521: 
 2522: if (strcmp(name, "-") == 0)
 2523:   {
 2524:   f = stdin;
 2525:   filename = stdin_name;
 2526:   }
 2527: else
 2528:   {
 2529:   f = fopen(name, "r");
 2530:   if (f == NULL)
 2531:     {
 2532:     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
 2533:     return FALSE;
 2534:     }
 2535:   filename = name;
 2536:   }
 2537: 
 2538: while (fgets(buffer, PATBUFSIZE, f) != NULL)
 2539:   {
 2540:   char *s = buffer + (int)strlen(buffer);
 2541:   while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
 2542:   *s = 0;
 2543:   linenumber++;
 2544:   if (buffer[0] == 0) continue;   /* Skip blank lines */
 2545: 
 2546:   /* Note: this call to add_pattern() puts a pointer to the local variable
 2547:   "buffer" into the pattern chain. However, that pointer is used only when
 2548:   compiling the pattern, which happens immediately below, so we flatten it
 2549:   afterwards, as a precaution against any later code trying to use it. */
 2550: 
 2551:   *patlastptr = add_pattern(buffer, *patlastptr);
 2552:   if (*patlastptr == NULL) return FALSE;
 2553:   if (*patptr == NULL) *patptr = *patlastptr;
 2554: 
 2555:   /* This loop is needed because compiling a "pattern" when -F is set may add
 2556:   on additional literal patterns if the original contains a newline. In the
 2557:   common case, it never will, because fgets() stops at a newline. However,
 2558:   the -N option can be used to give pcregrep a different newline setting. */
 2559: 
 2560:   for(;;)
 2561:     {
 2562:     if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
 2563:         linenumber))
 2564:       return FALSE;
 2565:     (*patlastptr)->string = NULL;            /* Insurance */
 2566:     if ((*patlastptr)->next == NULL) break;
 2567:     *patlastptr = (*patlastptr)->next;
 2568:     }
 2569:   }
 2570: 
 2571: if (f != stdin) fclose(f);
 2572: return TRUE;
 2573: }
 2574: 
 2575: 
 2576: 
 2577: /*************************************************
 2578: *                Main program                    *
 2579: *************************************************/
 2580: 
 2581: /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
 2582: 
 2583: int
 2584: main(int argc, char **argv)
 2585: {
 2586: int i, j;
 2587: int rc = 1;
 2588: BOOL only_one_at_top;
 2589: patstr *cp;
 2590: fnstr *fn;
 2591: const char *locale_from = "--locale";
 2592: const char *error;
 2593: 
 2594: #ifdef SUPPORT_PCREGREP_JIT
 2595: pcre_jit_stack *jit_stack = NULL;
 2596: #endif
 2597: 
 2598: /* Set the default line ending value from the default in the PCRE library;
 2599: "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
 2600: Note that the return values from pcre_config(), though derived from the ASCII
 2601: codes, are the same in EBCDIC environments, so we must use the actual values
 2602: rather than escapes such as as '\r'. */
 2603: 
 2604: (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
 2605: switch(i)
 2606:   {
 2607:   default:               newline = (char *)"lf"; break;
 2608:   case 13:               newline = (char *)"cr"; break;
 2609:   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
 2610:   case -1:               newline = (char *)"any"; break;
 2611:   case -2:               newline = (char *)"anycrlf"; break;
 2612:   }
 2613: 
 2614: /* Process the options */
 2615: 
 2616: for (i = 1; i < argc; i++)
 2617:   {
 2618:   option_item *op = NULL;
 2619:   char *option_data = (char *)"";    /* default to keep compiler happy */
 2620:   BOOL longop;
 2621:   BOOL longopwasequals = FALSE;
 2622: 
 2623:   if (argv[i][0] != '-') break;
 2624: 
 2625:   /* If we hit an argument that is just "-", it may be a reference to STDIN,
 2626:   but only if we have previously had -e or -f to define the patterns. */
 2627: 
 2628:   if (argv[i][1] == 0)
 2629:     {
 2630:     if (pattern_files != NULL || patterns != NULL) break;
 2631:       else pcregrep_exit(usage(2));
 2632:     }
 2633: 
 2634:   /* Handle a long name option, or -- to terminate the options */
 2635: 
 2636:   if (argv[i][1] == '-')
 2637:     {
 2638:     char *arg = argv[i] + 2;
 2639:     char *argequals = strchr(arg, '=');
 2640: 
 2641:     if (*arg == 0)    /* -- terminates options */
 2642:       {
 2643:       i++;
 2644:       break;                /* out of the options-handling loop */
 2645:       }
 2646: 
 2647:     longop = TRUE;
 2648: 
 2649:     /* Some long options have data that follows after =, for example file=name.
 2650:     Some options have variations in the long name spelling: specifically, we
 2651:     allow "regexp" because GNU grep allows it, though I personally go along
 2652:     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
 2653:     These options are entered in the table as "regex(p)". Options can be in
 2654:     both these categories. */
 2655: 
 2656:     for (op = optionlist; op->one_char != 0; op++)
 2657:       {
 2658:       char *opbra = strchr(op->long_name, '(');
 2659:       char *equals = strchr(op->long_name, '=');
 2660: 
 2661:       /* Handle options with only one spelling of the name */
 2662: 
 2663:       if (opbra == NULL)     /* Does not contain '(' */
 2664:         {
 2665:         if (equals == NULL)  /* Not thing=data case */
 2666:           {
 2667:           if (strcmp(arg, op->long_name) == 0) break;
 2668:           }
 2669:         else                 /* Special case xxx=data */
 2670:           {
 2671:           int oplen = (int)(equals - op->long_name);
 2672:           int arglen = (argequals == NULL)?
 2673:             (int)strlen(arg) : (int)(argequals - arg);
 2674:           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
 2675:             {
 2676:             option_data = arg + arglen;
 2677:             if (*option_data == '=')
 2678:               {
 2679:               option_data++;
 2680:               longopwasequals = TRUE;
 2681:               }
 2682:             break;
 2683:             }
 2684:           }
 2685:         }
 2686: 
 2687:       /* Handle options with an alternate spelling of the name */
 2688: 
 2689:       else
 2690:         {
 2691:         char buff1[24];
 2692:         char buff2[24];
 2693: 
 2694:         int baselen = (int)(opbra - op->long_name);
 2695:         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
 2696:         int arglen = (argequals == NULL || equals == NULL)?
 2697:           (int)strlen(arg) : (int)(argequals - arg);
 2698: 
 2699:         sprintf(buff1, "%.*s", baselen, op->long_name);
 2700:         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
 2701: 
 2702:         if (strncmp(arg, buff1, arglen) == 0 ||
 2703:            strncmp(arg, buff2, arglen) == 0)
 2704:           {
 2705:           if (equals != NULL && argequals != NULL)
 2706:             {
 2707:             option_data = argequals;
 2708:             if (*option_data == '=')
 2709:               {
 2710:               option_data++;
 2711:               longopwasequals = TRUE;
 2712:               }
 2713:             }
 2714:           break;
 2715:           }
 2716:         }
 2717:       }
 2718: 
 2719:     if (op->one_char == 0)
 2720:       {
 2721:       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
 2722:       pcregrep_exit(usage(2));
 2723:       }
 2724:     }
 2725: 
 2726:   /* Jeffrey Friedl's debugging harness uses these additional options which
 2727:   are not in the right form for putting in the option table because they use
 2728:   only one hyphen, yet are more than one character long. By putting them
 2729:   separately here, they will not get displayed as part of the help() output,
 2730:   but I don't think Jeffrey will care about that. */
 2731: 
 2732: #ifdef JFRIEDL_DEBUG
 2733:   else if (strcmp(argv[i], "-pre") == 0) {
 2734:           jfriedl_prefix = argv[++i];
 2735:           continue;
 2736:   } else if (strcmp(argv[i], "-post") == 0) {
 2737:           jfriedl_postfix = argv[++i];
 2738:           continue;
 2739:   } else if (strcmp(argv[i], "-XT") == 0) {
 2740:           sscanf(argv[++i], "%d", &jfriedl_XT);
 2741:           continue;
 2742:   } else if (strcmp(argv[i], "-XR") == 0) {
 2743:           sscanf(argv[++i], "%d", &jfriedl_XR);
 2744:           continue;
 2745:   }
 2746: #endif
 2747: 
 2748: 
 2749:   /* One-char options; many that have no data may be in a single argument; we
 2750:   continue till we hit the last one or one that needs data. */
 2751: 
 2752:   else
 2753:     {
 2754:     char *s = argv[i] + 1;
 2755:     longop = FALSE;
 2756: 
 2757:     while (*s != 0)
 2758:       {
 2759:       for (op = optionlist; op->one_char != 0; op++)
 2760:         {
 2761:         if (*s == op->one_char) break;
 2762:         }
 2763:       if (op->one_char == 0)
 2764:         {
 2765:         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
 2766:           *s, argv[i]);
 2767:         pcregrep_exit(usage(2));
 2768:         }
 2769: 
 2770:       option_data = s+1;
 2771: 
 2772:       /* Break out if this is the last character in the string; it's handled
 2773:       below like a single multi-char option. */
 2774: 
 2775:       if (*option_data == 0) break;
 2776: 
 2777:       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
 2778:       are used for ones that either have a numerical number or defaults, i.e.
 2779:       the data is optional. If a digit follows, there is data; if not, carry on
 2780:       with other single-character options in the same string. */
 2781: 
 2782:       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
 2783:         {
 2784:         if (isdigit((unsigned char)s[1])) break;
 2785:         }
 2786:       else   /* Check for an option with data */
 2787:         {
 2788:         if (op->type != OP_NODATA) break;
 2789:         }
 2790: 
 2791:       /* Handle a single-character option with no data, then loop for the
 2792:       next character in the string. */
 2793: 
 2794:       pcre_options = handle_option(*s++, pcre_options);
 2795:       }
 2796:     }
 2797: 
 2798:   /* At this point we should have op pointing to a matched option. If the type
 2799:   is NO_DATA, it means that there is no data, and the option might set
 2800:   something in the PCRE options. */
 2801: 
 2802:   if (op->type == OP_NODATA)
 2803:     {
 2804:     pcre_options = handle_option(op->one_char, pcre_options);
 2805:     continue;
 2806:     }
 2807: 
 2808:   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
 2809:   either has a value or defaults to something. It cannot have data in a
 2810:   separate item. At the moment, the only such options are "colo(u)r",
 2811:   "only-matching", and Jeffrey Friedl's special -S debugging option. */
 2812: 
 2813:   if (*option_data == 0 &&
 2814:       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
 2815:        op->type == OP_OP_NUMBERS))
 2816:     {
 2817:     switch (op->one_char)
 2818:       {
 2819:       case N_COLOUR:
 2820:       colour_option = (char *)"auto";
 2821:       break;
 2822: 
 2823:       case 'o':
 2824:       only_matching_last = add_number(0, only_matching_last);
 2825:       if (only_matching == NULL) only_matching = only_matching_last;
 2826:       break;
 2827: 
 2828: #ifdef JFRIEDL_DEBUG
 2829:       case 'S':
 2830:       S_arg = 0;
 2831:       break;
 2832: #endif
 2833:       }
 2834:     continue;
 2835:     }
 2836: 
 2837:   /* Otherwise, find the data string for the option. */
 2838: 
 2839:   if (*option_data == 0)
 2840:     {
 2841:     if (i >= argc - 1 || longopwasequals)
 2842:       {
 2843:       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
 2844:       pcregrep_exit(usage(2));
 2845:       }
 2846:     option_data = argv[++i];
 2847:     }
 2848: 
 2849:   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
 2850:   added to a chain of numbers. */
 2851: 
 2852:   if (op->type == OP_OP_NUMBERS)
 2853:     {
 2854:     unsigned long int n = decode_number(option_data, op, longop);
 2855:     omdatastr *omd = (omdatastr *)op->dataptr;
 2856:     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
 2857:     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
 2858:     }
 2859: 
 2860:   /* If the option type is OP_PATLIST, it's the -e option, or one of the
 2861:   include/exclude options, which can be called multiple times to create lists
 2862:   of patterns. */
 2863: 
 2864:   else if (op->type == OP_PATLIST)
 2865:     {
 2866:     patdatastr *pd = (patdatastr *)op->dataptr;
 2867:     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
 2868:     if (*(pd->lastptr) == NULL) goto EXIT2;
 2869:     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
 2870:     }
 2871: 
 2872:   /* If the option type is OP_FILELIST, it's one of the options that names a
 2873:   file. */
 2874: 
 2875:   else if (op->type == OP_FILELIST)
 2876:     {
 2877:     fndatastr *fd = (fndatastr *)op->dataptr;
 2878:     fn = (fnstr *)malloc(sizeof(fnstr));
 2879:     if (fn == NULL)
 2880:       {
 2881:       fprintf(stderr, "pcregrep: malloc failed\n");
 2882:       goto EXIT2;
 2883:       }
 2884:     fn->next = NULL;
 2885:     fn->name = option_data;
 2886:     if (*(fd->anchor) == NULL)
 2887:       *(fd->anchor) = fn;
 2888:     else
 2889:       (*(fd->lastptr))->next = fn;
 2890:     *(fd->lastptr) = fn;
 2891:     }
 2892: 
 2893:   /* Handle OP_BINARY_FILES */
 2894: 
 2895:   else if (op->type == OP_BINFILES)
 2896:     {
 2897:     if (strcmp(option_data, "binary") == 0)
 2898:       binary_files = BIN_BINARY;
 2899:     else if (strcmp(option_data, "without-match") == 0)
 2900:       binary_files = BIN_NOMATCH;
 2901:     else if (strcmp(option_data, "text") == 0)
 2902:       binary_files = BIN_TEXT;
 2903:     else
 2904:       {
 2905:       fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
 2906:         option_data);
 2907:       pcregrep_exit(usage(2));
 2908:       }
 2909:     }
 2910: 
 2911:   /* Otherwise, deal with a single string or numeric data value. */
 2912: 
 2913:   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
 2914:            op->type != OP_OP_NUMBER)
 2915:     {
 2916:     *((char **)op->dataptr) = option_data;
 2917:     }
 2918:   else
 2919:     {
 2920:     unsigned long int n = decode_number(option_data, op, longop);
 2921:     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
 2922:       else *((int *)op->dataptr) = n;
 2923:     }
 2924:   }
 2925: 
 2926: /* Options have been decoded. If -C was used, its value is used as a default
 2927: for -A and -B. */
 2928: 
 2929: if (both_context > 0)
 2930:   {
 2931:   if (after_context == 0) after_context = both_context;
 2932:   if (before_context == 0) before_context = both_context;
 2933:   }
 2934: 
 2935: /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
 2936: However, all three set show_only_matching because they display, each in their
 2937: own way, only the data that has matched. */
 2938: 
 2939: if ((only_matching != NULL && (file_offsets || line_offsets)) ||
 2940:     (file_offsets && line_offsets))
 2941:   {
 2942:   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
 2943:     "and/or --line-offsets\n");
 2944:   pcregrep_exit(usage(2));
 2945:   }
 2946: 
 2947: if (only_matching != NULL || file_offsets || line_offsets)
 2948:   show_only_matching = TRUE;
 2949: 
 2950: /* If a locale has not been provided as an option, see if the LC_CTYPE or
 2951: LC_ALL environment variable is set, and if so, use it. */
 2952: 
 2953: if (locale == NULL)
 2954:   {
 2955:   locale = getenv("LC_ALL");
 2956:   locale_from = "LCC_ALL";
 2957:   }
 2958: 
 2959: if (locale == NULL)
 2960:   {
 2961:   locale = getenv("LC_CTYPE");
 2962:   locale_from = "LC_CTYPE";
 2963:   }
 2964: 
 2965: /* If a locale has been provided, set it, and generate the tables the PCRE
 2966: needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
 2967: 
 2968: if (locale != NULL)
 2969:   {
 2970:   if (setlocale(LC_CTYPE, locale) == NULL)
 2971:     {
 2972:     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
 2973:       locale, locale_from);
 2974:     return 2;
 2975:     }
 2976:   pcretables = pcre_maketables();
 2977:   }
 2978: 
 2979: /* Sort out colouring */
 2980: 
 2981: if (colour_option != NULL && strcmp(colour_option, "never") != 0)
 2982:   {
 2983:   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
 2984:   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
 2985:   else
 2986:     {
 2987:     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
 2988:       colour_option);
 2989:     return 2;
 2990:     }
 2991:   if (do_colour)
 2992:     {
 2993:     char *cs = getenv("PCREGREP_COLOUR");
 2994:     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
 2995:     if (cs != NULL) colour_string = cs;
 2996:     }
 2997:   }
 2998: 
 2999: /* Interpret the newline type; the default settings are Unix-like. */
 3000: 
 3001: if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
 3002:   {
 3003:   pcre_options |= PCRE_NEWLINE_CR;
 3004:   endlinetype = EL_CR;
 3005:   }
 3006: else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
 3007:   {
 3008:   pcre_options |= PCRE_NEWLINE_LF;
 3009:   endlinetype = EL_LF;
 3010:   }
 3011: else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
 3012:   {
 3013:   pcre_options |= PCRE_NEWLINE_CRLF;
 3014:   endlinetype = EL_CRLF;
 3015:   }
 3016: else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
 3017:   {
 3018:   pcre_options |= PCRE_NEWLINE_ANY;
 3019:   endlinetype = EL_ANY;
 3020:   }
 3021: else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
 3022:   {
 3023:   pcre_options |= PCRE_NEWLINE_ANYCRLF;
 3024:   endlinetype = EL_ANYCRLF;
 3025:   }
 3026: else
 3027:   {
 3028:   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
 3029:   return 2;
 3030:   }
 3031: 
 3032: /* Interpret the text values for -d and -D */
 3033: 
 3034: if (dee_option != NULL)
 3035:   {
 3036:   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
 3037:   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
 3038:   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
 3039:   else
 3040:     {
 3041:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
 3042:     return 2;
 3043:     }
 3044:   }
 3045: 
 3046: if (DEE_option != NULL)
 3047:   {
 3048:   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
 3049:   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
 3050:   else
 3051:     {
 3052:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
 3053:     return 2;
 3054:     }
 3055:   }
 3056: 
 3057: /* Check the values for Jeffrey Friedl's debugging options. */
 3058: 
 3059: #ifdef JFRIEDL_DEBUG
 3060: if (S_arg > 9)
 3061:   {
 3062:   fprintf(stderr, "pcregrep: bad value for -S option\n");
 3063:   return 2;
 3064:   }
 3065: if (jfriedl_XT != 0 || jfriedl_XR != 0)
 3066:   {
 3067:   if (jfriedl_XT == 0) jfriedl_XT = 1;
 3068:   if (jfriedl_XR == 0) jfriedl_XR = 1;
 3069:   }
 3070: #endif
 3071: 
 3072: /* Get memory for the main buffer. */
 3073: 
 3074: bufsize = 3*bufthird;
 3075: main_buffer = (char *)malloc(bufsize);
 3076: 
 3077: if (main_buffer == NULL)
 3078:   {
 3079:   fprintf(stderr, "pcregrep: malloc failed\n");
 3080:   goto EXIT2;
 3081:   }
 3082: 
 3083: /* If no patterns were provided by -e, and there are no files provided by -f,
 3084: the first argument is the one and only pattern, and it must exist. */
 3085: 
 3086: if (patterns == NULL && pattern_files == NULL)
 3087:   {
 3088:   if (i >= argc) return usage(2);
 3089:   patterns = patterns_last = add_pattern(argv[i++], NULL);
 3090:   if (patterns == NULL) goto EXIT2;
 3091:   }
 3092: 
 3093: /* Compile the patterns that were provided on the command line, either by
 3094: multiple uses of -e or as a single unkeyed pattern. We cannot do this until
 3095: after all the command-line options are read so that we know which PCRE options
 3096: to use. When -F is used, compile_pattern() may add another block into the
 3097: chain, so we must not access the next pointer till after the compile. */
 3098: 
 3099: for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
 3100:   {
 3101:   if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
 3102:        (j == 1 && patterns->next == NULL)? 0 : j))
 3103:     goto EXIT2;
 3104:   }
 3105: 
 3106: /* Read and compile the regular expressions that are provided in files. */
 3107: 
 3108: for (fn = pattern_files; fn != NULL; fn = fn->next)
 3109:   {
 3110:   if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
 3111:     goto EXIT2;
 3112:   }
 3113: 
 3114: /* Study the regular expressions, as we will be running them many times. If an
 3115: extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
 3116: returned, even if studying produces no data. */
 3117: 
 3118: if (match_limit > 0 || match_limit_recursion > 0)
 3119:   study_options |= PCRE_STUDY_EXTRA_NEEDED;
 3120: 
 3121: /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
 3122: 
 3123: #ifdef SUPPORT_PCREGREP_JIT
 3124: if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
 3125:   jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
 3126: #endif
 3127: 
 3128: for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
 3129:   {
 3130:   cp->hint = pcre_study(cp->compiled, study_options, &error);
 3131:   if (error != NULL)
 3132:     {
 3133:     char s[16];
 3134:     if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
 3135:     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
 3136:     goto EXIT2;
 3137:     }
 3138: #ifdef SUPPORT_PCREGREP_JIT
 3139:   if (jit_stack != NULL && cp->hint != NULL)
 3140:     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
 3141: #endif
 3142:   }
 3143: 
 3144: /* If --match-limit or --recursion-limit was set, put the value(s) into the
 3145: pcre_extra block for each pattern. There will always be an extra block because
 3146: of the use of PCRE_STUDY_EXTRA_NEEDED above. */
 3147: 
 3148: for (cp = patterns; cp != NULL; cp = cp->next)
 3149:   {
 3150:   if (match_limit > 0)
 3151:     {
 3152:     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
 3153:     cp->hint->match_limit = match_limit;
 3154:     }
 3155: 
 3156:   if (match_limit_recursion > 0)
 3157:     {
 3158:     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
 3159:     cp->hint->match_limit_recursion = match_limit_recursion;
 3160:     }
 3161:   }
 3162: 
 3163: /* If there are include or exclude patterns read from the command line, compile
 3164: them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
 3165: 0. */
 3166: 
 3167: for (j = 0; j < 4; j++)
 3168:   {
 3169:   int k;
 3170:   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
 3171:     {
 3172:     if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
 3173:          (k == 1 && cp->next == NULL)? 0 : k))
 3174:       goto EXIT2;
 3175:     }
 3176:   }
 3177: 
 3178: /* Read and compile include/exclude patterns from files. */
 3179: 
 3180: for (fn = include_from; fn != NULL; fn = fn->next)
 3181:   {
 3182:   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
 3183:     goto EXIT2;
 3184:   }
 3185: 
 3186: for (fn = exclude_from; fn != NULL; fn = fn->next)
 3187:   {
 3188:   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
 3189:     goto EXIT2;
 3190:   }
 3191: 
 3192: /* If there are no files that contain lists of files to search, and there are
 3193: no file arguments, search stdin, and then exit. */
 3194: 
 3195: if (file_lists == NULL && i >= argc)
 3196:   {
 3197:   rc = pcregrep(stdin, FR_PLAIN, stdin_name,
 3198:     (filenames > FN_DEFAULT)? stdin_name : NULL);
 3199:   goto EXIT;
 3200:   }
 3201: 
 3202: /* If any files that contains a list of files to search have been specified,
 3203: read them line by line and search the given files. */
 3204: 
 3205: for (fn = file_lists; fn != NULL; fn = fn->next)
 3206:   {
 3207:   char buffer[PATBUFSIZE];
 3208:   FILE *fl;
 3209:   if (strcmp(fn->name, "-") == 0) fl = stdin; else
 3210:     {
 3211:     fl = fopen(fn->name, "rb");
 3212:     if (fl == NULL)
 3213:       {
 3214:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
 3215:         strerror(errno));
 3216:       goto EXIT2;
 3217:       }
 3218:     }
 3219:   while (fgets(buffer, PATBUFSIZE, fl) != NULL)
 3220:     {
 3221:     int frc;
 3222:     char *end = buffer + (int)strlen(buffer);
 3223:     while (end > buffer && isspace(end[-1])) end--;
 3224:     *end = 0;
 3225:     if (*buffer != 0)
 3226:       {
 3227:       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
 3228:       if (frc > 1) rc = frc;
 3229:         else if (frc == 0 && rc == 1) rc = 0;
 3230:       }
 3231:     }
 3232:   if (fl != stdin) fclose(fl);
 3233:   }
 3234: 
 3235: /* After handling file-list, work through remaining arguments. Pass in the fact
 3236: that there is only one argument at top level - this suppresses the file name if
 3237: the argument is not a directory and filenames are not otherwise forced. */
 3238: 
 3239: only_one_at_top = i == argc - 1 && file_lists == NULL;
 3240: 
 3241: for (; i < argc; i++)
 3242:   {
 3243:   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
 3244:     only_one_at_top);
 3245:   if (frc > 1) rc = frc;
 3246:     else if (frc == 0 && rc == 1) rc = 0;
 3247:   }
 3248: 
 3249: EXIT:
 3250: #ifdef SUPPORT_PCREGREP_JIT
 3251: if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
 3252: #endif
 3253: 
 3254: if (main_buffer != NULL) free(main_buffer);
 3255: 
 3256: free_pattern_chain(patterns);
 3257: free_pattern_chain(include_patterns);
 3258: free_pattern_chain(include_dir_patterns);
 3259: free_pattern_chain(exclude_patterns);
 3260: free_pattern_chain(exclude_dir_patterns);
 3261: 
 3262: free_file_chain(exclude_from);
 3263: free_file_chain(include_from);
 3264: free_file_chain(pattern_files);
 3265: free_file_chain(file_lists);
 3266: 
 3267: while (only_matching != NULL)
 3268:   {
 3269:   omstr *this = only_matching;
 3270:   only_matching = this->next;
 3271:   free(this);
 3272:   }
 3273: 
 3274: pcregrep_exit(rc);
 3275: 
 3276: EXIT2:
 3277: rc = 2;
 3278: goto EXIT;
 3279: }
 3280: 
 3281: /* End of pcregrep */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>