File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcregrep.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:05:51 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_21, HEAD
pcre

    1: /*************************************************
    2: *               pcregrep program                 *
    3: *************************************************/
    4: 
    5: /* This is a grep program that uses the PCRE regular expression library to do
    6: its pattern matching. On a Unix or Win32 system it can recurse into
    7: directories.
    8: 
    9:            Copyright (c) 1997-2011 University of Cambridge
   10: 
   11: -----------------------------------------------------------------------------
   12: Redistribution and use in source and binary forms, with or without
   13: modification, are permitted provided that the following conditions are met:
   14: 
   15:     * Redistributions of source code must retain the above copyright notice,
   16:       this list of conditions and the following disclaimer.
   17: 
   18:     * Redistributions in binary form must reproduce the above copyright
   19:       notice, this list of conditions and the following disclaimer in the
   20:       documentation and/or other materials provided with the distribution.
   21: 
   22:     * Neither the name of the University of Cambridge nor the names of its
   23:       contributors may be used to endorse or promote products derived from
   24:       this software without specific prior written permission.
   25: 
   26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36: POSSIBILITY OF SUCH DAMAGE.
   37: -----------------------------------------------------------------------------
   38: */
   39: 
   40: #ifdef HAVE_CONFIG_H
   41: #include "config.h"
   42: #endif
   43: 
   44: #include <ctype.h>
   45: #include <locale.h>
   46: #include <stdio.h>
   47: #include <string.h>
   48: #include <stdlib.h>
   49: #include <errno.h>
   50: 
   51: #include <sys/types.h>
   52: #include <sys/stat.h>
   53: 
   54: #ifdef HAVE_UNISTD_H
   55: #include <unistd.h>
   56: #endif
   57: 
   58: #ifdef SUPPORT_LIBZ
   59: #include <zlib.h>
   60: #endif
   61: 
   62: #ifdef SUPPORT_LIBBZ2
   63: #include <bzlib.h>
   64: #endif
   65: 
   66: #include "pcre.h"
   67: 
   68: #define FALSE 0
   69: #define TRUE 1
   70: 
   71: typedef int BOOL;
   72: 
   73: #define MAX_PATTERN_COUNT 100
   74: #define OFFSET_SIZE 99
   75: 
   76: #if BUFSIZ > 8192
   77: #define PATBUFSIZE BUFSIZ
   78: #else
   79: #define PATBUFSIZE 8192
   80: #endif
   81: 
   82: /* Values for the "filenames" variable, which specifies options for file name
   83: output. The order is important; it is assumed that a file name is wanted for
   84: all values greater than FN_DEFAULT. */
   85: 
   86: enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
   87: 
   88: /* File reading styles */
   89: 
   90: enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
   91: 
   92: /* Actions for the -d and -D options */
   93: 
   94: enum { dee_READ, dee_SKIP, dee_RECURSE };
   95: enum { DEE_READ, DEE_SKIP };
   96: 
   97: /* Actions for special processing options (flag bits) */
   98: 
   99: #define PO_WORD_MATCH     0x0001
  100: #define PO_LINE_MATCH     0x0002
  101: #define PO_FIXED_STRINGS  0x0004
  102: 
  103: /* Line ending types */
  104: 
  105: enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
  106: 
  107: /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
  108: environments), a warning is issued if the value of fwrite() is ignored.
  109: Unfortunately, casting to (void) does not suppress the warning. To get round
  110: this, we use a macro that compiles a fudge. Oddly, this does not also seem to
  111: apply to fprintf(). */
  112: 
  113: #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
  114: 
  115: 
  116: 
  117: /*************************************************
  118: *               Global variables                 *
  119: *************************************************/
  120: 
  121: /* Jeffrey Friedl has some debugging requirements that are not part of the
  122: regular code. */
  123: 
  124: #ifdef JFRIEDL_DEBUG
  125: static int S_arg = -1;
  126: static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
  127: static unsigned int jfriedl_XT = 0; /* replicate text this many times */
  128: static const char *jfriedl_prefix = "";
  129: static const char *jfriedl_postfix = "";
  130: #endif
  131: 
  132: static int  endlinetype;
  133: 
  134: static char *colour_string = (char *)"1;31";
  135: static char *colour_option = NULL;
  136: static char *dee_option = NULL;
  137: static char *DEE_option = NULL;
  138: static char *main_buffer = NULL;
  139: static char *newline = NULL;
  140: static char *pattern_filename = NULL;
  141: static char *stdin_name = (char *)"(standard input)";
  142: static char *locale = NULL;
  143: 
  144: static const unsigned char *pcretables = NULL;
  145: 
  146: static int  pattern_count = 0;
  147: static pcre **pattern_list = NULL;
  148: static pcre_extra **hints_list = NULL;
  149: 
  150: static char *include_pattern = NULL;
  151: static char *exclude_pattern = NULL;
  152: static char *include_dir_pattern = NULL;
  153: static char *exclude_dir_pattern = NULL;
  154: 
  155: static pcre *include_compiled = NULL;
  156: static pcre *exclude_compiled = NULL;
  157: static pcre *include_dir_compiled = NULL;
  158: static pcre *exclude_dir_compiled = NULL;
  159: 
  160: static int after_context = 0;
  161: static int before_context = 0;
  162: static int both_context = 0;
  163: static int bufthird = PCREGREP_BUFSIZE;
  164: static int bufsize = 3*PCREGREP_BUFSIZE;
  165: static int dee_action = dee_READ;
  166: static int DEE_action = DEE_READ;
  167: static int error_count = 0;
  168: static int filenames = FN_DEFAULT;
  169: static int only_matching = -1;
  170: static int process_options = 0;
  171: 
  172: #ifdef SUPPORT_PCREGREP_JIT
  173: static int study_options = PCRE_STUDY_JIT_COMPILE;
  174: #else
  175: static int study_options = 0;
  176: #endif
  177: 
  178: static unsigned long int match_limit = 0;
  179: static unsigned long int match_limit_recursion = 0;
  180: 
  181: static BOOL count_only = FALSE;
  182: static BOOL do_colour = FALSE;
  183: static BOOL file_offsets = FALSE;
  184: static BOOL hyphenpending = FALSE;
  185: static BOOL invert = FALSE;
  186: static BOOL line_buffered = FALSE;
  187: static BOOL line_offsets = FALSE;
  188: static BOOL multiline = FALSE;
  189: static BOOL number = FALSE;
  190: static BOOL omit_zero_count = FALSE;
  191: static BOOL resource_error = FALSE;
  192: static BOOL quiet = FALSE;
  193: static BOOL silent = FALSE;
  194: static BOOL utf8 = FALSE;
  195: 
  196: /* Structure for options and list of them */
  197: 
  198: enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
  199:        OP_OP_NUMBER, OP_PATLIST };
  200: 
  201: typedef struct option_item {
  202:   int type;
  203:   int one_char;
  204:   void *dataptr;
  205:   const char *long_name;
  206:   const char *help_text;
  207: } option_item;
  208: 
  209: /* Options without a single-letter equivalent get a negative value. This can be
  210: used to identify them. */
  211: 
  212: #define N_COLOUR       (-1)
  213: #define N_EXCLUDE      (-2)
  214: #define N_EXCLUDE_DIR  (-3)
  215: #define N_HELP         (-4)
  216: #define N_INCLUDE      (-5)
  217: #define N_INCLUDE_DIR  (-6)
  218: #define N_LABEL        (-7)
  219: #define N_LOCALE       (-8)
  220: #define N_NULL         (-9)
  221: #define N_LOFFSETS     (-10)
  222: #define N_FOFFSETS     (-11)
  223: #define N_LBUFFER      (-12)
  224: #define N_M_LIMIT      (-13)
  225: #define N_M_LIMIT_REC  (-14)
  226: #define N_BUFSIZE      (-15)
  227: #define N_NOJIT        (-16)
  228: 
  229: static option_item optionlist[] = {
  230:   { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
  231:   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
  232:   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
  233:   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
  234:   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
  235:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
  236:   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
  237:   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
  238:   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
  239:   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
  240:   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
  241:   { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
  242:   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
  243:   { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
  244:   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
  245:   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
  246:   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
  247:   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
  248: #ifdef SUPPORT_PCREGREP_JIT
  249:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
  250: #else
  251:   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
  252: #endif
  253:   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
  254:   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
  255:   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
  256:   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
  257:   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
  258:   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
  259:   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
  260:   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
  261:   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
  262:   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
  263:   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
  264:   { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
  265:   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
  266:   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
  267:   { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
  268:   { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
  269:   { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
  270:   { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
  271: 
  272:   /* These two were accidentally implemented with underscores instead of
  273:   hyphens in the option names. As this was not discovered for several releases,
  274:   the incorrect versions are left in the table for compatibility. However, the
  275:   --help function misses out any option that has an underscore in its name. */
  276: 
  277:   { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
  278:   { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
  279: 
  280: #ifdef JFRIEDL_DEBUG
  281:   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
  282: #endif
  283:   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
  284:   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
  285:   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
  286:   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
  287:   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
  288:   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
  289:   { OP_NODATA,    0,        NULL,               NULL,            NULL }
  290: };
  291: 
  292: /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
  293: options. These set the 1, 2, and 4 bits in process_options, respectively. Note
  294: that the combination of -w and -x has the same effect as -x on its own, so we
  295: can treat them as the same. */
  296: 
  297: static const char *prefix[] = {
  298:   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
  299: 
  300: static const char *suffix[] = {
  301:   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
  302: 
  303: /* UTF-8 tables - used only when the newline setting is "any". */
  304: 
  305: const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
  306: 
  307: const char utf8_table4[] = {
  308:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  309:   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  310:   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  311:   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
  312: 
  313: 
  314: 
  315: /*************************************************
  316: *         Exit from the program                  *
  317: *************************************************/
  318: 
  319: /* If there has been a resource error, give a suitable message.
  320: 
  321: Argument:  the return code
  322: Returns:   does not return
  323: */
  324: 
  325: static void
  326: pcregrep_exit(int rc)
  327: {
  328: if (resource_error)
  329:   {
  330:   fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
  331:     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
  332:     PCRE_ERROR_JIT_STACKLIMIT);
  333:   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
  334:   }
  335: 
  336: exit(rc);
  337: }
  338: 
  339: 
  340: /*************************************************
  341: *            OS-specific functions               *
  342: *************************************************/
  343: 
  344: /* These functions are defined so that they can be made system specific,
  345: although at present the only ones are for Unix, Win32, and for "no support". */
  346: 
  347: 
  348: /************* Directory scanning in Unix ***********/
  349: 
  350: #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
  351: #include <sys/types.h>
  352: #include <sys/stat.h>
  353: #include <dirent.h>
  354: 
  355: typedef DIR directory_type;
  356: 
  357: static int
  358: isdirectory(char *filename)
  359: {
  360: struct stat statbuf;
  361: if (stat(filename, &statbuf) < 0)
  362:   return 0;        /* In the expectation that opening as a file will fail */
  363: return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
  364: }
  365: 
  366: static directory_type *
  367: opendirectory(char *filename)
  368: {
  369: return opendir(filename);
  370: }
  371: 
  372: static char *
  373: readdirectory(directory_type *dir)
  374: {
  375: for (;;)
  376:   {
  377:   struct dirent *dent = readdir(dir);
  378:   if (dent == NULL) return NULL;
  379:   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
  380:     return dent->d_name;
  381:   }
  382: /* Control never reaches here */
  383: }
  384: 
  385: static void
  386: closedirectory(directory_type *dir)
  387: {
  388: closedir(dir);
  389: }
  390: 
  391: 
  392: /************* Test for regular file in Unix **********/
  393: 
  394: static int
  395: isregfile(char *filename)
  396: {
  397: struct stat statbuf;
  398: if (stat(filename, &statbuf) < 0)
  399:   return 1;        /* In the expectation that opening as a file will fail */
  400: return (statbuf.st_mode & S_IFMT) == S_IFREG;
  401: }
  402: 
  403: 
  404: /************* Test for a terminal in Unix **********/
  405: 
  406: static BOOL
  407: is_stdout_tty(void)
  408: {
  409: return isatty(fileno(stdout));
  410: }
  411: 
  412: static BOOL
  413: is_file_tty(FILE *f)
  414: {
  415: return isatty(fileno(f));
  416: }
  417: 
  418: 
  419: /************* Directory scanning in Win32 ***********/
  420: 
  421: /* I (Philip Hazel) have no means of testing this code. It was contributed by
  422: Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
  423: when it did not exist. David Byron added a patch that moved the #include of
  424: <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
  425: The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
  426: undefined when it is indeed undefined. */
  427: 
  428: #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
  429: 
  430: #ifndef STRICT
  431: # define STRICT
  432: #endif
  433: #ifndef WIN32_LEAN_AND_MEAN
  434: # define WIN32_LEAN_AND_MEAN
  435: #endif
  436: 
  437: #include <windows.h>
  438: 
  439: #ifndef INVALID_FILE_ATTRIBUTES
  440: #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
  441: #endif
  442: 
  443: typedef struct directory_type
  444: {
  445: HANDLE handle;
  446: BOOL first;
  447: WIN32_FIND_DATA data;
  448: } directory_type;
  449: 
  450: int
  451: isdirectory(char *filename)
  452: {
  453: DWORD attr = GetFileAttributes(filename);
  454: if (attr == INVALID_FILE_ATTRIBUTES)
  455:   return 0;
  456: return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
  457: }
  458: 
  459: directory_type *
  460: opendirectory(char *filename)
  461: {
  462: size_t len;
  463: char *pattern;
  464: directory_type *dir;
  465: DWORD err;
  466: len = strlen(filename);
  467: pattern = (char *) malloc(len + 3);
  468: dir = (directory_type *) malloc(sizeof(*dir));
  469: if ((pattern == NULL) || (dir == NULL))
  470:   {
  471:   fprintf(stderr, "pcregrep: malloc failed\n");
  472:   pcregrep_exit(2);
  473:   }
  474: memcpy(pattern, filename, len);
  475: memcpy(&(pattern[len]), "\\*", 3);
  476: dir->handle = FindFirstFile(pattern, &(dir->data));
  477: if (dir->handle != INVALID_HANDLE_VALUE)
  478:   {
  479:   free(pattern);
  480:   dir->first = TRUE;
  481:   return dir;
  482:   }
  483: err = GetLastError();
  484: free(pattern);
  485: free(dir);
  486: errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
  487: return NULL;
  488: }
  489: 
  490: char *
  491: readdirectory(directory_type *dir)
  492: {
  493: for (;;)
  494:   {
  495:   if (!dir->first)
  496:     {
  497:     if (!FindNextFile(dir->handle, &(dir->data)))
  498:       return NULL;
  499:     }
  500:   else
  501:     {
  502:     dir->first = FALSE;
  503:     }
  504:   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
  505:     return dir->data.cFileName;
  506:   }
  507: #ifndef _MSC_VER
  508: return NULL;   /* Keep compiler happy; never executed */
  509: #endif
  510: }
  511: 
  512: void
  513: closedirectory(directory_type *dir)
  514: {
  515: FindClose(dir->handle);
  516: free(dir);
  517: }
  518: 
  519: 
  520: /************* Test for regular file in Win32 **********/
  521: 
  522: /* I don't know how to do this, or if it can be done; assume all paths are
  523: regular if they are not directories. */
  524: 
  525: int isregfile(char *filename)
  526: {
  527: return !isdirectory(filename);
  528: }
  529: 
  530: 
  531: /************* Test for a terminal in Win32 **********/
  532: 
  533: /* I don't know how to do this; assume never */
  534: 
  535: static BOOL
  536: is_stdout_tty(void)
  537: {
  538: return FALSE;
  539: }
  540: 
  541: static BOOL
  542: is_file_tty(FILE *f)
  543: {
  544: return FALSE;
  545: }
  546: 
  547: 
  548: /************* Directory scanning when we can't do it ***********/
  549: 
  550: /* The type is void, and apart from isdirectory(), the functions do nothing. */
  551: 
  552: #else
  553: 
  554: typedef void directory_type;
  555: 
  556: int isdirectory(char *filename) { return 0; }
  557: directory_type * opendirectory(char *filename) { return (directory_type*)0;}
  558: char *readdirectory(directory_type *dir) { return (char*)0;}
  559: void closedirectory(directory_type *dir) {}
  560: 
  561: 
  562: /************* Test for regular when we can't do it **********/
  563: 
  564: /* Assume all files are regular. */
  565: 
  566: int isregfile(char *filename) { return 1; }
  567: 
  568: 
  569: /************* Test for a terminal when we can't do it **********/
  570: 
  571: static BOOL
  572: is_stdout_tty(void)
  573: {
  574: return FALSE;
  575: }
  576: 
  577: static BOOL
  578: is_file_tty(FILE *f)
  579: {
  580: return FALSE;
  581: }
  582: 
  583: #endif
  584: 
  585: 
  586: 
  587: #ifndef HAVE_STRERROR
  588: /*************************************************
  589: *     Provide strerror() for non-ANSI libraries  *
  590: *************************************************/
  591: 
  592: /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
  593: in their libraries, but can provide the same facility by this simple
  594: alternative function. */
  595: 
  596: extern int   sys_nerr;
  597: extern char *sys_errlist[];
  598: 
  599: char *
  600: strerror(int n)
  601: {
  602: if (n < 0 || n >= sys_nerr) return "unknown error number";
  603: return sys_errlist[n];
  604: }
  605: #endif /* HAVE_STRERROR */
  606: 
  607: 
  608: 
  609: /*************************************************
  610: *            Read one line of input              *
  611: *************************************************/
  612: 
  613: /* Normally, input is read using fread() into a large buffer, so many lines may
  614: be read at once. However, doing this for tty input means that no output appears
  615: until a lot of input has been typed. Instead, tty input is handled line by
  616: line. We cannot use fgets() for this, because it does not stop at a binary
  617: zero, and therefore there is no way of telling how many characters it has read,
  618: because there may be binary zeros embedded in the data.
  619: 
  620: Arguments:
  621:   buffer     the buffer to read into
  622:   length     the maximum number of characters to read
  623:   f          the file
  624: 
  625: Returns:     the number of characters read, zero at end of file
  626: */
  627: 
  628: static int
  629: read_one_line(char *buffer, int length, FILE *f)
  630: {
  631: int c;
  632: int yield = 0;
  633: while ((c = fgetc(f)) != EOF)
  634:   {
  635:   buffer[yield++] = c;
  636:   if (c == '\n' || yield >= length) break;
  637:   }
  638: return yield;
  639: }
  640: 
  641: 
  642: 
  643: /*************************************************
  644: *             Find end of line                   *
  645: *************************************************/
  646: 
  647: /* The length of the endline sequence that is found is set via lenptr. This may
  648: be zero at the very end of the file if there is no line-ending sequence there.
  649: 
  650: Arguments:
  651:   p         current position in line
  652:   endptr    end of available data
  653:   lenptr    where to put the length of the eol sequence
  654: 
  655: Returns:    pointer after the last byte of the line,
  656:             including the newline byte(s)
  657: */
  658: 
  659: static char *
  660: end_of_line(char *p, char *endptr, int *lenptr)
  661: {
  662: switch(endlinetype)
  663:   {
  664:   default:      /* Just in case */
  665:   case EL_LF:
  666:   while (p < endptr && *p != '\n') p++;
  667:   if (p < endptr)
  668:     {
  669:     *lenptr = 1;
  670:     return p + 1;
  671:     }
  672:   *lenptr = 0;
  673:   return endptr;
  674: 
  675:   case EL_CR:
  676:   while (p < endptr && *p != '\r') p++;
  677:   if (p < endptr)
  678:     {
  679:     *lenptr = 1;
  680:     return p + 1;
  681:     }
  682:   *lenptr = 0;
  683:   return endptr;
  684: 
  685:   case EL_CRLF:
  686:   for (;;)
  687:     {
  688:     while (p < endptr && *p != '\r') p++;
  689:     if (++p >= endptr)
  690:       {
  691:       *lenptr = 0;
  692:       return endptr;
  693:       }
  694:     if (*p == '\n')
  695:       {
  696:       *lenptr = 2;
  697:       return p + 1;
  698:       }
  699:     }
  700:   break;
  701: 
  702:   case EL_ANYCRLF:
  703:   while (p < endptr)
  704:     {
  705:     int extra = 0;
  706:     register int c = *((unsigned char *)p);
  707: 
  708:     if (utf8 && c >= 0xc0)
  709:       {
  710:       int gcii, gcss;
  711:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
  712:       gcss = 6*extra;
  713:       c = (c & utf8_table3[extra]) << gcss;
  714:       for (gcii = 1; gcii <= extra; gcii++)
  715:         {
  716:         gcss -= 6;
  717:         c |= (p[gcii] & 0x3f) << gcss;
  718:         }
  719:       }
  720: 
  721:     p += 1 + extra;
  722: 
  723:     switch (c)
  724:       {
  725:       case 0x0a:    /* LF */
  726:       *lenptr = 1;
  727:       return p;
  728: 
  729:       case 0x0d:    /* CR */
  730:       if (p < endptr && *p == 0x0a)
  731:         {
  732:         *lenptr = 2;
  733:         p++;
  734:         }
  735:       else *lenptr = 1;
  736:       return p;
  737: 
  738:       default:
  739:       break;
  740:       }
  741:     }   /* End of loop for ANYCRLF case */
  742: 
  743:   *lenptr = 0;  /* Must have hit the end */
  744:   return endptr;
  745: 
  746:   case EL_ANY:
  747:   while (p < endptr)
  748:     {
  749:     int extra = 0;
  750:     register int c = *((unsigned char *)p);
  751: 
  752:     if (utf8 && c >= 0xc0)
  753:       {
  754:       int gcii, gcss;
  755:       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
  756:       gcss = 6*extra;
  757:       c = (c & utf8_table3[extra]) << gcss;
  758:       for (gcii = 1; gcii <= extra; gcii++)
  759:         {
  760:         gcss -= 6;
  761:         c |= (p[gcii] & 0x3f) << gcss;
  762:         }
  763:       }
  764: 
  765:     p += 1 + extra;
  766: 
  767:     switch (c)
  768:       {
  769:       case 0x0a:    /* LF */
  770:       case 0x0b:    /* VT */
  771:       case 0x0c:    /* FF */
  772:       *lenptr = 1;
  773:       return p;
  774: 
  775:       case 0x0d:    /* CR */
  776:       if (p < endptr && *p == 0x0a)
  777:         {
  778:         *lenptr = 2;
  779:         p++;
  780:         }
  781:       else *lenptr = 1;
  782:       return p;
  783: 
  784:       case 0x85:    /* NEL */
  785:       *lenptr = utf8? 2 : 1;
  786:       return p;
  787: 
  788:       case 0x2028:  /* LS */
  789:       case 0x2029:  /* PS */
  790:       *lenptr = 3;
  791:       return p;
  792: 
  793:       default:
  794:       break;
  795:       }
  796:     }   /* End of loop for ANY case */
  797: 
  798:   *lenptr = 0;  /* Must have hit the end */
  799:   return endptr;
  800:   }     /* End of overall switch */
  801: }
  802: 
  803: 
  804: 
  805: /*************************************************
  806: *         Find start of previous line            *
  807: *************************************************/
  808: 
  809: /* This is called when looking back for before lines to print.
  810: 
  811: Arguments:
  812:   p         start of the subsequent line
  813:   startptr  start of available data
  814: 
  815: Returns:    pointer to the start of the previous line
  816: */
  817: 
  818: static char *
  819: previous_line(char *p, char *startptr)
  820: {
  821: switch(endlinetype)
  822:   {
  823:   default:      /* Just in case */
  824:   case EL_LF:
  825:   p--;
  826:   while (p > startptr && p[-1] != '\n') p--;
  827:   return p;
  828: 
  829:   case EL_CR:
  830:   p--;
  831:   while (p > startptr && p[-1] != '\n') p--;
  832:   return p;
  833: 
  834:   case EL_CRLF:
  835:   for (;;)
  836:     {
  837:     p -= 2;
  838:     while (p > startptr && p[-1] != '\n') p--;
  839:     if (p <= startptr + 1 || p[-2] == '\r') return p;
  840:     }
  841:   return p;   /* But control should never get here */
  842: 
  843:   case EL_ANY:
  844:   case EL_ANYCRLF:
  845:   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
  846:   if (utf8) while ((*p & 0xc0) == 0x80) p--;
  847: 
  848:   while (p > startptr)
  849:     {
  850:     register int c;
  851:     char *pp = p - 1;
  852: 
  853:     if (utf8)
  854:       {
  855:       int extra = 0;
  856:       while ((*pp & 0xc0) == 0x80) pp--;
  857:       c = *((unsigned char *)pp);
  858:       if (c >= 0xc0)
  859:         {
  860:         int gcii, gcss;
  861:         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
  862:         gcss = 6*extra;
  863:         c = (c & utf8_table3[extra]) << gcss;
  864:         for (gcii = 1; gcii <= extra; gcii++)
  865:           {
  866:           gcss -= 6;
  867:           c |= (pp[gcii] & 0x3f) << gcss;
  868:           }
  869:         }
  870:       }
  871:     else c = *((unsigned char *)pp);
  872: 
  873:     if (endlinetype == EL_ANYCRLF) switch (c)
  874:       {
  875:       case 0x0a:    /* LF */
  876:       case 0x0d:    /* CR */
  877:       return p;
  878: 
  879:       default:
  880:       break;
  881:       }
  882: 
  883:     else switch (c)
  884:       {
  885:       case 0x0a:    /* LF */
  886:       case 0x0b:    /* VT */
  887:       case 0x0c:    /* FF */
  888:       case 0x0d:    /* CR */
  889:       case 0x85:    /* NEL */
  890:       case 0x2028:  /* LS */
  891:       case 0x2029:  /* PS */
  892:       return p;
  893: 
  894:       default:
  895:       break;
  896:       }
  897: 
  898:     p = pp;  /* Back one character */
  899:     }        /* End of loop for ANY case */
  900: 
  901:   return startptr;  /* Hit start of data */
  902:   }     /* End of overall switch */
  903: }
  904: 
  905: 
  906: 
  907: 
  908: 
  909: /*************************************************
  910: *       Print the previous "after" lines         *
  911: *************************************************/
  912: 
  913: /* This is called if we are about to lose said lines because of buffer filling,
  914: and at the end of the file. The data in the line is written using fwrite() so
  915: that a binary zero does not terminate it.
  916: 
  917: Arguments:
  918:   lastmatchnumber   the number of the last matching line, plus one
  919:   lastmatchrestart  where we restarted after the last match
  920:   endptr            end of available data
  921:   printname         filename for printing
  922: 
  923: Returns:            nothing
  924: */
  925: 
  926: static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
  927:   char *endptr, char *printname)
  928: {
  929: if (after_context > 0 && lastmatchnumber > 0)
  930:   {
  931:   int count = 0;
  932:   while (lastmatchrestart < endptr && count++ < after_context)
  933:     {
  934:     int ellength;
  935:     char *pp = lastmatchrestart;
  936:     if (printname != NULL) fprintf(stdout, "%s-", printname);
  937:     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
  938:     pp = end_of_line(pp, endptr, &ellength);
  939:     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
  940:     lastmatchrestart = pp;
  941:     }
  942:   hyphenpending = TRUE;
  943:   }
  944: }
  945: 
  946: 
  947: 
  948: /*************************************************
  949: *   Apply patterns to subject till one matches   *
  950: *************************************************/
  951: 
  952: /* This function is called to run through all patterns, looking for a match. It
  953: is used multiple times for the same subject when colouring is enabled, in order
  954: to find all possible matches.
  955: 
  956: Arguments:
  957:   matchptr     the start of the subject
  958:   length       the length of the subject to match
  959:   startoffset  where to start matching
  960:   offsets      the offets vector to fill in
  961:   mrc          address of where to put the result of pcre_exec()
  962: 
  963: Returns:      TRUE if there was a match
  964:               FALSE if there was no match
  965:               invert if there was a non-fatal error
  966: */
  967: 
  968: static BOOL
  969: match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
  970:   int *mrc)
  971: {
  972: int i;
  973: size_t slen = length;
  974: const char *msg = "this text:\n\n";
  975: if (slen > 200)
  976:   {
  977:   slen = 200;
  978:   msg = "text that starts:\n\n";
  979:   }
  980: for (i = 0; i < pattern_count; i++)
  981:   {
  982:   *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
  983:     startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
  984:   if (*mrc >= 0) return TRUE;
  985:   if (*mrc == PCRE_ERROR_NOMATCH) continue;
  986:   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
  987:   if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
  988:   fprintf(stderr, "%s", msg);
  989:   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
  990:   fprintf(stderr, "\n\n");
  991:   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
  992:       *mrc == PCRE_ERROR_JIT_STACKLIMIT)
  993:     resource_error = TRUE;
  994:   if (error_count++ > 20)
  995:     {
  996:     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
  997:     pcregrep_exit(2);
  998:     }
  999:   return invert;    /* No more matching; don't show the line again */
 1000:   }
 1001: 
 1002: return FALSE;  /* No match, no errors */
 1003: }
 1004: 
 1005: 
 1006: 
 1007: /*************************************************
 1008: *            Grep an individual file             *
 1009: *************************************************/
 1010: 
 1011: /* This is called from grep_or_recurse() below. It uses a buffer that is three
 1012: times the value of bufthird. The matching point is never allowed to stray into
 1013: the top third of the buffer, thus keeping more of the file available for
 1014: context printing or for multiline scanning. For large files, the pointer will
 1015: be in the middle third most of the time, so the bottom third is available for
 1016: "before" context printing.
 1017: 
 1018: Arguments:
 1019:   handle       the fopened FILE stream for a normal file
 1020:                the gzFile pointer when reading is via libz
 1021:                the BZFILE pointer when reading is via libbz2
 1022:   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
 1023:   filename     the file name or NULL (for errors)
 1024:   printname    the file name if it is to be printed for each match
 1025:                or NULL if the file name is not to be printed
 1026:                it cannot be NULL if filenames[_nomatch]_only is set
 1027: 
 1028: Returns:       0 if there was at least one match
 1029:                1 otherwise (no matches)
 1030:                2 if an overlong line is encountered
 1031:                3 if there is a read error on a .bz2 file
 1032: */
 1033: 
 1034: static int
 1035: pcregrep(void *handle, int frtype, char *filename, char *printname)
 1036: {
 1037: int rc = 1;
 1038: int linenumber = 1;
 1039: int lastmatchnumber = 0;
 1040: int count = 0;
 1041: int filepos = 0;
 1042: int offsets[OFFSET_SIZE];
 1043: char *lastmatchrestart = NULL;
 1044: char *ptr = main_buffer;
 1045: char *endptr;
 1046: size_t bufflength;
 1047: BOOL endhyphenpending = FALSE;
 1048: BOOL input_line_buffered = line_buffered;
 1049: FILE *in = NULL;                    /* Ensure initialized */
 1050: 
 1051: #ifdef SUPPORT_LIBZ
 1052: gzFile ingz = NULL;
 1053: #endif
 1054: 
 1055: #ifdef SUPPORT_LIBBZ2
 1056: BZFILE *inbz2 = NULL;
 1057: #endif
 1058: 
 1059: 
 1060: /* Do the first read into the start of the buffer and set up the pointer to end
 1061: of what we have. In the case of libz, a non-zipped .gz file will be read as a
 1062: plain file. However, if a .bz2 file isn't actually bzipped, the first read will
 1063: fail. */
 1064: 
 1065: #ifdef SUPPORT_LIBZ
 1066: if (frtype == FR_LIBZ)
 1067:   {
 1068:   ingz = (gzFile)handle;
 1069:   bufflength = gzread (ingz, main_buffer, bufsize);
 1070:   }
 1071: else
 1072: #endif
 1073: 
 1074: #ifdef SUPPORT_LIBBZ2
 1075: if (frtype == FR_LIBBZ2)
 1076:   {
 1077:   inbz2 = (BZFILE *)handle;
 1078:   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
 1079:   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
 1080:   }                                    /* without the cast it is unsigned. */
 1081: else
 1082: #endif
 1083: 
 1084:   {
 1085:   in = (FILE *)handle;
 1086:   if (is_file_tty(in)) input_line_buffered = TRUE;
 1087:   bufflength = input_line_buffered?
 1088:     read_one_line(main_buffer, bufsize, in) :
 1089:     fread(main_buffer, 1, bufsize, in);
 1090:   }
 1091: 
 1092: endptr = main_buffer + bufflength;
 1093: 
 1094: /* Loop while the current pointer is not at the end of the file. For large
 1095: files, endptr will be at the end of the buffer when we are in the middle of the
 1096: file, but ptr will never get there, because as soon as it gets over 2/3 of the
 1097: way, the buffer is shifted left and re-filled. */
 1098: 
 1099: while (ptr < endptr)
 1100:   {
 1101:   int endlinelength;
 1102:   int mrc = 0;
 1103:   int startoffset = 0;
 1104:   BOOL match;
 1105:   char *matchptr = ptr;
 1106:   char *t = ptr;
 1107:   size_t length, linelength;
 1108: 
 1109:   /* At this point, ptr is at the start of a line. We need to find the length
 1110:   of the subject string to pass to pcre_exec(). In multiline mode, it is the
 1111:   length remainder of the data in the buffer. Otherwise, it is the length of
 1112:   the next line, excluding the terminating newline. After matching, we always
 1113:   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
 1114:   option is used for compiling, so that any match is constrained to be in the
 1115:   first line. */
 1116: 
 1117:   t = end_of_line(t, endptr, &endlinelength);
 1118:   linelength = t - ptr - endlinelength;
 1119:   length = multiline? (size_t)(endptr - ptr) : linelength;
 1120: 
 1121:   /* Check to see if the line we are looking at extends right to the very end
 1122:   of the buffer without a line terminator. This means the line is too long to
 1123:   handle. */
 1124: 
 1125:   if (endlinelength == 0 && t == main_buffer + bufsize)
 1126:     {
 1127:     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
 1128:                     "pcregrep: check the --buffer-size option\n",
 1129:                     linenumber,
 1130:                     (filename == NULL)? "" : " of file ",
 1131:                     (filename == NULL)? "" : filename);
 1132:     return 2;
 1133:     }
 1134: 
 1135:   /* Extra processing for Jeffrey Friedl's debugging. */
 1136: 
 1137: #ifdef JFRIEDL_DEBUG
 1138:   if (jfriedl_XT || jfriedl_XR)
 1139:   {
 1140:       #include <sys/time.h>
 1141:       #include <time.h>
 1142:       struct timeval start_time, end_time;
 1143:       struct timezone dummy;
 1144:       int i;
 1145: 
 1146:       if (jfriedl_XT)
 1147:       {
 1148:           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
 1149:           const char *orig = ptr;
 1150:           ptr = malloc(newlen + 1);
 1151:           if (!ptr) {
 1152:                   printf("out of memory");
 1153:                   pcregrep_exit(2);
 1154:           }
 1155:           endptr = ptr;
 1156:           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
 1157:           for (i = 0; i < jfriedl_XT; i++) {
 1158:                   strncpy(endptr, orig,  length);
 1159:                   endptr += length;
 1160:           }
 1161:           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
 1162:           length = newlen;
 1163:       }
 1164: 
 1165:       if (gettimeofday(&start_time, &dummy) != 0)
 1166:               perror("bad gettimeofday");
 1167: 
 1168: 
 1169:       for (i = 0; i < jfriedl_XR; i++)
 1170:           match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
 1171:               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
 1172: 
 1173:       if (gettimeofday(&end_time, &dummy) != 0)
 1174:               perror("bad gettimeofday");
 1175: 
 1176:       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
 1177:                       -
 1178:                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
 1179: 
 1180:       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
 1181:       return 0;
 1182:   }
 1183: #endif
 1184: 
 1185:   /* We come back here after a match when the -o option (only_matching) is set,
 1186:   in order to find any further matches in the same line. */
 1187: 
 1188:   ONLY_MATCHING_RESTART:
 1189: 
 1190:   /* Run through all the patterns until one matches or there is an error other
 1191:   than NOMATCH. This code is in a subroutine so that it can be re-used for
 1192:   finding subsequent matches when colouring matched lines. */
 1193: 
 1194:   match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
 1195: 
 1196:   /* If it's a match or a not-match (as required), do what's wanted. */
 1197: 
 1198:   if (match != invert)
 1199:     {
 1200:     BOOL hyphenprinted = FALSE;
 1201: 
 1202:     /* We've failed if we want a file that doesn't have any matches. */
 1203: 
 1204:     if (filenames == FN_NOMATCH_ONLY) return 1;
 1205: 
 1206:     /* Just count if just counting is wanted. */
 1207: 
 1208:     if (count_only) count++;
 1209: 
 1210:     /* If all we want is a file name, there is no need to scan any more lines
 1211:     in the file. */
 1212: 
 1213:     else if (filenames == FN_MATCH_ONLY)
 1214:       {
 1215:       fprintf(stdout, "%s\n", printname);
 1216:       return 0;
 1217:       }
 1218: 
 1219:     /* Likewise, if all we want is a yes/no answer. */
 1220: 
 1221:     else if (quiet) return 0;
 1222: 
 1223:     /* The --only-matching option prints just the substring that matched, or a
 1224:     captured portion of it, as long as this string is not empty, and the
 1225:     --file-offsets and --line-offsets options output offsets for the matching
 1226:     substring (they both force --only-matching = 0). None of these options
 1227:     prints any context. Afterwards, adjust the start and then jump back to look
 1228:     for further matches in the same line. If we are in invert mode, however,
 1229:     nothing is printed and we do not restart - this could still be useful
 1230:     because the return code is set. */
 1231: 
 1232:     else if (only_matching >= 0)
 1233:       {
 1234:       if (!invert)
 1235:         {
 1236:         if (printname != NULL) fprintf(stdout, "%s:", printname);
 1237:         if (number) fprintf(stdout, "%d:", linenumber);
 1238:         if (line_offsets)
 1239:           fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
 1240:             offsets[1] - offsets[0]);
 1241:         else if (file_offsets)
 1242:           fprintf(stdout, "%d,%d\n",
 1243:             (int)(filepos + matchptr + offsets[0] - ptr),
 1244:             offsets[1] - offsets[0]);
 1245:         else if (only_matching < mrc)
 1246:           {
 1247:           int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
 1248:           if (plen > 0)
 1249:             {
 1250:             if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1251:             FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
 1252:             if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
 1253:             fprintf(stdout, "\n");
 1254:             }
 1255:           }
 1256:         else if (printname != NULL || number) fprintf(stdout, "\n");
 1257:         match = FALSE;
 1258:         if (line_buffered) fflush(stdout);
 1259:         rc = 0;                      /* Had some success */
 1260:         startoffset = offsets[1];    /* Restart after the match */
 1261:         goto ONLY_MATCHING_RESTART;
 1262:         }
 1263:       }
 1264: 
 1265:     /* This is the default case when none of the above options is set. We print
 1266:     the matching lines(s), possibly preceded and/or followed by other lines of
 1267:     context. */
 1268: 
 1269:     else
 1270:       {
 1271:       /* See if there is a requirement to print some "after" lines from a
 1272:       previous match. We never print any overlaps. */
 1273: 
 1274:       if (after_context > 0 && lastmatchnumber > 0)
 1275:         {
 1276:         int ellength;
 1277:         int linecount = 0;
 1278:         char *p = lastmatchrestart;
 1279: 
 1280:         while (p < ptr && linecount < after_context)
 1281:           {
 1282:           p = end_of_line(p, ptr, &ellength);
 1283:           linecount++;
 1284:           }
 1285: 
 1286:         /* It is important to advance lastmatchrestart during this printing so
 1287:         that it interacts correctly with any "before" printing below. Print
 1288:         each line's data using fwrite() in case there are binary zeroes. */
 1289: 
 1290:         while (lastmatchrestart < p)
 1291:           {
 1292:           char *pp = lastmatchrestart;
 1293:           if (printname != NULL) fprintf(stdout, "%s-", printname);
 1294:           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
 1295:           pp = end_of_line(pp, endptr, &ellength);
 1296:           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
 1297:           lastmatchrestart = pp;
 1298:           }
 1299:         if (lastmatchrestart != ptr) hyphenpending = TRUE;
 1300:         }
 1301: 
 1302:       /* If there were non-contiguous lines printed above, insert hyphens. */
 1303: 
 1304:       if (hyphenpending)
 1305:         {
 1306:         fprintf(stdout, "--\n");
 1307:         hyphenpending = FALSE;
 1308:         hyphenprinted = TRUE;
 1309:         }
 1310: 
 1311:       /* See if there is a requirement to print some "before" lines for this
 1312:       match. Again, don't print overlaps. */
 1313: 
 1314:       if (before_context > 0)
 1315:         {
 1316:         int linecount = 0;
 1317:         char *p = ptr;
 1318: 
 1319:         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
 1320:                linecount < before_context)
 1321:           {
 1322:           linecount++;
 1323:           p = previous_line(p, main_buffer);
 1324:           }
 1325: 
 1326:         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
 1327:           fprintf(stdout, "--\n");
 1328: 
 1329:         while (p < ptr)
 1330:           {
 1331:           int ellength;
 1332:           char *pp = p;
 1333:           if (printname != NULL) fprintf(stdout, "%s-", printname);
 1334:           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
 1335:           pp = end_of_line(pp, endptr, &ellength);
 1336:           FWRITE(p, 1, pp - p, stdout);
 1337:           p = pp;
 1338:           }
 1339:         }
 1340: 
 1341:       /* Now print the matching line(s); ensure we set hyphenpending at the end
 1342:       of the file if any context lines are being output. */
 1343: 
 1344:       if (after_context > 0 || before_context > 0)
 1345:         endhyphenpending = TRUE;
 1346: 
 1347:       if (printname != NULL) fprintf(stdout, "%s:", printname);
 1348:       if (number) fprintf(stdout, "%d:", linenumber);
 1349: 
 1350:       /* In multiline mode, we want to print to the end of the line in which
 1351:       the end of the matched string is found, so we adjust linelength and the
 1352:       line number appropriately, but only when there actually was a match
 1353:       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
 1354:       the match will always be before the first newline sequence. */
 1355: 
 1356:       if (multiline & !invert)
 1357:         {
 1358:         char *endmatch = ptr + offsets[1];
 1359:         t = ptr;
 1360:         while (t < endmatch)
 1361:           {
 1362:           t = end_of_line(t, endptr, &endlinelength);
 1363:           if (t < endmatch) linenumber++; else break;
 1364:           }
 1365:         linelength = t - ptr - endlinelength;
 1366:         }
 1367: 
 1368:       /*** NOTE: Use only fwrite() to output the data line, so that binary
 1369:       zeroes are treated as just another data character. */
 1370: 
 1371:       /* This extra option, for Jeffrey Friedl's debugging requirements,
 1372:       replaces the matched string, or a specific captured string if it exists,
 1373:       with X. When this happens, colouring is ignored. */
 1374: 
 1375: #ifdef JFRIEDL_DEBUG
 1376:       if (S_arg >= 0 && S_arg < mrc)
 1377:         {
 1378:         int first = S_arg * 2;
 1379:         int last  = first + 1;
 1380:         FWRITE(ptr, 1, offsets[first], stdout);
 1381:         fprintf(stdout, "X");
 1382:         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
 1383:         }
 1384:       else
 1385: #endif
 1386: 
 1387:       /* We have to split the line(s) up if colouring, and search for further
 1388:       matches, but not of course if the line is a non-match. */
 1389: 
 1390:       if (do_colour && !invert)
 1391:         {
 1392:         int plength;
 1393:         FWRITE(ptr, 1, offsets[0], stdout);
 1394:         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1395:         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
 1396:         fprintf(stdout, "%c[00m", 0x1b);
 1397:         for (;;)
 1398:           {
 1399:           startoffset = offsets[1];
 1400:           if (startoffset >= (int)linelength + endlinelength ||
 1401:               !match_patterns(matchptr, length, startoffset, offsets, &mrc))
 1402:             break;
 1403:           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
 1404:           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1405:           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
 1406:           fprintf(stdout, "%c[00m", 0x1b);
 1407:           }
 1408: 
 1409:         /* In multiline mode, we may have already printed the complete line
 1410:         and its line-ending characters (if they matched the pattern), so there
 1411:         may be no more to print. */
 1412: 
 1413:         plength = (int)((linelength + endlinelength) - startoffset);
 1414:         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
 1415:         }
 1416: 
 1417:       /* Not colouring; no need to search for further matches */
 1418: 
 1419:       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
 1420:       }
 1421: 
 1422:     /* End of doing what has to be done for a match. If --line-buffered was
 1423:     given, flush the output. */
 1424: 
 1425:     if (line_buffered) fflush(stdout);
 1426:     rc = 0;    /* Had some success */
 1427: 
 1428:     /* Remember where the last match happened for after_context. We remember
 1429:     where we are about to restart, and that line's number. */
 1430: 
 1431:     lastmatchrestart = ptr + linelength + endlinelength;
 1432:     lastmatchnumber = linenumber + 1;
 1433:     }
 1434: 
 1435:   /* For a match in multiline inverted mode (which of course did not cause
 1436:   anything to be printed), we have to move on to the end of the match before
 1437:   proceeding. */
 1438: 
 1439:   if (multiline && invert && match)
 1440:     {
 1441:     int ellength;
 1442:     char *endmatch = ptr + offsets[1];
 1443:     t = ptr;
 1444:     while (t < endmatch)
 1445:       {
 1446:       t = end_of_line(t, endptr, &ellength);
 1447:       if (t <= endmatch) linenumber++; else break;
 1448:       }
 1449:     endmatch = end_of_line(endmatch, endptr, &ellength);
 1450:     linelength = endmatch - ptr - ellength;
 1451:     }
 1452: 
 1453:   /* Advance to after the newline and increment the line number. The file
 1454:   offset to the current line is maintained in filepos. */
 1455: 
 1456:   ptr += linelength + endlinelength;
 1457:   filepos += (int)(linelength + endlinelength);
 1458:   linenumber++;
 1459: 
 1460:   /* If input is line buffered, and the buffer is not yet full, read another
 1461:   line and add it into the buffer. */
 1462: 
 1463:   if (input_line_buffered && bufflength < (size_t)bufsize)
 1464:     {
 1465:     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
 1466:     bufflength += add;
 1467:     endptr += add;
 1468:     }
 1469: 
 1470:   /* If we haven't yet reached the end of the file (the buffer is full), and
 1471:   the current point is in the top 1/3 of the buffer, slide the buffer down by
 1472:   1/3 and refill it. Before we do this, if some unprinted "after" lines are
 1473:   about to be lost, print them. */
 1474: 
 1475:   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
 1476:     {
 1477:     if (after_context > 0 &&
 1478:         lastmatchnumber > 0 &&
 1479:         lastmatchrestart < main_buffer + bufthird)
 1480:       {
 1481:       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
 1482:       lastmatchnumber = 0;
 1483:       }
 1484: 
 1485:     /* Now do the shuffle */
 1486: 
 1487:     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
 1488:     ptr -= bufthird;
 1489: 
 1490: #ifdef SUPPORT_LIBZ
 1491:     if (frtype == FR_LIBZ)
 1492:       bufflength = 2*bufthird +
 1493:         gzread (ingz, main_buffer + 2*bufthird, bufthird);
 1494:     else
 1495: #endif
 1496: 
 1497: #ifdef SUPPORT_LIBBZ2
 1498:     if (frtype == FR_LIBBZ2)
 1499:       bufflength = 2*bufthird +
 1500:         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
 1501:     else
 1502: #endif
 1503: 
 1504:     bufflength = 2*bufthird +
 1505:       (input_line_buffered?
 1506:        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
 1507:        fread(main_buffer + 2*bufthird, 1, bufthird, in));
 1508:     endptr = main_buffer + bufflength;
 1509: 
 1510:     /* Adjust any last match point */
 1511: 
 1512:     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
 1513:     }
 1514:   }     /* Loop through the whole file */
 1515: 
 1516: /* End of file; print final "after" lines if wanted; do_after_lines sets
 1517: hyphenpending if it prints something. */
 1518: 
 1519: if (only_matching < 0 && !count_only)
 1520:   {
 1521:   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
 1522:   hyphenpending |= endhyphenpending;
 1523:   }
 1524: 
 1525: /* Print the file name if we are looking for those without matches and there
 1526: were none. If we found a match, we won't have got this far. */
 1527: 
 1528: if (filenames == FN_NOMATCH_ONLY)
 1529:   {
 1530:   fprintf(stdout, "%s\n", printname);
 1531:   return 0;
 1532:   }
 1533: 
 1534: /* Print the match count if wanted */
 1535: 
 1536: if (count_only)
 1537:   {
 1538:   if (count > 0 || !omit_zero_count)
 1539:     {
 1540:     if (printname != NULL && filenames != FN_NONE)
 1541:       fprintf(stdout, "%s:", printname);
 1542:     fprintf(stdout, "%d\n", count);
 1543:     }
 1544:   }
 1545: 
 1546: return rc;
 1547: }
 1548: 
 1549: 
 1550: 
 1551: /*************************************************
 1552: *     Grep a file or recurse into a directory    *
 1553: *************************************************/
 1554: 
 1555: /* Given a path name, if it's a directory, scan all the files if we are
 1556: recursing; if it's a file, grep it.
 1557: 
 1558: Arguments:
 1559:   pathname          the path to investigate
 1560:   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
 1561:   only_one_at_top   TRUE if the path is the only one at toplevel
 1562: 
 1563: Returns:   0 if there was at least one match
 1564:            1 if there were no matches
 1565:            2 there was some kind of error
 1566: 
 1567: However, file opening failures are suppressed if "silent" is set.
 1568: */
 1569: 
 1570: static int
 1571: grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
 1572: {
 1573: int rc = 1;
 1574: int sep;
 1575: int frtype;
 1576: int pathlen;
 1577: void *handle;
 1578: FILE *in = NULL;           /* Ensure initialized */
 1579: 
 1580: #ifdef SUPPORT_LIBZ
 1581: gzFile ingz = NULL;
 1582: #endif
 1583: 
 1584: #ifdef SUPPORT_LIBBZ2
 1585: BZFILE *inbz2 = NULL;
 1586: #endif
 1587: 
 1588: /* If the file name is "-" we scan stdin */
 1589: 
 1590: if (strcmp(pathname, "-") == 0)
 1591:   {
 1592:   return pcregrep(stdin, FR_PLAIN, stdin_name,
 1593:     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
 1594:       stdin_name : NULL);
 1595:   }
 1596: 
 1597: /* If the file is a directory, skip if skipping or if we are recursing, scan
 1598: each file and directory within it, subject to any include or exclude patterns
 1599: that were set. The scanning code is localized so it can be made
 1600: system-specific. */
 1601: 
 1602: if ((sep = isdirectory(pathname)) != 0)
 1603:   {
 1604:   if (dee_action == dee_SKIP) return 1;
 1605:   if (dee_action == dee_RECURSE)
 1606:     {
 1607:     char buffer[1024];
 1608:     char *nextfile;
 1609:     directory_type *dir = opendirectory(pathname);
 1610: 
 1611:     if (dir == NULL)
 1612:       {
 1613:       if (!silent)
 1614:         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
 1615:           strerror(errno));
 1616:       return 2;
 1617:       }
 1618: 
 1619:     while ((nextfile = readdirectory(dir)) != NULL)
 1620:       {
 1621:       int frc, nflen;
 1622:       sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
 1623:       nflen = (int)(strlen(nextfile));
 1624: 
 1625:       if (isdirectory(buffer))
 1626:         {
 1627:         if (exclude_dir_compiled != NULL &&
 1628:             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
 1629:           continue;
 1630: 
 1631:         if (include_dir_compiled != NULL &&
 1632:             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
 1633:           continue;
 1634:         }
 1635:       else
 1636:         {
 1637:         if (exclude_compiled != NULL &&
 1638:             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
 1639:           continue;
 1640: 
 1641:         if (include_compiled != NULL &&
 1642:             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
 1643:           continue;
 1644:         }
 1645: 
 1646:       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
 1647:       if (frc > 1) rc = frc;
 1648:        else if (frc == 0 && rc == 1) rc = 0;
 1649:       }
 1650: 
 1651:     closedirectory(dir);
 1652:     return rc;
 1653:     }
 1654:   }
 1655: 
 1656: /* If the file is not a directory and not a regular file, skip it if that's
 1657: been requested. */
 1658: 
 1659: else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
 1660: 
 1661: /* Control reaches here if we have a regular file, or if we have a directory
 1662: and recursion or skipping was not requested, or if we have anything else and
 1663: skipping was not requested. The scan proceeds. If this is the first and only
 1664: argument at top level, we don't show the file name, unless we are only showing
 1665: the file name, or the filename was forced (-H). */
 1666: 
 1667: pathlen = (int)(strlen(pathname));
 1668: 
 1669: /* Open using zlib if it is supported and the file name ends with .gz. */
 1670: 
 1671: #ifdef SUPPORT_LIBZ
 1672: if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
 1673:   {
 1674:   ingz = gzopen(pathname, "rb");
 1675:   if (ingz == NULL)
 1676:     {
 1677:     if (!silent)
 1678:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
 1679:         strerror(errno));
 1680:     return 2;
 1681:     }
 1682:   handle = (void *)ingz;
 1683:   frtype = FR_LIBZ;
 1684:   }
 1685: else
 1686: #endif
 1687: 
 1688: /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
 1689: 
 1690: #ifdef SUPPORT_LIBBZ2
 1691: if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
 1692:   {
 1693:   inbz2 = BZ2_bzopen(pathname, "rb");
 1694:   handle = (void *)inbz2;
 1695:   frtype = FR_LIBBZ2;
 1696:   }
 1697: else
 1698: #endif
 1699: 
 1700: /* Otherwise use plain fopen(). The label is so that we can come back here if
 1701: an attempt to read a .bz2 file indicates that it really is a plain file. */
 1702: 
 1703: #ifdef SUPPORT_LIBBZ2
 1704: PLAIN_FILE:
 1705: #endif
 1706:   {
 1707:   in = fopen(pathname, "rb");
 1708:   handle = (void *)in;
 1709:   frtype = FR_PLAIN;
 1710:   }
 1711: 
 1712: /* All the opening methods return errno when they fail. */
 1713: 
 1714: if (handle == NULL)
 1715:   {
 1716:   if (!silent)
 1717:     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
 1718:       strerror(errno));
 1719:   return 2;
 1720:   }
 1721: 
 1722: /* Now grep the file */
 1723: 
 1724: rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
 1725:   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
 1726: 
 1727: /* Close in an appropriate manner. */
 1728: 
 1729: #ifdef SUPPORT_LIBZ
 1730: if (frtype == FR_LIBZ)
 1731:   gzclose(ingz);
 1732: else
 1733: #endif
 1734: 
 1735: /* If it is a .bz2 file and the result is 3, it means that the first attempt to
 1736: read failed. If the error indicates that the file isn't in fact bzipped, try
 1737: again as a normal file. */
 1738: 
 1739: #ifdef SUPPORT_LIBBZ2
 1740: if (frtype == FR_LIBBZ2)
 1741:   {
 1742:   if (rc == 3)
 1743:     {
 1744:     int errnum;
 1745:     const char *err = BZ2_bzerror(inbz2, &errnum);
 1746:     if (errnum == BZ_DATA_ERROR_MAGIC)
 1747:       {
 1748:       BZ2_bzclose(inbz2);
 1749:       goto PLAIN_FILE;
 1750:       }
 1751:     else if (!silent)
 1752:       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
 1753:         pathname, err);
 1754:     rc = 2;    /* The normal "something went wrong" code */
 1755:     }
 1756:   BZ2_bzclose(inbz2);
 1757:   }
 1758: else
 1759: #endif
 1760: 
 1761: /* Normal file close */
 1762: 
 1763: fclose(in);
 1764: 
 1765: /* Pass back the yield from pcregrep(). */
 1766: 
 1767: return rc;
 1768: }
 1769: 
 1770: 
 1771: 
 1772: 
 1773: /*************************************************
 1774: *                Usage function                  *
 1775: *************************************************/
 1776: 
 1777: static int
 1778: usage(int rc)
 1779: {
 1780: option_item *op;
 1781: fprintf(stderr, "Usage: pcregrep [-");
 1782: for (op = optionlist; op->one_char != 0; op++)
 1783:   {
 1784:   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
 1785:   }
 1786: fprintf(stderr, "] [long options] [pattern] [files]\n");
 1787: fprintf(stderr, "Type `pcregrep --help' for more information and the long "
 1788:   "options.\n");
 1789: return rc;
 1790: }
 1791: 
 1792: 
 1793: 
 1794: 
 1795: /*************************************************
 1796: *                Help function                   *
 1797: *************************************************/
 1798: 
 1799: static void
 1800: help(void)
 1801: {
 1802: option_item *op;
 1803: 
 1804: printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
 1805: printf("Search for PATTERN in each FILE or standard input.\n");
 1806: printf("PATTERN must be present if neither -e nor -f is used.\n");
 1807: printf("\"-\" can be used as a file name to mean STDIN.\n");
 1808: 
 1809: #ifdef SUPPORT_LIBZ
 1810: printf("Files whose names end in .gz are read using zlib.\n");
 1811: #endif
 1812: 
 1813: #ifdef SUPPORT_LIBBZ2
 1814: printf("Files whose names end in .bz2 are read using bzlib2.\n");
 1815: #endif
 1816: 
 1817: #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
 1818: printf("Other files and the standard input are read as plain files.\n\n");
 1819: #else
 1820: printf("All files are read as plain files, without any interpretation.\n\n");
 1821: #endif
 1822: 
 1823: printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
 1824: printf("Options:\n");
 1825: 
 1826: for (op = optionlist; op->one_char != 0; op++)
 1827:   {
 1828:   int n;
 1829:   char s[4];
 1830: 
 1831:   /* Two options were accidentally implemented and documented with underscores
 1832:   instead of hyphens in their names, something that was not noticed for quite a
 1833:   few releases. When fixing this, I left the underscored versions in the list
 1834:   in case people were using them. However, we don't want to display them in the
 1835:   help data. There are no other options that contain underscores, and we do not
 1836:   expect ever to implement such options. Therefore, just omit any option that
 1837:   contains an underscore. */
 1838: 
 1839:   if (strchr(op->long_name, '_') != NULL) continue;
 1840: 
 1841:   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
 1842:   n = 31 - printf("  %s --%s", s, op->long_name);
 1843:   if (n < 1) n = 1;
 1844:   printf("%.*s%s\n", n, "                     ", op->help_text);
 1845:   }
 1846: 
 1847: printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
 1848: printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
 1849: printf("When reading patterns from a file instead of using a command line option,\n");
 1850: printf("trailing white space is removed and blank lines are ignored.\n");
 1851: printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
 1852:   MAX_PATTERN_COUNT, PATBUFSIZE);
 1853: 
 1854: printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
 1855: printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
 1856: }
 1857: 
 1858: 
 1859: 
 1860: 
 1861: /*************************************************
 1862: *    Handle a single-letter, no data option      *
 1863: *************************************************/
 1864: 
 1865: static int
 1866: handle_option(int letter, int options)
 1867: {
 1868: switch(letter)
 1869:   {
 1870:   case N_FOFFSETS: file_offsets = TRUE; break;
 1871:   case N_HELP: help(); pcregrep_exit(0);
 1872:   case N_LBUFFER: line_buffered = TRUE; break;
 1873:   case N_LOFFSETS: line_offsets = number = TRUE; break;
 1874:   case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
 1875:   case 'c': count_only = TRUE; break;
 1876:   case 'F': process_options |= PO_FIXED_STRINGS; break;
 1877:   case 'H': filenames = FN_FORCE; break;
 1878:   case 'h': filenames = FN_NONE; break;
 1879:   case 'i': options |= PCRE_CASELESS; break;
 1880:   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
 1881:   case 'L': filenames = FN_NOMATCH_ONLY; break;
 1882:   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
 1883:   case 'n': number = TRUE; break;
 1884:   case 'o': only_matching = 0; break;
 1885:   case 'q': quiet = TRUE; break;
 1886:   case 'r': dee_action = dee_RECURSE; break;
 1887:   case 's': silent = TRUE; break;
 1888:   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
 1889:   case 'v': invert = TRUE; break;
 1890:   case 'w': process_options |= PO_WORD_MATCH; break;
 1891:   case 'x': process_options |= PO_LINE_MATCH; break;
 1892: 
 1893:   case 'V':
 1894:   fprintf(stderr, "pcregrep version %s\n", pcre_version());
 1895:   pcregrep_exit(0);
 1896:   break;
 1897: 
 1898:   default:
 1899:   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
 1900:   pcregrep_exit(usage(2));
 1901:   }
 1902: 
 1903: return options;
 1904: }
 1905: 
 1906: 
 1907: 
 1908: 
 1909: /*************************************************
 1910: *          Construct printed ordinal             *
 1911: *************************************************/
 1912: 
 1913: /* This turns a number into "1st", "3rd", etc. */
 1914: 
 1915: static char *
 1916: ordin(int n)
 1917: {
 1918: static char buffer[8];
 1919: char *p = buffer;
 1920: sprintf(p, "%d", n);
 1921: while (*p != 0) p++;
 1922: switch (n%10)
 1923:   {
 1924:   case 1: strcpy(p, "st"); break;
 1925:   case 2: strcpy(p, "nd"); break;
 1926:   case 3: strcpy(p, "rd"); break;
 1927:   default: strcpy(p, "th"); break;
 1928:   }
 1929: return buffer;
 1930: }
 1931: 
 1932: 
 1933: 
 1934: /*************************************************
 1935: *          Compile a single pattern              *
 1936: *************************************************/
 1937: 
 1938: /* When the -F option has been used, this is called for each substring.
 1939: Otherwise it's called for each supplied pattern.
 1940: 
 1941: Arguments:
 1942:   pattern        the pattern string
 1943:   options        the PCRE options
 1944:   filename       the file name, or NULL for a command-line pattern
 1945:   count          0 if this is the only command line pattern, or
 1946:                  number of the command line pattern, or
 1947:                  linenumber for a pattern from a file
 1948: 
 1949: Returns:         TRUE on success, FALSE after an error
 1950: */
 1951: 
 1952: static BOOL
 1953: compile_single_pattern(char *pattern, int options, char *filename, int count)
 1954: {
 1955: char buffer[PATBUFSIZE];
 1956: const char *error;
 1957: int errptr;
 1958: 
 1959: if (pattern_count >= MAX_PATTERN_COUNT)
 1960:   {
 1961:   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
 1962:     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
 1963:   return FALSE;
 1964:   }
 1965: 
 1966: sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
 1967:   suffix[process_options]);
 1968: pattern_list[pattern_count] =
 1969:   pcre_compile(buffer, options, &error, &errptr, pcretables);
 1970: if (pattern_list[pattern_count] != NULL)
 1971:   {
 1972:   pattern_count++;
 1973:   return TRUE;
 1974:   }
 1975: 
 1976: /* Handle compile errors */
 1977: 
 1978: errptr -= (int)strlen(prefix[process_options]);
 1979: if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
 1980: 
 1981: if (filename == NULL)
 1982:   {
 1983:   if (count == 0)
 1984:     fprintf(stderr, "pcregrep: Error in command-line regex "
 1985:       "at offset %d: %s\n", errptr, error);
 1986:   else
 1987:     fprintf(stderr, "pcregrep: Error in %s command-line regex "
 1988:       "at offset %d: %s\n", ordin(count), errptr, error);
 1989:   }
 1990: else
 1991:   {
 1992:   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
 1993:     "at offset %d: %s\n", count, filename, errptr, error);
 1994:   }
 1995: 
 1996: return FALSE;
 1997: }
 1998: 
 1999: 
 2000: 
 2001: /*************************************************
 2002: *           Compile one supplied pattern         *
 2003: *************************************************/
 2004: 
 2005: /* When the -F option has been used, each string may be a list of strings,
 2006: separated by line breaks. They will be matched literally.
 2007: 
 2008: Arguments:
 2009:   pattern        the pattern string
 2010:   options        the PCRE options
 2011:   filename       the file name, or NULL for a command-line pattern
 2012:   count          0 if this is the only command line pattern, or
 2013:                  number of the command line pattern, or
 2014:                  linenumber for a pattern from a file
 2015: 
 2016: Returns:         TRUE on success, FALSE after an error
 2017: */
 2018: 
 2019: static BOOL
 2020: compile_pattern(char *pattern, int options, char *filename, int count)
 2021: {
 2022: if ((process_options & PO_FIXED_STRINGS) != 0)
 2023:   {
 2024:   char *eop = pattern + strlen(pattern);
 2025:   char buffer[PATBUFSIZE];
 2026:   for(;;)
 2027:     {
 2028:     int ellength;
 2029:     char *p = end_of_line(pattern, eop, &ellength);
 2030:     if (ellength == 0)
 2031:       return compile_single_pattern(pattern, options, filename, count);
 2032:     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
 2033:     pattern = p;
 2034:     if (!compile_single_pattern(buffer, options, filename, count))
 2035:       return FALSE;
 2036:     }
 2037:   }
 2038: else return compile_single_pattern(pattern, options, filename, count);
 2039: }
 2040: 
 2041: 
 2042: 
 2043: /*************************************************
 2044: *                Main program                    *
 2045: *************************************************/
 2046: 
 2047: /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
 2048: 
 2049: int
 2050: main(int argc, char **argv)
 2051: {
 2052: int i, j;
 2053: int rc = 1;
 2054: int pcre_options = 0;
 2055: int cmd_pattern_count = 0;
 2056: int hint_count = 0;
 2057: int errptr;
 2058: BOOL only_one_at_top;
 2059: char *patterns[MAX_PATTERN_COUNT];
 2060: const char *locale_from = "--locale";
 2061: const char *error;
 2062: 
 2063: #ifdef SUPPORT_PCREGREP_JIT
 2064: pcre_jit_stack *jit_stack = NULL;
 2065: #endif
 2066: 
 2067: /* Set the default line ending value from the default in the PCRE library;
 2068: "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
 2069: Note that the return values from pcre_config(), though derived from the ASCII
 2070: codes, are the same in EBCDIC environments, so we must use the actual values
 2071: rather than escapes such as as '\r'. */
 2072: 
 2073: (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
 2074: switch(i)
 2075:   {
 2076:   default:               newline = (char *)"lf"; break;
 2077:   case 13:               newline = (char *)"cr"; break;
 2078:   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
 2079:   case -1:               newline = (char *)"any"; break;
 2080:   case -2:               newline = (char *)"anycrlf"; break;
 2081:   }
 2082: 
 2083: /* Process the options */
 2084: 
 2085: for (i = 1; i < argc; i++)
 2086:   {
 2087:   option_item *op = NULL;
 2088:   char *option_data = (char *)"";    /* default to keep compiler happy */
 2089:   BOOL longop;
 2090:   BOOL longopwasequals = FALSE;
 2091: 
 2092:   if (argv[i][0] != '-') break;
 2093: 
 2094:   /* If we hit an argument that is just "-", it may be a reference to STDIN,
 2095:   but only if we have previously had -e or -f to define the patterns. */
 2096: 
 2097:   if (argv[i][1] == 0)
 2098:     {
 2099:     if (pattern_filename != NULL || pattern_count > 0) break;
 2100:       else pcregrep_exit(usage(2));
 2101:     }
 2102: 
 2103:   /* Handle a long name option, or -- to terminate the options */
 2104: 
 2105:   if (argv[i][1] == '-')
 2106:     {
 2107:     char *arg = argv[i] + 2;
 2108:     char *argequals = strchr(arg, '=');
 2109: 
 2110:     if (*arg == 0)    /* -- terminates options */
 2111:       {
 2112:       i++;
 2113:       break;                /* out of the options-handling loop */
 2114:       }
 2115: 
 2116:     longop = TRUE;
 2117: 
 2118:     /* Some long options have data that follows after =, for example file=name.
 2119:     Some options have variations in the long name spelling: specifically, we
 2120:     allow "regexp" because GNU grep allows it, though I personally go along
 2121:     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
 2122:     These options are entered in the table as "regex(p)". Options can be in
 2123:     both these categories. */
 2124: 
 2125:     for (op = optionlist; op->one_char != 0; op++)
 2126:       {
 2127:       char *opbra = strchr(op->long_name, '(');
 2128:       char *equals = strchr(op->long_name, '=');
 2129: 
 2130:       /* Handle options with only one spelling of the name */
 2131: 
 2132:       if (opbra == NULL)     /* Does not contain '(' */
 2133:         {
 2134:         if (equals == NULL)  /* Not thing=data case */
 2135:           {
 2136:           if (strcmp(arg, op->long_name) == 0) break;
 2137:           }
 2138:         else                 /* Special case xxx=data */
 2139:           {
 2140:           int oplen = (int)(equals - op->long_name);
 2141:           int arglen = (argequals == NULL)?
 2142:             (int)strlen(arg) : (int)(argequals - arg);
 2143:           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
 2144:             {
 2145:             option_data = arg + arglen;
 2146:             if (*option_data == '=')
 2147:               {
 2148:               option_data++;
 2149:               longopwasequals = TRUE;
 2150:               }
 2151:             break;
 2152:             }
 2153:           }
 2154:         }
 2155: 
 2156:       /* Handle options with an alternate spelling of the name */
 2157: 
 2158:       else
 2159:         {
 2160:         char buff1[24];
 2161:         char buff2[24];
 2162: 
 2163:         int baselen = (int)(opbra - op->long_name);
 2164:         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
 2165:         int arglen = (argequals == NULL || equals == NULL)?
 2166:           (int)strlen(arg) : (int)(argequals - arg);
 2167: 
 2168:         sprintf(buff1, "%.*s", baselen, op->long_name);
 2169:         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
 2170: 
 2171:         if (strncmp(arg, buff1, arglen) == 0 ||
 2172:            strncmp(arg, buff2, arglen) == 0)
 2173:           {
 2174:           if (equals != NULL && argequals != NULL)
 2175:             {
 2176:             option_data = argequals;
 2177:             if (*option_data == '=')
 2178:               {
 2179:               option_data++;
 2180:               longopwasequals = TRUE;
 2181:               }
 2182:             }
 2183:           break;
 2184:           }
 2185:         }
 2186:       }
 2187: 
 2188:     if (op->one_char == 0)
 2189:       {
 2190:       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
 2191:       pcregrep_exit(usage(2));
 2192:       }
 2193:     }
 2194: 
 2195:   /* Jeffrey Friedl's debugging harness uses these additional options which
 2196:   are not in the right form for putting in the option table because they use
 2197:   only one hyphen, yet are more than one character long. By putting them
 2198:   separately here, they will not get displayed as part of the help() output,
 2199:   but I don't think Jeffrey will care about that. */
 2200: 
 2201: #ifdef JFRIEDL_DEBUG
 2202:   else if (strcmp(argv[i], "-pre") == 0) {
 2203:           jfriedl_prefix = argv[++i];
 2204:           continue;
 2205:   } else if (strcmp(argv[i], "-post") == 0) {
 2206:           jfriedl_postfix = argv[++i];
 2207:           continue;
 2208:   } else if (strcmp(argv[i], "-XT") == 0) {
 2209:           sscanf(argv[++i], "%d", &jfriedl_XT);
 2210:           continue;
 2211:   } else if (strcmp(argv[i], "-XR") == 0) {
 2212:           sscanf(argv[++i], "%d", &jfriedl_XR);
 2213:           continue;
 2214:   }
 2215: #endif
 2216: 
 2217: 
 2218:   /* One-char options; many that have no data may be in a single argument; we
 2219:   continue till we hit the last one or one that needs data. */
 2220: 
 2221:   else
 2222:     {
 2223:     char *s = argv[i] + 1;
 2224:     longop = FALSE;
 2225:     while (*s != 0)
 2226:       {
 2227:       for (op = optionlist; op->one_char != 0; op++)
 2228:         {
 2229:         if (*s == op->one_char) break;
 2230:         }
 2231:       if (op->one_char == 0)
 2232:         {
 2233:         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
 2234:           *s, argv[i]);
 2235:         pcregrep_exit(usage(2));
 2236:         }
 2237: 
 2238:       /* Check for a single-character option that has data: OP_OP_NUMBER
 2239:       is used for one that either has a numerical number or defaults, i.e. the
 2240:       data is optional. If a digit follows, there is data; if not, carry on
 2241:       with other single-character options in the same string. */
 2242: 
 2243:       option_data = s+1;
 2244:       if (op->type == OP_OP_NUMBER)
 2245:         {
 2246:         if (isdigit((unsigned char)s[1])) break;
 2247:         }
 2248:       else   /* Check for end or a dataless option */
 2249:         {
 2250:         if (op->type != OP_NODATA || s[1] == 0) break;
 2251:         }
 2252: 
 2253:       /* Handle a single-character option with no data, then loop for the
 2254:       next character in the string. */
 2255: 
 2256:       pcre_options = handle_option(*s++, pcre_options);
 2257:       }
 2258:     }
 2259: 
 2260:   /* At this point we should have op pointing to a matched option. If the type
 2261:   is NO_DATA, it means that there is no data, and the option might set
 2262:   something in the PCRE options. */
 2263: 
 2264:   if (op->type == OP_NODATA)
 2265:     {
 2266:     pcre_options = handle_option(op->one_char, pcre_options);
 2267:     continue;
 2268:     }
 2269: 
 2270:   /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
 2271:   either has a value or defaults to something. It cannot have data in a
 2272:   separate item. At the moment, the only such options are "colo(u)r",
 2273:   "only-matching", and Jeffrey Friedl's special -S debugging option. */
 2274: 
 2275:   if (*option_data == 0 &&
 2276:       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
 2277:     {
 2278:     switch (op->one_char)
 2279:       {
 2280:       case N_COLOUR:
 2281:       colour_option = (char *)"auto";
 2282:       break;
 2283: 
 2284:       case 'o':
 2285:       only_matching = 0;
 2286:       break;
 2287: 
 2288: #ifdef JFRIEDL_DEBUG
 2289:       case 'S':
 2290:       S_arg = 0;
 2291:       break;
 2292: #endif
 2293:       }
 2294:     continue;
 2295:     }
 2296: 
 2297:   /* Otherwise, find the data string for the option. */
 2298: 
 2299:   if (*option_data == 0)
 2300:     {
 2301:     if (i >= argc - 1 || longopwasequals)
 2302:       {
 2303:       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
 2304:       pcregrep_exit(usage(2));
 2305:       }
 2306:     option_data = argv[++i];
 2307:     }
 2308: 
 2309:   /* If the option type is OP_PATLIST, it's the -e option, which can be called
 2310:   multiple times to create a list of patterns. */
 2311: 
 2312:   if (op->type == OP_PATLIST)
 2313:     {
 2314:     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
 2315:       {
 2316:       fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
 2317:         MAX_PATTERN_COUNT);
 2318:       return 2;
 2319:       }
 2320:     patterns[cmd_pattern_count++] = option_data;
 2321:     }
 2322: 
 2323:   /* Otherwise, deal with single string or numeric data values. */
 2324: 
 2325:   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
 2326:            op->type != OP_OP_NUMBER)
 2327:     {
 2328:     *((char **)op->dataptr) = option_data;
 2329:     }
 2330: 
 2331:   /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
 2332:   only for unpicking arguments, so just keep it simple. */
 2333: 
 2334:   else
 2335:     {
 2336:     unsigned long int n = 0;
 2337:     char *endptr = option_data;
 2338:     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
 2339:     while (isdigit((unsigned char)(*endptr)))
 2340:       n = n * 10 + (int)(*endptr++ - '0');
 2341:     if (toupper(*endptr) == 'K')
 2342:       {
 2343:       n *= 1024;
 2344:       endptr++;
 2345:       }
 2346:     else if (toupper(*endptr) == 'M')
 2347:       {
 2348:       n *= 1024*1024;
 2349:       endptr++;
 2350:       }
 2351:     if (*endptr != 0)
 2352:       {
 2353:       if (longop)
 2354:         {
 2355:         char *equals = strchr(op->long_name, '=');
 2356:         int nlen = (equals == NULL)? (int)strlen(op->long_name) :
 2357:           (int)(equals - op->long_name);
 2358:         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
 2359:           option_data, nlen, op->long_name);
 2360:         }
 2361:       else
 2362:         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
 2363:           option_data, op->one_char);
 2364:       pcregrep_exit(usage(2));
 2365:       }
 2366:     if (op->type == OP_LONGNUMBER)
 2367:         *((unsigned long int *)op->dataptr) = n;
 2368:     else
 2369:         *((int *)op->dataptr) = n;
 2370:     }
 2371:   }
 2372: 
 2373: /* Options have been decoded. If -C was used, its value is used as a default
 2374: for -A and -B. */
 2375: 
 2376: if (both_context > 0)
 2377:   {
 2378:   if (after_context == 0) after_context = both_context;
 2379:   if (before_context == 0) before_context = both_context;
 2380:   }
 2381: 
 2382: /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
 2383: However, the latter two set only_matching. */
 2384: 
 2385: if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
 2386:     (file_offsets && line_offsets))
 2387:   {
 2388:   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
 2389:     "and/or --line-offsets\n");
 2390:   pcregrep_exit(usage(2));
 2391:   }
 2392: 
 2393: if (file_offsets || line_offsets) only_matching = 0;
 2394: 
 2395: /* If a locale has not been provided as an option, see if the LC_CTYPE or
 2396: LC_ALL environment variable is set, and if so, use it. */
 2397: 
 2398: if (locale == NULL)
 2399:   {
 2400:   locale = getenv("LC_ALL");
 2401:   locale_from = "LCC_ALL";
 2402:   }
 2403: 
 2404: if (locale == NULL)
 2405:   {
 2406:   locale = getenv("LC_CTYPE");
 2407:   locale_from = "LC_CTYPE";
 2408:   }
 2409: 
 2410: /* If a locale has been provided, set it, and generate the tables the PCRE
 2411: needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
 2412: 
 2413: if (locale != NULL)
 2414:   {
 2415:   if (setlocale(LC_CTYPE, locale) == NULL)
 2416:     {
 2417:     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
 2418:       locale, locale_from);
 2419:     return 2;
 2420:     }
 2421:   pcretables = pcre_maketables();
 2422:   }
 2423: 
 2424: /* Sort out colouring */
 2425: 
 2426: if (colour_option != NULL && strcmp(colour_option, "never") != 0)
 2427:   {
 2428:   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
 2429:   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
 2430:   else
 2431:     {
 2432:     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
 2433:       colour_option);
 2434:     return 2;
 2435:     }
 2436:   if (do_colour)
 2437:     {
 2438:     char *cs = getenv("PCREGREP_COLOUR");
 2439:     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
 2440:     if (cs != NULL) colour_string = cs;
 2441:     }
 2442:   }
 2443: 
 2444: /* Interpret the newline type; the default settings are Unix-like. */
 2445: 
 2446: if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
 2447:   {
 2448:   pcre_options |= PCRE_NEWLINE_CR;
 2449:   endlinetype = EL_CR;
 2450:   }
 2451: else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
 2452:   {
 2453:   pcre_options |= PCRE_NEWLINE_LF;
 2454:   endlinetype = EL_LF;
 2455:   }
 2456: else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
 2457:   {
 2458:   pcre_options |= PCRE_NEWLINE_CRLF;
 2459:   endlinetype = EL_CRLF;
 2460:   }
 2461: else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
 2462:   {
 2463:   pcre_options |= PCRE_NEWLINE_ANY;
 2464:   endlinetype = EL_ANY;
 2465:   }
 2466: else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
 2467:   {
 2468:   pcre_options |= PCRE_NEWLINE_ANYCRLF;
 2469:   endlinetype = EL_ANYCRLF;
 2470:   }
 2471: else
 2472:   {
 2473:   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
 2474:   return 2;
 2475:   }
 2476: 
 2477: /* Interpret the text values for -d and -D */
 2478: 
 2479: if (dee_option != NULL)
 2480:   {
 2481:   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
 2482:   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
 2483:   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
 2484:   else
 2485:     {
 2486:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
 2487:     return 2;
 2488:     }
 2489:   }
 2490: 
 2491: if (DEE_option != NULL)
 2492:   {
 2493:   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
 2494:   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
 2495:   else
 2496:     {
 2497:     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
 2498:     return 2;
 2499:     }
 2500:   }
 2501: 
 2502: /* Check the values for Jeffrey Friedl's debugging options. */
 2503: 
 2504: #ifdef JFRIEDL_DEBUG
 2505: if (S_arg > 9)
 2506:   {
 2507:   fprintf(stderr, "pcregrep: bad value for -S option\n");
 2508:   return 2;
 2509:   }
 2510: if (jfriedl_XT != 0 || jfriedl_XR != 0)
 2511:   {
 2512:   if (jfriedl_XT == 0) jfriedl_XT = 1;
 2513:   if (jfriedl_XR == 0) jfriedl_XR = 1;
 2514:   }
 2515: #endif
 2516: 
 2517: /* Get memory for the main buffer, and to store the pattern and hints lists. */
 2518: 
 2519: bufsize = 3*bufthird;
 2520: main_buffer = (char *)malloc(bufsize);
 2521: pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
 2522: hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
 2523: 
 2524: if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
 2525:   {
 2526:   fprintf(stderr, "pcregrep: malloc failed\n");
 2527:   goto EXIT2;
 2528:   }
 2529: 
 2530: /* If no patterns were provided by -e, and there is no file provided by -f,
 2531: the first argument is the one and only pattern, and it must exist. */
 2532: 
 2533: if (cmd_pattern_count == 0 && pattern_filename == NULL)
 2534:   {
 2535:   if (i >= argc) return usage(2);
 2536:   patterns[cmd_pattern_count++] = argv[i++];
 2537:   }
 2538: 
 2539: /* Compile the patterns that were provided on the command line, either by
 2540: multiple uses of -e or as a single unkeyed pattern. */
 2541: 
 2542: for (j = 0; j < cmd_pattern_count; j++)
 2543:   {
 2544:   if (!compile_pattern(patterns[j], pcre_options, NULL,
 2545:        (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
 2546:     goto EXIT2;
 2547:   }
 2548: 
 2549: /* Compile the regular expressions that are provided in a file. */
 2550: 
 2551: if (pattern_filename != NULL)
 2552:   {
 2553:   int linenumber = 0;
 2554:   FILE *f;
 2555:   char *filename;
 2556:   char buffer[PATBUFSIZE];
 2557: 
 2558:   if (strcmp(pattern_filename, "-") == 0)
 2559:     {
 2560:     f = stdin;
 2561:     filename = stdin_name;
 2562:     }
 2563:   else
 2564:     {
 2565:     f = fopen(pattern_filename, "r");
 2566:     if (f == NULL)
 2567:       {
 2568:       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
 2569:         strerror(errno));
 2570:       goto EXIT2;
 2571:       }
 2572:     filename = pattern_filename;
 2573:     }
 2574: 
 2575:   while (fgets(buffer, PATBUFSIZE, f) != NULL)
 2576:     {
 2577:     char *s = buffer + (int)strlen(buffer);
 2578:     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
 2579:     *s = 0;
 2580:     linenumber++;
 2581:     if (buffer[0] == 0) continue;   /* Skip blank lines */
 2582:     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
 2583:       goto EXIT2;
 2584:     }
 2585: 
 2586:   if (f != stdin) fclose(f);
 2587:   }
 2588: 
 2589: /* Study the regular expressions, as we will be running them many times. Unless
 2590: JIT has been explicitly disabled, arrange a stack for it to use. */
 2591: 
 2592: #ifdef SUPPORT_PCREGREP_JIT
 2593: if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
 2594:   jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
 2595: #endif
 2596: 
 2597: for (j = 0; j < pattern_count; j++)
 2598:   {
 2599:   hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
 2600:   if (error != NULL)
 2601:     {
 2602:     char s[16];
 2603:     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
 2604:     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
 2605:     goto EXIT2;
 2606:     }
 2607:   hint_count++;
 2608: #ifdef SUPPORT_PCREGREP_JIT
 2609:   if (jit_stack != NULL && hints_list[j] != NULL)
 2610:     pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
 2611: #endif
 2612:   }
 2613: 
 2614: /* If --match-limit or --recursion-limit was set, put the value(s) into the
 2615: pcre_extra block for each pattern. */
 2616: 
 2617: if (match_limit > 0 || match_limit_recursion > 0)
 2618:   {
 2619:   for (j = 0; j < pattern_count; j++)
 2620:     {
 2621:     if (hints_list[j] == NULL)
 2622:       {
 2623:       hints_list[j] = malloc(sizeof(pcre_extra));
 2624:       if (hints_list[j] == NULL)
 2625:         {
 2626:         fprintf(stderr, "pcregrep: malloc failed\n");
 2627:         pcregrep_exit(2);
 2628:         }
 2629:       }
 2630:     if (match_limit > 0)
 2631:       {
 2632:       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
 2633:       hints_list[j]->match_limit = match_limit;
 2634:       }
 2635:     if (match_limit_recursion > 0)
 2636:       {
 2637:       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
 2638:       hints_list[j]->match_limit_recursion = match_limit_recursion;
 2639:       }
 2640:     }
 2641:   }
 2642: 
 2643: /* If there are include or exclude patterns, compile them. */
 2644: 
 2645: if (exclude_pattern != NULL)
 2646:   {
 2647:   exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
 2648:     pcretables);
 2649:   if (exclude_compiled == NULL)
 2650:     {
 2651:     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
 2652:       errptr, error);
 2653:     goto EXIT2;
 2654:     }
 2655:   }
 2656: 
 2657: if (include_pattern != NULL)
 2658:   {
 2659:   include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
 2660:     pcretables);
 2661:   if (include_compiled == NULL)
 2662:     {
 2663:     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
 2664:       errptr, error);
 2665:     goto EXIT2;
 2666:     }
 2667:   }
 2668: 
 2669: if (exclude_dir_pattern != NULL)
 2670:   {
 2671:   exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
 2672:     pcretables);
 2673:   if (exclude_dir_compiled == NULL)
 2674:     {
 2675:     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
 2676:       errptr, error);
 2677:     goto EXIT2;
 2678:     }
 2679:   }
 2680: 
 2681: if (include_dir_pattern != NULL)
 2682:   {
 2683:   include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
 2684:     pcretables);
 2685:   if (include_dir_compiled == NULL)
 2686:     {
 2687:     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
 2688:       errptr, error);
 2689:     goto EXIT2;
 2690:     }
 2691:   }
 2692: 
 2693: /* If there are no further arguments, do the business on stdin and exit. */
 2694: 
 2695: if (i >= argc)
 2696:   {
 2697:   rc = pcregrep(stdin, FR_PLAIN, stdin_name,
 2698:     (filenames > FN_DEFAULT)? stdin_name : NULL);
 2699:   goto EXIT;
 2700:   }
 2701: 
 2702: /* Otherwise, work through the remaining arguments as files or directories.
 2703: Pass in the fact that there is only one argument at top level - this suppresses
 2704: the file name if the argument is not a directory and filenames are not
 2705: otherwise forced. */
 2706: 
 2707: only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 2708: 
 2709: for (; i < argc; i++)
 2710:   {
 2711:   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
 2712:     only_one_at_top);
 2713:   if (frc > 1) rc = frc;
 2714:     else if (frc == 0 && rc == 1) rc = 0;
 2715:   }
 2716: 
 2717: EXIT:
 2718: #ifdef SUPPORT_PCREGREP_JIT
 2719: if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
 2720: #endif
 2721: if (main_buffer != NULL) free(main_buffer);
 2722: if (pattern_list != NULL)
 2723:   {
 2724:   for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
 2725:   free(pattern_list);
 2726:   }
 2727: if (hints_list != NULL)
 2728:   {
 2729:   for (i = 0; i < hint_count; i++)
 2730:     {
 2731:     if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
 2732:     }
 2733:   free(hints_list);
 2734:   }
 2735: pcregrep_exit(rc);
 2736: 
 2737: EXIT2:
 2738: rc = 2;
 2739: goto EXIT;
 2740: }
 2741: 
 2742: /* End of pcregrep */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>