Package: grep;
Reported by: Paolo Bonzini <bonzini <at> gnu.org>
Date: Tue, 1 Apr 2014 09:19:01 UTC
Severity: normal
Tags: patch
Done: Jim Meyering <jim <at> meyering.net>
Bug is archived. No further changes may be made.
View this message in rfc822 format
From: Paolo Bonzini <bonzini <at> gnu.org> To: 17156 <at> debbugs.gnu.org Cc: meyering <at> fb.com Subject: bug#17156: [PATCH 2/5] Revert conversion to shell scripts Date: Tue, 1 Apr 2014 11:18:43 +0200
This reverts the following commits: - e4994366166059c7320fbb97a930100b1cc79e61. - ddea6c8ea3529f3364257e0e239775c63bf1b95d. - b639643840ef506594b6c46e5b24d9980a33e78e. The portability requirements of grep mandate higher standards than this. Even though egrep and fgrep have been deprecated, most users will be using them instead of grep -E and -F, on MS Windows systems too where the native command shell is used. There was a typo in the matcher-name comparisons; "-P" is "perl", not "pcre". Fix it. --- gnulib | 2 +- po/POTFILES.in | 3 + src/Makefile.am | 28 +- src/dfasearch.c | 12 + src/egrep.c | 12 + src/egrep.sh | 10 - src/fgrep.c | 12 + src/grep.c | 2496 +------------------------------------------------------ src/grep.h | 17 + src/main.c | 2478 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/search.h | 2 + 11 files changed, 2560 insertions(+), 2512 deletions(-) create mode 100644 src/egrep.c delete mode 100644 src/egrep.sh create mode 100644 src/fgrep.c create mode 100644 src/main.c diff --git a/gnulib b/gnulib index 24379a9..497f4cd 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit 24379a9217fa4bd62685795aaaa010fd90ced9e3 +Subproject commit 497f4cdf8d4ef8c73059e68030c824949ae32649 diff --git a/po/POTFILES.in b/po/POTFILES.in index 399bd2f..c6385d2 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -27,6 +27,9 @@ lib/version-etc.c lib/xalloc-die.c lib/xstrtol-error.c src/dfa.c +src/egrep.c +src/fgrep.c src/grep.c src/kwset.c +src/main.c src/pcresearch.c diff --git a/src/Makefile.am b/src/Makefile.am index 3487848..9283d1a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -21,20 +21,23 @@ AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) # Tell the linker to omit references to unused shared libraries. AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS) -bin_PROGRAMS = grep -bin_SCRIPTS = egrep fgrep -grep_SOURCES = grep.c searchutils.c \ - dfa.c dfasearch.c \ - kwset.c kwsearch.c \ - pcresearch.c +bin_PROGRAMS = grep egrep fgrep +grep_SOURCES = grep.c +egrep_SOURCES = egrep.c +fgrep_SOURCES = fgrep.c noinst_HEADERS = grep.h dfa.h kwset.h search.h system.h mbsupport.h +noinst_LIBRARIES = libgrep.a +libgrep_a_SOURCES = kwset.c dfa.c searchutils.c dfasearch.c kwsearch.c \ + pcresearch.c main.c + # Sometimes, the expansion of $(LIBINTL) includes -lc which may # include modules defining variables like 'optind', so libgreputils.a # must precede $(LIBINTL) in order to ensure we use GNU getopt. # But libgreputils.a must also follow $(LIBINTL), since libintl uses # replacement functions defined in libgreputils.a. LDADD = \ + libgrep.a \ ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a $(LIBICONV) \ $(LIBTHREAD) @@ -42,15 +45,4 @@ grep_LDADD = $(LDADD) $(LIB_PCRE) localedir = $(datadir)/locale AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib -EXTRA_DIST = dosbuf.c egrep.sh - -egrep fgrep: egrep.sh Makefile - $(AM_V_GEN)grep=`echo grep | sed -e '$(transform)'` && \ - case $@ in egrep) option=-E;; fgrep) option=-F;; esac && \ - sed -e 's|[@]SHELL@|$(SHELL)|g' \ - -e "s|[@]grep@|$$grep|g" \ - -e "s|[@]option@|$$option|g" <$(srcdir)/egrep.sh >$@-t - $(AM_V_at)chmod +x $@-t - $(AM_V_at)mv $@-t $@ - -CLEANFILES = egrep fgrep *-t +EXTRA_DIST = dosbuf.c diff --git a/src/dfasearch.c b/src/dfasearch.c index d098a9b..d3a8af3 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -406,3 +406,15 @@ EGexecute (char const *buf, size_t size, size_t *match_size, *match_size = len; return off; } + +void +Gcompile (char const *pattern, size_t size) +{ + GEAcompile (pattern, size, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES); +} + +void +Ecompile (char const *pattern, size_t size) +{ + GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES); +} diff --git a/src/egrep.c b/src/egrep.c new file mode 100644 index 0000000..4c34694 --- /dev/null +++ b/src/egrep.c @@ -0,0 +1,12 @@ +#include <config.h> +#include "search.h" + +struct matcher const matchers[] = { + { "egrep", Ecompile, EGexecute }, + { NULL, NULL, NULL }, +}; + +const char before_options[] = +N_("PATTERN is an extended regular expression (ERE).\n"); +const char after_options[] = +N_("Invocation as 'egrep' is deprecated; use 'grep -E' instead.\n"); diff --git a/src/egrep.sh b/src/egrep.sh deleted file mode 100644 index f1b4146..0000000 --- a/src/egrep.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!@SHELL@ -grep=grep -case $0 in - */*) - if test -x "${0%/*}/@grep@"; then - PATH=${0%/*}:$PATH - grep=@grep@ - fi;; -esac -exec $grep @option@ "$@" diff --git a/src/fgrep.c b/src/fgrep.c new file mode 100644 index 0000000..a0940cc --- /dev/null +++ b/src/fgrep.c @@ -0,0 +1,12 @@ +#include <config.h> +#include "search.h" + +struct matcher const matchers[] = { + { "fgrep", Fcompile, Fexecute }, + { NULL, NULL, NULL }, +}; + +const char before_options[] = +N_("PATTERN is a set of newline-separated fixed strings.\n"); +const char after_options[] = +N_("Invocation as 'fgrep' is deprecated; use 'grep -F' instead.\n"); diff --git a/src/grep.c b/src/grep.c index a1bccdb..3572017 100644 --- a/src/grep.c +++ b/src/grep.c @@ -1,1665 +1,5 @@ -/* grep.c - main driver file for grep. - Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ - -/* Written July 1992 by Mike Haertel. */ - #include <config.h> -#include <sys/types.h> -#include <sys/stat.h> -#include "mbsupport.h" -#include <wchar.h> -#include <wctype.h> -#include <fcntl.h> -#include <inttypes.h> -#include <stdio.h> -#include "system.h" - -#include "argmatch.h" -#include "c-ctype.h" -#include "closeout.h" -#include "colorize.h" -#include "dfa.h" -#include "error.h" -#include "exclude.h" -#include "exitfail.h" -#include "fcntl-safer.h" -#include "fts_.h" -#include "getopt.h" -#include "grep.h" -#include "intprops.h" -#include "progname.h" -#include "propername.h" -#include "quote.h" -#include "safe-read.h" #include "search.h" -#include "version-etc.h" -#include "xalloc.h" -#include "xstrtol.h" - -#define SEP_CHAR_SELECTED ':' -#define SEP_CHAR_REJECTED '-' -#define SEP_STR_GROUP "--" - -#define AUTHORS \ - proper_name ("Mike Haertel"), \ - _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>") - -/* When stdout is connected to a regular file, save its stat - information here, so that we can automatically skip it, thus - avoiding a potential (racy) infinite loop. */ -static struct stat out_stat; - -/* if non-zero, display usage information and exit */ -static int show_help; - -/* If non-zero, print the version on standard output and exit. */ -static int show_version; - -/* If nonzero, suppress diagnostics for nonexistent or unreadable files. */ -static int suppress_errors; - -/* If nonzero, use color markers. */ -static int color_option; - -/* If nonzero, show only the part of a line matching the expression. */ -static int only_matching; - -/* If nonzero, make sure first content char in a line is on a tab stop. */ -static int align_tabs; - -/* The group separator used when context is requested. */ -static const char *group_separator = SEP_STR_GROUP; - -/* The context and logic for choosing default --color screen attributes - (foreground and background colors, etc.) are the following. - -- There are eight basic colors available, each with its own - nominal luminosity to the human eye and foreground/background - codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41], - magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46], - yellow [89 %, 33/43], and white [100 %, 37/47]). - -- Sometimes, white as a background is actually implemented using - a shade of light gray, so that a foreground white can be visible - on top of it (but most often not). - -- Sometimes, black as a foreground is actually implemented using - a shade of dark gray, so that it can be visible on top of a - background black (but most often not). - -- Sometimes, more colors are available, as extensions. - -- Other attributes can be selected/deselected (bold [1/22], - underline [4/24], standout/inverse [7/27], blink [5/25], and - invisible/hidden [8/28]). They are sometimes implemented by - using colors instead of what their names imply; e.g., bold is - often achieved by using brighter colors. In practice, only bold - is really available to us, underline sometimes being mapped by - the terminal to some strange color choice, and standout best - being left for use by downstream programs such as less(1). - -- We cannot assume that any of the extensions or special features - are available for the purpose of choosing defaults for everyone. - -- The most prevalent default terminal backgrounds are pure black - and pure white, and are not necessarily the same shades of - those as if they were selected explicitly with SGR sequences. - Some terminals use dark or light pictures as default background, - but those are covered over by an explicit selection of background - color with an SGR sequence; their users will appreciate their - background pictures not be covered like this, if possible. - -- Some uses of colors attributes is to make some output items - more understated (e.g., context lines); this cannot be achieved - by changing the background color. - -- For these reasons, the grep color defaults should strive not - to change the background color from its default, unless it's - for a short item that should be highlighted, not understated. - -- The grep foreground color defaults (without an explicitly set - background) should provide enough contrast to be readable on any - terminal with either a black (dark) or white (light) background. - This only leaves red, magenta, green, and cyan (and their bold - counterparts) and possibly bold blue. */ -/* The color strings used for matched text. - The user can overwrite them using the deprecated - environment variable GREP_COLOR or the new GREP_COLORS. */ -static const char *selected_match_color = "01;31"; /* bold red */ -static const char *context_match_color = "01;31"; /* bold red */ - -/* Other colors. Defaults look damn good. */ -static const char *filename_color = "35"; /* magenta */ -static const char *line_num_color = "32"; /* green */ -static const char *byte_num_color = "32"; /* green */ -static const char *sep_color = "36"; /* cyan */ -static const char *selected_line_color = ""; /* default color pair */ -static const char *context_line_color = ""; /* default color pair */ - -/* Select Graphic Rendition (SGR, "\33[...m") strings. */ -/* Also Erase in Line (EL) to Right ("\33[K") by default. */ -/* Why have EL to Right after SGR? - -- The behavior of line-wrapping when at the bottom of the - terminal screen and at the end of the current line is often - such that a new line is introduced, entirely cleared with - the current background color which may be different from the - default one (see the boolean back_color_erase terminfo(5) - capability), thus scrolling the display by one line. - The end of this new line will stay in this background color - even after reverting to the default background color with - "\33[m', unless it is explicitly cleared again with "\33[K" - (which is the behavior the user would instinctively expect - from the whole thing). There may be some unavoidable - background-color flicker at the end of this new line because - of this (when timing with the monitor's redraw is just right). - -- The behavior of HT (tab, "\t") is usually the same as that of - Cursor Forward Tabulation (CHT) with a default parameter - of 1 ("\33[I"), i.e., it performs pure movement to the next - tab stop, without any clearing of either content or screen - attributes (including background color); try - printf 'asdfqwerzxcv\rASDF\tZXCV\n' - in a bash(1) shell to demonstrate this. This is not what the - user would instinctively expect of HT (but is ok for CHT). - The instinctive behavior would include clearing the terminal - cells that are skipped over by HT with blank cells in the - current screen attributes, including background color; - the boolean dest_tabs_magic_smso terminfo(5) capability - indicates this saner behavior for HT, but only some rare - terminals have it (although it also indicates a special - glitch with standout mode in the Teleray terminal for which - it was initially introduced). The remedy is to add "\33K" - after each SGR sequence, be it START (to fix the behavior - of any HT after that before another SGR) or END (to fix the - behavior of an HT in default background color that would - follow a line-wrapping at the bottom of the screen in another - background color, and to complement doing it after START). - Piping grep's output through a pager such as less(1) avoids - any HT problems since the pager performs tab expansion. - - Generic disadvantages of this remedy are: - -- Some very rare terminals might support SGR but not EL (nobody - will use "grep --color" on a terminal that does not support - SGR in the first place). - -- Having these extra control sequences might somewhat complicate - the task of any program trying to parse "grep --color" - output in order to extract structuring information from it. - A specific disadvantage to doing it after SGR START is: - -- Even more possible background color flicker (when timing - with the monitor's redraw is just right), even when not at the - bottom of the screen. - There are no additional disadvantages specific to doing it after - SGR END. - - It would be impractical for GNU grep to become a full-fledged - terminal program linked against ncurses or the like, so it will - not detect terminfo(5) capabilities. */ -static const char *sgr_start = "\33[%sm\33[K"; -static const char *sgr_end = "\33[m\33[K"; - -/* SGR utility functions. */ -static void -pr_sgr_start (char const *s) -{ - if (*s) - print_start_colorize (sgr_start, s); -} -static void -pr_sgr_end (char const *s) -{ - if (*s) - print_end_colorize (sgr_end); -} -static void -pr_sgr_start_if (char const *s) -{ - if (color_option) - pr_sgr_start (s); -} -static void -pr_sgr_end_if (char const *s) -{ - if (color_option) - pr_sgr_end (s); -} - -struct color_cap - { - const char *name; - const char **var; - void (*fct) (void); - }; - -static void -color_cap_mt_fct (void) -{ - /* Our caller just set selected_match_color. */ - context_match_color = selected_match_color; -} - -static void -color_cap_rv_fct (void) -{ - /* By this point, it was 1 (or already -1). */ - color_option = -1; /* That's still != 0. */ -} - -static void -color_cap_ne_fct (void) -{ - sgr_start = "\33[%sm"; - sgr_end = "\33[m"; -} - -/* For GREP_COLORS. */ -static const struct color_cap color_dict[] = - { - { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */ - { "ms", &selected_match_color, NULL }, /* selected matched text */ - { "mc", &context_match_color, NULL }, /* context matched text */ - { "fn", &filename_color, NULL }, /* filename */ - { "ln", &line_num_color, NULL }, /* line number */ - { "bn", &byte_num_color, NULL }, /* byte (sic) offset */ - { "se", &sep_color, NULL }, /* separator */ - { "sl", &selected_line_color, NULL }, /* selected lines */ - { "cx", &context_line_color, NULL }, /* context lines */ - { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */ - { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */ - { NULL, NULL, NULL } - }; - -static struct exclude *excluded_patterns; -static struct exclude *excluded_directory_patterns; -/* Short options. */ -static char const short_options[] = -"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; - -/* Non-boolean long options that have no corresponding short equivalents. */ -enum -{ - BINARY_FILES_OPTION = CHAR_MAX + 1, - COLOR_OPTION, - INCLUDE_OPTION, - EXCLUDE_OPTION, - EXCLUDE_FROM_OPTION, - LINE_BUFFERED_OPTION, - LABEL_OPTION, - EXCLUDE_DIRECTORY_OPTION, - GROUP_SEPARATOR_OPTION -}; - -/* Long options equivalences. */ -static struct option const long_options[] = -{ - {"basic-regexp", no_argument, NULL, 'G'}, - {"extended-regexp", no_argument, NULL, 'E'}, - {"fixed-regexp", no_argument, NULL, 'F'}, - {"fixed-strings", no_argument, NULL, 'F'}, - {"perl-regexp", no_argument, NULL, 'P'}, - {"after-context", required_argument, NULL, 'A'}, - {"before-context", required_argument, NULL, 'B'}, - {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, - {"byte-offset", no_argument, NULL, 'b'}, - {"context", required_argument, NULL, 'C'}, - {"color", optional_argument, NULL, COLOR_OPTION}, - {"colour", optional_argument, NULL, COLOR_OPTION}, - {"count", no_argument, NULL, 'c'}, - {"devices", required_argument, NULL, 'D'}, - {"directories", required_argument, NULL, 'd'}, - {"exclude", required_argument, NULL, EXCLUDE_OPTION}, - {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION}, - {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION}, - {"file", required_argument, NULL, 'f'}, - {"files-with-matches", no_argument, NULL, 'l'}, - {"files-without-match", no_argument, NULL, 'L'}, - {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION}, - {"help", no_argument, &show_help, 1}, - {"include", required_argument, NULL, INCLUDE_OPTION}, - {"ignore-case", no_argument, NULL, 'i'}, - {"initial-tab", no_argument, NULL, 'T'}, - {"label", required_argument, NULL, LABEL_OPTION}, - {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION}, - {"line-number", no_argument, NULL, 'n'}, - {"line-regexp", no_argument, NULL, 'x'}, - {"max-count", required_argument, NULL, 'm'}, - - {"no-filename", no_argument, NULL, 'h'}, - {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION}, - {"no-messages", no_argument, NULL, 's'}, - {"null", no_argument, NULL, 'Z'}, - {"null-data", no_argument, NULL, 'z'}, - {"only-matching", no_argument, NULL, 'o'}, - {"quiet", no_argument, NULL, 'q'}, - {"recursive", no_argument, NULL, 'r'}, - {"dereference-recursive", no_argument, NULL, 'R'}, - {"regexp", required_argument, NULL, 'e'}, - {"invert-match", no_argument, NULL, 'v'}, - {"silent", no_argument, NULL, 'q'}, - {"text", no_argument, NULL, 'a'}, - {"binary", no_argument, NULL, 'U'}, - {"unix-byte-offsets", no_argument, NULL, 'u'}, - {"version", no_argument, NULL, 'V'}, - {"with-filename", no_argument, NULL, 'H'}, - {"word-regexp", no_argument, NULL, 'w'}, - {0, 0, 0, 0} -}; - -/* Define flags declared in grep.h. */ -int match_icase; -int match_words; -int match_lines; -unsigned char eolbyte; - -static char const *matcher; - -/* For error messages. */ -/* The input file name, or (if standard input) "-" or a --label argument. */ -static char const *filename; -static size_t filename_prefix_len; -static int errseen; -static int write_error_seen; - -enum directories_type - { - READ_DIRECTORIES = 2, - RECURSE_DIRECTORIES, - SKIP_DIRECTORIES - }; - -/* How to handle directories. */ -static char const *const directories_args[] = -{ - "read", "recurse", "skip", NULL -}; -static enum directories_type const directories_types[] = -{ - READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES -}; -ARGMATCH_VERIFY (directories_args, directories_types); - -static enum directories_type directories = READ_DIRECTORIES; - -enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK }; -static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL; - -/* How to handle devices. */ -static enum - { - READ_COMMAND_LINE_DEVICES, - READ_DEVICES, - SKIP_DEVICES - } devices = READ_COMMAND_LINE_DEVICES; - -static int grepfile (int, char const *, int, int); -static int grepdesc (int, int); -#if defined HAVE_DOS_FILE_CONTENTS -static int undossify_input (char *, size_t); -#endif - -static int -is_device_mode (mode_t m) -{ - return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m); -} - -/* Return nonzero if ST->st_size is defined. Assume the file is not a - symbolic link. */ -static int -usable_st_size (struct stat const *st) -{ - return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st); -} - -/* Functions we'll use to search. */ -typedef void (*compile_fp_t) (char const *, size_t); -typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *); -static compile_fp_t compile; -static execute_fp_t execute; - -/* Like error, but suppress the diagnostic if requested. */ -static void -suppressible_error (char const *mesg, int errnum) -{ - if (! suppress_errors) - error (0, errnum, "%s", mesg); - errseen = 1; -} - -/* If there has already been a write error, don't bother closing - standard output, as that might elicit a duplicate diagnostic. */ -static void -clean_up_stdout (void) -{ - if (! write_error_seen) - close_stdout (); -} - -/* Return 1 if a file is known to be binary for the purpose of 'grep'. - BUF, of size BUFSIZE, is the initial buffer read from the file with - descriptor FD and status ST. */ -static int -file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) -{ - #ifndef SEEK_HOLE - enum { SEEK_HOLE = SEEK_END }; - #endif - - /* If -z, test only whether the initial buffer contains '\200'; - knowing about holes won't help. */ - if (! eolbyte) - return memchr (buf, '\200', bufsize) != 0; - - /* If the initial buffer contains a null byte, guess that the file - is binary. */ - if (memchr (buf, '\0', bufsize)) - return 1; - - /* If the file has holes, it must contain a null byte somewhere. */ - if (SEEK_HOLE != SEEK_END && usable_st_size (st)) - { - off_t cur = bufsize; - if (O_BINARY || fd == STDIN_FILENO) - { - cur = lseek (fd, 0, SEEK_CUR); - if (cur < 0) - return 0; - } - - /* Look for a hole after the current location. */ - off_t hole_start = lseek (fd, cur, SEEK_HOLE); - if (0 <= hole_start) - { - if (lseek (fd, cur, SEEK_SET) < 0) - suppressible_error (filename, errno); - if (hole_start < st->st_size) - return 1; - } - } - - /* Guess that the file does not contain binary data. */ - return 0; -} - -/* Convert STR to a nonnegative integer, storing the result in *OUT. - STR must be a valid context length argument; report an error if it - isn't. Silently ceiling *OUT at the maximum value, as that is - practically equivalent to infinity for grep's purposes. */ -static void -context_length_arg (char const *str, intmax_t *out) -{ - switch (xstrtoimax (str, 0, 10, out, "")) - { - case LONGINT_OK: - case LONGINT_OVERFLOW: - if (0 <= *out) - break; - /* Fall through. */ - default: - error (EXIT_TROUBLE, 0, "%s: %s", str, - _("invalid context length argument")); - } -} - -/* Return nonzero if the file with NAME should be skipped. - If COMMAND_LINE is nonzero, it is a command-line argument. - If IS_DIR is nonzero, it is a directory. */ -static int -skipped_file (char const *name, int command_line, int is_dir) -{ - return (is_dir - ? (directories == SKIP_DIRECTORIES - || (! (command_line && filename_prefix_len != 0) - && excluded_directory_patterns - && excluded_file_name (excluded_directory_patterns, name))) - : (excluded_patterns - && excluded_file_name (excluded_patterns, name))); -} - -/* Hairy buffering mechanism for grep. The intent is to keep - all reads aligned on a page boundary and multiples of the - page size, unless a read yields a partial page. */ - -static char *buffer; /* Base of buffer. */ -static size_t bufalloc; /* Allocated buffer size, counting slop. */ -#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */ -static int bufdesc; /* File descriptor. */ -static char *bufbeg; /* Beginning of user-visible stuff. */ -static char *buflim; /* Limit of user-visible stuff. */ -static size_t pagesize; /* alignment of memory pages */ -static off_t bufoffset; /* Read offset; defined on regular files. */ -static off_t after_last_match; /* Pointer after last matching line that - would have been output if we were - outputting characters. */ - -/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be - an integer or a pointer. Both args must be free of side effects. */ -#define ALIGN_TO(val, alignment) \ - ((size_t) (val) % (alignment) == 0 \ - ? (val) \ - : (val) + ((alignment) - (size_t) (val) % (alignment))) - -/* Reset the buffer for a new file, returning zero if we should skip it. - Initialize on the first time through. */ -static int -reset (int fd, struct stat const *st) -{ - if (! pagesize) - { - pagesize = getpagesize (); - if (pagesize == 0 || 2 * pagesize + 1 <= pagesize) - abort (); - bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1; - buffer = xmalloc (bufalloc); - } - - bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize); - bufbeg[-1] = eolbyte; - bufdesc = fd; - - if (S_ISREG (st->st_mode)) - { - if (fd != STDIN_FILENO) - bufoffset = 0; - else - { - bufoffset = lseek (fd, 0, SEEK_CUR); - if (bufoffset < 0) - { - suppressible_error (_("lseek failed"), errno); - return 0; - } - } - } - return 1; -} - -/* Read new stuff into the buffer, saving the specified - amount of old stuff. When we're done, 'bufbeg' points - to the beginning of the buffer contents, and 'buflim' - points just after the end. Return zero if there's an error. */ -static int -fillbuf (size_t save, struct stat const *st) -{ - ssize_t fillsize; - int cc = 1; - char *readbuf; - size_t readsize; - - /* Offset from start of buffer to start of old stuff - that we want to save. */ - size_t saved_offset = buflim - save - buffer; - - if (pagesize <= buffer + bufalloc - buflim) - { - readbuf = buflim; - bufbeg = buflim - save; - } - else - { - size_t minsize = save + pagesize; - size_t newsize; - size_t newalloc; - char *newbuf; - - /* Grow newsize until it is at least as great as minsize. */ - for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2) - if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2) - xalloc_die (); - - /* Try not to allocate more memory than the file size indicates, - as that might cause unnecessary memory exhaustion if the file - is large. However, do not use the original file size as a - heuristic if we've already read past the file end, as most - likely the file is growing. */ - if (usable_st_size (st)) - { - off_t to_be_read = st->st_size - bufoffset; - off_t maxsize_off = save + to_be_read; - if (0 <= to_be_read && to_be_read <= maxsize_off - && maxsize_off == (size_t) maxsize_off - && minsize <= (size_t) maxsize_off - && (size_t) maxsize_off < newsize) - newsize = maxsize_off; - } - - /* Add enough room so that the buffer is aligned and has room - for byte sentinels fore and aft. */ - newalloc = newsize + pagesize + 1; - - newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; - readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); - bufbeg = readbuf - save; - memmove (bufbeg, buffer + saved_offset, save); - bufbeg[-1] = eolbyte; - if (newbuf != buffer) - { - free (buffer); - buffer = newbuf; - } - } - - readsize = buffer + bufalloc - readbuf; - readsize -= readsize % pagesize; - - fillsize = safe_read (bufdesc, readbuf, readsize); - if (fillsize < 0) - fillsize = cc = 0; - bufoffset += fillsize; -#if defined HAVE_DOS_FILE_CONTENTS - if (fillsize) - fillsize = undossify_input (readbuf, fillsize); -#endif - buflim = readbuf + fillsize; - return cc; -} - -/* Flags controlling the style of output. */ -static enum -{ - BINARY_BINARY_FILES, - TEXT_BINARY_FILES, - WITHOUT_MATCH_BINARY_FILES -} binary_files; /* How to handle binary files. */ - -static int filename_mask; /* If zero, output nulls after filenames. */ -static int out_quiet; /* Suppress all normal output. */ -static int out_invert; /* Print nonmatching stuff. */ -static int out_file; /* Print filenames. */ -static int out_line; /* Print line numbers. */ -static int out_byte; /* Print byte offsets. */ -static intmax_t out_before; /* Lines of leading context. */ -static intmax_t out_after; /* Lines of trailing context. */ -static int count_matches; /* Count matching lines. */ -static int list_files; /* List matching files. */ -static int no_filenames; /* Suppress file names. */ -static intmax_t max_count; /* Stop after outputting this many - lines from an input file. */ -static int line_buffered; /* If nonzero, use line buffering, i.e. - fflush everyline out. */ -static char *label = NULL; /* Fake filename for stdin */ - - -/* Internal variables to keep track of byte count, context, etc. */ -static uintmax_t totalcc; /* Total character count before bufbeg. */ -static char const *lastnl; /* Pointer after last newline counted. */ -static char const *lastout; /* Pointer after last character output; - NULL if no character has been output - or if it's conceptually before bufbeg. */ -static uintmax_t totalnl; /* Total newline count before lastnl. */ -static intmax_t outleft; /* Maximum number of lines to be output. */ -static intmax_t pending; /* Pending lines of output. - Always kept 0 if out_quiet is true. */ -static int done_on_match; /* Stop scanning file on first match. */ -static int exit_on_match; /* Exit on first match. */ - -#if defined HAVE_DOS_FILE_CONTENTS -# include "dosbuf.c" -#endif - -/* Add two numbers that count input bytes or lines, and report an - error if the addition overflows. */ -static uintmax_t -add_count (uintmax_t a, uintmax_t b) -{ - uintmax_t sum = a + b; - if (sum < a) - error (EXIT_TROUBLE, 0, _("input is too large to count")); - return sum; -} - -static void -nlscan (char const *lim) -{ - size_t newlines = 0; - char const *beg; - for (beg = lastnl; beg < lim; beg++) - { - beg = memchr (beg, eolbyte, lim - beg); - if (!beg) - break; - newlines++; - } - totalnl = add_count (totalnl, newlines); - lastnl = lim; -} - -/* Print the current filename. */ -static void -print_filename (void) -{ - pr_sgr_start_if (filename_color); - fputs (filename, stdout); - pr_sgr_end_if (filename_color); -} - -/* Print a character separator. */ -static void -print_sep (char sep) -{ - pr_sgr_start_if (sep_color); - fputc (sep, stdout); - pr_sgr_end_if (sep_color); -} - -/* Print a line number or a byte offset. */ -static void -print_offset (uintmax_t pos, int min_width, const char *color) -{ - /* Do not rely on printf to print pos, since uintmax_t may be longer - than long, and long long is not portable. */ - - char buf[sizeof pos * CHAR_BIT]; - char *p = buf + sizeof buf; - - do - { - *--p = '0' + pos % 10; - --min_width; - } - while ((pos /= 10) != 0); - - /* Do this to maximize the probability of alignment across lines. */ - if (align_tabs) - while (--min_width >= 0) - *--p = ' '; - - pr_sgr_start_if (color); - fwrite (p, 1, buf + sizeof buf - p, stdout); - pr_sgr_end_if (color); -} - -/* Print a whole line head (filename, line, byte). */ -static void -print_line_head (char const *beg, char const *lim, int sep) -{ - int pending_sep = 0; - - if (out_file) - { - print_filename (); - if (filename_mask) - pending_sep = 1; - else - fputc (0, stdout); - } - - if (out_line) - { - if (lastnl < lim) - { - nlscan (beg); - totalnl = add_count (totalnl, 1); - lastnl = lim; - } - if (pending_sep) - print_sep (sep); - print_offset (totalnl, 4, line_num_color); - pending_sep = 1; - } - - if (out_byte) - { - uintmax_t pos = add_count (totalcc, beg - bufbeg); -#if defined HAVE_DOS_FILE_CONTENTS - pos = dossified_pos (pos); -#endif - if (pending_sep) - print_sep (sep); - print_offset (pos, 6, byte_num_color); - pending_sep = 1; - } - - if (pending_sep) - { - /* This assumes sep is one column wide. - Try doing this any other way with Unicode - (and its combining and wide characters) - filenames and you're wasting your efforts. */ - if (align_tabs) - fputs ("\t\b", stdout); - - print_sep (sep); - } -} - -static const char * -print_line_middle (const char *beg, const char *lim, - const char *line_color, const char *match_color) -{ - size_t match_size; - size_t match_offset; - const char *cur = beg; - const char *mid = NULL; - - while (cur < lim - && ((match_offset = execute (beg, lim - beg, &match_size, - beg + (cur - beg))) != (size_t) -1)) - { - char const *b = beg + match_offset; - - /* Avoid matching the empty line at the end of the buffer. */ - if (b == lim) - break; - - /* Avoid hanging on grep --color "" foo */ - if (match_size == 0) - { - /* Make minimal progress; there may be further non-empty matches. */ - /* XXX - Could really advance by one whole multi-octet character. */ - match_size = 1; - if (!mid) - mid = cur; - } - else - { - /* This function is called on a matching line only, - but is it selected or rejected/context? */ - if (only_matching) - print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED - : SEP_CHAR_SELECTED)); - else - { - pr_sgr_start (line_color); - if (mid) - { - cur = mid; - mid = NULL; - } - fwrite (cur, sizeof (char), b - cur, stdout); - } - - pr_sgr_start_if (match_color); - fwrite (b, sizeof (char), match_size, stdout); - pr_sgr_end_if (match_color); - if (only_matching) - fputs ("\n", stdout); - } - cur = b + match_size; - } - - if (only_matching) - cur = lim; - else if (mid) - cur = mid; - - return cur; -} - -static const char * -print_line_tail (const char *beg, const char *lim, const char *line_color) -{ - size_t eol_size; - size_t tail_size; - - eol_size = (lim > beg && lim[-1] == eolbyte); - eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r'); - tail_size = lim - eol_size - beg; - - if (tail_size > 0) - { - pr_sgr_start (line_color); - fwrite (beg, 1, tail_size, stdout); - beg += tail_size; - pr_sgr_end (line_color); - } - - return beg; -} - -static void -prline (char const *beg, char const *lim, int sep) -{ - int matching; - const char *line_color; - const char *match_color; - - if (!only_matching) - print_line_head (beg, lim, sep); - - matching = (sep == SEP_CHAR_SELECTED) ^ !!out_invert; - - if (color_option) - { - line_color = (((sep == SEP_CHAR_SELECTED) - ^ (out_invert && (color_option < 0))) - ? selected_line_color : context_line_color); - match_color = (sep == SEP_CHAR_SELECTED - ? selected_match_color : context_match_color); - } - else - line_color = match_color = NULL; /* Shouldn't be used. */ - - if ((only_matching && matching) - || (color_option && (*line_color || *match_color))) - { - /* We already know that non-matching lines have no match (to colorize). */ - if (matching && (only_matching || *match_color)) - beg = print_line_middle (beg, lim, line_color, match_color); - - if (!only_matching && *line_color) - { - /* This code is exercised at least when grep is invoked like this: - echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */ - beg = print_line_tail (beg, lim, line_color); - } - } - - if (!only_matching && lim > beg) - fwrite (beg, 1, lim - beg, stdout); - - if (ferror (stdout)) - { - write_error_seen = 1; - error (EXIT_TROUBLE, 0, _("write error")); - } - - lastout = lim; - - if (line_buffered) - fflush (stdout); -} - -/* Print pending lines of trailing context prior to LIM. Trailing context ends - at the next matching line when OUTLEFT is 0. */ -static void -prpending (char const *lim) -{ - if (!lastout) - lastout = bufbeg; - while (pending > 0 && lastout < lim) - { - char const *nl = memchr (lastout, eolbyte, lim - lastout); - size_t match_size; - --pending; - if (outleft - || ((execute (lastout, nl + 1 - lastout, - &match_size, NULL) == (size_t) -1) - == !out_invert)) - prline (lastout, nl + 1, SEP_CHAR_REJECTED); - else - pending = 0; - } -} - -/* Print the lines between BEG and LIM. Deal with context crap. - If NLINESP is non-null, store a count of lines between BEG and LIM. */ -static void -prtext (char const *beg, char const *lim, intmax_t *nlinesp) -{ - static int used; /* avoid printing SEP_STR_GROUP before any output */ - char const *bp, *p; - char eol = eolbyte; - intmax_t i, n; - - if (!out_quiet && pending > 0) - prpending (beg); - - p = beg; - - if (!out_quiet) - { - /* Deal with leading context crap. */ - - bp = lastout ? lastout : bufbeg; - for (i = 0; i < out_before; ++i) - if (p > bp) - do - --p; - while (p[-1] != eol); - - /* We print the SEP_STR_GROUP separator only if our output is - discontiguous from the last output in the file. */ - if ((out_before || out_after) && used && p != lastout && group_separator) - { - pr_sgr_start_if (sep_color); - fputs (group_separator, stdout); - pr_sgr_end_if (sep_color); - fputc ('\n', stdout); - } - - while (p < beg) - { - char const *nl = memchr (p, eol, beg - p); - nl++; - prline (p, nl, SEP_CHAR_REJECTED); - p = nl; - } - } - - if (nlinesp) - { - /* Caller wants a line count. */ - for (n = 0; p < lim && n < outleft; n++) - { - char const *nl = memchr (p, eol, lim - p); - nl++; - if (!out_quiet) - prline (p, nl, SEP_CHAR_SELECTED); - p = nl; - } - *nlinesp = n; - - /* relying on it that this function is never called when outleft = 0. */ - after_last_match = bufoffset - (buflim - p); - } - else if (!out_quiet) - prline (beg, lim, SEP_CHAR_SELECTED); - - pending = out_quiet ? 0 : out_after; - used = 1; -} - -/* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there - is no match, return (size_t) -1. Otherwise, set *MATCH_SIZE to the - length of the match and return the offset of the start of the match. */ -static size_t -do_execute (char const *buf, size_t size, size_t *match_size, - char const *start_ptr) -{ - size_t result; - const char *line_next; - - /* With the current implementation, using --ignore-case with a multi-byte - character set is very inefficient when applied to a large buffer - containing many matches. We can avoid much of the wasted effort - by matching line-by-line. - - FIXME: this is just an ugly workaround, and it doesn't really - belong here. Also, PCRE is always using this same per-line - matching algorithm. Either we fix -i, or we should refactor - this code---for example, we could add another function pointer - to struct matcher to split the buffer passed to execute. It would - perform the memchr if line-by-line matching is necessary, or just - return buf + size otherwise. */ - if (! (execute == Fexecute || execute == Pexecute) - || MB_CUR_MAX == 1 || !match_icase) - return execute (buf, size, match_size, start_ptr); - - for (line_next = buf; line_next < buf + size; ) - { - const char *line_buf = line_next; - const char *line_end = memchr (line_buf, eolbyte, - (buf + size) - line_buf); - if (line_end == NULL) - line_next = line_end = buf + size; - else - line_next = line_end + 1; - - if (start_ptr && start_ptr >= line_end) - continue; - - result = execute (line_buf, line_next - line_buf, match_size, start_ptr); - if (result != (size_t) -1) - return (line_buf - buf) + result; - } - - return (size_t) -1; -} - -/* Scan the specified portion of the buffer, matching lines (or - between matching lines if OUT_INVERT is true). Return a count of - lines printed. */ -static intmax_t -grepbuf (char const *beg, char const *lim) -{ - intmax_t nlines, n; - char const *p; - size_t match_offset; - size_t match_size; - - nlines = 0; - p = beg; - while ((match_offset = do_execute (p, lim - p, &match_size, - NULL)) != (size_t) -1) - { - char const *b = p + match_offset; - char const *endp = b + match_size; - /* Avoid matching the empty line at the end of the buffer. */ - if (b == lim) - break; - if (!out_invert) - { - prtext (b, endp, NULL); - nlines++; - outleft--; - if (!outleft || done_on_match) - { - if (exit_on_match) - exit (EXIT_SUCCESS); - after_last_match = bufoffset - (buflim - endp); - return nlines; - } - } - else if (p < b) - { - prtext (p, b, &n); - nlines += n; - outleft -= n; - if (!outleft) - return nlines; - } - p = endp; - } - if (out_invert && p < lim) - { - prtext (p, lim, &n); - nlines += n; - outleft -= n; - } - return nlines; -} - -/* Search a given file. Normally, return a count of lines printed; - but if the file is a directory and we search it recursively, then - return -2 if there was a match, and -1 otherwise. */ -static intmax_t -grep (int fd, struct stat const *st) -{ - intmax_t nlines, i; - int not_text; - size_t residue, save; - char oldc; - char *beg; - char *lim; - char eol = eolbyte; - - if (! reset (fd, st)) - return 0; - - totalcc = 0; - lastout = 0; - totalnl = 0; - outleft = max_count; - after_last_match = 0; - pending = 0; - - nlines = 0; - residue = 0; - save = 0; - - if (! fillbuf (save, st)) - { - suppressible_error (filename, errno); - return 0; - } - - not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet) - || binary_files == WITHOUT_MATCH_BINARY_FILES) - && file_is_binary (bufbeg, buflim - bufbeg, fd, st)); - if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES) - return 0; - done_on_match += not_text; - out_quiet += not_text; - - for (;;) - { - lastnl = bufbeg; - if (lastout) - lastout = bufbeg; - - beg = bufbeg + save; - - /* no more data to scan (eof) except for maybe a residue -> break */ - if (beg == buflim) - break; - - /* Determine new residue (the length of an incomplete line at the end of - the buffer, 0 means there is no incomplete last line). */ - oldc = beg[-1]; - beg[-1] = eol; - /* FIXME: use rawmemrchr if/when it exists, since we have ensured - that this use of memrchr is guaranteed never to return NULL. */ - lim = memrchr (beg - 1, eol, buflim - beg + 1); - ++lim; - beg[-1] = oldc; - if (lim == beg) - lim = beg - residue; - beg -= residue; - residue = buflim - lim; - - if (beg < lim) - { - if (outleft) - nlines += grepbuf (beg, lim); - if (pending) - prpending (lim); - if ((!outleft && !pending) - || (nlines && done_on_match && !out_invert)) - goto finish_grep; - } - - /* The last OUT_BEFORE lines at the end of the buffer will be needed as - leading context if there is a matching line at the begin of the - next data. Make beg point to their begin. */ - i = 0; - beg = lim; - while (i < out_before && beg > bufbeg && beg != lastout) - { - ++i; - do - --beg; - while (beg[-1] != eol); - } - - /* detect if leading context is discontinuous from last printed line. */ - if (beg != lastout) - lastout = 0; - - /* Handle some details and read more data to scan. */ - save = residue + lim - beg; - if (out_byte) - totalcc = add_count (totalcc, buflim - bufbeg - save); - if (out_line) - nlscan (beg); - if (! fillbuf (save, st)) - { - suppressible_error (filename, errno); - goto finish_grep; - } - } - if (residue) - { - *buflim++ = eol; - if (outleft) - nlines += grepbuf (bufbeg + save - residue, buflim); - if (pending) - prpending (buflim); - } - - finish_grep: - done_on_match -= not_text; - out_quiet -= not_text; - if ((not_text & ~out_quiet) && nlines != 0) - printf (_("Binary file %s matches\n"), filename); - return nlines; -} - -static int -grepdirent (FTS *fts, FTSENT *ent, int command_line) -{ - int follow, dirdesc; - struct stat *st = ent->fts_statp; - command_line &= ent->fts_level == FTS_ROOTLEVEL; - - if (ent->fts_info == FTS_DP) - { - if (directories == RECURSE_DIRECTORIES && command_line) - out_file &= ~ (2 * !no_filenames); - return 1; - } - - if (skipped_file (ent->fts_name, command_line, - (ent->fts_info == FTS_D || ent->fts_info == FTS_DC - || ent->fts_info == FTS_DNR))) - { - fts_set (fts, ent, FTS_SKIP); - return 1; - } - - filename = ent->fts_path + filename_prefix_len; - follow = (fts->fts_options & FTS_LOGICAL - || (fts->fts_options & FTS_COMFOLLOW && command_line)); - - switch (ent->fts_info) - { - case FTS_D: - if (directories == RECURSE_DIRECTORIES) - { - out_file |= 2 * !no_filenames; - return 1; - } - fts_set (fts, ent, FTS_SKIP); - break; - - case FTS_DC: - if (!suppress_errors) - error (0, 0, _("warning: %s: %s"), filename, - _("recursive directory loop")); - return 1; - - case FTS_DNR: - case FTS_ERR: - case FTS_NS: - suppressible_error (filename, ent->fts_errno); - return 1; - - case FTS_DEFAULT: - case FTS_NSOK: - if (devices == SKIP_DEVICES - || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) - { - struct stat st1; - if (! st->st_mode) - { - /* The file type is not already known. Get the file status - before opening, since opening might have side effects - on a device. */ - int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW; - if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0) - { - suppressible_error (filename, errno); - return 1; - } - st = &st1; - } - if (is_device_mode (st->st_mode)) - return 1; - } - break; - - case FTS_F: - case FTS_SLNONE: - break; - - case FTS_SL: - case FTS_W: - return 1; - - default: - abort (); - } - - dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD - ? fts->fts_cwd_fd - : AT_FDCWD); - return grepfile (dirdesc, ent->fts_accpath, follow, command_line); -} - -static int -grepfile (int dirdesc, char const *name, int follow, int command_line) -{ - int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 : O_NOFOLLOW)); - if (desc < 0) - { - if (follow || (errno != ELOOP && errno != EMLINK)) - suppressible_error (filename, errno); - return 1; - } - return grepdesc (desc, command_line); -} - -static int -grepdesc (int desc, int command_line) -{ - intmax_t count; - int status = 1; - struct stat st; - - /* Get the file status, possibly for the second time. This catches - a race condition if the directory entry changes after the - directory entry is read and before the file is opened. For - example, normally DESC is a directory only at the top level, but - there is an exception if some other process substitutes a - directory for a non-directory while 'grep' is running. */ - if (fstat (desc, &st) != 0) - { - suppressible_error (filename, errno); - goto closeout; - } - - if (desc != STDIN_FILENO && command_line - && skipped_file (filename, 1, S_ISDIR (st.st_mode))) - goto closeout; - - if (desc != STDIN_FILENO - && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode)) - { - /* Traverse the directory starting with its full name, because - unfortunately fts provides no way to traverse the directory - starting from its file descriptor. */ - - FTS *fts; - FTSENT *ent; - int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW); - char *fts_arg[2]; - - /* Close DESC now, to conserve file descriptors if the race - condition occurs many times in a deep recursion. */ - if (close (desc) != 0) - suppressible_error (filename, errno); - - fts_arg[0] = (char *) filename; - fts_arg[1] = NULL; - fts = fts_open (fts_arg, opts, NULL); - - if (!fts) - xalloc_die (); - while ((ent = fts_read (fts))) - status &= grepdirent (fts, ent, command_line); - if (errno) - suppressible_error (filename, errno); - if (fts_close (fts) != 0) - suppressible_error (filename, errno); - return status; - } - if (desc != STDIN_FILENO - && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode)) - || ((devices == SKIP_DEVICES - || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) - && is_device_mode (st.st_mode)))) - goto closeout; - - /* If there is a regular file on stdout and the current file refers - to the same i-node, we have to report the problem and skip it. - Otherwise when matching lines from some other input reach the - disk before we open this file, we can end up reading and matching - those lines and appending them to the file from which we're reading. - Then we'd have what appears to be an infinite loop that'd terminate - only upon filling the output file system or reaching a quota. - However, there is no risk of an infinite loop if grep is generating - no output, i.e., with --silent, --quiet, -q. - Similarly, with any of these: - --max-count=N (-m) (for N >= 2) - --files-with-matches (-l) - --files-without-match (-L) - there is no risk of trouble. - For --max-count=1, grep stops after printing the first match, - so there is no risk of malfunction. But even --max-count=2, with - input==output, while there is no risk of infloop, there is a race - condition that could result in "alternate" output. */ - if (!out_quiet && list_files == 0 && 1 < max_count - && S_ISREG (out_stat.st_mode) && out_stat.st_ino - && SAME_INODE (st, out_stat)) - { - if (! suppress_errors) - error (0, 0, _("input file %s is also the output"), quote (filename)); - errseen = 1; - goto closeout; - } - -#if defined SET_BINARY - /* Set input to binary mode. Pipes are simulated with files - on DOS, so this includes the case of "foo | grep bar". */ - if (!isatty (desc)) - SET_BINARY (desc); -#endif - - count = grep (desc, &st); - if (count < 0) - status = count + 2; - else - { - if (count_matches) - { - if (out_file) - { - print_filename (); - if (filename_mask) - print_sep (SEP_CHAR_SELECTED); - else - fputc (0, stdout); - } - printf ("%" PRIdMAX "\n", count); - } - - status = !count; - if (list_files == 1 - 2 * status) - { - print_filename (); - fputc ('\n' & filename_mask, stdout); - } - - if (desc == STDIN_FILENO) - { - off_t required_offset = outleft ? bufoffset : after_last_match; - if (required_offset != bufoffset - && lseek (desc, required_offset, SEEK_SET) < 0 - && S_ISREG (st.st_mode)) - suppressible_error (filename, errno); - } - } - - closeout: - if (desc != STDIN_FILENO && close (desc) != 0) - suppressible_error (filename, errno); - return status; -} - -static int -grep_command_line_arg (char const *arg) -{ - if (STREQ (arg, "-")) - { - filename = label ? label : _("(standard input)"); - return grepdesc (STDIN_FILENO, 1); - } - else - { - filename = arg; - return grepfile (AT_FDCWD, arg, 1, 1); - } -} - -_Noreturn void usage (int); -void -usage (int status) -{ - if (status != 0) - { - fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), - program_name); - fprintf (stderr, _("Try '%s --help' for more information.\n"), - program_name); - } - else - { - printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); - printf (_("Search for PATTERN in each FILE or standard input.\n")); - printf (_("PATTERN is, by default, a basic regular expression (BRE).\n")); - printf (_("\ -Example: %s -i 'hello world' menu.h main.c\n\ -\n\ -Regexp selection and interpretation:\n"), program_name); - printf (_("\ - -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ - -F, --fixed-strings PATTERN is a set of newline-separated fixed strings\n\ - -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ - -P, --perl-regexp PATTERN is a Perl regular expression\n")); - /* -X is undocumented on purpose. */ - printf (_("\ - -e, --regexp=PATTERN use PATTERN for matching\n\ - -f, --file=FILE obtain PATTERN from FILE\n\ - -i, --ignore-case ignore case distinctions\n\ - -w, --word-regexp force PATTERN to match only whole words\n\ - -x, --line-regexp force PATTERN to match only whole lines\n\ - -z, --null-data a data line ends in 0 byte, not newline\n")); - printf (_("\ -\n\ -Miscellaneous:\n\ - -s, --no-messages suppress error messages\n\ - -v, --invert-match select non-matching lines\n\ - -V, --version display version information and exit\n\ - --help display this help text and exit\n")); - printf (_("\ -\n\ -Output control:\n\ - -m, --max-count=NUM stop after NUM matches\n\ - -b, --byte-offset print the byte offset with output lines\n\ - -n, --line-number print line number with output lines\n\ - --line-buffered flush output on every line\n\ - -H, --with-filename print the file name for each match\n\ - -h, --no-filename suppress the file name prefix on output\n\ - --label=LABEL use LABEL as the standard input file name prefix\n\ -")); - printf (_("\ - -o, --only-matching show only the part of a line matching PATTERN\n\ - -q, --quiet, --silent suppress all normal output\n\ - --binary-files=TYPE assume that binary files are TYPE;\n\ - TYPE is 'binary', 'text', or 'without-match'\n\ - -a, --text equivalent to --binary-files=text\n\ -")); - printf (_("\ - -I equivalent to --binary-files=without-match\n\ - -d, --directories=ACTION how to handle directories;\n\ - ACTION is 'read', 'recurse', or 'skip'\n\ - -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\ - ACTION is 'read' or 'skip'\n\ - -r, --recursive like --directories=recurse\n\ - -R, --dereference-recursive likewise, but follow all symlinks\n\ -")); - printf (_("\ - --include=FILE_PATTERN search only files that match FILE_PATTERN\n\ - --exclude=FILE_PATTERN skip files and directories matching FILE_PATTERN\n\ - --exclude-from=FILE skip files matching any file pattern from FILE\n\ - --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\ -")); - printf (_("\ - -L, --files-without-match print only names of FILEs containing no match\n\ - -l, --files-with-matches print only names of FILEs containing matches\n\ - -c, --count print only a count of matching lines per FILE\n\ - -T, --initial-tab make tabs line up (if needed)\n\ - -Z, --null print 0 byte after FILE name\n")); - printf (_("\ -\n\ -Context control:\n\ - -B, --before-context=NUM print NUM lines of leading context\n\ - -A, --after-context=NUM print NUM lines of trailing context\n\ - -C, --context=NUM print NUM lines of output context\n\ -")); - printf (_("\ - -NUM same as --context=NUM\n\ - --color[=WHEN],\n\ - --colour[=WHEN] use markers to highlight the matching strings;\n\ - WHEN is 'always', 'never', or 'auto'\n\ - -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\ - -u, --unix-byte-offsets report offsets as if CRs were not there\n\ - (MSDOS/Windows)\n\ -\n")); - printf (_("\ -'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\ -Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n")); - printf (_("\ -When FILE is -, read standard input. With no FILE, read . if a command-line\n\ --r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\ -Exit status is 0 if any line is selected, 1 otherwise;\n\ -if any error occurs and -q is not given, the exit status is 2.\n")); - printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT); - printf (_("GNU Grep home page: <%s>\n"), - "http://www.gnu.org/software/grep/"); - fputs (_("General help using GNU software: <http://www.gnu.org/gethelp/>\n"), - stdout); - - } - exit (status); -} - -/* Pattern compilers and matchers. */ - -static void -Gcompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES); -} - -static void -Ecompile (char const *pattern, size_t size) -{ - GEAcompile (pattern, size, RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES); -} static void Acompile (char const *pattern, size_t size) @@ -1679,829 +19,19 @@ PAcompile (char const *pattern, size_t size) GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK); } -struct matcher -{ - char const name[16]; - compile_fp_t compile; - execute_fp_t execute; -}; -static struct matcher const matchers[] = { - { "grep", Gcompile, EGexecute }, - { "egrep", Ecompile, EGexecute }, - { "fgrep", Fcompile, Fexecute }, - { "awk", Acompile, EGexecute }, - { "gawk", GAcompile, EGexecute }, +struct matcher const matchers[] = { + { "grep", Gcompile, EGexecute }, + { "egrep", Ecompile, EGexecute }, + { "awk", Acompile, EGexecute }, + { "gawk", GAcompile, EGexecute }, { "posixawk", PAcompile, EGexecute }, - { "perl", Pcompile, Pexecute }, - { "", NULL, NULL }, + { "fgrep", Fcompile, Fexecute }, + { "perl", Pcompile, Pexecute }, + { NULL, NULL, NULL }, }; -/* Set the matcher to M if available. Exit in case of conflicts or if - M is not available. */ -static void -setmatcher (char const *m) -{ - struct matcher const *p; - - if (matcher && !STREQ (matcher, m)) - error (EXIT_TROUBLE, 0, _("conflicting matchers specified")); - - for (p = matchers; p->compile; p++) - if (STREQ (m, p->name)) - { - matcher = p->name; - compile = p->compile; - execute = p->execute; - return; - } - - error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); -} - -/* Find the white-space-separated options specified by OPTIONS, and - using BUF to store copies of these options, set ARGV[0], ARGV[1], - etc. to the option copies. Return the number N of options found. - Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] - etc. Backslash can be used to escape whitespace (and backslashes). */ -static size_t -prepend_args (char const *options, char *buf, char **argv) -{ - char const *o = options; - char *b = buf; - size_t n = 0; - - for (;;) - { - while (c_isspace (to_uchar (*o))) - o++; - if (!*o) - return n; - if (argv) - argv[n] = b; - n++; - - do - if ((*b++ = *o++) == '\\' && *o) - b[-1] = *o++; - while (*o && ! c_isspace (to_uchar (*o))); - - *b++ = '\0'; - } -} - -/* Prepend the whitespace-separated options in OPTIONS to the argument - vector of a main program with argument count *PARGC and argument - vector *PARGV. Return the number of options prepended. */ -static int -prepend_default_options (char const *options, int *pargc, char ***pargv) -{ - if (options && *options) - { - char *buf = xmalloc (strlen (options) + 1); - size_t prepended = prepend_args (options, buf, NULL); - int argc = *pargc; - char *const *argv = *pargv; - char **pp; - enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) }; - if (MAX_ARGS - argc < prepended) - xalloc_die (); - pp = xmalloc ((prepended + argc + 1) * sizeof *pp); - *pargc = prepended + argc; - *pargv = pp; - *pp++ = *argv++; - pp += prepend_args (options, buf, pp); - while ((*pp++ = *argv++)) - continue; - return prepended; - } - - return 0; -} - -/* Get the next non-digit option from ARGC and ARGV. - Return -1 if there are no more options. - Process any digit options that were encountered on the way, - and store the resulting integer into *DEFAULT_CONTEXT. */ -static int -get_nondigit_option (int argc, char *const *argv, intmax_t *default_context) -{ - static int prev_digit_optind = -1; - int this_digit_optind, was_digit; - char buf[INT_BUFSIZE_BOUND (intmax_t) + 4]; - char *p = buf; - int opt; - - was_digit = 0; - this_digit_optind = optind; - while (1) - { - opt = getopt_long (argc, (char **) argv, short_options, - long_options, NULL); - if ( ! ('0' <= opt && opt <= '9')) - break; - - if (prev_digit_optind != this_digit_optind || !was_digit) - { - /* Reset to start another context length argument. */ - p = buf; - } - else - { - /* Suppress trivial leading zeros, to avoid incorrect - diagnostic on strings like 00000000000. */ - p -= buf[0] == '0'; - } - - if (p == buf + sizeof buf - 4) - { - /* Too many digits. Append "..." to make context_length_arg - complain about "X...", where X contains the digits seen - so far. */ - strcpy (p, "..."); - p += 3; - break; - } - *p++ = opt; - - was_digit = 1; - prev_digit_optind = this_digit_optind; - this_digit_optind = optind; - } - if (p != buf) - { - *p = '\0'; - context_length_arg (buf, default_context); - } - - return opt; -} - -/* Parse GREP_COLORS. The default would look like: - GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36' - with boolean capabilities (ne and rv) unset (i.e., omitted). - No character escaping is needed or supported. */ -static void -parse_grep_colors (void) -{ - const char *p; - char *q; - char *name; - char *val; - - p = getenv ("GREP_COLORS"); /* Plural! */ - if (p == NULL || *p == '\0') - return; - - /* Work off a writable copy. */ - q = xstrdup (p); - - name = q; - val = NULL; - /* From now on, be well-formed or you're gone. */ - for (;;) - if (*q == ':' || *q == '\0') - { - char c = *q; - struct color_cap const *cap; - - *q++ = '\0'; /* Terminate name or val. */ - /* Empty name without val (empty cap) - * won't match and will be ignored. */ - for (cap = color_dict; cap->name; cap++) - if (STREQ (cap->name, name)) - break; - /* If name unknown, go on for forward compatibility. */ - if (cap->var && val) - *(cap->var) = val; - if (cap->fct) - cap->fct (); - if (c == '\0') - return; - name = q; - val = NULL; - } - else if (*q == '=') - { - if (q == name || val) - return; - *q++ = '\0'; /* Terminate name. */ - val = q; /* Can be the empty string. */ - } - else if (val == NULL) - q++; /* Accumulate name. */ - else if (*q == ';' || (*q >= '0' && *q <= '9')) - q++; /* Accumulate val. Protect the terminal from being sent crap. */ - else - return; -} - -#define MBRTOWC(pwc, s, n, ps) \ - (MB_CUR_MAX == 1 \ - ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \ - : mbrtowc (pwc, s, n, ps)) -#define WCRTOMB(s, wc, ps) \ - (MB_CUR_MAX == 1 \ - ? (*(s) = wctob ((wint_t) (wc)), 1) \ - : wcrtomb (s, wc, ps)) - -/* Change a pattern for fgrep into grep. */ -static void -fgrep_to_grep_pattern (size_t len, char const *keys, - size_t *new_len, char **new_keys) -{ - char *p = *new_keys = xnmalloc (len + 1, 2); - mbstate_t mb_state = { 0 }; - size_t n; - - for (; len; keys += n, len -= n) - { - wchar_t wc; - n = MBRTOWC (&wc, keys, len, &mb_state); - switch (n) - { - case (size_t) -2: - n = len; - /* Fall through. */ - default: - p = mempcpy (p, keys, n); - break; - - case (size_t) -1: - memset (&mb_state, 0, sizeof mb_state); - /* Fall through. */ - case 1: - *p = '\\'; - p += strchr ("$*.[\\^", *keys) != NULL; - /* Fall through. */ - case 0: - *p++ = *keys; - n = 1; - break; - } - } - - *new_len = p - *new_keys; -} - -/* If the newline-separated regular expressions, KEYS (with length, LEN - and no trailing NUL byte), are amenable to transformation into - otherwise equivalent case-ignoring ones, perform the transformation, - put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN, - and return true. Otherwise, return false. */ - -static bool -trivial_case_ignore (size_t len, char const *keys, - size_t *new_len, char **new_keys) -{ - /* FIXME: consider removing the following restriction: - Reject if KEYS contain ASCII '\\' or '['. */ - if (memchr (keys, '\\', len) || memchr (keys, '[', len)) - return false; - - /* Worst case is that each byte B of KEYS is ASCII alphabetic and - CASE_FOLDED_BUFSIZE other_case(B) characters, C through Z, each - occupying MB_CUR_MAX bytes, so each B maps to [BC...Z], which - requires CASE_FOLDED_BUFSIZE * MB_CUR_MAX + 3 bytes; this is - bounded above by the constant expression CASE_FOLDED_BUFSIZE * - MB_LEN_MAX + 3. */ - *new_keys = xnmalloc (len + 1, CASE_FOLDED_BUFSIZE * MB_LEN_MAX + 3); - char *p = *new_keys; - - mbstate_t mb_state = { 0 }; - while (len) - { - bool initial_state = mbsinit (&mb_state) != 0; - wchar_t wc; - size_t n = MBRTOWC (&wc, keys, len, &mb_state); - - /* For an invalid, incomplete or L'\0', skip this optimization. */ - if ((size_t) -2 <= n) - { - skip_case_ignore_optimization: - free (*new_keys); - return false; - } - - char const *orig = keys; - keys += n; - len -= n; - - wchar_t folded[CASE_FOLDED_BUFSIZE]; - int nfolded = case_folded_counterparts (wc, folded); - if (nfolded <= 0) - { - memcpy (p, orig, n); - p += n; - } - else if (! initial_state) - goto skip_case_ignore_optimization; - else - { - *p++ = '['; - memcpy (p, orig, n); - p += n; - - int i = 0; - do - { - size_t nbytes = WCRTOMB (p, folded[i], &mb_state); - if (nbytes == (size_t) -1) - goto skip_case_ignore_optimization; - p += nbytes; - } - while (++i < nfolded); - - if (! mbsinit (&mb_state)) - goto skip_case_ignore_optimization; - - *p++ = ']'; - } - } - - *new_len = p - *new_keys; - - return true; -} - -int -main (int argc, char **argv) -{ - char *keys; - size_t keycc, oldcc, keyalloc; - int with_filenames; - size_t cc; - int opt, status, prepended; - int prev_optind, last_recursive; - int fread_errno; - intmax_t default_context; - FILE *fp; - exit_failure = EXIT_TROUBLE; - initialize_main (&argc, &argv); - set_program_name (argv[0]); - program_name = argv[0]; - - keys = NULL; - keycc = 0; - with_filenames = 0; - eolbyte = '\n'; - filename_mask = ~0; - - max_count = INTMAX_MAX; - - /* The value -1 means to use DEFAULT_CONTEXT. */ - out_after = out_before = -1; - /* Default before/after context: changed by -C/-NUM options */ - default_context = 0; - /* Changed by -o option */ - only_matching = 0; - - /* Internationalization. */ -#if defined HAVE_SETLOCALE - setlocale (LC_ALL, ""); -#endif -#if defined ENABLE_NLS - bindtextdomain (PACKAGE, LOCALEDIR); - textdomain (PACKAGE); -#endif - - exit_failure = EXIT_TROUBLE; - atexit (clean_up_stdout); - - last_recursive = 0; - prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); - compile = matchers[0].compile; - execute = matchers[0].execute; - - while (prev_optind = optind, - (opt = get_nondigit_option (argc, argv, &default_context)) != -1) - switch (opt) - { - case 'A': - context_length_arg (optarg, &out_after); - break; - - case 'B': - context_length_arg (optarg, &out_before); - break; - - case 'C': - /* Set output match context, but let any explicit leading or - trailing amount specified with -A or -B stand. */ - context_length_arg (optarg, &default_context); - break; - - case 'D': - if (STREQ (optarg, "read")) - devices = READ_DEVICES; - else if (STREQ (optarg, "skip")) - devices = SKIP_DEVICES; - else - error (EXIT_TROUBLE, 0, _("unknown devices method")); - break; - - case 'E': - setmatcher ("egrep"); - break; - - case 'F': - setmatcher ("fgrep"); - break; - - case 'P': - setmatcher ("perl"); - break; - - case 'G': - setmatcher ("grep"); - break; - - case 'X': /* undocumented on purpose */ - setmatcher (optarg); - break; - - case 'H': - with_filenames = 1; - no_filenames = 0; - break; - - case 'I': - binary_files = WITHOUT_MATCH_BINARY_FILES; - break; - - case 'T': - align_tabs = 1; - break; - - case 'U': -#if defined HAVE_DOS_FILE_CONTENTS - dos_use_file_type = DOS_BINARY; -#endif - break; - - case 'u': -#if defined HAVE_DOS_FILE_CONTENTS - dos_report_unix_offset = 1; -#endif - break; - - case 'V': - show_version = 1; - break; - - case 'a': - binary_files = TEXT_BINARY_FILES; - break; - - case 'b': - out_byte = 1; - break; - - case 'c': - count_matches = 1; - break; - - case 'd': - directories = XARGMATCH ("--directories", optarg, - directories_args, directories_types); - if (directories == RECURSE_DIRECTORIES) - last_recursive = prev_optind; - break; - - case 'e': - cc = strlen (optarg); - keys = xrealloc (keys, keycc + cc + 1); - strcpy (&keys[keycc], optarg); - keycc += cc; - keys[keycc++] = '\n'; - break; - - case 'f': - fp = STREQ (optarg, "-") ? stdin : fopen (optarg, "r"); - if (!fp) - error (EXIT_TROUBLE, errno, "%s", optarg); - for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) - ; - keys = xrealloc (keys, keyalloc); - oldcc = keycc; - while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0) - { - keycc += cc; - if (keycc == keyalloc - 1) - keys = x2nrealloc (keys, &keyalloc, sizeof *keys); - } - fread_errno = errno; - if (ferror (fp)) - error (EXIT_TROUBLE, fread_errno, "%s", optarg); - if (fp != stdin) - fclose (fp); - /* Append final newline if file ended in non-newline. */ - if (oldcc != keycc && keys[keycc - 1] != '\n') - keys[keycc++] = '\n'; - break; - - case 'h': - with_filenames = 0; - no_filenames = 1; - break; - - case 'i': - case 'y': /* For old-timers . . . */ - match_icase = 1; - break; - - case 'L': - /* Like -l, except list files that don't contain matches. - Inspired by the same option in Hume's gre. */ - list_files = -1; - break; - - case 'l': - list_files = 1; - break; - - case 'm': - switch (xstrtoimax (optarg, 0, 10, &max_count, "")) - { - case LONGINT_OK: - case LONGINT_OVERFLOW: - break; - - default: - error (EXIT_TROUBLE, 0, _("invalid max count")); - } - break; - - case 'n': - out_line = 1; - break; - - case 'o': - only_matching = 1; - break; - - case 'q': - exit_on_match = 1; - exit_failure = 0; - break; - - case 'R': - fts_options = basic_fts_options | FTS_LOGICAL; - /* Fall through. */ - case 'r': - directories = RECURSE_DIRECTORIES; - last_recursive = prev_optind; - break; - - case 's': - suppress_errors = 1; - break; - - case 'v': - out_invert = 1; - break; - - case 'w': - match_words = 1; - break; - - case 'x': - match_lines = 1; - break; - - case 'Z': - filename_mask = 0; - break; - - case 'z': - eolbyte = '\0'; - break; - - case BINARY_FILES_OPTION: - if (STREQ (optarg, "binary")) - binary_files = BINARY_BINARY_FILES; - else if (STREQ (optarg, "text")) - binary_files = TEXT_BINARY_FILES; - else if (STREQ (optarg, "without-match")) - binary_files = WITHOUT_MATCH_BINARY_FILES; - else - error (EXIT_TROUBLE, 0, _("unknown binary-files type")); - break; - - case COLOR_OPTION: - if (optarg) - { - if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes") - || !strcasecmp (optarg, "force")) - color_option = 1; - else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no") - || !strcasecmp (optarg, "none")) - color_option = 0; - else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty") - || !strcasecmp (optarg, "if-tty")) - color_option = 2; - else - show_help = 1; - } - else - color_option = 2; - break; - - case EXCLUDE_OPTION: - case INCLUDE_OPTION: - if (!excluded_patterns) - excluded_patterns = new_exclude (); - add_exclude (excluded_patterns, optarg, - (EXCLUDE_WILDCARDS - | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0))); - break; - case EXCLUDE_FROM_OPTION: - if (!excluded_patterns) - excluded_patterns = new_exclude (); - if (add_exclude_file (add_exclude, excluded_patterns, optarg, - EXCLUDE_WILDCARDS, '\n') != 0) - { - error (EXIT_TROUBLE, errno, "%s", optarg); - } - break; - - case EXCLUDE_DIRECTORY_OPTION: - if (!excluded_directory_patterns) - excluded_directory_patterns = new_exclude (); - add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS); - break; - - case GROUP_SEPARATOR_OPTION: - group_separator = optarg; - break; - - case LINE_BUFFERED_OPTION: - line_buffered = 1; - break; - - case LABEL_OPTION: - label = optarg; - break; - - case 0: - /* long options */ - break; - - default: - usage (EXIT_TROUBLE); - break; - - } - - if (color_option == 2) - color_option = isatty (STDOUT_FILENO) && should_colorize (); - init_colorize (); - - /* POSIX says that -q overrides -l, which in turn overrides the - other output options. */ - if (exit_on_match) - list_files = 0; - if (exit_on_match | list_files) - { - count_matches = 0; - done_on_match = 1; - } - out_quiet = count_matches | done_on_match; - - if (out_after < 0) - out_after = default_context; - if (out_before < 0) - out_before = default_context; - - if (color_option) - { - /* Legacy. */ - char *userval = getenv ("GREP_COLOR"); - if (userval != NULL && *userval != '\0') - selected_match_color = context_match_color = userval; - - /* New GREP_COLORS has priority. */ - parse_grep_colors (); - } - - if (show_version) - { - version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS, - (char *) NULL); - exit (EXIT_SUCCESS); - } - - if (show_help) - usage (EXIT_SUCCESS); - - struct stat tmp_stat; - if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) - out_stat = tmp_stat; - - if (keys) - { - if (keycc == 0) - { - /* No keys were specified (e.g. -f /dev/null). Match nothing. */ - out_invert ^= 1; - match_lines = match_words = 0; - } - else - /* Strip trailing newline. */ - --keycc; - } - else if (optind < argc) - { - /* A copy must be made in case of an xrealloc() or free() later. */ - keycc = strlen (argv[optind]); - keys = xmemdup (argv[optind++], keycc + 1); - } - else - usage (EXIT_TROUBLE); - - /* If case-insensitive fgrep in a multibyte locale, improve - performance by using grep instead. */ - if (match_icase && compile == Fcompile && MB_CUR_MAX > 1) - { - size_t new_keycc; - char *new_keys; - fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys); - free (keys); - keys = new_keys; - keycc = new_keycc; - matcher = "grep"; - compile = Gcompile; - execute = EGexecute; - } - - /* Case-insensitive matching is expensive in multibyte locales - because a few characters may change size when converted to upper - or lower case. To accommodate those, search the input one line - at a time, rather than using the much more efficient buffer search. - - Try to convert a regular expression 'foo' (ignoring case) to an - equivalent regular expression '[fF][oO][oO]' (where case matters). - Not only does this avoid the expensive requirement to read and - process a line at a time, it also allows use of the kwset engine, - a win in non-UTF-8 multibyte locales. */ - if (match_icase) - { - size_t new_keycc; - char *new_keys; - /* It is not possible with -F, not useful with -P (pcre) and there is no - point when there is no regexp. It also depends on which constructs - appear in the regexp. See trivial_case_ignore for those details. */ - if (keycc - && ! (compile == Fcompile || compile == Pcompile) - && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys)) - { - match_icase = 0; - free (keys); - keys = new_keys; - keycc = new_keycc; - } - } - -#if MBS_SUPPORT - if (MB_CUR_MAX > 1) - build_mbclen_cache (); -#endif - - compile (keys, keycc); - free (keys); - - if ((argc - optind > 1 && !no_filenames) || with_filenames) - out_file = 1; - -#ifdef SET_BINARY - /* Output is set to binary mode because we shouldn't convert - NL to CR-LF pairs, especially when grepping binary files. */ - if (!isatty (1)) - SET_BINARY (1); -#endif - - if (max_count == 0) - exit (EXIT_FAILURE); - - if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) - devices = READ_DEVICES; - - if (optind < argc) - { - status = 1; - do - status &= grep_command_line_arg (argv[optind]); - while (++optind < argc); - } - else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive) - { - /* Grep through ".", omitting leading "./" from diagnostics. */ - filename_prefix_len = 2; - status = grep_command_line_arg ("."); - } - else - status = grep_command_line_arg ("-"); - - /* We register via atexit() to test stdout. */ - exit (errseen ? EXIT_TROUBLE : status); -} -/* vim:set shiftwidth=2: */ +const char before_options[] = +N_("PATTERN is, by default, a basic regular expression (BRE).\n"); +const char after_options[] = +N_("'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\ +Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n"); diff --git a/src/grep.h b/src/grep.h index 4935872..4d642a1 100644 --- a/src/grep.h +++ b/src/grep.h @@ -20,6 +20,23 @@ #ifndef GREP_GREP_H #define GREP_GREP_H 1 +/* Function pointer types. */ +typedef void (*compile_fp_t) (char const *, size_t); +typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *); + +/* grep.c expects the matchers vector to be terminated by an entry + with a NULL name, and to contain at least one entry. */ +struct matcher +{ + const char *name; + compile_fp_t compile; + execute_fp_t execute; +}; +extern const struct matcher matchers[]; + +extern const char before_options[]; +extern const char after_options[]; + /* The following flags are exported from grep for the matchers to look at. */ extern int match_icase; /* -i */ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..e429380 --- /dev/null +++ b/src/main.c @@ -0,0 +1,2478 @@ +/* grep.c - main driver file for grep. + Copyright (C) 1992, 1997-2002, 2004-2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Written July 1992 by Mike Haertel. */ + +#include <config.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "mbsupport.h" +#include <wchar.h> +#include <wctype.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdio.h> +#include "system.h" + +#include "argmatch.h" +#include "c-ctype.h" +#include "closeout.h" +#include "colorize.h" +#include "dfa.h" +#include "error.h" +#include "exclude.h" +#include "exitfail.h" +#include "fcntl-safer.h" +#include "fts_.h" +#include "getopt.h" +#include "grep.h" +#include "intprops.h" +#include "progname.h" +#include "propername.h" +#include "quote.h" +#include "safe-read.h" +#include "search.h" +#include "version-etc.h" +#include "xalloc.h" +#include "xstrtol.h" + +#define SEP_CHAR_SELECTED ':' +#define SEP_CHAR_REJECTED '-' +#define SEP_STR_GROUP "--" + +#define AUTHORS \ + proper_name ("Mike Haertel"), \ + _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>") + +/* When stdout is connected to a regular file, save its stat + information here, so that we can automatically skip it, thus + avoiding a potential (racy) infinite loop. */ +static struct stat out_stat; + +/* if non-zero, display usage information and exit */ +static int show_help; + +/* If non-zero, print the version on standard output and exit. */ +static int show_version; + +/* If nonzero, suppress diagnostics for nonexistent or unreadable files. */ +static int suppress_errors; + +/* If nonzero, use color markers. */ +static int color_option; + +/* If nonzero, show only the part of a line matching the expression. */ +static int only_matching; + +/* If nonzero, make sure first content char in a line is on a tab stop. */ +static int align_tabs; + +/* The group separator used when context is requested. */ +static const char *group_separator = SEP_STR_GROUP; + +/* The context and logic for choosing default --color screen attributes + (foreground and background colors, etc.) are the following. + -- There are eight basic colors available, each with its own + nominal luminosity to the human eye and foreground/background + codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41], + magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46], + yellow [89 %, 33/43], and white [100 %, 37/47]). + -- Sometimes, white as a background is actually implemented using + a shade of light gray, so that a foreground white can be visible + on top of it (but most often not). + -- Sometimes, black as a foreground is actually implemented using + a shade of dark gray, so that it can be visible on top of a + background black (but most often not). + -- Sometimes, more colors are available, as extensions. + -- Other attributes can be selected/deselected (bold [1/22], + underline [4/24], standout/inverse [7/27], blink [5/25], and + invisible/hidden [8/28]). They are sometimes implemented by + using colors instead of what their names imply; e.g., bold is + often achieved by using brighter colors. In practice, only bold + is really available to us, underline sometimes being mapped by + the terminal to some strange color choice, and standout best + being left for use by downstream programs such as less(1). + -- We cannot assume that any of the extensions or special features + are available for the purpose of choosing defaults for everyone. + -- The most prevalent default terminal backgrounds are pure black + and pure white, and are not necessarily the same shades of + those as if they were selected explicitly with SGR sequences. + Some terminals use dark or light pictures as default background, + but those are covered over by an explicit selection of background + color with an SGR sequence; their users will appreciate their + background pictures not be covered like this, if possible. + -- Some uses of colors attributes is to make some output items + more understated (e.g., context lines); this cannot be achieved + by changing the background color. + -- For these reasons, the grep color defaults should strive not + to change the background color from its default, unless it's + for a short item that should be highlighted, not understated. + -- The grep foreground color defaults (without an explicitly set + background) should provide enough contrast to be readable on any + terminal with either a black (dark) or white (light) background. + This only leaves red, magenta, green, and cyan (and their bold + counterparts) and possibly bold blue. */ +/* The color strings used for matched text. + The user can overwrite them using the deprecated + environment variable GREP_COLOR or the new GREP_COLORS. */ +static const char *selected_match_color = "01;31"; /* bold red */ +static const char *context_match_color = "01;31"; /* bold red */ + +/* Other colors. Defaults look damn good. */ +static const char *filename_color = "35"; /* magenta */ +static const char *line_num_color = "32"; /* green */ +static const char *byte_num_color = "32"; /* green */ +static const char *sep_color = "36"; /* cyan */ +static const char *selected_line_color = ""; /* default color pair */ +static const char *context_line_color = ""; /* default color pair */ + +/* Select Graphic Rendition (SGR, "\33[...m") strings. */ +/* Also Erase in Line (EL) to Right ("\33[K") by default. */ +/* Why have EL to Right after SGR? + -- The behavior of line-wrapping when at the bottom of the + terminal screen and at the end of the current line is often + such that a new line is introduced, entirely cleared with + the current background color which may be different from the + default one (see the boolean back_color_erase terminfo(5) + capability), thus scrolling the display by one line. + The end of this new line will stay in this background color + even after reverting to the default background color with + "\33[m', unless it is explicitly cleared again with "\33[K" + (which is the behavior the user would instinctively expect + from the whole thing). There may be some unavoidable + background-color flicker at the end of this new line because + of this (when timing with the monitor's redraw is just right). + -- The behavior of HT (tab, "\t") is usually the same as that of + Cursor Forward Tabulation (CHT) with a default parameter + of 1 ("\33[I"), i.e., it performs pure movement to the next + tab stop, without any clearing of either content or screen + attributes (including background color); try + printf 'asdfqwerzxcv\rASDF\tZXCV\n' + in a bash(1) shell to demonstrate this. This is not what the + user would instinctively expect of HT (but is ok for CHT). + The instinctive behavior would include clearing the terminal + cells that are skipped over by HT with blank cells in the + current screen attributes, including background color; + the boolean dest_tabs_magic_smso terminfo(5) capability + indicates this saner behavior for HT, but only some rare + terminals have it (although it also indicates a special + glitch with standout mode in the Teleray terminal for which + it was initially introduced). The remedy is to add "\33K" + after each SGR sequence, be it START (to fix the behavior + of any HT after that before another SGR) or END (to fix the + behavior of an HT in default background color that would + follow a line-wrapping at the bottom of the screen in another + background color, and to complement doing it after START). + Piping grep's output through a pager such as less(1) avoids + any HT problems since the pager performs tab expansion. + + Generic disadvantages of this remedy are: + -- Some very rare terminals might support SGR but not EL (nobody + will use "grep --color" on a terminal that does not support + SGR in the first place). + -- Having these extra control sequences might somewhat complicate + the task of any program trying to parse "grep --color" + output in order to extract structuring information from it. + A specific disadvantage to doing it after SGR START is: + -- Even more possible background color flicker (when timing + with the monitor's redraw is just right), even when not at the + bottom of the screen. + There are no additional disadvantages specific to doing it after + SGR END. + + It would be impractical for GNU grep to become a full-fledged + terminal program linked against ncurses or the like, so it will + not detect terminfo(5) capabilities. */ +static const char *sgr_start = "\33[%sm\33[K"; +static const char *sgr_end = "\33[m\33[K"; + +/* SGR utility functions. */ +static void +pr_sgr_start (char const *s) +{ + if (*s) + print_start_colorize (sgr_start, s); +} +static void +pr_sgr_end (char const *s) +{ + if (*s) + print_end_colorize (sgr_end); +} +static void +pr_sgr_start_if (char const *s) +{ + if (color_option) + pr_sgr_start (s); +} +static void +pr_sgr_end_if (char const *s) +{ + if (color_option) + pr_sgr_end (s); +} + +struct color_cap + { + const char *name; + const char **var; + void (*fct) (void); + }; + +static void +color_cap_mt_fct (void) +{ + /* Our caller just set selected_match_color. */ + context_match_color = selected_match_color; +} + +static void +color_cap_rv_fct (void) +{ + /* By this point, it was 1 (or already -1). */ + color_option = -1; /* That's still != 0. */ +} + +static void +color_cap_ne_fct (void) +{ + sgr_start = "\33[%sm"; + sgr_end = "\33[m"; +} + +/* For GREP_COLORS. */ +static const struct color_cap color_dict[] = + { + { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */ + { "ms", &selected_match_color, NULL }, /* selected matched text */ + { "mc", &context_match_color, NULL }, /* context matched text */ + { "fn", &filename_color, NULL }, /* filename */ + { "ln", &line_num_color, NULL }, /* line number */ + { "bn", &byte_num_color, NULL }, /* byte (sic) offset */ + { "se", &sep_color, NULL }, /* separator */ + { "sl", &selected_line_color, NULL }, /* selected lines */ + { "cx", &context_line_color, NULL }, /* context lines */ + { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */ + { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */ + { NULL, NULL, NULL } + }; + +static struct exclude *excluded_patterns; +static struct exclude *excluded_directory_patterns; +/* Short options. */ +static char const short_options[] = +"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; + +/* Non-boolean long options that have no corresponding short equivalents. */ +enum +{ + BINARY_FILES_OPTION = CHAR_MAX + 1, + COLOR_OPTION, + INCLUDE_OPTION, + EXCLUDE_OPTION, + EXCLUDE_FROM_OPTION, + LINE_BUFFERED_OPTION, + LABEL_OPTION, + EXCLUDE_DIRECTORY_OPTION, + GROUP_SEPARATOR_OPTION +}; + +/* Long options equivalences. */ +static struct option const long_options[] = +{ + {"basic-regexp", no_argument, NULL, 'G'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"fixed-regexp", no_argument, NULL, 'F'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"perl-regexp", no_argument, NULL, 'P'}, + {"after-context", required_argument, NULL, 'A'}, + {"before-context", required_argument, NULL, 'B'}, + {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, + {"byte-offset", no_argument, NULL, 'b'}, + {"context", required_argument, NULL, 'C'}, + {"color", optional_argument, NULL, COLOR_OPTION}, + {"colour", optional_argument, NULL, COLOR_OPTION}, + {"count", no_argument, NULL, 'c'}, + {"devices", required_argument, NULL, 'D'}, + {"directories", required_argument, NULL, 'd'}, + {"exclude", required_argument, NULL, EXCLUDE_OPTION}, + {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION}, + {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION}, + {"file", required_argument, NULL, 'f'}, + {"files-with-matches", no_argument, NULL, 'l'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION}, + {"help", no_argument, &show_help, 1}, + {"include", required_argument, NULL, INCLUDE_OPTION}, + {"ignore-case", no_argument, NULL, 'i'}, + {"initial-tab", no_argument, NULL, 'T'}, + {"label", required_argument, NULL, LABEL_OPTION}, + {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION}, + {"line-number", no_argument, NULL, 'n'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"max-count", required_argument, NULL, 'm'}, + + {"no-filename", no_argument, NULL, 'h'}, + {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION}, + {"no-messages", no_argument, NULL, 's'}, + {"null", no_argument, NULL, 'Z'}, + {"null-data", no_argument, NULL, 'z'}, + {"only-matching", no_argument, NULL, 'o'}, + {"quiet", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"dereference-recursive", no_argument, NULL, 'R'}, + {"regexp", required_argument, NULL, 'e'}, + {"invert-match", no_argument, NULL, 'v'}, + {"silent", no_argument, NULL, 'q'}, + {"text", no_argument, NULL, 'a'}, + {"binary", no_argument, NULL, 'U'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, + {"version", no_argument, NULL, 'V'}, + {"with-filename", no_argument, NULL, 'H'}, + {"word-regexp", no_argument, NULL, 'w'}, + {0, 0, 0, 0} +}; + +/* Define flags declared in grep.h. */ +int match_icase; +int match_words; +int match_lines; +unsigned char eolbyte; + +static char const *matcher; + +/* For error messages. */ +/* The input file name, or (if standard input) "-" or a --label argument. */ +static char const *filename; +static size_t filename_prefix_len; +static int errseen; +static int write_error_seen; + +enum directories_type + { + READ_DIRECTORIES = 2, + RECURSE_DIRECTORIES, + SKIP_DIRECTORIES + }; + +/* How to handle directories. */ +static char const *const directories_args[] = +{ + "read", "recurse", "skip", NULL +}; +static enum directories_type const directories_types[] = +{ + READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES +}; +ARGMATCH_VERIFY (directories_args, directories_types); + +static enum directories_type directories = READ_DIRECTORIES; + +enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK }; +static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL; + +/* How to handle devices. */ +static enum + { + READ_COMMAND_LINE_DEVICES, + READ_DEVICES, + SKIP_DEVICES + } devices = READ_COMMAND_LINE_DEVICES; + +static int grepfile (int, char const *, int, int); +static int grepdesc (int, int); +#if defined HAVE_DOS_FILE_CONTENTS +static int undossify_input (char *, size_t); +#endif + +static int +is_device_mode (mode_t m) +{ + return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m); +} + +/* Return nonzero if ST->st_size is defined. Assume the file is not a + symbolic link. */ +static int +usable_st_size (struct stat const *st) +{ + return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st); +} + +/* Functions we'll use to search. */ +static compile_fp_t compile; +static execute_fp_t execute; + +/* Like error, but suppress the diagnostic if requested. */ +static void +suppressible_error (char const *mesg, int errnum) +{ + if (! suppress_errors) + error (0, errnum, "%s", mesg); + errseen = 1; +} + +/* If there has already been a write error, don't bother closing + standard output, as that might elicit a duplicate diagnostic. */ +static void +clean_up_stdout (void) +{ + if (! write_error_seen) + close_stdout (); +} + +/* Return 1 if a file is known to be binary for the purpose of 'grep'. + BUF, of size BUFSIZE, is the initial buffer read from the file with + descriptor FD and status ST. */ +static int +file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) +{ + #ifndef SEEK_HOLE + enum { SEEK_HOLE = SEEK_END }; + #endif + + /* If -z, test only whether the initial buffer contains '\200'; + knowing about holes won't help. */ + if (! eolbyte) + return memchr (buf, '\200', bufsize) != 0; + + /* If the initial buffer contains a null byte, guess that the file + is binary. */ + if (memchr (buf, '\0', bufsize)) + return 1; + + /* If the file has holes, it must contain a null byte somewhere. */ + if (SEEK_HOLE != SEEK_END && usable_st_size (st)) + { + off_t cur = bufsize; + if (O_BINARY || fd == STDIN_FILENO) + { + cur = lseek (fd, 0, SEEK_CUR); + if (cur < 0) + return 0; + } + + /* Look for a hole after the current location. */ + off_t hole_start = lseek (fd, cur, SEEK_HOLE); + if (0 <= hole_start) + { + if (lseek (fd, cur, SEEK_SET) < 0) + suppressible_error (filename, errno); + if (hole_start < st->st_size) + return 1; + } + } + + /* Guess that the file does not contain binary data. */ + return 0; +} + +/* Convert STR to a nonnegative integer, storing the result in *OUT. + STR must be a valid context length argument; report an error if it + isn't. Silently ceiling *OUT at the maximum value, as that is + practically equivalent to infinity for grep's purposes. */ +static void +context_length_arg (char const *str, intmax_t *out) +{ + switch (xstrtoimax (str, 0, 10, out, "")) + { + case LONGINT_OK: + case LONGINT_OVERFLOW: + if (0 <= *out) + break; + /* Fall through. */ + default: + error (EXIT_TROUBLE, 0, "%s: %s", str, + _("invalid context length argument")); + } +} + +/* Return nonzero if the file with NAME should be skipped. + If COMMAND_LINE is nonzero, it is a command-line argument. + If IS_DIR is nonzero, it is a directory. */ +static int +skipped_file (char const *name, int command_line, int is_dir) +{ + return (is_dir + ? (directories == SKIP_DIRECTORIES + || (! (command_line && filename_prefix_len != 0) + && excluded_directory_patterns + && excluded_file_name (excluded_directory_patterns, name))) + : (excluded_patterns + && excluded_file_name (excluded_patterns, name))); +} + +/* Hairy buffering mechanism for grep. The intent is to keep + all reads aligned on a page boundary and multiples of the + page size, unless a read yields a partial page. */ + +static char *buffer; /* Base of buffer. */ +static size_t bufalloc; /* Allocated buffer size, counting slop. */ +#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */ +static int bufdesc; /* File descriptor. */ +static char *bufbeg; /* Beginning of user-visible stuff. */ +static char *buflim; /* Limit of user-visible stuff. */ +static size_t pagesize; /* alignment of memory pages */ +static off_t bufoffset; /* Read offset; defined on regular files. */ +static off_t after_last_match; /* Pointer after last matching line that + would have been output if we were + outputting characters. */ + +/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be + an integer or a pointer. Both args must be free of side effects. */ +#define ALIGN_TO(val, alignment) \ + ((size_t) (val) % (alignment) == 0 \ + ? (val) \ + : (val) + ((alignment) - (size_t) (val) % (alignment))) + +/* Reset the buffer for a new file, returning zero if we should skip it. + Initialize on the first time through. */ +static int +reset (int fd, struct stat const *st) +{ + if (! pagesize) + { + pagesize = getpagesize (); + if (pagesize == 0 || 2 * pagesize + 1 <= pagesize) + abort (); + bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1; + buffer = xmalloc (bufalloc); + } + + bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize); + bufbeg[-1] = eolbyte; + bufdesc = fd; + + if (S_ISREG (st->st_mode)) + { + if (fd != STDIN_FILENO) + bufoffset = 0; + else + { + bufoffset = lseek (fd, 0, SEEK_CUR); + if (bufoffset < 0) + { + suppressible_error (_("lseek failed"), errno); + return 0; + } + } + } + return 1; +} + +/* Read new stuff into the buffer, saving the specified + amount of old stuff. When we're done, 'bufbeg' points + to the beginning of the buffer contents, and 'buflim' + points just after the end. Return zero if there's an error. */ +static int +fillbuf (size_t save, struct stat const *st) +{ + ssize_t fillsize; + int cc = 1; + char *readbuf; + size_t readsize; + + /* Offset from start of buffer to start of old stuff + that we want to save. */ + size_t saved_offset = buflim - save - buffer; + + if (pagesize <= buffer + bufalloc - buflim) + { + readbuf = buflim; + bufbeg = buflim - save; + } + else + { + size_t minsize = save + pagesize; + size_t newsize; + size_t newalloc; + char *newbuf; + + /* Grow newsize until it is at least as great as minsize. */ + for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2) + if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2) + xalloc_die (); + + /* Try not to allocate more memory than the file size indicates, + as that might cause unnecessary memory exhaustion if the file + is large. However, do not use the original file size as a + heuristic if we've already read past the file end, as most + likely the file is growing. */ + if (usable_st_size (st)) + { + off_t to_be_read = st->st_size - bufoffset; + off_t maxsize_off = save + to_be_read; + if (0 <= to_be_read && to_be_read <= maxsize_off + && maxsize_off == (size_t) maxsize_off + && minsize <= (size_t) maxsize_off + && (size_t) maxsize_off < newsize) + newsize = maxsize_off; + } + + /* Add enough room so that the buffer is aligned and has room + for byte sentinels fore and aft. */ + newalloc = newsize + pagesize + 1; + + newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; + readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); + bufbeg = readbuf - save; + memmove (bufbeg, buffer + saved_offset, save); + bufbeg[-1] = eolbyte; + if (newbuf != buffer) + { + free (buffer); + buffer = newbuf; + } + } + + readsize = buffer + bufalloc - readbuf; + readsize -= readsize % pagesize; + + fillsize = safe_read (bufdesc, readbuf, readsize); + if (fillsize < 0) + fillsize = cc = 0; + bufoffset += fillsize; +#if defined HAVE_DOS_FILE_CONTENTS + if (fillsize) + fillsize = undossify_input (readbuf, fillsize); +#endif + buflim = readbuf + fillsize; + return cc; +} + +/* Flags controlling the style of output. */ +static enum +{ + BINARY_BINARY_FILES, + TEXT_BINARY_FILES, + WITHOUT_MATCH_BINARY_FILES +} binary_files; /* How to handle binary files. */ + +static int filename_mask; /* If zero, output nulls after filenames. */ +static int out_quiet; /* Suppress all normal output. */ +static int out_invert; /* Print nonmatching stuff. */ +static int out_file; /* Print filenames. */ +static int out_line; /* Print line numbers. */ +static int out_byte; /* Print byte offsets. */ +static intmax_t out_before; /* Lines of leading context. */ +static intmax_t out_after; /* Lines of trailing context. */ +static int count_matches; /* Count matching lines. */ +static int list_files; /* List matching files. */ +static int no_filenames; /* Suppress file names. */ +static intmax_t max_count; /* Stop after outputting this many + lines from an input file. */ +static int line_buffered; /* If nonzero, use line buffering, i.e. + fflush everyline out. */ +static char *label = NULL; /* Fake filename for stdin */ + + +/* Internal variables to keep track of byte count, context, etc. */ +static uintmax_t totalcc; /* Total character count before bufbeg. */ +static char const *lastnl; /* Pointer after last newline counted. */ +static char const *lastout; /* Pointer after last character output; + NULL if no character has been output + or if it's conceptually before bufbeg. */ +static uintmax_t totalnl; /* Total newline count before lastnl. */ +static intmax_t outleft; /* Maximum number of lines to be output. */ +static intmax_t pending; /* Pending lines of output. + Always kept 0 if out_quiet is true. */ +static int done_on_match; /* Stop scanning file on first match. */ +static int exit_on_match; /* Exit on first match. */ + +#if defined HAVE_DOS_FILE_CONTENTS +# include "dosbuf.c" +#endif + +/* Add two numbers that count input bytes or lines, and report an + error if the addition overflows. */ +static uintmax_t +add_count (uintmax_t a, uintmax_t b) +{ + uintmax_t sum = a + b; + if (sum < a) + error (EXIT_TROUBLE, 0, _("input is too large to count")); + return sum; +} + +static void +nlscan (char const *lim) +{ + size_t newlines = 0; + char const *beg; + for (beg = lastnl; beg < lim; beg++) + { + beg = memchr (beg, eolbyte, lim - beg); + if (!beg) + break; + newlines++; + } + totalnl = add_count (totalnl, newlines); + lastnl = lim; +} + +/* Print the current filename. */ +static void +print_filename (void) +{ + pr_sgr_start_if (filename_color); + fputs (filename, stdout); + pr_sgr_end_if (filename_color); +} + +/* Print a character separator. */ +static void +print_sep (char sep) +{ + pr_sgr_start_if (sep_color); + fputc (sep, stdout); + pr_sgr_end_if (sep_color); +} + +/* Print a line number or a byte offset. */ +static void +print_offset (uintmax_t pos, int min_width, const char *color) +{ + /* Do not rely on printf to print pos, since uintmax_t may be longer + than long, and long long is not portable. */ + + char buf[sizeof pos * CHAR_BIT]; + char *p = buf + sizeof buf; + + do + { + *--p = '0' + pos % 10; + --min_width; + } + while ((pos /= 10) != 0); + + /* Do this to maximize the probability of alignment across lines. */ + if (align_tabs) + while (--min_width >= 0) + *--p = ' '; + + pr_sgr_start_if (color); + fwrite (p, 1, buf + sizeof buf - p, stdout); + pr_sgr_end_if (color); +} + +/* Print a whole line head (filename, line, byte). */ +static void +print_line_head (char const *beg, char const *lim, int sep) +{ + int pending_sep = 0; + + if (out_file) + { + print_filename (); + if (filename_mask) + pending_sep = 1; + else + fputc (0, stdout); + } + + if (out_line) + { + if (lastnl < lim) + { + nlscan (beg); + totalnl = add_count (totalnl, 1); + lastnl = lim; + } + if (pending_sep) + print_sep (sep); + print_offset (totalnl, 4, line_num_color); + pending_sep = 1; + } + + if (out_byte) + { + uintmax_t pos = add_count (totalcc, beg - bufbeg); +#if defined HAVE_DOS_FILE_CONTENTS + pos = dossified_pos (pos); +#endif + if (pending_sep) + print_sep (sep); + print_offset (pos, 6, byte_num_color); + pending_sep = 1; + } + + if (pending_sep) + { + /* This assumes sep is one column wide. + Try doing this any other way with Unicode + (and its combining and wide characters) + filenames and you're wasting your efforts. */ + if (align_tabs) + fputs ("\t\b", stdout); + + print_sep (sep); + } +} + +static const char * +print_line_middle (const char *beg, const char *lim, + const char *line_color, const char *match_color) +{ + size_t match_size; + size_t match_offset; + const char *cur = beg; + const char *mid = NULL; + + while (cur < lim + && ((match_offset = execute (beg, lim - beg, &match_size, + beg + (cur - beg))) != (size_t) -1)) + { + char const *b = beg + match_offset; + + /* Avoid matching the empty line at the end of the buffer. */ + if (b == lim) + break; + + /* Avoid hanging on grep --color "" foo */ + if (match_size == 0) + { + /* Make minimal progress; there may be further non-empty matches. */ + /* XXX - Could really advance by one whole multi-octet character. */ + match_size = 1; + if (!mid) + mid = cur; + } + else + { + /* This function is called on a matching line only, + but is it selected or rejected/context? */ + if (only_matching) + print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED + : SEP_CHAR_SELECTED)); + else + { + pr_sgr_start (line_color); + if (mid) + { + cur = mid; + mid = NULL; + } + fwrite (cur, sizeof (char), b - cur, stdout); + } + + pr_sgr_start_if (match_color); + fwrite (b, sizeof (char), match_size, stdout); + pr_sgr_end_if (match_color); + if (only_matching) + fputs ("\n", stdout); + } + cur = b + match_size; + } + + if (only_matching) + cur = lim; + else if (mid) + cur = mid; + + return cur; +} + +static const char * +print_line_tail (const char *beg, const char *lim, const char *line_color) +{ + size_t eol_size; + size_t tail_size; + + eol_size = (lim > beg && lim[-1] == eolbyte); + eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r'); + tail_size = lim - eol_size - beg; + + if (tail_size > 0) + { + pr_sgr_start (line_color); + fwrite (beg, 1, tail_size, stdout); + beg += tail_size; + pr_sgr_end (line_color); + } + + return beg; +} + +static void +prline (char const *beg, char const *lim, int sep) +{ + int matching; + const char *line_color; + const char *match_color; + + if (!only_matching) + print_line_head (beg, lim, sep); + + matching = (sep == SEP_CHAR_SELECTED) ^ !!out_invert; + + if (color_option) + { + line_color = (((sep == SEP_CHAR_SELECTED) + ^ (out_invert && (color_option < 0))) + ? selected_line_color : context_line_color); + match_color = (sep == SEP_CHAR_SELECTED + ? selected_match_color : context_match_color); + } + else + line_color = match_color = NULL; /* Shouldn't be used. */ + + if ((only_matching && matching) + || (color_option && (*line_color || *match_color))) + { + /* We already know that non-matching lines have no match (to colorize). */ + if (matching && (only_matching || *match_color)) + beg = print_line_middle (beg, lim, line_color, match_color); + + if (!only_matching && *line_color) + { + /* This code is exercised at least when grep is invoked like this: + echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */ + beg = print_line_tail (beg, lim, line_color); + } + } + + if (!only_matching && lim > beg) + fwrite (beg, 1, lim - beg, stdout); + + if (ferror (stdout)) + { + write_error_seen = 1; + error (EXIT_TROUBLE, 0, _("write error")); + } + + lastout = lim; + + if (line_buffered) + fflush (stdout); +} + +/* Print pending lines of trailing context prior to LIM. Trailing context ends + at the next matching line when OUTLEFT is 0. */ +static void +prpending (char const *lim) +{ + if (!lastout) + lastout = bufbeg; + while (pending > 0 && lastout < lim) + { + char const *nl = memchr (lastout, eolbyte, lim - lastout); + size_t match_size; + --pending; + if (outleft + || ((execute (lastout, nl + 1 - lastout, + &match_size, NULL) == (size_t) -1) + == !out_invert)) + prline (lastout, nl + 1, SEP_CHAR_REJECTED); + else + pending = 0; + } +} + +/* Print the lines between BEG and LIM. Deal with context crap. + If NLINESP is non-null, store a count of lines between BEG and LIM. */ +static void +prtext (char const *beg, char const *lim, intmax_t *nlinesp) +{ + static int used; /* avoid printing SEP_STR_GROUP before any output */ + char const *bp, *p; + char eol = eolbyte; + intmax_t i, n; + + if (!out_quiet && pending > 0) + prpending (beg); + + p = beg; + + if (!out_quiet) + { + /* Deal with leading context crap. */ + + bp = lastout ? lastout : bufbeg; + for (i = 0; i < out_before; ++i) + if (p > bp) + do + --p; + while (p[-1] != eol); + + /* We print the SEP_STR_GROUP separator only if our output is + discontiguous from the last output in the file. */ + if ((out_before || out_after) && used && p != lastout && group_separator) + { + pr_sgr_start_if (sep_color); + fputs (group_separator, stdout); + pr_sgr_end_if (sep_color); + fputc ('\n', stdout); + } + + while (p < beg) + { + char const *nl = memchr (p, eol, beg - p); + nl++; + prline (p, nl, SEP_CHAR_REJECTED); + p = nl; + } + } + + if (nlinesp) + { + /* Caller wants a line count. */ + for (n = 0; p < lim && n < outleft; n++) + { + char const *nl = memchr (p, eol, lim - p); + nl++; + if (!out_quiet) + prline (p, nl, SEP_CHAR_SELECTED); + p = nl; + } + *nlinesp = n; + + /* relying on it that this function is never called when outleft = 0. */ + after_last_match = bufoffset - (buflim - p); + } + else if (!out_quiet) + prline (beg, lim, SEP_CHAR_SELECTED); + + pending = out_quiet ? 0 : out_after; + used = 1; +} + +/* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there + is no match, return (size_t) -1. Otherwise, set *MATCH_SIZE to the + length of the match and return the offset of the start of the match. */ +static size_t +do_execute (char const *buf, size_t size, size_t *match_size, + char const *start_ptr) +{ + size_t result; + const char *line_next; + + /* With the current implementation, using --ignore-case with a multi-byte + character set is very inefficient when applied to a large buffer + containing many matches. We can avoid much of the wasted effort + by matching line-by-line. + + FIXME: this is just an ugly workaround, and it doesn't really + belong here. Also, PCRE is always using this same per-line + matching algorithm. Either we fix -i, or we should refactor + this code---for example, we could add another function pointer + to struct matcher to split the buffer passed to execute. It would + perform the memchr if line-by-line matching is necessary, or just + return buf + size otherwise. */ + if (MB_CUR_MAX == 1 || !match_icase + || ! (matcher + && (STREQ (matcher, "fgrep") || STREQ (matcher, "perl")))) + return execute (buf, size, match_size, start_ptr); + + for (line_next = buf; line_next < buf + size; ) + { + const char *line_buf = line_next; + const char *line_end = memchr (line_buf, eolbyte, + (buf + size) - line_buf); + if (line_end == NULL) + line_next = line_end = buf + size; + else + line_next = line_end + 1; + + if (start_ptr && start_ptr >= line_end) + continue; + + result = execute (line_buf, line_next - line_buf, match_size, start_ptr); + if (result != (size_t) -1) + return (line_buf - buf) + result; + } + + return (size_t) -1; +} + +/* Scan the specified portion of the buffer, matching lines (or + between matching lines if OUT_INVERT is true). Return a count of + lines printed. */ +static intmax_t +grepbuf (char const *beg, char const *lim) +{ + intmax_t nlines, n; + char const *p; + size_t match_offset; + size_t match_size; + + nlines = 0; + p = beg; + while ((match_offset = do_execute (p, lim - p, &match_size, + NULL)) != (size_t) -1) + { + char const *b = p + match_offset; + char const *endp = b + match_size; + /* Avoid matching the empty line at the end of the buffer. */ + if (b == lim) + break; + if (!out_invert) + { + prtext (b, endp, NULL); + nlines++; + outleft--; + if (!outleft || done_on_match) + { + if (exit_on_match) + exit (EXIT_SUCCESS); + after_last_match = bufoffset - (buflim - endp); + return nlines; + } + } + else if (p < b) + { + prtext (p, b, &n); + nlines += n; + outleft -= n; + if (!outleft) + return nlines; + } + p = endp; + } + if (out_invert && p < lim) + { + prtext (p, lim, &n); + nlines += n; + outleft -= n; + } + return nlines; +} + +/* Search a given file. Normally, return a count of lines printed; + but if the file is a directory and we search it recursively, then + return -2 if there was a match, and -1 otherwise. */ +static intmax_t +grep (int fd, struct stat const *st) +{ + intmax_t nlines, i; + int not_text; + size_t residue, save; + char oldc; + char *beg; + char *lim; + char eol = eolbyte; + + if (! reset (fd, st)) + return 0; + + totalcc = 0; + lastout = 0; + totalnl = 0; + outleft = max_count; + after_last_match = 0; + pending = 0; + + nlines = 0; + residue = 0; + save = 0; + + if (! fillbuf (save, st)) + { + suppressible_error (filename, errno); + return 0; + } + + not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet) + || binary_files == WITHOUT_MATCH_BINARY_FILES) + && file_is_binary (bufbeg, buflim - bufbeg, fd, st)); + if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES) + return 0; + done_on_match += not_text; + out_quiet += not_text; + + for (;;) + { + lastnl = bufbeg; + if (lastout) + lastout = bufbeg; + + beg = bufbeg + save; + + /* no more data to scan (eof) except for maybe a residue -> break */ + if (beg == buflim) + break; + + /* Determine new residue (the length of an incomplete line at the end of + the buffer, 0 means there is no incomplete last line). */ + oldc = beg[-1]; + beg[-1] = eol; + /* FIXME: use rawmemrchr if/when it exists, since we have ensured + that this use of memrchr is guaranteed never to return NULL. */ + lim = memrchr (beg - 1, eol, buflim - beg + 1); + ++lim; + beg[-1] = oldc; + if (lim == beg) + lim = beg - residue; + beg -= residue; + residue = buflim - lim; + + if (beg < lim) + { + if (outleft) + nlines += grepbuf (beg, lim); + if (pending) + prpending (lim); + if ((!outleft && !pending) + || (nlines && done_on_match && !out_invert)) + goto finish_grep; + } + + /* The last OUT_BEFORE lines at the end of the buffer will be needed as + leading context if there is a matching line at the begin of the + next data. Make beg point to their begin. */ + i = 0; + beg = lim; + while (i < out_before && beg > bufbeg && beg != lastout) + { + ++i; + do + --beg; + while (beg[-1] != eol); + } + + /* detect if leading context is discontinuous from last printed line. */ + if (beg != lastout) + lastout = 0; + + /* Handle some details and read more data to scan. */ + save = residue + lim - beg; + if (out_byte) + totalcc = add_count (totalcc, buflim - bufbeg - save); + if (out_line) + nlscan (beg); + if (! fillbuf (save, st)) + { + suppressible_error (filename, errno); + goto finish_grep; + } + } + if (residue) + { + *buflim++ = eol; + if (outleft) + nlines += grepbuf (bufbeg + save - residue, buflim); + if (pending) + prpending (buflim); + } + + finish_grep: + done_on_match -= not_text; + out_quiet -= not_text; + if ((not_text & ~out_quiet) && nlines != 0) + printf (_("Binary file %s matches\n"), filename); + return nlines; +} + +static int +grepdirent (FTS *fts, FTSENT *ent, int command_line) +{ + int follow, dirdesc; + struct stat *st = ent->fts_statp; + command_line &= ent->fts_level == FTS_ROOTLEVEL; + + if (ent->fts_info == FTS_DP) + { + if (directories == RECURSE_DIRECTORIES && command_line) + out_file &= ~ (2 * !no_filenames); + return 1; + } + + if (skipped_file (ent->fts_name, command_line, + (ent->fts_info == FTS_D || ent->fts_info == FTS_DC + || ent->fts_info == FTS_DNR))) + { + fts_set (fts, ent, FTS_SKIP); + return 1; + } + + filename = ent->fts_path + filename_prefix_len; + follow = (fts->fts_options & FTS_LOGICAL + || (fts->fts_options & FTS_COMFOLLOW && command_line)); + + switch (ent->fts_info) + { + case FTS_D: + if (directories == RECURSE_DIRECTORIES) + { + out_file |= 2 * !no_filenames; + return 1; + } + fts_set (fts, ent, FTS_SKIP); + break; + + case FTS_DC: + if (!suppress_errors) + error (0, 0, _("warning: %s: %s"), filename, + _("recursive directory loop")); + return 1; + + case FTS_DNR: + case FTS_ERR: + case FTS_NS: + suppressible_error (filename, ent->fts_errno); + return 1; + + case FTS_DEFAULT: + case FTS_NSOK: + if (devices == SKIP_DEVICES + || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) + { + struct stat st1; + if (! st->st_mode) + { + /* The file type is not already known. Get the file status + before opening, since opening might have side effects + on a device. */ + int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW; + if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0) + { + suppressible_error (filename, errno); + return 1; + } + st = &st1; + } + if (is_device_mode (st->st_mode)) + return 1; + } + break; + + case FTS_F: + case FTS_SLNONE: + break; + + case FTS_SL: + case FTS_W: + return 1; + + default: + abort (); + } + + dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD + ? fts->fts_cwd_fd + : AT_FDCWD); + return grepfile (dirdesc, ent->fts_accpath, follow, command_line); +} + +static int +grepfile (int dirdesc, char const *name, int follow, int command_line) +{ + int desc = openat_safer (dirdesc, name, O_RDONLY | (follow ? 0 : O_NOFOLLOW)); + if (desc < 0) + { + if (follow || (errno != ELOOP && errno != EMLINK)) + suppressible_error (filename, errno); + return 1; + } + return grepdesc (desc, command_line); +} + +static int +grepdesc (int desc, int command_line) +{ + intmax_t count; + int status = 1; + struct stat st; + + /* Get the file status, possibly for the second time. This catches + a race condition if the directory entry changes after the + directory entry is read and before the file is opened. For + example, normally DESC is a directory only at the top level, but + there is an exception if some other process substitutes a + directory for a non-directory while 'grep' is running. */ + if (fstat (desc, &st) != 0) + { + suppressible_error (filename, errno); + goto closeout; + } + + if (desc != STDIN_FILENO && command_line + && skipped_file (filename, 1, S_ISDIR (st.st_mode))) + goto closeout; + + if (desc != STDIN_FILENO + && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode)) + { + /* Traverse the directory starting with its full name, because + unfortunately fts provides no way to traverse the directory + starting from its file descriptor. */ + + FTS *fts; + FTSENT *ent; + int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW); + char *fts_arg[2]; + + /* Close DESC now, to conserve file descriptors if the race + condition occurs many times in a deep recursion. */ + if (close (desc) != 0) + suppressible_error (filename, errno); + + fts_arg[0] = (char *) filename; + fts_arg[1] = NULL; + fts = fts_open (fts_arg, opts, NULL); + + if (!fts) + xalloc_die (); + while ((ent = fts_read (fts))) + status &= grepdirent (fts, ent, command_line); + if (errno) + suppressible_error (filename, errno); + if (fts_close (fts) != 0) + suppressible_error (filename, errno); + return status; + } + if (desc != STDIN_FILENO + && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode)) + || ((devices == SKIP_DEVICES + || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) + && is_device_mode (st.st_mode)))) + goto closeout; + + /* If there is a regular file on stdout and the current file refers + to the same i-node, we have to report the problem and skip it. + Otherwise when matching lines from some other input reach the + disk before we open this file, we can end up reading and matching + those lines and appending them to the file from which we're reading. + Then we'd have what appears to be an infinite loop that'd terminate + only upon filling the output file system or reaching a quota. + However, there is no risk of an infinite loop if grep is generating + no output, i.e., with --silent, --quiet, -q. + Similarly, with any of these: + --max-count=N (-m) (for N >= 2) + --files-with-matches (-l) + --files-without-match (-L) + there is no risk of trouble. + For --max-count=1, grep stops after printing the first match, + so there is no risk of malfunction. But even --max-count=2, with + input==output, while there is no risk of infloop, there is a race + condition that could result in "alternate" output. */ + if (!out_quiet && list_files == 0 && 1 < max_count + && S_ISREG (out_stat.st_mode) && out_stat.st_ino + && SAME_INODE (st, out_stat)) + { + if (! suppress_errors) + error (0, 0, _("input file %s is also the output"), quote (filename)); + errseen = 1; + goto closeout; + } + +#if defined SET_BINARY + /* Set input to binary mode. Pipes are simulated with files + on DOS, so this includes the case of "foo | grep bar". */ + if (!isatty (desc)) + SET_BINARY (desc); +#endif + + count = grep (desc, &st); + if (count < 0) + status = count + 2; + else + { + if (count_matches) + { + if (out_file) + { + print_filename (); + if (filename_mask) + print_sep (SEP_CHAR_SELECTED); + else + fputc (0, stdout); + } + printf ("%" PRIdMAX "\n", count); + } + + status = !count; + if (list_files == 1 - 2 * status) + { + print_filename (); + fputc ('\n' & filename_mask, stdout); + } + + if (desc == STDIN_FILENO) + { + off_t required_offset = outleft ? bufoffset : after_last_match; + if (required_offset != bufoffset + && lseek (desc, required_offset, SEEK_SET) < 0 + && S_ISREG (st.st_mode)) + suppressible_error (filename, errno); + } + } + + closeout: + if (desc != STDIN_FILENO && close (desc) != 0) + suppressible_error (filename, errno); + return status; +} + +static int +grep_command_line_arg (char const *arg) +{ + if (STREQ (arg, "-")) + { + filename = label ? label : _("(standard input)"); + return grepdesc (STDIN_FILENO, 1); + } + else + { + filename = arg; + return grepfile (AT_FDCWD, arg, 1, 1); + } +} + +_Noreturn void usage (int); +void +usage (int status) +{ + if (status != 0) + { + fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), + program_name); + fprintf (stderr, _("Try '%s --help' for more information.\n"), + program_name); + } + else + { + printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); + printf (_("\ +Search for PATTERN in each FILE or standard input.\n")); + fputs (_(before_options), stdout); + printf (_("\ +Example: %s -i 'hello world' menu.h main.c\n\ +\n\ +Regexp selection and interpretation:\n"), program_name); + if (matchers[1].name) + printf (_("\ + -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ + -F, --fixed-strings PATTERN is a set of newline-separated fixed strings\n\ + -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ + -P, --perl-regexp PATTERN is a Perl regular expression\n")); + /* -X is undocumented on purpose. */ + printf (_("\ + -e, --regexp=PATTERN use PATTERN for matching\n\ + -f, --file=FILE obtain PATTERN from FILE\n\ + -i, --ignore-case ignore case distinctions\n\ + -w, --word-regexp force PATTERN to match only whole words\n\ + -x, --line-regexp force PATTERN to match only whole lines\n\ + -z, --null-data a data line ends in 0 byte, not newline\n")); + printf (_("\ +\n\ +Miscellaneous:\n\ + -s, --no-messages suppress error messages\n\ + -v, --invert-match select non-matching lines\n\ + -V, --version display version information and exit\n\ + --help display this help text and exit\n")); + printf (_("\ +\n\ +Output control:\n\ + -m, --max-count=NUM stop after NUM matches\n\ + -b, --byte-offset print the byte offset with output lines\n\ + -n, --line-number print line number with output lines\n\ + --line-buffered flush output on every line\n\ + -H, --with-filename print the file name for each match\n\ + -h, --no-filename suppress the file name prefix on output\n\ + --label=LABEL use LABEL as the standard input file name prefix\n\ +")); + printf (_("\ + -o, --only-matching show only the part of a line matching PATTERN\n\ + -q, --quiet, --silent suppress all normal output\n\ + --binary-files=TYPE assume that binary files are TYPE;\n\ + TYPE is 'binary', 'text', or 'without-match'\n\ + -a, --text equivalent to --binary-files=text\n\ +")); + printf (_("\ + -I equivalent to --binary-files=without-match\n\ + -d, --directories=ACTION how to handle directories;\n\ + ACTION is 'read', 'recurse', or 'skip'\n\ + -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\ + ACTION is 'read' or 'skip'\n\ + -r, --recursive like --directories=recurse\n\ + -R, --dereference-recursive likewise, but follow all symlinks\n\ +")); + printf (_("\ + --include=FILE_PATTERN search only files that match FILE_PATTERN\n\ + --exclude=FILE_PATTERN skip files and directories matching FILE_PATTERN\n\ + --exclude-from=FILE skip files matching any file pattern from FILE\n\ + --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\ +")); + printf (_("\ + -L, --files-without-match print only names of FILEs containing no match\n\ + -l, --files-with-matches print only names of FILEs containing matches\n\ + -c, --count print only a count of matching lines per FILE\n\ + -T, --initial-tab make tabs line up (if needed)\n\ + -Z, --null print 0 byte after FILE name\n")); + printf (_("\ +\n\ +Context control:\n\ + -B, --before-context=NUM print NUM lines of leading context\n\ + -A, --after-context=NUM print NUM lines of trailing context\n\ + -C, --context=NUM print NUM lines of output context\n\ +")); + printf (_("\ + -NUM same as --context=NUM\n\ + --color[=WHEN],\n\ + --colour[=WHEN] use markers to highlight the matching strings;\n\ + WHEN is 'always', 'never', or 'auto'\n\ + -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\ + -u, --unix-byte-offsets report offsets as if CRs were not there\n\ + (MSDOS/Windows)\n\ +\n")); + fputs (_(after_options), stdout); + printf (_("\ +When FILE is -, read standard input. With no FILE, read . if a command-line\n\ +-r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\ +Exit status is 0 if any line is selected, 1 otherwise;\n\ +if any error occurs and -q is not given, the exit status is 2.\n")); + printf (_("\nReport bugs to: %s\n"), PACKAGE_BUGREPORT); + printf (_("GNU Grep home page: <%s>\n"), + "http://www.gnu.org/software/grep/"); + fputs (_("General help using GNU software: <http://www.gnu.org/gethelp/>\n"), + stdout); + + } + exit (status); +} + +/* If M is NULL, initialize the matcher to the default. Otherwise set the + matcher to M if available. Exit in case of conflicts or if M is not + available. */ +static void +setmatcher (char const *m) +{ + unsigned int i; + + if (!m) + { + compile = matchers[0].compile; + execute = matchers[0].execute; + if (!matchers[1].name) + matcher = matchers[0].name; + } + + else if (matcher) + { + if (matcher && STREQ (matcher, m)) + ; + + else if (!matchers[1].name) + error (EXIT_TROUBLE, 0, _("%s can only use the %s pattern syntax"), + program_name, matcher); + else + error (EXIT_TROUBLE, 0, _("conflicting matchers specified")); + } + + else + { + for (i = 0; matchers[i].name; i++) + if (STREQ (m, matchers[i].name)) + { + compile = matchers[i].compile; + execute = matchers[i].execute; + matcher = m; + return; + } + + error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); + } +} + +/* Find the white-space-separated options specified by OPTIONS, and + using BUF to store copies of these options, set ARGV[0], ARGV[1], + etc. to the option copies. Return the number N of options found. + Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] + etc. Backslash can be used to escape whitespace (and backslashes). */ +static size_t +prepend_args (char const *options, char *buf, char **argv) +{ + char const *o = options; + char *b = buf; + size_t n = 0; + + for (;;) + { + while (c_isspace (to_uchar (*o))) + o++; + if (!*o) + return n; + if (argv) + argv[n] = b; + n++; + + do + if ((*b++ = *o++) == '\\' && *o) + b[-1] = *o++; + while (*o && ! c_isspace (to_uchar (*o))); + + *b++ = '\0'; + } +} + +/* Prepend the whitespace-separated options in OPTIONS to the argument + vector of a main program with argument count *PARGC and argument + vector *PARGV. Return the number of options prepended. */ +static int +prepend_default_options (char const *options, int *pargc, char ***pargv) +{ + if (options && *options) + { + char *buf = xmalloc (strlen (options) + 1); + size_t prepended = prepend_args (options, buf, NULL); + int argc = *pargc; + char *const *argv = *pargv; + char **pp; + enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) }; + if (MAX_ARGS - argc < prepended) + xalloc_die (); + pp = xmalloc ((prepended + argc + 1) * sizeof *pp); + *pargc = prepended + argc; + *pargv = pp; + *pp++ = *argv++; + pp += prepend_args (options, buf, pp); + while ((*pp++ = *argv++)) + continue; + return prepended; + } + + return 0; +} + +/* Get the next non-digit option from ARGC and ARGV. + Return -1 if there are no more options. + Process any digit options that were encountered on the way, + and store the resulting integer into *DEFAULT_CONTEXT. */ +static int +get_nondigit_option (int argc, char *const *argv, intmax_t *default_context) +{ + static int prev_digit_optind = -1; + int this_digit_optind, was_digit; + char buf[INT_BUFSIZE_BOUND (intmax_t) + 4]; + char *p = buf; + int opt; + + was_digit = 0; + this_digit_optind = optind; + while (1) + { + opt = getopt_long (argc, (char **) argv, short_options, + long_options, NULL); + if ( ! ('0' <= opt && opt <= '9')) + break; + + if (prev_digit_optind != this_digit_optind || !was_digit) + { + /* Reset to start another context length argument. */ + p = buf; + } + else + { + /* Suppress trivial leading zeros, to avoid incorrect + diagnostic on strings like 00000000000. */ + p -= buf[0] == '0'; + } + + if (p == buf + sizeof buf - 4) + { + /* Too many digits. Append "..." to make context_length_arg + complain about "X...", where X contains the digits seen + so far. */ + strcpy (p, "..."); + p += 3; + break; + } + *p++ = opt; + + was_digit = 1; + prev_digit_optind = this_digit_optind; + this_digit_optind = optind; + } + if (p != buf) + { + *p = '\0'; + context_length_arg (buf, default_context); + } + + return opt; +} + +/* Parse GREP_COLORS. The default would look like: + GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36' + with boolean capabilities (ne and rv) unset (i.e., omitted). + No character escaping is needed or supported. */ +static void +parse_grep_colors (void) +{ + const char *p; + char *q; + char *name; + char *val; + + p = getenv ("GREP_COLORS"); /* Plural! */ + if (p == NULL || *p == '\0') + return; + + /* Work off a writable copy. */ + q = xstrdup (p); + + name = q; + val = NULL; + /* From now on, be well-formed or you're gone. */ + for (;;) + if (*q == ':' || *q == '\0') + { + char c = *q; + struct color_cap const *cap; + + *q++ = '\0'; /* Terminate name or val. */ + /* Empty name without val (empty cap) + * won't match and will be ignored. */ + for (cap = color_dict; cap->name; cap++) + if (STREQ (cap->name, name)) + break; + /* If name unknown, go on for forward compatibility. */ + if (cap->var && val) + *(cap->var) = val; + if (cap->fct) + cap->fct (); + if (c == '\0') + return; + name = q; + val = NULL; + } + else if (*q == '=') + { + if (q == name || val) + return; + *q++ = '\0'; /* Terminate name. */ + val = q; /* Can be the empty string. */ + } + else if (val == NULL) + q++; /* Accumulate name. */ + else if (*q == ';' || (*q >= '0' && *q <= '9')) + q++; /* Accumulate val. Protect the terminal from being sent crap. */ + else + return; +} + +#define MBRTOWC(pwc, s, n, ps) \ + (MB_CUR_MAX == 1 \ + ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \ + : mbrtowc (pwc, s, n, ps)) +#define WCRTOMB(s, wc, ps) \ + (MB_CUR_MAX == 1 \ + ? (*(s) = wctob ((wint_t) (wc)), 1) \ + : wcrtomb (s, wc, ps)) + +/* Change a pattern for fgrep into grep. */ +static void +fgrep_to_grep_pattern (size_t len, char const *keys, + size_t *new_len, char **new_keys) +{ + char *p = *new_keys = xnmalloc (len + 1, 2); + mbstate_t mb_state = { 0 }; + size_t n; + + for (; len; keys += n, len -= n) + { + wchar_t wc; + n = MBRTOWC (&wc, keys, len, &mb_state); + switch (n) + { + case (size_t) -2: + n = len; + /* Fall through. */ + default: + p = mempcpy (p, keys, n); + break; + + case (size_t) -1: + memset (&mb_state, 0, sizeof mb_state); + /* Fall through. */ + case 1: + *p = '\\'; + p += strchr ("$*.[\\^", *keys) != NULL; + /* Fall through. */ + case 0: + *p++ = *keys; + n = 1; + break; + } + } + + *new_len = p - *new_keys; +} + +/* If the newline-separated regular expressions, KEYS (with length, LEN + and no trailing NUL byte), are amenable to transformation into + otherwise equivalent case-ignoring ones, perform the transformation, + put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN, + and return true. Otherwise, return false. */ + +static bool +trivial_case_ignore (size_t len, char const *keys, + size_t *new_len, char **new_keys) +{ + /* FIXME: consider removing the following restriction: + Reject if KEYS contain ASCII '\\' or '['. */ + if (memchr (keys, '\\', len) || memchr (keys, '[', len)) + return false; + + /* Worst case is that each byte B of KEYS is ASCII alphabetic and + CASE_FOLDED_BUFSIZE other_case(B) characters, C through Z, each + occupying MB_CUR_MAX bytes, so each B maps to [BC...Z], which + requires CASE_FOLDED_BUFSIZE * MB_CUR_MAX + 3 bytes; this is + bounded above by the constant expression CASE_FOLDED_BUFSIZE * + MB_LEN_MAX + 3. */ + *new_keys = xnmalloc (len + 1, CASE_FOLDED_BUFSIZE * MB_LEN_MAX + 3); + char *p = *new_keys; + + mbstate_t mb_state = { 0 }; + while (len) + { + bool initial_state = mbsinit (&mb_state) != 0; + wchar_t wc; + size_t n = MBRTOWC (&wc, keys, len, &mb_state); + + /* For an invalid, incomplete or L'\0', skip this optimization. */ + if ((size_t) -2 <= n) + { + skip_case_ignore_optimization: + free (*new_keys); + return false; + } + + char const *orig = keys; + keys += n; + len -= n; + + wchar_t folded[CASE_FOLDED_BUFSIZE]; + int nfolded = case_folded_counterparts (wc, folded); + if (nfolded <= 0) + { + memcpy (p, orig, n); + p += n; + } + else if (! initial_state) + goto skip_case_ignore_optimization; + else + { + *p++ = '['; + memcpy (p, orig, n); + p += n; + + int i = 0; + do + { + size_t nbytes = WCRTOMB (p, folded[i], &mb_state); + if (nbytes == (size_t) -1) + goto skip_case_ignore_optimization; + p += nbytes; + } + while (++i < nfolded); + + if (! mbsinit (&mb_state)) + goto skip_case_ignore_optimization; + + *p++ = ']'; + } + } + + *new_len = p - *new_keys; + + return true; +} + +int +main (int argc, char **argv) +{ + char *keys; + size_t keycc, oldcc, keyalloc; + int with_filenames; + size_t cc; + int opt, status, prepended; + int prev_optind, last_recursive; + int fread_errno; + intmax_t default_context; + FILE *fp; + exit_failure = EXIT_TROUBLE; + initialize_main (&argc, &argv); + set_program_name (argv[0]); + program_name = argv[0]; + + keys = NULL; + keycc = 0; + with_filenames = 0; + eolbyte = '\n'; + filename_mask = ~0; + + max_count = INTMAX_MAX; + + /* The value -1 means to use DEFAULT_CONTEXT. */ + out_after = out_before = -1; + /* Default before/after context: changed by -C/-NUM options */ + default_context = 0; + /* Changed by -o option */ + only_matching = 0; + + /* Internationalization. */ +#if defined HAVE_SETLOCALE + setlocale (LC_ALL, ""); +#endif +#if defined ENABLE_NLS + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); +#endif + + exit_failure = EXIT_TROUBLE; + atexit (clean_up_stdout); + + last_recursive = 0; + prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); + setmatcher (NULL); + + while (prev_optind = optind, + (opt = get_nondigit_option (argc, argv, &default_context)) != -1) + switch (opt) + { + case 'A': + context_length_arg (optarg, &out_after); + break; + + case 'B': + context_length_arg (optarg, &out_before); + break; + + case 'C': + /* Set output match context, but let any explicit leading or + trailing amount specified with -A or -B stand. */ + context_length_arg (optarg, &default_context); + break; + + case 'D': + if (STREQ (optarg, "read")) + devices = READ_DEVICES; + else if (STREQ (optarg, "skip")) + devices = SKIP_DEVICES; + else + error (EXIT_TROUBLE, 0, _("unknown devices method")); + break; + + case 'E': + setmatcher ("egrep"); + break; + + case 'F': + setmatcher ("fgrep"); + break; + + case 'P': + setmatcher ("perl"); + break; + + case 'G': + setmatcher ("grep"); + break; + + case 'X': /* undocumented on purpose */ + setmatcher (optarg); + break; + + case 'H': + with_filenames = 1; + no_filenames = 0; + break; + + case 'I': + binary_files = WITHOUT_MATCH_BINARY_FILES; + break; + + case 'T': + align_tabs = 1; + break; + + case 'U': +#if defined HAVE_DOS_FILE_CONTENTS + dos_use_file_type = DOS_BINARY; +#endif + break; + + case 'u': +#if defined HAVE_DOS_FILE_CONTENTS + dos_report_unix_offset = 1; +#endif + break; + + case 'V': + show_version = 1; + break; + + case 'a': + binary_files = TEXT_BINARY_FILES; + break; + + case 'b': + out_byte = 1; + break; + + case 'c': + count_matches = 1; + break; + + case 'd': + directories = XARGMATCH ("--directories", optarg, + directories_args, directories_types); + if (directories == RECURSE_DIRECTORIES) + last_recursive = prev_optind; + break; + + case 'e': + cc = strlen (optarg); + keys = xrealloc (keys, keycc + cc + 1); + strcpy (&keys[keycc], optarg); + keycc += cc; + keys[keycc++] = '\n'; + break; + + case 'f': + fp = STREQ (optarg, "-") ? stdin : fopen (optarg, "r"); + if (!fp) + error (EXIT_TROUBLE, errno, "%s", optarg); + for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) + ; + keys = xrealloc (keys, keyalloc); + oldcc = keycc; + while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0) + { + keycc += cc; + if (keycc == keyalloc - 1) + keys = x2nrealloc (keys, &keyalloc, sizeof *keys); + } + fread_errno = errno; + if (ferror (fp)) + error (EXIT_TROUBLE, fread_errno, "%s", optarg); + if (fp != stdin) + fclose (fp); + /* Append final newline if file ended in non-newline. */ + if (oldcc != keycc && keys[keycc - 1] != '\n') + keys[keycc++] = '\n'; + break; + + case 'h': + with_filenames = 0; + no_filenames = 1; + break; + + case 'i': + case 'y': /* For old-timers . . . */ + match_icase = 1; + break; + + case 'L': + /* Like -l, except list files that don't contain matches. + Inspired by the same option in Hume's gre. */ + list_files = -1; + break; + + case 'l': + list_files = 1; + break; + + case 'm': + switch (xstrtoimax (optarg, 0, 10, &max_count, "")) + { + case LONGINT_OK: + case LONGINT_OVERFLOW: + break; + + default: + error (EXIT_TROUBLE, 0, _("invalid max count")); + } + break; + + case 'n': + out_line = 1; + break; + + case 'o': + only_matching = 1; + break; + + case 'q': + exit_on_match = 1; + exit_failure = 0; + break; + + case 'R': + fts_options = basic_fts_options | FTS_LOGICAL; + /* Fall through. */ + case 'r': + directories = RECURSE_DIRECTORIES; + last_recursive = prev_optind; + break; + + case 's': + suppress_errors = 1; + break; + + case 'v': + out_invert = 1; + break; + + case 'w': + match_words = 1; + break; + + case 'x': + match_lines = 1; + break; + + case 'Z': + filename_mask = 0; + break; + + case 'z': + eolbyte = '\0'; + break; + + case BINARY_FILES_OPTION: + if (STREQ (optarg, "binary")) + binary_files = BINARY_BINARY_FILES; + else if (STREQ (optarg, "text")) + binary_files = TEXT_BINARY_FILES; + else if (STREQ (optarg, "without-match")) + binary_files = WITHOUT_MATCH_BINARY_FILES; + else + error (EXIT_TROUBLE, 0, _("unknown binary-files type")); + break; + + case COLOR_OPTION: + if (optarg) + { + if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes") + || !strcasecmp (optarg, "force")) + color_option = 1; + else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no") + || !strcasecmp (optarg, "none")) + color_option = 0; + else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty") + || !strcasecmp (optarg, "if-tty")) + color_option = 2; + else + show_help = 1; + } + else + color_option = 2; + break; + + case EXCLUDE_OPTION: + case INCLUDE_OPTION: + if (!excluded_patterns) + excluded_patterns = new_exclude (); + add_exclude (excluded_patterns, optarg, + (EXCLUDE_WILDCARDS + | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0))); + break; + case EXCLUDE_FROM_OPTION: + if (!excluded_patterns) + excluded_patterns = new_exclude (); + if (add_exclude_file (add_exclude, excluded_patterns, optarg, + EXCLUDE_WILDCARDS, '\n') != 0) + { + error (EXIT_TROUBLE, errno, "%s", optarg); + } + break; + + case EXCLUDE_DIRECTORY_OPTION: + if (!excluded_directory_patterns) + excluded_directory_patterns = new_exclude (); + add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS); + break; + + case GROUP_SEPARATOR_OPTION: + group_separator = optarg; + break; + + case LINE_BUFFERED_OPTION: + line_buffered = 1; + break; + + case LABEL_OPTION: + label = optarg; + break; + + case 0: + /* long options */ + break; + + default: + usage (EXIT_TROUBLE); + break; + + } + + if (color_option == 2) + color_option = isatty (STDOUT_FILENO) && should_colorize (); + init_colorize (); + + /* POSIX says that -q overrides -l, which in turn overrides the + other output options. */ + if (exit_on_match) + list_files = 0; + if (exit_on_match | list_files) + { + count_matches = 0; + done_on_match = 1; + } + out_quiet = count_matches | done_on_match; + + if (out_after < 0) + out_after = default_context; + if (out_before < 0) + out_before = default_context; + + if (color_option) + { + /* Legacy. */ + char *userval = getenv ("GREP_COLOR"); + if (userval != NULL && *userval != '\0') + selected_match_color = context_match_color = userval; + + /* New GREP_COLORS has priority. */ + parse_grep_colors (); + } + + if (show_version) + { + version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS, + (char *) NULL); + exit (EXIT_SUCCESS); + } + + if (show_help) + usage (EXIT_SUCCESS); + + struct stat tmp_stat; + if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) + out_stat = tmp_stat; + + if (keys) + { + if (keycc == 0) + { + /* No keys were specified (e.g. -f /dev/null). Match nothing. */ + out_invert ^= 1; + match_lines = match_words = 0; + } + else + /* Strip trailing newline. */ + --keycc; + } + else if (optind < argc) + { + /* A copy must be made in case of an xrealloc() or free() later. */ + keycc = strlen (argv[optind]); + keys = xmemdup (argv[optind++], keycc + 1); + } + else + usage (EXIT_TROUBLE); + + /* If case-insensitive fgrep in a multibyte locale, improve + performance by using grep instead. */ + if (match_icase && compile == Fcompile && MB_CUR_MAX > 1) + { + size_t new_keycc; + char *new_keys; + fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys); + free (keys); + keys = new_keys; + keycc = new_keycc; + matcher = "grep"; + compile = Gcompile; + execute = EGexecute; + } + + /* Case-insensitive matching is expensive in multibyte locales + because a few characters may change size when converted to upper + or lower case. To accommodate those, search the input one line + at a time, rather than using the much more efficient buffer search. + + Try to convert a regular expression 'foo' (ignoring case) to an + equivalent regular expression '[fF][oO][oO]' (where case matters). + Not only does this avoid the expensive requirement to read and + process a line at a time, it also allows use of the kwset engine, + a win in non-UTF-8 multibyte locales. */ + if (match_icase) + { + size_t new_keycc; + char *new_keys; + /* It is not possible with -F, not useful with -P (pcre) and there is no + point when there is no regexp. It also depends on which constructs + appear in the regexp. See trivial_case_ignore for those details. */ + if (keycc + && ! (matcher + && (STREQ (matcher, "fgrep") || STREQ (matcher, "perl"))) + && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys)) + { + match_icase = 0; + free (keys); + keys = new_keys; + keycc = new_keycc; + } + } + +#if MBS_SUPPORT + if (MB_CUR_MAX > 1) + build_mbclen_cache (); +#endif + + compile (keys, keycc); + free (keys); + + if ((argc - optind > 1 && !no_filenames) || with_filenames) + out_file = 1; + +#ifdef SET_BINARY + /* Output is set to binary mode because we shouldn't convert + NL to CR-LF pairs, especially when grepping binary files. */ + if (!isatty (1)) + SET_BINARY (1); +#endif + + if (max_count == 0) + exit (EXIT_FAILURE); + + if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) + devices = READ_DEVICES; + + if (optind < argc) + { + status = 1; + do + status &= grep_command_line_arg (argv[optind]); + while (++optind < argc); + } + else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive) + { + /* Grep through ".", omitting leading "./" from diagnostics. */ + filename_prefix_len = 2; + status = grep_command_line_arg ("."); + } + else + status = grep_command_line_arg ("-"); + + /* We register via atexit() to test stdout. */ + exit (errseen ? EXIT_TROUBLE : status); +} +/* vim:set shiftwidth=2: */ diff --git a/src/search.h b/src/search.h index 69e3afd..990da73 100644 --- a/src/search.h +++ b/src/search.h @@ -52,6 +52,8 @@ extern bool is_mb_middle (const char **, const char *, const char *, size_t); /* dfasearch.c */ extern void GEAcompile (char const *, size_t, reg_syntax_t); extern size_t EGexecute (char const *, size_t, size_t *, char const *); +extern void Gcompile (char const *pattern, size_t size); +extern void Ecompile (char const *pattern, size_t size); /* kwsearch.c */ extern void Fcompile (char const *, size_t); -- 1.9.0
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.