This turned into more work than I expected, as I kept finding performance
glitches and/or correctness bugs in the neighborhood. I installed the attached
set of patches. Patch 03 is the crucial one. Patch 10 trivially fixes an earlier
test of mine and I'm too lazy to write a separate email for it.
This fixes the problem for me, so I'm taking the liberty of closing this bug
report.
From 8b24c7008aadf62bd6803778ab04fdd065d573d8 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 19 Nov 2016 01:06:01 -0800
Subject: [PATCH 01/10] grep: avoid unnecessary isatty calls
This fixes an inefficiency that was mistakenly introduced a while
back, when the macro SET_BINARY became defined on all platforms.
* src/grep.c (grepdesc, main): Do not unecessarily call isatty on
POSIXish platforms.
---
src/grep.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index 1163eae..201b1d9 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1834,12 +1834,10 @@ grepdesc (int desc, bool command_line)
goto closeout;
}
-#if defined SET_BINARY
/* Set input to binary mode. Pipes are simulated with files
on DOS, so this includes the case of "foo | grep bar". */
- if (!isatty (desc))
- SET_BINARY (desc);
-#endif
+ if (O_BINARY && !isatty (desc))
+ set_binary_mode (desc, O_BINARY);
count = grep (desc, &st);
if (count_matches)
@@ -2801,12 +2799,10 @@ main (int argc, char **argv)
if ((argc - optind > 1 && !no_filenames) || with_filenames)
out_file = 1;
-#ifdef SET_BINARY
/* Output is set to binary mode because we shouldn't convert
NL to CR-LF pairs, especially when grepping binary files. */
- if (!isatty (STDOUT_FILENO))
- SET_BINARY (STDOUT_FILENO);
-#endif
+ if (O_BINARY && !isatty (STDOUT_FILENO))
+ set_binary_mode (STDOUT_FILENO, O_BINARY);
if (max_count == 0)
return EXIT_FAILURE;
--
2.7.4
From c87daf6fdcaa116308663047c2e4fb8ff38011c7 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 19 Nov 2016 01:06:01 -0800
Subject: [PATCH 02/10] grep: improve diagnostic on lseek failure
* src/grep.c (reset): Mention the file name in the (unlikely)
chance of an lseek failure.
---
src/grep.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/grep.c b/src/grep.c
index 201b1d9..cafa0a2 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -861,7 +861,7 @@ reset (int fd, struct stat const *st)
bufoffset = lseek (fd, 0, SEEK_CUR);
if (bufoffset < 0)
{
- suppressible_error (_("lseek failed"), errno);
+ suppressible_error (filename, errno);
return false;
}
}
--
2.7.4
From 80c97aa06e8f0320ff397a74a018eadc6a21f5fa Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Nov 2016 13:41:39 -0800
Subject: [PATCH 03/10] grep: scale back /dev/null speedup
The performance improvement when output is /dev/null (commit
af6af288eac28951b5eee1eaaf373e22b2193b7b dated 2016-05-01)
breaks scripts that run "PROGRAM | grep PATTERN >/dev/null"
where PROGRAM dies when writing into a broken pipe.
Suppress the improvement if standard input is not seekable.
Problem reported by Gary Johnson (Bug#24941).
* NEWS: Document this.
* src/grep.c (seek_failed): New static var.
(seek_data_failed): Move decl earlier, to be next to seek_failed.
(file_must_have_nulls): Skip useless syscalls if seek_failed.
Lessen source-code nesting.
(reset): Set seek_failed and seek_data_failed.
Try lseek even on non-regular files.
(grep): New arg INEOF. All callers changed.
Do not clear seek_data_failed here, since 'reset' now does this.
(finalize_input): New static function.
(grepdesc): Use it.
(main): Do not exit on first match merely because output is
/dev/null.
* tests/grep-dev-null-out: Adjust to new behavior.
---
NEWS | 6 +++
src/grep.c | 135 +++++++++++++++++++++++++++++-------------------
tests/grep-dev-null-out | 3 +-
3 files changed, 91 insertions(+), 53 deletions(-)
diff --git a/NEWS b/NEWS
index a95c875..a165377 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,12 @@ GNU grep NEWS -*- outline -*-
** Bug fixes
+ grep by default now reads all of standard input if it is a pipe,
+ even if this cannot affect grep's output or exit status. This works
+ better with nonportable scripts that run "PROGRAM | grep PATTERN
+ >/dev/null" where PROGRAM dies when writing into a broken pipe.
+ [bug introduced in grep-2.26]
+
grep -Pz no longer rejects patterns containing ^ and $, and is
more cautious about special patterns like (?-m) and (*FAIL).
[bug introduced in grep-2.23]
diff --git a/src/grep.c b/src/grep.c
index cafa0a2..5ce8f95 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -580,6 +580,10 @@ enum { SEEK_DATA = SEEK_SET };
enum { SEEK_HOLE = SEEK_SET };
#endif
+/* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */
+static bool seek_failed;
+static bool seek_data_failed;
+
/* Functions we'll use to search. */
typedef void (*compile_fp_t) (char const *, size_t);
typedef size_t (*execute_fp_t) (char *, size_t, size_t *, char const *);
@@ -718,31 +722,26 @@ buf_has_nulls (char *buf, size_t size)
static bool
file_must_have_nulls (size_t size, int fd, struct stat const *st)
{
- if (usable_st_size (st))
+ /* If the file has holes, it must contain a null byte somewhere. */
+ if (SEEK_HOLE != SEEK_SET && !seek_failed
+ && usable_st_size (st) && size < st->st_size)
{
- if (st->st_size <= size)
- return false;
-
- /* If the file has holes, it must contain a null byte somewhere. */
- if (SEEK_HOLE != SEEK_SET)
+ off_t cur = size;
+ if (O_BINARY || fd == STDIN_FILENO)
{
- off_t cur = size;
- if (O_BINARY || fd == STDIN_FILENO)
- {
- cur = lseek (fd, 0, SEEK_CUR);
- if (cur < 0)
- return false;
- }
+ cur = lseek (fd, 0, SEEK_CUR);
+ if (cur < 0)
+ return false;
+ }
- /* Look for a hole after the current location. */
- off_t hole_start = lseek (fd, cur, SEEK_HOLE);
- if (0 <= hole_start)
- {
- if (lseek (fd, cur, SEEK_SET) < 0)
- suppressible_error (filename, errno);
- if (hole_start < st->st_size)
- return true;
- }
+ /* Look for a hole after the current location. */
+ off_t hole_start = lseek (fd, cur, SEEK_HOLE);
+ if (0 <= hole_start)
+ {
+ if (lseek (fd, cur, SEEK_SET) < 0)
+ suppressible_error (filename, errno);
+ if (hole_start < st->st_size)
+ return true;
}
}
@@ -806,13 +805,12 @@ static int bufdesc; /* File descriptor. */
static char *bufbeg; /* Beginning of user-visible stuff. */
static char *buflim; /* Limit of user-visible stuff. */
static size_t pagesize; /* alignment of memory pages */
-static off_t bufoffset; /* Read offset; defined on regular files. */
+static off_t bufoffset; /* Read offset. */
static off_t after_last_match; /* Pointer after last matching line that
would have been output if we were
outputting characters. */
static bool skip_nuls; /* Skip '\0' in data. */
static bool skip_empty_lines; /* Skip empty lines in data. */
-static bool seek_data_failed; /* lseek with SEEK_DATA failed. */
static uintmax_t totalnl; /* Total newline count before lastnl. */
/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
@@ -851,20 +849,20 @@ reset (int fd, struct stat const *st)
bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
bufbeg[-1] = eolbyte;
bufdesc = fd;
+ bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
+ seek_failed = bufoffset < 0;
+
+ /* Assume SEEK_DATA fails if SEEK_CUR does. */
+ seek_data_failed = seek_failed;
- if (S_ISREG (st->st_mode))
+ if (seek_failed)
{
- if (fd != STDIN_FILENO)
- bufoffset = 0;
- else
+ if (errno != ESPIPE)
{
- bufoffset = lseek (fd, 0, SEEK_CUR);
- if (bufoffset < 0)
- {
- suppressible_error (filename, errno);
- return false;
- }
+ suppressible_error (filename, errno);
+ return false;
}
+ bufoffset = 0;
}
return true;
}
@@ -1477,9 +1475,10 @@ grepbuf (char *beg, char const *lim)
return outleft0 - outleft;
}
-/* Search a given (non-directory) file. Return a count of lines printed. */
+/* Search a given (non-directory) file. Return a count of lines printed.
+ Set *INEOF to true if end-of-file reached. */
static intmax_t
-grep (int fd, struct stat const *st)
+grep (int fd, struct stat const *st, bool *ineof)
{
intmax_t nlines, i;
size_t residue, save;
@@ -1507,7 +1506,6 @@ grep (int fd, struct stat const *st)
pending = 0;
skip_nuls = skip_empty_lines && !eol;
encoding_error_output = false;
- seek_data_failed = false;
nlines = 0;
residue = 0;
@@ -1542,7 +1540,10 @@ grep (int fd, struct stat const *st)
/* no more data to scan (eof) except for maybe a residue -> break */
if (beg == buflim)
- break;
+ {
+ *ineof = true;
+ break;
+ }
zap_nuls (beg, buflim, nul_zapper);
@@ -1742,11 +1743,46 @@ grepfile (int dirdesc, char const *name, bool follow, bool command_line)
return grepdesc (desc, command_line);
}
+/* Finish reading from FD, with status ST and where end-of-file has
+ been seen if INEOF. Typically this is a no-op, but when reading
+ from standard input this may adjust the file offset or drain a
+ pipe. */
+
+static void
+finalize_input (int fd, struct stat const *st, bool ineof)
+{
+ if (fd != STDIN_FILENO)
+ return;
+
+ if (outleft)
+ {
+ if (ineof)
+ return;
+ if (seek_failed)
+ {
+ while (fillbuf (0, st))
+ if (bufbeg == buflim)
+ return;
+ }
+ else if (0 <= lseek (fd, 0, SEEK_END))
+ return;
+ }
+ else
+ {
+ if (seek_failed || bufoffset == after_last_match
+ || 0 <= lseek (fd, after_last_match, SEEK_SET))
+ return;
+ }
+
+ suppressible_error (filename, errno);
+}
+
static bool
grepdesc (int desc, bool command_line)
{
intmax_t count;
bool status = true;
+ bool ineof = false;
struct stat st;
/* Get the file status, possibly for the second time. This catches
@@ -1839,7 +1875,7 @@ grepdesc (int desc, bool command_line)
if (O_BINARY && !isatty (desc))
set_binary_mode (desc, O_BINARY);
- count = grep (desc, &st);
+ count = grep (desc, &st, &ineof);
if (count_matches)
{
if (out_file)
@@ -1856,7 +1892,10 @@ grepdesc (int desc, bool command_line)
}
status = !count;
- if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
+
+ if (list_files == LISTFILES_NONE)
+ finalize_input (desc, &st, ineof);
+ else if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
{
print_filename ();
putchar_errno ('\n' & filename_mask);
@@ -1864,15 +1903,6 @@ grepdesc (int desc, bool command_line)
fflush_errno ();
}
- if (desc == STDIN_FILENO)
- {
- off_t required_offset = outleft ? bufoffset : after_last_match;
- if (required_offset != bufoffset
- && lseek (desc, required_offset, SEEK_SET) < 0
- && S_ISREG (st.st_mode))
- suppressible_error (filename, errno);
- }
-
closeout:
if (desc != STDIN_FILENO && close (desc) != 0)
suppressible_error (filename, errno);
@@ -2699,6 +2729,7 @@ main (int argc, char **argv)
if (show_help)
usage (EXIT_SUCCESS);
+ bool dev_null_output = false;
bool possibly_tty = false;
struct stat tmp_stat;
if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
@@ -2710,7 +2741,7 @@ main (int argc, char **argv)
struct stat null_stat;
if (stat ("/dev/null", &null_stat) == 0
&& SAME_INODE (tmp_stat, null_stat))
- exit_on_match = true;
+ dev_null_output = true;
else
possibly_tty = true;
}
@@ -2733,9 +2764,9 @@ main (int argc, char **argv)
/* POSIX says -c, -l and -q are mutually exclusive. In this
implementation, -q overrides -l and -L, which in turn override -c. */
- if (exit_on_match)
+ if (exit_on_match | dev_null_output)
list_files = LISTFILES_NONE;
- if (exit_on_match || list_files != LISTFILES_NONE)
+ if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
{
count_matches = false;
done_on_match = true;
diff --git a/tests/grep-dev-null-out b/tests/grep-dev-null-out
index 7f0e1c5..16ddadf 100755
--- a/tests/grep-dev-null-out
+++ b/tests/grep-dev-null-out
@@ -6,6 +6,7 @@
require_timeout_
${AWK-awk} 'BEGIN {while (1) print "x"}' </dev/null |
- timeout 10 grep x >/dev/null || fail=1
+ timeout 1 grep x >/dev/null
+test $? -eq 124 || fail=1
Exit $fail
--
2.7.4
From 4fa6f48b573267e758650e114ec158d97916411e Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Nov 2016 15:11:35 -0800
Subject: [PATCH 04/10] grep: drain the input pipe faster
* src/grep.c (dev_null_output): Now static.
(drain_input): New function, using 'splice' if that makes sense.
(finalize_input): Use it.
(main): Omit now-unnecessary initialization.
---
src/grep.c | 30 ++++++++++++++++++++++++++----
1 file changed, 26 insertions(+), 4 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index 5ce8f95..64c72ce 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1031,6 +1031,7 @@ static intmax_t pending; /* Pending lines of output.
Always kept 0 if out_quiet is true. */
static bool done_on_match; /* Stop scanning file on first match. */
static bool exit_on_match; /* Exit on first match. */
+static bool dev_null_output; /* Stdout is known to be /dev/null. */
#include "dosbuf.c"
@@ -1743,6 +1744,29 @@ grepfile (int dirdesc, char const *name, bool follow, bool command_line)
return grepdesc (desc, command_line);
}
+/* Read all data from FD, with status ST. Return true if successful,
+ false (setting errno) otherwise. */
+static bool
+drain_input (int fd, struct stat const *st)
+{
+ ssize_t nbytes;
+ if (S_ISFIFO (st->st_mode) && dev_null_output)
+ {
+#ifdef SPLICE_F_MOVE
+ /* Should be faster, since it need not copy data to user space. */
+ while ((nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
+ INITIAL_BUFSIZE, SPLICE_F_MOVE)))
+ if (nbytes < 0)
+ return false;
+ return true;
+#endif
+ }
+ while ((nbytes = safe_read (fd, buffer, bufalloc)))
+ if (nbytes == SAFE_READ_ERROR)
+ return false;
+ return true;
+}
+
/* Finish reading from FD, with status ST and where end-of-file has
been seen if INEOF. Typically this is a no-op, but when reading
from standard input this may adjust the file offset or drain a
@@ -1760,9 +1784,8 @@ finalize_input (int fd, struct stat const *st, bool ineof)
return;
if (seek_failed)
{
- while (fillbuf (0, st))
- if (bufbeg == buflim)
- return;
+ if (drain_input (fd, st))
+ return;
}
else if (0 <= lseek (fd, 0, SEEK_END))
return;
@@ -2729,7 +2752,6 @@ main (int argc, char **argv)
if (show_help)
usage (EXIT_SUCCESS);
- bool dev_null_output = false;
bool possibly_tty = false;
struct stat tmp_stat;
if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
--
2.7.4
From 2389e561ad0252ab5ea62ab53f19cae0a00ec794 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Nov 2016 17:51:49 -0800
Subject: [PATCH 05/10] grep: avoid unnecessary gettext call
Translate "(standard input)" lazily.
* src/grep.c (input_filename): New function.
(suppressible_error): Remove 1st arg, since it is always
input_filename (). All callers changed.
(suppressible_error, print_filename, grep, grepdesc): Use it.
(grep_command_line_arg): Set filename to NULL if standard
input has no label. Often, this avoids all calls to gettext,
which can be a win as the first call can be expensive.
---
src/grep.c | 51 ++++++++++++++++++++++++++++++---------------------
1 file changed, 30 insertions(+), 21 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index 64c72ce..e1b7937 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -502,7 +502,7 @@ char eolbyte;
static char const *matcher;
/* For error messages. */
-/* The input file name, or (if standard input) "-" or a --label argument. */
+/* The input file name, or (if standard input) null or a --label argument. */
static char const *filename;
/* Omit leading "./" from file names in diagnostics. */
static bool omit_dot_slash;
@@ -590,12 +590,20 @@ typedef size_t (*execute_fp_t) (char *, size_t, size_t *, char const *);
static compile_fp_t compile;
static execute_fp_t execute;
-/* Like error, but suppress the diagnostic if requested. */
+static char const *
+input_filename (void)
+{
+ if (!filename)
+ filename = _("(standard input)");
+ return filename;
+}
+
+/* Unless requested, diagnose an error about the input file. */
static void
-suppressible_error (char const *mesg, int errnum)
+suppressible_error (int errnum)
{
if (! suppress_errors)
- error (0, errnum, "%s", mesg);
+ error (0, errnum, "%s", input_filename ());
errseen = true;
}
@@ -739,7 +747,7 @@ file_must_have_nulls (size_t size, int fd, struct stat const *st)
if (0 <= hole_start)
{
if (lseek (fd, cur, SEEK_SET) < 0)
- suppressible_error (filename, errno);
+ suppressible_error (errno);
if (hole_start < st->st_size)
return true;
}
@@ -859,7 +867,7 @@ reset (int fd, struct stat const *st)
{
if (errno != ESPIPE)
{
- suppressible_error (filename, errno);
+ suppressible_error (errno);
return false;
}
bufoffset = 0;
@@ -1056,7 +1064,7 @@ static void
print_filename (void)
{
pr_sgr_start_if (filename_color);
- fputs_errno (filename);
+ fputs_errno (input_filename ());
pr_sgr_end_if (filename_color);
}
@@ -1514,7 +1522,7 @@ grep (int fd, struct stat const *st, bool *ineof)
if (! fillbuf (save, st))
{
- suppressible_error (filename, errno);
+ suppressible_error (errno);
return 0;
}
@@ -1598,7 +1606,7 @@ grep (int fd, struct stat const *st, bool *ineof)
nlscan (beg);
if (! fillbuf (save, st))
{
- suppressible_error (filename, errno);
+ suppressible_error (errno);
goto finish_grep;
}
}
@@ -1617,7 +1625,7 @@ grep (int fd, struct stat const *st, bool *ineof)
if (!out_quiet && (encoding_error_output
|| (0 <= nlines_first_null && nlines_first_null < nlines)))
{
- printf_errno (_("Binary file %s matches\n"), filename);
+ printf_errno (_("Binary file %s matches\n"), input_filename ());
if (line_buffered)
fflush_errno ();
}
@@ -1672,7 +1680,7 @@ grepdirent (FTS *fts, FTSENT *ent, bool command_line)
case FTS_DNR:
case FTS_ERR:
case FTS_NS:
- suppressible_error (filename, ent->fts_errno);
+ suppressible_error (ent->fts_errno);
return true;
case FTS_DEFAULT:
@@ -1689,7 +1697,7 @@ grepdirent (FTS *fts, FTSENT *ent, bool command_line)
int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
{
- suppressible_error (filename, errno);
+ suppressible_error (errno);
return true;
}
st = &st1;
@@ -1738,7 +1746,7 @@ grepfile (int dirdesc, char const *name, bool follow, bool command_line)
if (desc < 0)
{
if (follow || ! open_symlink_nofollow_error (errno))
- suppressible_error (filename, errno);
+ suppressible_error (errno);
return true;
}
return grepdesc (desc, command_line);
@@ -1797,7 +1805,7 @@ finalize_input (int fd, struct stat const *st, bool ineof)
return;
}
- suppressible_error (filename, errno);
+ suppressible_error (errno);
}
static bool
@@ -1816,7 +1824,7 @@ grepdesc (int desc, bool command_line)
directory for a non-directory while 'grep' is running. */
if (fstat (desc, &st) != 0)
{
- suppressible_error (filename, errno);
+ suppressible_error (errno);
goto closeout;
}
@@ -1843,7 +1851,7 @@ grepdesc (int desc, bool command_line)
/* Close DESC now, to conserve file descriptors if the race
condition occurs many times in a deep recursion. */
if (close (desc) != 0)
- suppressible_error (filename, errno);
+ suppressible_error (errno);
fts_arg[0] = (char *) filename;
fts_arg[1] = NULL;
@@ -1854,9 +1862,9 @@ grepdesc (int desc, bool command_line)
while ((ent = fts_read (fts)))
status &= grepdirent (fts, ent, command_line);
if (errno)
- suppressible_error (filename, errno);
+ suppressible_error (errno);
if (fts_close (fts) != 0)
- suppressible_error (filename, errno);
+ suppressible_error (errno);
return status;
}
if (desc != STDIN_FILENO
@@ -1888,7 +1896,8 @@ grepdesc (int desc, bool command_line)
&& S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
{
if (! suppress_errors)
- error (0, 0, _("input file %s is also the output"), quote (filename));
+ error (0, 0, _("input file %s is also the output"),
+ quote (input_filename ()));
errseen = true;
goto closeout;
}
@@ -1928,7 +1937,7 @@ grepdesc (int desc, bool command_line)
closeout:
if (desc != STDIN_FILENO && close (desc) != 0)
- suppressible_error (filename, errno);
+ suppressible_error (errno);
return status;
}
@@ -1937,7 +1946,7 @@ grep_command_line_arg (char const *arg)
{
if (STREQ (arg, "-"))
{
- filename = label ? label : _("(standard input)");
+ filename = label;
return grepdesc (STDIN_FILENO, true);
}
else
--
2.7.4
From 2a45b2fbe2ca6d2d6c161d1593511b92d9e4640e Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Nov 2016 18:25:19 -0800
Subject: [PATCH 06/10] grep: avoid O(N**2) buffer reallocation
* src/grep.c (main): Use x2realloc to avoid O(N**2) performance as
pattern buffers grow.
---
src/grep.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index e1b7937..fe56e9c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2413,9 +2413,9 @@ fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
int
main (int argc, char **argv)
{
- char *keys;
- size_t keycc, oldcc, keyalloc;
- bool with_filenames;
+ char *keys = NULL;
+ size_t keycc = 0, oldcc, keyalloc = 0;
+ bool with_filenames = false;
size_t cc;
int opt, prepended;
int prev_optind, last_recursive;
@@ -2425,9 +2425,6 @@ main (int argc, char **argv)
exit_failure = EXIT_TROUBLE;
initialize_main (&argc, &argv);
- keys = NULL;
- keycc = 0;
- with_filenames = false;
eolbyte = '\n';
filename_mask = ~0;
@@ -2556,8 +2553,12 @@ main (int argc, char **argv)
case 'e':
cc = strlen (optarg);
- keys = xrealloc (keys, keycc + cc + 1);
- strcpy (&keys[keycc], optarg);
+ if (keyalloc < keycc + cc + 1)
+ {
+ keyalloc = keycc + cc + 1;
+ keys = x2realloc (keys, &keyalloc);
+ }
+ memcpy (&keys[keycc], optarg, cc);
keycc += cc;
keys[keycc++] = '\n';
fl_add (keys, keycc - cc - 1, keycc, "");
@@ -2567,15 +2568,14 @@ main (int argc, char **argv)
fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r");
if (!fp)
die (EXIT_TROUBLE, errno, "%s", optarg);
- for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
- ;
- keys = xrealloc (keys, keyalloc);
oldcc = keycc;
- while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0)
+ for (;; keycc += cc)
{
- keycc += cc;
- if (keycc == keyalloc - 1)
- keys = x2nrealloc (keys, &keyalloc, sizeof *keys);
+ if (keyalloc <= keycc + 1)
+ keys = x2realloc (keys, &keyalloc);
+ cc = fread (keys + keycc, 1, keyalloc - (keycc + 1), fp);
+ if (cc == 0)
+ break;
}
fread_errno = errno;
if (ferror (fp))
@@ -2823,7 +2823,7 @@ main (int argc, char **argv)
}
else if (optind < argc)
{
- /* A copy must be made in case of an xrealloc() or free() later. */
+ /* Make a copy so that it can be reallocated or freed later. */
keycc = strlen (argv[optind]);
keys = xmemdup (argv[optind++], keycc + 1);
fl_add (keys, 0, keycc, "");
--
2.7.4
From fc6fce9a16bc52aca11cd8f1ad2632943fe94201 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Nov 2016 21:05:42 -0800
Subject: [PATCH 07/10] grep: treat -f /dev/null like -m0
* NEWS: Document this.
* src/grep.c (main): With -f /dev/null, don't bother to read the
input. This is what FreeBSD grep does.
* tests/Makefile.am (TESTS): Add skip-read.
* tests/skip-read: New file.
---
NEWS | 5 +++++
src/grep.c | 5 ++++-
tests/Makefile.am | 1 +
tests/skip-read | 15 +++++++++++++++
4 files changed, 25 insertions(+), 1 deletion(-)
create mode 100755 tests/skip-read
diff --git a/NEWS b/NEWS
index a165377..06a186a 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,11 @@ GNU grep NEWS -*- outline -*-
grep's use of getprogname no longer causes a build failure on HP-UX.
+** Improvements
+
+ grep no longer reads the input in a few more cases when it is easy
+ to see that matching cannot succeed, e.g., 'grep -f /dev/null'.
+
* Noteworthy changes in release 2.26 (2016-10-02) [stable]
diff --git a/src/grep.c b/src/grep.c
index fe56e9c..317a0d5 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2866,7 +2866,10 @@ main (int argc, char **argv)
if (O_BINARY && !isatty (STDOUT_FILENO))
set_binary_mode (STDOUT_FILENO, O_BINARY);
- if (max_count == 0)
+ /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
+ /dev/null'), fail without reading the input. */
+ if (max_count == 0
+ || (keycc == 0 && out_invert && !match_lines && !match_words))
return EXIT_FAILURE;
/* Prefer sysconf for page size, as getpagesize typically returns int. */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f4c82f4..b6f0df3 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -143,6 +143,7 @@ TESTS = \
reversed-range-endpoints \
sjis-mb \
skip-device \
+ skip-read \
spencer1 \
spencer1-locale \
status \
diff --git a/tests/skip-read b/tests/skip-read
new file mode 100755
index 0000000..627d362
--- /dev/null
+++ b/tests/skip-read
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Check that grep skips reading in some cases.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+fail=0
+
+for opts in '-m0 y' '-f /dev/null' '-v ""'; do
+ for matcher in '' -E -F; do
+ eval returns_ 1 grep $opts $matcher no-such-file > out || fail=1
+ compare /dev/null out || fail=1
+ done
+done
+
+Exit $fail
--
2.7.4
From e25ea1223d0fb50e759907cdd88e8096b354cbab Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Nov 2016 21:11:30 -0800
Subject: [PATCH 08/10] grep: tune -f /dev/null
* src/grep.c (main): Do the -f /dev/null early-exit checks before
more-expensive tests that involve syscalls.
---
src/grep.c | 80 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 40 insertions(+), 40 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index 317a0d5..066df8c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2761,6 +2761,28 @@ main (int argc, char **argv)
if (show_help)
usage (EXIT_SUCCESS);
+ if (keys)
+ {
+ if (keycc == 0)
+ {
+ /* No keys were specified (e.g. -f /dev/null). Match nothing. */
+ out_invert ^= true;
+ match_lines = match_words = false;
+ }
+ else
+ /* Strip trailing newline. */
+ --keycc;
+ }
+ else if (optind < argc)
+ {
+ /* Make a copy so that it can be reallocated or freed later. */
+ keycc = strlen (argv[optind]);
+ keys = xmemdup (argv[optind++], keycc + 1);
+ fl_add (keys, 0, keycc, "");
+ }
+ else
+ usage (EXIT_TROUBLE);
+
bool possibly_tty = false;
struct stat tmp_stat;
if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
@@ -2778,21 +2800,6 @@ main (int argc, char **argv)
}
}
- if (color_option == 2)
- color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
- init_colorize ();
-
- if (color_option)
- {
- /* Legacy. */
- char *userval = getenv ("GREP_COLOR");
- if (userval != NULL && *userval != '\0')
- selected_match_color = context_match_color = userval;
-
- /* New GREP_COLORS has priority. */
- parse_grep_colors ();
- }
-
/* POSIX says -c, -l and -q are mutually exclusive. In this
implementation, -q overrides -l and -L, which in turn override -c. */
if (exit_on_match | dev_null_output)
@@ -2809,27 +2816,26 @@ main (int argc, char **argv)
if (out_before < 0)
out_before = default_context;
- if (keys)
- {
- if (keycc == 0)
- {
- /* No keys were specified (e.g. -f /dev/null). Match nothing. */
- out_invert ^= true;
- match_lines = match_words = false;
- }
- else
- /* Strip trailing newline. */
- --keycc;
- }
- else if (optind < argc)
+ /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
+ /dev/null'), fail without reading the input. */
+ if (max_count == 0
+ || (keycc == 0 && out_invert && !match_lines && !match_words))
+ return EXIT_FAILURE;
+
+ if (color_option == 2)
+ color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
+ init_colorize ();
+
+ if (color_option)
{
- /* Make a copy so that it can be reallocated or freed later. */
- keycc = strlen (argv[optind]);
- keys = xmemdup (argv[optind++], keycc + 1);
- fl_add (keys, 0, keycc, "");
+ /* Legacy. */
+ char *userval = getenv ("GREP_COLOR");
+ if (userval != NULL && *userval != '\0')
+ selected_match_color = context_match_color = userval;
+
+ /* New GREP_COLORS has priority. */
+ parse_grep_colors ();
}
- else
- usage (EXIT_TROUBLE);
initialize_unibyte_mask ();
@@ -2866,12 +2872,6 @@ main (int argc, char **argv)
if (O_BINARY && !isatty (STDOUT_FILENO))
set_binary_mode (STDOUT_FILENO, O_BINARY);
- /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
- /dev/null'), fail without reading the input. */
- if (max_count == 0
- || (keycc == 0 && out_invert && !match_lines && !match_words))
- return EXIT_FAILURE;
-
/* Prefer sysconf for page size, as getpagesize typically returns int. */
#ifdef _SC_PAGESIZE
long psize = sysconf (_SC_PAGESIZE);
--
2.7.4
From 5b8900267ff93688fad2a7ab0b25dc57b42ea9e7 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 19 Nov 2016 01:02:08 -0800
Subject: [PATCH 09/10] grep -f /dev/null -L PAT FILE outputs FILE
* NEWS: Document this.
* src/grep.c (main): Do not exit right away with -L.
* tests/skip-read: Test for the fix.
---
NEWS | 2 ++
src/grep.c | 5 +++--
tests/skip-read | 13 +++++++++++--
3 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/NEWS b/NEWS
index 06a186a..29a0e8d 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,8 @@ GNU grep NEWS -*- outline -*-
more cautious about special patterns like (?-m) and (*FAIL).
[bug introduced in grep-2.23]
+ grep -m0 -L PAT FILE now outputs "FILE". [bug introduced in grep-2.5]
+
grep's use of getprogname no longer causes a build failure on HP-UX.
** Improvements
diff --git a/src/grep.c b/src/grep.c
index 066df8c..a794af4 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2818,8 +2818,9 @@ main (int argc, char **argv)
/* If it is easy to see that matching cannot succeed (e.g., 'grep -f
/dev/null'), fail without reading the input. */
- if (max_count == 0
- || (keycc == 0 && out_invert && !match_lines && !match_words))
+ if ((max_count == 0
+ || (keycc == 0 && out_invert && !match_lines && !match_words))
+ && list_files != LISTFILES_NONMATCHING)
return EXIT_FAILURE;
if (color_option == 2)
diff --git a/tests/skip-read b/tests/skip-read
index 627d362..1eef87e 100755
--- a/tests/skip-read
+++ b/tests/skip-read
@@ -5,11 +5,20 @@
fail=0
+echo /dev/null >exp || framework_failure_
+
for opts in '-m0 y' '-f /dev/null' '-v ""'; do
for matcher in '' -E -F; do
- eval returns_ 1 grep $opts $matcher no-such-file > out || fail=1
- compare /dev/null out || fail=1
+ for file in /dev/null no-such-file; do
+ eval returns_ 1 grep $opts $matcher no-such-file > out || fail=1
+ compare /dev/null out || fail=1
+ eval returns_ 1 grep -l $opts $matcher /dev/null > out || fail=1
+ compare /dev/null out || fail=1
+ done
+ eval returns_ 1 grep -L $opts $matcher /dev/null > out || fail=1
+ compare exp out || fail=1
done
done
+
Exit $fail
--
2.7.4
From f028b70e4eac297ce37b306fbdc18efbf09f2e4a Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 19 Nov 2016 01:23:27 -0800
Subject: [PATCH 10/10] tests: use "returns_" rather than "$?"
* tests/grep-dev-null-out: Use "returns_ 124" rather than testing
$? = 124.
---
tests/grep-dev-null-out | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tests/grep-dev-null-out b/tests/grep-dev-null-out
index 16ddadf..13a4843 100755
--- a/tests/grep-dev-null-out
+++ b/tests/grep-dev-null-out
@@ -6,7 +6,6 @@
require_timeout_
${AWK-awk} 'BEGIN {while (1) print "x"}' </dev/null |
- timeout 1 grep x >/dev/null
-test $? -eq 124 || fail=1
+ returns_ 124 timeout 1 grep x >/dev/null || fail=1
Exit $fail
--
2.7.4