Attached are two related 'grep' patches, one prompted by Bug#33552 "Possible bug
with handling -I option" and the other by Bug#29668 "grep: Fatal problem with
(big) file". Although I'd normally install these on grep master, Jim has started
the ball rolling on the next grep release so I'll cc this to him to see whether
these patches can be squeezed in before the next release.
>From a20242b5bbe9a31f990919159b8bc8c0a00450d1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Sep 2020 08:54:09 -0700
Subject: [PATCH 1/2] Suppress "Binary file FOO matches" if -I
Problem reported by Jason Franklin (Bug#33552).
* NEWS: Mention this.
* src/grep.c (grep): Do not output "Binary file FOO matches" if -I.
* tests/encoding-error: Add test for this bug.
---
NEWS | 3 +++
src/grep.c | 5 +++--
tests/encoding-error | 9 +++++++++
3 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/NEWS b/NEWS
index a0514d7..8a25eef 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,9 @@ GNU grep NEWS -*- outline -*-
** Bug fixes
+ grep -I no longer issues a spurious "Binary file FOO matches" line.
+ [Bug#33552 introduced in grep 2.23]
+
In UTF-8 locales, grep -w no longer ignores a multibyte word
constituent just before what would otherwise be a word match.
[Bug#43225 introduced in grep 2.28]
diff --git a/src/grep.c b/src/grep.c
index d058a76..8c84a0f 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1641,8 +1641,9 @@ grep (int fd, struct stat const *st, bool *ineof)
finish_grep:
done_on_match = done_on_match_0;
out_quiet = out_quiet_0;
- if (!out_quiet && (encoding_error_output
- || (0 <= nlines_first_null && nlines_first_null < nlines)))
+ if (binary_files == BINARY_BINARY_FILES && !out_quiet
+ && (encoding_error_output
+ || (0 <= nlines_first_null && nlines_first_null < nlines)))
{
printf_errno (_("Binary file %s matches\n"), input_filename ());
if (line_buffered)
diff --git a/tests/encoding-error b/tests/encoding-error
index ba78f91..53f77d4 100755
--- a/tests/encoding-error
+++ b/tests/encoding-error
@@ -28,6 +28,10 @@ grep '^P' in >out || fail=1
printf 'Binary file in matches\n' >exp || framework_failure_
compare exp out || fail=1
+grep -I '^P' in >out 2>err || fail=1
+compare /dev/null out || fail=1
+compare /dev/null err || fail=1
+
grep '^J' in >out || fail=1
compare j out || fail=1
@@ -38,6 +42,11 @@ grep . in >out || fail=1
(cat a j && printf 'Binary file in matches\n') >exp || framework_failure_
compare exp out || fail=1
+grep -I . in >out 2>err || fail=1
+cat a j >exp || framework_failure_
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
grep -a . in >out || fail=1
compare in out
--
2.25.4
>From 2d2d1bebbaafccc39c22de776fa71a4c3059cf26 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Sep 2020 07:54:38 -0700
Subject: [PATCH 2/2] Send "Binary file FOO matches" to stderr
* NEWS, doc/grep.texi: Mention this change (Bug#29668).
* src/grep.c (grep): Send "Binary file FOO matches" to stderr
instead of stdout.
* tests/encoding-error, tests/invalid-multibyte-infloop:
* tests/null-byte, tests/pcre-count, tests/surrogate-pair:
* tests/symlink, tests/unibyte-binary:
Adjust tests to match new behavior. In all cases this
simplifies the tests, which is a good sign.
---
NEWS | 8 ++++++++
doc/grep.texi | 3 ++-
src/grep.c | 8 ++------
tests/encoding-error | 5 ++---
tests/invalid-multibyte-infloop | 4 +---
tests/null-byte | 2 +-
tests/pcre-count | 5 ++---
tests/surrogate-pair | 9 ++++-----
tests/symlink | 6 +-----
tests/unibyte-binary | 2 +-
10 files changed, 24 insertions(+), 28 deletions(-)
diff --git a/NEWS b/NEWS
index 8a25eef..fd08663 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,14 @@ GNU grep NEWS -*- outline -*-
** Changes in behavior
+ The "Binary file FOO matches" message is now sent to standard error
+ and FOO is now quoted, to avoid confusion with ordinary output and
+ when file names contain spaces and the like. For example, commands
+ like 'grep PATTERN FILE | wc' no longer add 1 to the count of
+ matching text lines due to the presence of the message. Like other
+ stderr messages, the message is now omitted if the --no-messages
+ (-s) option is given.
+
The --files-without-match (-L) option has reverted to its behavior
in grep 3.1 and earlier. That is, grep -L again succeeds when a
line is selected, not when a file is listed. The behavior in grep
diff --git a/doc/grep.texi b/doc/grep.texi
index 02b1968..a680d39 100644
--- a/doc/grep.texi
+++ b/doc/grep.texi
@@ -1754,7 +1754,8 @@ to output lines even from files that appear to be binary, use the
@option{-a} or @samp{--binary-files=text} option.
To eliminate the
``Binary file matches'' messages, use the @option{-I} or
-@samp{--binary-files=without-match} option.
+@samp{--binary-files=without-match} option,
+or the @option{-s} or @option{--no-messages} option.
@item
Why doesn't @samp{grep -lv} print non-matching file names?
diff --git a/src/grep.c b/src/grep.c
index 8c84a0f..0a0c71f 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1641,14 +1641,10 @@ grep (int fd, struct stat const *st, bool *ineof)
finish_grep:
done_on_match = done_on_match_0;
out_quiet = out_quiet_0;
- if (binary_files == BINARY_BINARY_FILES && !out_quiet
+ if (binary_files == BINARY_BINARY_FILES && ! (out_quiet | suppress_errors)
&& (encoding_error_output
|| (0 <= nlines_first_null && nlines_first_null < nlines)))
- {
- printf_errno (_("Binary file %s matches\n"), input_filename ());
- if (line_buffered)
- fflush_errno ();
- }
+ error (0, 0, _("Binary file %s matches"), quote (input_filename ()));
return nlines;
}
diff --git a/tests/encoding-error b/tests/encoding-error
index 53f77d4..61d161a 100755
--- a/tests/encoding-error
+++ b/tests/encoding-error
@@ -25,8 +25,7 @@ grep '^A' in >out || fail=1
compare a out || fail=1
grep '^P' in >out || fail=1
-printf 'Binary file in matches\n' >exp || framework_failure_
-compare exp out || fail=1
+compare /dev/null out || fail=1
grep -I '^P' in >out 2>err || fail=1
compare /dev/null out || fail=1
@@ -39,7 +38,7 @@ returns_ 1 grep '^X' in >out || fail=1
compare /dev/null out || fail=1
grep . in >out || fail=1
-(cat a j && printf 'Binary file in matches\n') >exp || framework_failure_
+cat a j >exp || framework_failure_
compare exp out || fail=1
grep -I . in >out 2>err || fail=1
diff --git a/tests/invalid-multibyte-infloop b/tests/invalid-multibyte-infloop
index 5b3bdfc..b4ad14b 100755
--- a/tests/invalid-multibyte-infloop
+++ b/tests/invalid-multibyte-infloop
@@ -24,12 +24,10 @@ else
test $status -eq 2
fi || fail=1
-echo 'Binary file input matches' >binary-file-matches
-
LC_ALL=en_US.UTF-8 timeout 10 grep -F $(encode A) input > out
status=$?
if test $status -eq 0; then
- compare binary-file-matches out
+ compare /dev/null out
elif test $status -eq 1; then
compare_dev_null_ /dev/null out
else
diff --git a/tests/null-byte b/tests/null-byte
index f338883..c59f56b 100755
--- a/tests/null-byte
+++ b/tests/null-byte
@@ -56,7 +56,7 @@ echo xxx >exp || framework_failure_
grep xxx in >out || fail=1
compare exp out || fail=1
-printf '%s\n' xxx 'Binary file in matches' > exp || framework_failure_
+printf 'xxx\n' > exp || framework_failure_
grep -E 'xxx|z' in >out || fail=1
compare exp out || fail=1
diff --git a/tests/pcre-count b/tests/pcre-count
index e4b55b7..e95ced8 100755
--- a/tests/pcre-count
+++ b/tests/pcre-count
@@ -17,10 +17,9 @@ printf 'a\n%032768d\nb\0\n%032768d\na\n' 0 0 > in || framework_failure_
# grep will discover that the input is a binary file sooner if the
# page size is larger, so allow for either possible output.
-printf 'a\nBinary file in matches\n' >exp1a || framework_failure_
-printf 'Binary file in matches\n' >exp1b || framework_failure_
+printf 'a\n' >exp1a || framework_failure_
LC_ALL=C grep -P 'a' in >out || fail=1
-compare exp1a out || compare exp1b out || fail=1
+compare exp1a out || compare /dev/null out || fail=1
printf '2\n' >exp2 || framework_failure_
LC_ALL=C grep -Pc 'a' in >out || fail=1
diff --git a/tests/surrogate-pair b/tests/surrogate-pair
index d4f0c0b..7066cf4 100755
--- a/tests/surrogate-pair
+++ b/tests/surrogate-pair
@@ -25,10 +25,6 @@ fail=0
printf '\360\220\220\205\n' > in || framework_failure_
-# On platforms where wchar_t is only 16 bits, wchar_t cannot represent
-# the character encoded in 'in', so accept that behavior too.
-printf 'Binary file in matches\n' > out16 || framework_failure_
-
LC_ALL=en_US.UTF-8
export LC_ALL
@@ -41,7 +37,10 @@ compare /dev/null out || fail=1
# Also test whether a surrogate-pair in the search string works.
for opt in '' -i -E -F -iE -iF; do
grep --file=in $opt in > out 2>&1 || fail=1
- compare out in || compare out out16 || fail=1
+
+ # On platforms where wchar_t is only 16 bits, wchar_t cannot represent
+ # the character encoded in 'in', so accept that behavior too.
+ compare out in || compare /dev/null out || fail=1
done
Exit $fail
diff --git a/tests/symlink b/tests/symlink
index 325d9e6..7a5d558 100755
--- a/tests/symlink
+++ b/tests/symlink
@@ -58,11 +58,7 @@ do
printf "$exp" >exp || framework_failure_
- LC_ALL=C sort grepout >out-t || fail=1
-
- # Ignore "Binary file d matches" on systems for which
- # reading from a directory actually succeeds.
- grep -v Binary out-t > out; case $? in 0|1) ;; *) fail=1;; esac
+ LC_ALL=C sort grepout >out || fail=1
compare exp out || fail=1
done
diff --git a/tests/unibyte-binary b/tests/unibyte-binary
index 6fcfd66..8ae0f0f 100755
--- a/tests/unibyte-binary
+++ b/tests/unibyte-binary
@@ -22,7 +22,7 @@ require_unibyte_locale
fail=0
printf 'a\n\200\nb\n' >in || framework_failure_
-printf 'a\nBinary file in matches\n' >exp || framework_failure_
+printf 'a\n' >exp || framework_failure_
grep . in >out || fail=1
# In some unibyte locales, \200 is an encoding error;
--
2.25.4