Attached are two related 'grep' patches, one prompted by Bug#33552 "Possible bug with handling -I option" and the other by Bug#29668 "grep: Fatal problem with (big) file". Although I'd normally install these on grep master, Jim has started the ball rolling on the next grep release so I'll cc this to him to see whether these patches can be squeezed in before the next release.
>From a20242b5bbe9a31f990919159b8bc8c0a00450d1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Sep 2020 08:54:09 -0700
Subject: [PATCH 1/2] Suppress "Binary file FOO matches" if -I

Problem reported by Jason Franklin (Bug#33552).
* NEWS: Mention this.
* src/grep.c (grep): Do not output "Binary file FOO matches" if -I.
* tests/encoding-error: Add test for this bug.
---
 NEWS                 | 3 +++
 src/grep.c           | 5 +++--
 tests/encoding-error | 9 +++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index a0514d7..8a25eef 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,9 @@ GNU grep NEWS                                    -*- outline -*-
 
 ** Bug fixes
 
+  grep -I no longer issues a spurious "Binary file FOO matches" line.
+  [Bug#33552 introduced in grep 2.23]
+
   In UTF-8 locales, grep -w no longer ignores a multibyte word
   constituent just before what would otherwise be a word match.
   [Bug#43225 introduced in grep 2.28]
diff --git a/src/grep.c b/src/grep.c
index d058a76..8c84a0f 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1641,8 +1641,9 @@ grep (int fd, struct stat const *st, bool *ineof)
  finish_grep:
   done_on_match = done_on_match_0;
   out_quiet = out_quiet_0;
-  if (!out_quiet && (encoding_error_output
-                     || (0 <= nlines_first_null && nlines_first_null < nlines)))
+  if (binary_files == BINARY_BINARY_FILES && !out_quiet
+      && (encoding_error_output
+          || (0 <= nlines_first_null && nlines_first_null < nlines)))
     {
       printf_errno (_("Binary file %s matches\n"), input_filename ());
       if (line_buffered)
diff --git a/tests/encoding-error b/tests/encoding-error
index ba78f91..53f77d4 100755
--- a/tests/encoding-error
+++ b/tests/encoding-error
@@ -28,6 +28,10 @@ grep '^P' in >out || fail=1
 printf 'Binary file in matches\n' >exp || framework_failure_
 compare exp out || fail=1
 
+grep -I '^P' in >out 2>err || fail=1
+compare /dev/null out || fail=1
+compare /dev/null err || fail=1
+
 grep '^J' in >out || fail=1
 compare j out || fail=1
 
@@ -38,6 +42,11 @@ grep . in >out || fail=1
 (cat a j && printf 'Binary file in matches\n') >exp || framework_failure_
 compare exp out || fail=1
 
+grep -I . in >out 2>err || fail=1
+cat a j >exp || framework_failure_
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
 grep -a . in >out || fail=1
 compare in out
 
-- 
2.25.4

>From 2d2d1bebbaafccc39c22de776fa71a4c3059cf26 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 17 Sep 2020 07:54:38 -0700
Subject: [PATCH 2/2] Send "Binary file FOO matches" to stderr

* NEWS, doc/grep.texi: Mention this change (Bug#29668).
* src/grep.c (grep): Send "Binary file FOO matches" to stderr
instead of stdout.
* tests/encoding-error, tests/invalid-multibyte-infloop:
* tests/null-byte, tests/pcre-count, tests/surrogate-pair:
* tests/symlink, tests/unibyte-binary:
Adjust tests to match new behavior.  In all cases this
simplifies the tests, which is a good sign.
---
 NEWS                            | 8 ++++++++
 doc/grep.texi                   | 3 ++-
 src/grep.c                      | 8 ++------
 tests/encoding-error            | 5 ++---
 tests/invalid-multibyte-infloop | 4 +---
 tests/null-byte                 | 2 +-
 tests/pcre-count                | 5 ++---
 tests/surrogate-pair            | 9 ++++-----
 tests/symlink                   | 6 +-----
 tests/unibyte-binary            | 2 +-
 10 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/NEWS b/NEWS
index 8a25eef..fd08663 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,14 @@ GNU grep NEWS                                    -*- outline -*-
 
 ** Changes in behavior
 
+  The "Binary file FOO matches" message is now sent to standard error
+  and FOO is now quoted, to avoid confusion with ordinary output and
+  when file names contain spaces and the like.  For example, commands
+  like 'grep PATTERN FILE | wc' no longer add 1 to the count of
+  matching text lines due to the presence of the message.  Like other
+  stderr messages, the message is now omitted if the --no-messages
+  (-s) option is given.
+
   The --files-without-match (-L) option has reverted to its behavior
   in grep 3.1 and earlier.  That is, grep -L again succeeds when a
   line is selected, not when a file is listed.  The behavior in grep
diff --git a/doc/grep.texi b/doc/grep.texi
index 02b1968..a680d39 100644
--- a/doc/grep.texi
+++ b/doc/grep.texi
@@ -1754,7 +1754,8 @@ to output lines even from files that appear to be binary, use the
 @option{-a} or @samp{--binary-files=text} option.
 To eliminate the
 ``Binary file matches'' messages, use the @option{-I} or
-@samp{--binary-files=without-match} option.
+@samp{--binary-files=without-match} option,
+or the @option{-s} or @option{--no-messages} option.
 
 @item
 Why doesn't @samp{grep -lv} print non-matching file names?
diff --git a/src/grep.c b/src/grep.c
index 8c84a0f..0a0c71f 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1641,14 +1641,10 @@ grep (int fd, struct stat const *st, bool *ineof)
  finish_grep:
   done_on_match = done_on_match_0;
   out_quiet = out_quiet_0;
-  if (binary_files == BINARY_BINARY_FILES && !out_quiet
+  if (binary_files == BINARY_BINARY_FILES && ! (out_quiet | suppress_errors)
       && (encoding_error_output
           || (0 <= nlines_first_null && nlines_first_null < nlines)))
-    {
-      printf_errno (_("Binary file %s matches\n"), input_filename ());
-      if (line_buffered)
-        fflush_errno ();
-    }
+    error (0, 0, _("Binary file %s matches"), quote (input_filename ()));
   return nlines;
 }
 
diff --git a/tests/encoding-error b/tests/encoding-error
index 53f77d4..61d161a 100755
--- a/tests/encoding-error
+++ b/tests/encoding-error
@@ -25,8 +25,7 @@ grep '^A' in >out || fail=1
 compare a out || fail=1
 
 grep '^P' in >out || fail=1
-printf 'Binary file in matches\n' >exp || framework_failure_
-compare exp out || fail=1
+compare /dev/null out || fail=1
 
 grep -I '^P' in >out 2>err || fail=1
 compare /dev/null out || fail=1
@@ -39,7 +38,7 @@ returns_ 1 grep '^X' in >out || fail=1
 compare /dev/null out || fail=1
 
 grep . in >out || fail=1
-(cat a j && printf 'Binary file in matches\n') >exp || framework_failure_
+cat a j >exp || framework_failure_
 compare exp out || fail=1
 
 grep -I . in >out 2>err || fail=1
diff --git a/tests/invalid-multibyte-infloop b/tests/invalid-multibyte-infloop
index 5b3bdfc..b4ad14b 100755
--- a/tests/invalid-multibyte-infloop
+++ b/tests/invalid-multibyte-infloop
@@ -24,12 +24,10 @@ else
   test $status -eq 2
 fi || fail=1
 
-echo 'Binary file input matches' >binary-file-matches
-
 LC_ALL=en_US.UTF-8 timeout 10 grep -F $(encode A) input > out
 status=$?
 if test $status -eq 0; then
-  compare binary-file-matches out
+  compare /dev/null out
 elif test $status -eq 1; then
   compare_dev_null_ /dev/null out
 else
diff --git a/tests/null-byte b/tests/null-byte
index f338883..c59f56b 100755
--- a/tests/null-byte
+++ b/tests/null-byte
@@ -56,7 +56,7 @@ echo xxx >exp || framework_failure_
 grep xxx in >out || fail=1
 compare exp out || fail=1
 
-printf '%s\n' xxx 'Binary file in matches' > exp || framework_failure_
+printf 'xxx\n' > exp || framework_failure_
 grep -E 'xxx|z' in >out || fail=1
 compare exp out || fail=1
 
diff --git a/tests/pcre-count b/tests/pcre-count
index e4b55b7..e95ced8 100755
--- a/tests/pcre-count
+++ b/tests/pcre-count
@@ -17,10 +17,9 @@ printf 'a\n%032768d\nb\0\n%032768d\na\n' 0 0 > in || framework_failure_
 
 # grep will discover that the input is a binary file sooner if the
 # page size is larger, so allow for either possible output.
-printf 'a\nBinary file in matches\n' >exp1a || framework_failure_
-printf 'Binary file in matches\n' >exp1b || framework_failure_
+printf 'a\n' >exp1a || framework_failure_
 LC_ALL=C grep -P 'a' in >out || fail=1
-compare exp1a out || compare exp1b out || fail=1
+compare exp1a out || compare /dev/null out || fail=1
 
 printf '2\n' >exp2 || framework_failure_
 LC_ALL=C grep -Pc 'a' in >out || fail=1
diff --git a/tests/surrogate-pair b/tests/surrogate-pair
index d4f0c0b..7066cf4 100755
--- a/tests/surrogate-pair
+++ b/tests/surrogate-pair
@@ -25,10 +25,6 @@ fail=0
 
 printf '\360\220\220\205\n' > in || framework_failure_
 
-# On platforms where wchar_t is only 16 bits, wchar_t cannot represent
-# the character encoded in 'in', so accept that behavior too.
-printf 'Binary file in matches\n' > out16 || framework_failure_
-
 LC_ALL=en_US.UTF-8
 export LC_ALL
 
@@ -41,7 +37,10 @@ compare /dev/null out || fail=1
 # Also test whether a surrogate-pair in the search string works.
 for opt in '' -i -E -F -iE -iF; do
   grep --file=in $opt in > out 2>&1 || fail=1
-  compare out in || compare out out16 || fail=1
+
+  # On platforms where wchar_t is only 16 bits, wchar_t cannot represent
+  # the character encoded in 'in', so accept that behavior too.
+  compare out in || compare /dev/null out || fail=1
 done
 
 Exit $fail
diff --git a/tests/symlink b/tests/symlink
index 325d9e6..7a5d558 100755
--- a/tests/symlink
+++ b/tests/symlink
@@ -58,11 +58,7 @@ do
 
     printf "$exp" >exp || framework_failure_
 
-    LC_ALL=C sort grepout >out-t || fail=1
-
-    # Ignore "Binary file d matches" on systems for which
-    # reading from a directory actually succeeds.
-    grep -v Binary out-t > out; case $? in 0|1) ;; *) fail=1;; esac
+    LC_ALL=C sort grepout >out || fail=1
 
     compare exp out || fail=1
   done
diff --git a/tests/unibyte-binary b/tests/unibyte-binary
index 6fcfd66..8ae0f0f 100755
--- a/tests/unibyte-binary
+++ b/tests/unibyte-binary
@@ -22,7 +22,7 @@ require_unibyte_locale
 fail=0
 
 printf 'a\n\200\nb\n' >in || framework_failure_
-printf 'a\nBinary file in matches\n' >exp || framework_failure_
+printf 'a\n' >exp || framework_failure_
 grep . in >out || fail=1
 
 # In some unibyte locales, \200 is an encoding error;
-- 
2.25.4

Reply via email to