bug#22655: grep-2.21 (and git master): --null-data and ranges work in an odd way (-P works fine)

Jim Meyering Sat, 20 Feb 2016 20:24:39 -0800

On Sat, Feb 20, 2016 at 8:19 PM, Jim Meyering <j...@meyering.net> wrote:
...
>> The bug also present with PCRE engine:
>>
>>    $ printf '12\n34\0' | LC_ALL=en_US.utf-8 grep -z -P '^[1234]*$' | wc -c
>>    6
>>    $ printf '12\n34\0' | LC_ALL=en_US.utf-8 grep -z -P '^[1-4]*$' | wc -c
>>    6
>
> Thank you for the analysis and the report.
> I have fixed the regex-oriented problem with the attached
> patch, but not yet the case using -P -z (PCRE + --null-data):


FTR, I've also pushed the attached test-improving patch:

From 121d63666fed298769a19354e517fd9a7190a3a2 Mon Sep 17 00:00:00 2001
From: Jim Meyering <meyer...@fb.com>
Date: Sat, 20 Feb 2016 17:19:44 -0800
Subject: [PATCH 1/2] tests: convert "cmd && fail=1" to "returns_ 1 cmd ||
 fail=1"

The latter is robust, while the former can silently ignore
failure due to signals.
* cfg.mk (sc_prohibit_and_fail_1): New rule, copied from coreutils.
* tests/long-pattern-perf: Perform the above substitution.
* tests/mb-non-UTF8-performance: Likewise.
* tests/help-version: Merge from coreutils.
---
 cfg.mk                        | 11 +++++++++
 tests/help-version            | 53 ++++++++++++++++++++-----------------------
 tests/long-pattern-perf       |  2 +-
 tests/mb-non-UTF8-performance |  2 +-
 4 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/cfg.mk b/cfg.mk
index 9c80b2a..3f4ef32 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -120,6 +120,17 @@ sc_THANKS_in_duplicates:
 	    && { echo '$(ME): remove the above names from THANKS.in'	\
 		  1>&2; exit 1; } || :

+# Ensure that tests don't use `cmd ... && fail=1` as that hides crashes.
+# The "exclude" expression allows common idioms like `test ... && fail=1`
+# and the 2>... portion allows commands that redirect stderr and so probably
+# independently check its contents and thus detect any crash messages.
+sc_prohibit_and_fail_1:
+	@prohibit='&& fail=1'						\
+	exclude='(stat|kill|test |EGREP|grep|compare|2> *[^/])'		\
+	halt='&& fail=1 detected. Please use: returns_ 1 ... || fail=1'	\
+	in_vc_files='^tests/'						\
+	  $(_sc_search_regexp)
+
 update-copyright-env = \
   UPDATE_COPYRIGHT_USE_INTERVALS=1 \
   UPDATE_COPYRIGHT_MAX_LINE_LENGTH=79
diff --git a/tests/help-version b/tests/help-version
index 59ecaeb..9d9b147 100755
--- a/tests/help-version
+++ b/tests/help-version
@@ -1,5 +1,5 @@
-#! /bin/sh
-# Make sure all these programs work properly
+#!/bin/sh
+# Make sure all of these programs work properly
 # when invoked with --help or --version.

 # Copyright (C) 2000-2016 Free Software Foundation, Inc.
@@ -17,20 +17,16 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

-# Ensure that $SHELL is set to *some* value and exported.
-# This is required for dircolors, which would fail e.g., when
-# invoked via debuild (which removes SHELL from the environment).
-test "x$SHELL" = x && SHELL=/bin/sh
-export SHELL
-
 . "${srcdir=.}/init.sh"; path_prepend_ ../src

+# Terminate any background processes
+cleanup_() { kill $pid 2>/dev/null && wait $pid; }
+
 expected_failure_status_chroot=125
 expected_failure_status_env=125
 expected_failure_status_nice=125
 expected_failure_status_nohup=125
 expected_failure_status_stdbuf=125
-expected_failure_status_su=125
 expected_failure_status_timeout=125
 expected_failure_status_printenv=2
 expected_failure_status_tty=3
@@ -75,14 +71,11 @@ for lang in C fr da; do
   for i in $built_programs; do

     # Skip 'test'; it doesn't accept --help or --version.
-    test $i = test && continue;
+    test $i = test && continue

     # false fails even when invoked with --help or --version.
-    if test $i = false; then
-      env LC_MESSAGES=$lang $i --help >/dev/null && fail=1
-      env LC_MESSAGES=$lang $i --version >/dev/null && fail=1
-      continue
-    fi
+    # true and false are tested with these options separately.
+    test $i = false || test $i = true && continue

     # The just-built install executable is always named 'ginstall'.
     test $i = install && i=ginstall
@@ -97,19 +90,19 @@ for lang in C fr da; do

     # Make sure they fail upon 'disk full' error.
     if test -w /dev/full && test -c /dev/full; then
-      env $i --help    >/dev/full 2>/dev/null && fail=1
-      env $i --version >/dev/full 2>/dev/null && fail=1
-      status=$?
-      test $i = [ && prog=lbracket || prog=$i
+      test $i = [ && prog=lbracket || prog=$(echo $i|sed "s/$EXEEXT$//")
       eval "expected=\$expected_failure_status_$prog"
       test x$expected = x && expected=1
-      if test $status = $expected; then
-        : # ok
-      else
+
+      returns_ $expected env $i --help    >/dev/full 2>/dev/null &&
+      returns_ $expected env $i --version >/dev/full 2>/dev/null ||
+      {
         fail=1
+        env $i --help >/dev/full 2>/dev/null
+        status=$?
         echo "*** $i: bad exit status '$status' (expected $expected)," 1>&2
         echo "  with --help or --version output redirected to /dev/full" 1>&2
-      fi
+      }
     fi
   done
 done
@@ -177,6 +170,7 @@ ln_setup () { args="$tmp_in ln-target"; }
 ginstall_setup () { args="$tmp_in $tmp_in2"; }
 mv_setup () { args="$tmp_in $tmp_in2"; }
 mkdir_setup () { args=$tmp_dir/subdir; }
+realpath_setup () { args=$tmp_in; }
 rmdir_setup () { args=$tmp_dir; }
 rm_setup () { args=$tmp_in; }
 shred_setup () { args=$tmp_in; }
@@ -207,7 +201,6 @@ nohup_setup () { args=--version; }
 printf_setup () { args=foo; }
 seq_setup () { args=10; }
 sleep_setup () { args=0; }
-su_setup () { args=--version; }
 stdbuf_setup () { args="-oL true"; }
 timeout_setup () { args=--version; }

@@ -226,8 +219,9 @@ id_setup () { args=-u; }

 # Use env to avoid invoking built-in sleep of Solaris 11's /bin/sh.
 kill_setup () {
-  env sleep 10m &
-  args=$!
+  external=env
+  $external sleep 10m & pid=$!
+  args=$pid
 }

 link_setup () { args="$tmp_in link-target"; }
@@ -242,11 +236,14 @@ stat_setup () { args=$tmp_in; }
 unlink_setup () { args=$tmp_in; }
 lbracket_setup () { args=": ]"; }

+parted_setup () { args="-s $tmp_in mklabel gpt"
+  dd if=/dev/null of=$tmp_in seek=2000; }
+
 # Ensure that each program "works" (exits successfully) when doing
 # something more than --help or --version.
 for i in $built_programs; do
   # Skip these.
-  case $i in chroot|stty|tty|false|chcon|runcon) continue;; esac
+  case $i in chroot|stty|tty|false|chcon|runcon|coreutils) continue;; esac

   rm -rf $tmp_in $tmp_in2 $tmp_dir $tmp_out $bigZ_in $zin $zin2
   echo z |gzip > $zin
@@ -261,7 +258,7 @@ for i in $built_programs; do
   cp $tmp_in $tmp_in2
   mkdir $tmp_dir
   # echo ================== $i
-  test $i = [ && prog=lbracket || prog=$i
+  test $i = [ && prog=lbracket || prog=$(echo $i|sed "s/$EXEEXT$//")
   if type ${prog}_setup > /dev/null 2>&1; then
     ${prog}_setup
   else
diff --git a/tests/long-pattern-perf b/tests/long-pattern-perf
index b88c03c..0548f34 100755
--- a/tests/long-pattern-perf
+++ b/tests/long-pattern-perf
@@ -38,6 +38,6 @@ b10x_ms=$(user_time_ 1 grep -f re-10x in) || fail=1
 # Increasing the length of the regular expression by a factor
 # of 10 should cause no more than a 10x increase in duration.
 # However, we'll draw the line at 20x to avoid false-positives.
-expr $base_ms '<' $b10x_ms / 20 && fail=1
+returns_ 1 expr $base_ms '<' $b10x_ms / 20 || fail=1

 Exit $fail
diff --git a/tests/mb-non-UTF8-performance b/tests/mb-non-UTF8-performance
index 3f9209c..11d0cf7 100755
--- a/tests/mb-non-UTF8-performance
+++ b/tests/mb-non-UTF8-performance
@@ -43,6 +43,6 @@ mbyte_ms=$(user_time_ 1 grep -i foobar in) || fail=1
 # The duration of the multi-byte run must be no more than 30 times
 # that of the single-byte one.
 # A multiple of 3 seems to be enough for i5,i7, but AMD needs >25.
-expr $ubyte_ms '<' $mbyte_ms / 30 && fail=1
+returns_ 1 expr $ubyte_ms '<' $mbyte_ms / 30 || fail=1

 Exit $fail
-- 
2.6.4

bug#22655: grep-2.21 (and git master): --null-data and ranges work in an odd way (-P works fine)

Reply via email to