Stephane Chazelas wrote:
2.25 was OK. git bisect points to commit
2769d5331a38d623b67b1860ac46b39ff7e54aca

Thanks for pinpointing the bug. It was my logic error in that commit. Fixed by altering Gnulib as follows:

http://lists.gnu.org/archive/html/bug-gnulib/2016-11/msg00086.html

and by installing the attached patches into grep.
>From 00a6d71259ba8432db7eaa2729d215858c4c0cb3 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 20 Nov 2016 20:21:06 -0800
Subject: [PATCH 1/2] build: update gnulib submodule to latest

---
 gnulib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnulib b/gnulib
index 3c72272..60e8ffc 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 3c72272268021349cbc9a442fe033e7ba13a0c17
+Subproject commit 60e8ffca02dd4eac3a87b744f4f9ef68f3dffa35
-- 
2.7.4

>From ed6228198180fedc728a4e2981939fa0c902bbf3 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 20 Nov 2016 20:31:01 -0800
Subject: [PATCH 2/2] tests: check for unibyte French range bug

Problem reported by Stephane Chazelas (Bug#24973).
This bug was fixed in Gnulib.
* NEWS: Document the fix.
* tests/init.cfg (require_ru_RU_koi8_r): Remove.
* tests/unibyte-bracket-expr: Add a test for the bug.
Call get-mb-cur-max directly instead of bothering with
require_ru_RU_koi8_r.
---
 NEWS                       |  3 +++
 tests/init.cfg             |  9 -------
 tests/unibyte-bracket-expr | 58 ++++++++++++++++++++++++++++------------------
 3 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/NEWS b/NEWS
index 6138b48..bd1a201 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,9 @@ GNU grep NEWS                                    -*- outline -*-
   >/dev/null" where PROGRAM dies when writing into a broken pipe.
   [bug introduced in grep-2.26]
 
+  grep no longer mishandles ranges in nontrivial unibyte locales.
+  [bug introduced in grep-2.26]
+
   grep -P no longer attempts multiline matches.  This works more
   intuitively with unusual patterns, and means that grep -Pz no longer
   rejects patterns containing ^ and $ and works when combined with -x.
diff --git a/tests/init.cfg b/tests/init.cfg
index 1677ec5..6c7abd2 100644
--- a/tests/init.cfg
+++ b/tests/init.cfg
@@ -74,15 +74,6 @@ require_tr_utf8_locale_()
   esac
 }
 
-require_ru_RU_koi8_r()
-{
-  path_prepend_ .
-  case $(get-mb-cur-max ru_RU.KOI8-R) in
-    1) ;;
-    *) skip_ 'ru_RU.KOI8-R locale not found' ;;
-  esac
-}
-
 require_compiled_in_MB_support()
 {
   require_en_utf8_locale_
diff --git a/tests/unibyte-bracket-expr b/tests/unibyte-bracket-expr
index 68c475c..85aff1c 100755
--- a/tests/unibyte-bracket-expr
+++ b/tests/unibyte-bracket-expr
@@ -1,9 +1,4 @@
 #!/bin/sh
-# Exercise a DFA range bug that arises only with a unibyte encoding
-# for which the wide-char-to-single-byte mapping is nontrivial.
-# E.g., the regexp, [C] would fail to match C in a unibyte locale like
-# ru_RU.KOI8-R for any C whose wide-char representation differed from
-# its single-byte equivalent.
 
 # Copyright (C) 2011-2016 Free Software Foundation, Inc.
 
@@ -21,23 +16,42 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 . "${srcdir=.}/init.sh"; path_prepend_ ../src
-require_ru_RU_koi8_r
-LC_ALL=ru_RU.KOI8-R
-export LC_ALL
-
-fail=0
-
-i=128
-while :; do
-  in=in-$i
-  octal=$(printf '%03o' $i)
-  b=$(printf "\\$octal")
-  echo "$b" > $in || framework_failure_
-  grep "[$b]" $in > out || fail=1
-  compare out $in || fail=1
-
-  test $i = 255 && break
-  i=$(expr $i + 1)
+
+# Add "." to PATH for the use of get-mb-cur-max.
+path_prepend_ .
+
+# Exercise a DFA range bug that arises only with a unibyte encoding
+# for which the wide-char-to-single-byte mapping is nontrivial.
+# E.g., the regexp, [C] would fail to match C in a unibyte locale like
+# ru_RU.KOI8-R for any C whose wide-char representation differed from
+# its single-byte equivalent.
+
+case $(get-mb-cur-max ru_RU.KOI8-R) in
+  1)
+    fail=0
+
+    i=128
+    while :; do
+      in=in-$i
+      octal=$(printf '%03o' $i)
+      b=$(printf "\\$octal")
+      echo "$b" > $in || framework_failure_
+      LC_ALL=ru_RU.KOI8-R grep "[$b]" $in > out || fail=1
+      compare out $in || fail=1
+
+      test $i = 255 && break
+      i=$(expr $i + 1)
+    done;;
+esac
+
+# Exercise a DFA range bug where '[d-f]' did not match accented 'e' in a
+# unibyte French locale.
+
+for locale in fr_FR.iso88591 fr_FR.iso885915@euro fr_FR.ISO8859-1; do
+  case $(get-mb-cur-max $locale) in
+    1)
+       printf '\351\n' | LC_ALL=$locale grep '[d-f]' || fail=1;;
+  esac
 done
 
 Exit $fail
-- 
2.7.4

Reply via email to