Stephane Chazelas wrote:
2.25 was OK. git bisect points to commit 2769d5331a38d623b67b1860ac46b39ff7e54aca
Thanks for pinpointing the bug. It was my logic error in that commit. Fixed by altering Gnulib as follows:
http://lists.gnu.org/archive/html/bug-gnulib/2016-11/msg00086.html and by installing the attached patches into grep.
>From 00a6d71259ba8432db7eaa2729d215858c4c0cb3 Mon Sep 17 00:00:00 2001 From: Paul Eggert <egg...@cs.ucla.edu> Date: Sun, 20 Nov 2016 20:21:06 -0800 Subject: [PATCH 1/2] build: update gnulib submodule to latest --- gnulib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnulib b/gnulib index 3c72272..60e8ffc 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit 3c72272268021349cbc9a442fe033e7ba13a0c17 +Subproject commit 60e8ffca02dd4eac3a87b744f4f9ef68f3dffa35 -- 2.7.4
>From ed6228198180fedc728a4e2981939fa0c902bbf3 Mon Sep 17 00:00:00 2001 From: Paul Eggert <egg...@cs.ucla.edu> Date: Sun, 20 Nov 2016 20:31:01 -0800 Subject: [PATCH 2/2] tests: check for unibyte French range bug Problem reported by Stephane Chazelas (Bug#24973). This bug was fixed in Gnulib. * NEWS: Document the fix. * tests/init.cfg (require_ru_RU_koi8_r): Remove. * tests/unibyte-bracket-expr: Add a test for the bug. Call get-mb-cur-max directly instead of bothering with require_ru_RU_koi8_r. --- NEWS | 3 +++ tests/init.cfg | 9 ------- tests/unibyte-bracket-expr | 58 ++++++++++++++++++++++++++++------------------ 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/NEWS b/NEWS index 6138b48..bd1a201 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,9 @@ GNU grep NEWS -*- outline -*- >/dev/null" where PROGRAM dies when writing into a broken pipe. [bug introduced in grep-2.26] + grep no longer mishandles ranges in nontrivial unibyte locales. + [bug introduced in grep-2.26] + grep -P no longer attempts multiline matches. This works more intuitively with unusual patterns, and means that grep -Pz no longer rejects patterns containing ^ and $ and works when combined with -x. diff --git a/tests/init.cfg b/tests/init.cfg index 1677ec5..6c7abd2 100644 --- a/tests/init.cfg +++ b/tests/init.cfg @@ -74,15 +74,6 @@ require_tr_utf8_locale_() esac } -require_ru_RU_koi8_r() -{ - path_prepend_ . - case $(get-mb-cur-max ru_RU.KOI8-R) in - 1) ;; - *) skip_ 'ru_RU.KOI8-R locale not found' ;; - esac -} - require_compiled_in_MB_support() { require_en_utf8_locale_ diff --git a/tests/unibyte-bracket-expr b/tests/unibyte-bracket-expr index 68c475c..85aff1c 100755 --- a/tests/unibyte-bracket-expr +++ b/tests/unibyte-bracket-expr @@ -1,9 +1,4 @@ #!/bin/sh -# Exercise a DFA range bug that arises only with a unibyte encoding -# for which the wide-char-to-single-byte mapping is nontrivial. -# E.g., the regexp, [C] would fail to match C in a unibyte locale like -# ru_RU.KOI8-R for any C whose wide-char representation differed from -# its single-byte equivalent. # Copyright (C) 2011-2016 Free Software Foundation, Inc. @@ -21,23 +16,42 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. . "${srcdir=.}/init.sh"; path_prepend_ ../src -require_ru_RU_koi8_r -LC_ALL=ru_RU.KOI8-R -export LC_ALL - -fail=0 - -i=128 -while :; do - in=in-$i - octal=$(printf '%03o' $i) - b=$(printf "\\$octal") - echo "$b" > $in || framework_failure_ - grep "[$b]" $in > out || fail=1 - compare out $in || fail=1 - - test $i = 255 && break - i=$(expr $i + 1) + +# Add "." to PATH for the use of get-mb-cur-max. +path_prepend_ . + +# Exercise a DFA range bug that arises only with a unibyte encoding +# for which the wide-char-to-single-byte mapping is nontrivial. +# E.g., the regexp, [C] would fail to match C in a unibyte locale like +# ru_RU.KOI8-R for any C whose wide-char representation differed from +# its single-byte equivalent. + +case $(get-mb-cur-max ru_RU.KOI8-R) in + 1) + fail=0 + + i=128 + while :; do + in=in-$i + octal=$(printf '%03o' $i) + b=$(printf "\\$octal") + echo "$b" > $in || framework_failure_ + LC_ALL=ru_RU.KOI8-R grep "[$b]" $in > out || fail=1 + compare out $in || fail=1 + + test $i = 255 && break + i=$(expr $i + 1) + done;; +esac + +# Exercise a DFA range bug where '[d-f]' did not match accented 'e' in a +# unibyte French locale. + +for locale in fr_FR.iso88591 fr_FR.iso885915@euro fr_FR.ISO8859-1; do + case $(get-mb-cur-max $locale) in + 1) + printf '\351\n' | LC_ALL=$locale grep '[d-f]' || fail=1;; + esac done Exit $fail -- 2.7.4