From 799ce52011f81607bb2d8aba357112a8682a4118 Mon Sep 17 00:00:00 2001
From: Jim Meyering <meyering@fb.com>
Date: Thu, 20 Dec 2018 20:54:26 -0800
Subject: [PATCH] sed: fix \b DFA-bug in C locale

Under some conditions, \b would mistakenly fail to match. E.g.,
this would mistakenly print "123-x" instead of "123":
  echo 123-x|LC_ALL=C sed 's/.\bx//'
* NEWS (Bug fixes): Mention it
* gnulib: Update to latest, for DFA regression fix.
* testsuite/word-delim.sh: New file, to test for the dfa.c regression.
* testsuite/local.mk (T): Add it.
Reported by Jan Palus in
https://lists.gnu.org/r/sed-devel/2018-12/msg00022.html
---
 NEWS                    |  8 ++++++++
 gnulib                  |  2 +-
 testsuite/local.mk      |  1 +
 testsuite/word-delim.sh | 19 +++++++++++++++++++
 4 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100755 testsuite/word-delim.sh

diff --git a/NEWS b/NEWS
index 7bc203e..dd75aee 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,14 @@ GNU sed NEWS                                    -*- outline -*-

 * Noteworthy changes in release ?.? (????-??-??) [?]

+** Bug fixes
+
+  Some uses of \b in the C locale and with the DFA matcher would fail, e.g.,
+  the following would mistakenly print "123-x" instead of "123":
+    echo 123-x|LC_ALL=C sed 's/.\bx//'
+  Using a multibyte locale or certain regexp constructs (some ranges,
+  backreferences) would avoid the bug.  [bug introduced in sed 4.6]
+

 * Noteworthy changes in release 4.6 (2018-12-19) [stable]

diff --git a/gnulib b/gnulib
index 453f37e..5d6a3cd 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 453f37e2b6364cb5fdcd79f9330b962c88daab9f
+Subproject commit 5d6a3cdd5c312e77a6d0f0848e3cb79a52e08658
diff --git a/testsuite/local.mk b/testsuite/local.mk
index 8213b06..43623bf 100644
--- a/testsuite/local.mk
+++ b/testsuite/local.mk
@@ -112,6 +112,7 @@ T += testsuite/8bit.sh			\
      testsuite/stdin.sh                 \
      testsuite/utf8-ru.sh		\
      testsuite/uniq.sh			\
+     testsuite/word-delim.sh		\
      testsuite/xemacs.sh

 TESTS = $(SEDTESTS) $(T)
diff --git a/testsuite/word-delim.sh b/testsuite/word-delim.sh
new file mode 100755
index 0000000..ade3137
--- /dev/null
+++ b/testsuite/word-delim.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# Exercise the DFA regression in sed-4.6.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_en_utf8_locale_
+
+# Also ensure that this works in both the C locale and that multibyte one.
+# In the C locale, it failed due to a dfa.c regression in sed-4.6.
+echo 123-x > in || framework_failure_
+echo 123 > exp || framework_failure_
+
+for locale in C en_US.UTF-8; do
+  LC_ALL=$locale sed 's/.\bx//' in > out 2>err || fail=1
+  compare exp out || fail=1
+  compare /dev/null err || fail=1
+done
+
+Exit $fail
-- 
2.20.1.2.gb21ebb671b

