I haven't found a clear test case yet.  However, I found another bug
while the investigation.  We also reproduce it on grep-2.19 and grep-2.20,
but it can be fixed by the patch for this bug.

  $ printf 'a\naa\n' | env LC_ALL=zh_CN src/grep ..
  a
  aa

I added a test case and changed the title for the previous patch.
From 7ff5fba31fb416483e7f084d8f93c1fa6d21c228 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Mon, 29 Sep 2014 08:53:56 +0900
Subject: [PATCH] dfa: fix behavior after a transition for ANYCHAR or MBCSET in
 non-UTF8 multibyte locales

If previous character is newline after a transition for ANYCHAR or
MBCSET in non-UTF8 multibyte locales, check whether current position is
the end of the input buffer or not, and transit to initial state if not
allow newline, even if RE_DOT_NEWLINE is set.

* src/dfa.c (dfaexec_main): Do them.
---
 src/dfa.c            | 30 ++++++++++++++++++++++++++++++
 tests/Makefile.am    |  1 +
 tests/mb-dot-newline | 16 ++++++++++++++++
 3 files changed, 47 insertions(+)
 create mode 100644 tests/mb-dot-newline

diff --git a/src/dfa.c b/src/dfa.c
index 4f45fff..7cbe247 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3351,6 +3351,21 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
               /* Can match with a multibyte character (and multi character
                  collating element).  Transition table might be updated.  */
               s = transit_state (d, s, &p, (unsigned char *) end);
+
+              if (p[-1] == eol)
+                {
+                  if ((char *) p > end)
+                    {
+                      p = NULL;
+                      goto done;
+                    }
+
+                  nlcount++;
+
+                  if (!allow_nl)
+                    s = 0;
+                }
+
               mbp = p;
               trans = d->trans;
             }
@@ -3399,6 +3414,21 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
               /* Can match with a multibyte character (and multicharacter
                  collating element).  Transition table might be updated.  */
               s = transit_state (d, s, &p, (unsigned char *) end);
+
+              if (p[-1] == eol)
+                {
+                  if ((char *) p > end)
+                    {
+                      p = NULL;
+                      goto done;
+                    }
+
+                  nlcount++;
+
+                  if (!allow_nl)
+                    s = 0;
+                }
+
               mbp = p;
               trans = d->trans;
             }
diff --git a/tests/Makefile.am b/tests/Makefile.am
index d47978f..5de7ec5 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -75,6 +75,7 @@ TESTS =                                               \
   long-line-vs-2GiB-read                       \
   max-count-overread                           \
   max-count-vs-context                         \
+  mb-dot-newline                               \
   mb-non-UTF8-performance                      \
   multibyte-white-space                                \
   multiple-begin-or-end-line                   \
diff --git a/tests/mb-dot-newline b/tests/mb-dot-newline
new file mode 100644
index 0000000..c80fd9c
--- /dev/null
+++ b/tests/mb-dot-newline
@@ -0,0 +1,16 @@
+#!/bin/sh
+# This would fail for grep-2.20
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+printf 'a\naa\n' > in || framework_failure_
+printf 'aa\n' > exp || framework_failure_
+
+fail=0
+
+for LOC in en_US.UTF-8 en_US zh_CN $LOCALE_FR_UTF8 C; do
+  out1=out1-$LOC
+  LC_ALL=$LOC grep '..' in > out || fail=1
+  compare exp out || fail=1
+done
+
+Exit $fail
-- 
2.1.1

Reply via email to