From 65f0652d68d424934c126498e8189374ebf77dc4 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <noritnk@kcn.ne.jp>
Date: Mon, 29 Sep 2014 08:53:56 +0900
Subject: [PATCH] dfa: check end of input buffer after transition in non-UTF8
 multibyte locale

* src/dfa.c (dfaexec_main): Check for end of input buffer after each
transition in a non-UTF8 multibyte locale.
* tests/mb-non-UTF8-overrun: New test.
* tests/Makefile.am (TESTS): Add it.
* src/grep.c (main): With this fix, we no longer need the fourth
byte of "eolbytes".
---
 src/dfa.c                 | 30 ++++++++++++++++++++++++++++++
 src/grep.c                |  4 ++--
 tests/Makefile.am         |  1 +
 tests/mb-non-UTF8-overrun | 30 ++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100755 tests/mb-non-UTF8-overrun

diff --git a/src/dfa.c b/src/dfa.c
index 7692515..fc4755d 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3351,6 +3351,21 @@ dfaexec_main (struct dfa *d, char const *begin, char *end,
               /* Can match with a multibyte character (and multi character
                  collating element).  Transition table might be updated.  */
               s = transit_state (d, s, &p, (unsigned char *) end);
+
+              if (p[-1] == eol)
+                {
+                  if ((char *) p > end)
+                    {
+                      p = NULL;
+                      goto done;
+                    }
+
+                  nlcount++;
+
+                  if (!allow_nl)
+                    s = 0;
+                }
+
               mbp = p;
               trans = d->trans;
             }
@@ -3399,6 +3414,21 @@ dfaexec_main (struct dfa *d, char const *begin, char *end,
               /* Can match with a multibyte character (and multicharacter
                  collating element).  Transition table might be updated.  */
               s = transit_state (d, s, &p, (unsigned char *) end);
+
+              if (p[-1] == eol)
+                {
+                  if ((char *) p > end)
+                    {
+                      p = NULL;
+                      goto done;
+                    }
+
+                  nlcount++;
+
+                  if (!allow_nl)
+                    s = 0;
+                }
+
               mbp = p;
               trans = d->trans;
             }
diff --git a/src/grep.c b/src/grep.c
index 9dcf298..dfc0e51 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2513,8 +2513,8 @@ main (int argc, char **argv)

   compile (keys, keycc);
   free (keys);
-  /* We need one byte prior and at least two after.  */
-  char eolbytes[4] = { 0, eolbyte, 0, 0 };
+  /* We need one byte prior and one after.  */
+  char eolbytes[3] = { 0, eolbyte, 0 };
   size_t match_size;
   skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0)
                       == out_invert);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index d47978f..4b9a931 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -75,6 +75,7 @@ TESTS =						\
   long-line-vs-2GiB-read			\
   max-count-overread				\
   max-count-vs-context				\
+  mb-non-UTF8-overrun				\
   mb-non-UTF8-performance			\
   multibyte-white-space				\
   multiple-begin-or-end-line			\
diff --git a/tests/mb-non-UTF8-overrun b/tests/mb-non-UTF8-overrun
new file mode 100755
index 0000000..1d4d59e
--- /dev/null
+++ b/tests/mb-non-UTF8-overrun
@@ -0,0 +1,30 @@
+#!/bin/sh
+# grep would sometimes read beyond end of input, when using a non-UTF8
+# multibyte locale.
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_JP_EUC_locale_
+
+fail=0
+
+# This would fail when running an ASAN-enabled binary, or when run via
+# valgrind, accessing one byte beyond the end of an input buffer.
+grep -z . < /dev/null
+test $? = 1 || fail=1
+
+Exit $fail
-- 
2.1.0

