On Sat, 23 Mar 2019 08:06:35 +0900
Norihiro Tanaka <nori...@kcn.ne.jp> wrote:

> A kwset matcher is not built in a grep matcher after token re-order is
> introduced in commit 5c7a0371823876cca7a1347fa09ca26bbbff0c98 in dfa.
> It caused performance degradation in some typical cases.  This bug is
> introduced in grep-3.2.
> 
> DFAMUST() does not work if tokens which are parsed in dfa matcher are
> re-ordered.  Therefore, change as it is called after parse and before
> tokens re-order.
> 
> BTW, this change does not affect programs that do not use DFAMUST(),
> such as sed or gawk.
> 
> $ yes $(printf '%040d' 0) | head -10000000 >inp
> $ grep-2.2/src/grep 01.2 inp
> real 1.61
> user 1.53
> sys 0.07
> $ grep-2.3/src/grep 01.2 inp
> real 1.57
> user 1.48
> sys 0.08
> $ grep-2.4/src/grep 01.2 inp
> real 1.50
> user 1.44
> sys 0.05
> $ grep-2.4.1/src/grep 01.2 inp
> real 1.53
> user 1.48
> sys 0.05
> $ grep-2.4.2/src/grep 01.2 inp
> real 1.52
> user 1.47
> sys 0.04
> $ grep-2.5.4/src/grep 01.2 inp
> real 1.53
> user 1.47
> sys 0.05
> $ grep-2.6/src/grep 01.2 inp
> real 1.51
> user 1.47
> sys 0.04
> $ grep-2.6.1/src/grep 01.2 inp
> real 1.50
> user 1.44
> sys 0.05
> $ grep-2.6.2/src/grep 01.2 inp
> real 1.52
> user 1.46
> sys 0.05
> $ grep-2.6.3/src/grep 01.2 inp
> real 1.52
> user 1.47
> sys 0.05
> $ grep-2.7/src/grep 01.2 inp
> real 1.53
> user 1.49
> sys 0.04
> $ grep-2.8/src/grep 01.2 inp
> real 1.52
> user 1.46
> sys 0.05
> $ grep-2.9/src/grep 01.2 inp
> real 1.54
> user 1.50
> sys 0.04
> $ grep-2.10/src/grep 01.2 inp
> real 1.51
> user 1.46
> sys 0.05
> $ grep-2.11/src/grep 01.2 inp
> real 1.53
> user 1.48
> sys 0.05
> $ grep-2.12/src/grep 01.2 inp
> real 1.51
> user 1.47
> sys 0.03
> $ grep-2.13/src/grep 01.2 inp
> real 1.52
> user 1.47
> sys 0.03
> $ grep-2.14/src/grep 01.2 inp
> real 1.52
> user 1.47
> sys 0.04
> $ grep-2.15/src/grep 01.2 inp
> real 1.55
> user 1.49
> sys 0.05
> $ grep-2.16/src/grep 01.2 inp
> real 1.53
> user 1.48
> sys 0.04
> $ grep-2.17/src/grep 01.2 inp
> real 1.53
> user 1.48
> sys 0.05
> $ grep-2.18/src/grep 01.2 inp
> real 1.51
> user 1.44
> sys 0.06
> $ grep-2.19/src/grep 01.2 inp
> real 0.06
> user 0.02
> sys 0.04
> $ grep-2.20/src/grep 01.2 inp
> real 0.07
> user 0.01
> sys 0.05
> $ grep-2.21/src/grep 01.2 inp
> real 0.06
> user 0.02
> sys 0.04
> $ grep-2.22/src/grep 01.2 inp
> real 0.06
> user 0.01
> sys 0.05
> $ grep-2.23/src/grep 01.2 inp
> real 0.09
> user 0.04
> sys 0.05
> $ grep-2.24/src/grep 01.2 inp
> real 0.09
> user 0.04
> sys 0.04
> $ grep-2.25/src/grep 01.2 inp
> real 0.09
> user 0.05
> sys 0.04
> $ grep-2.26/src/grep 01.2 inp
> real 0.09
> user 0.04
> sys 0.05
> $ grep-2.27/src/grep 01.2 inp
> real 0.09
> user 0.04
> sys 0.04
> $ grep-2.28/src/grep 01.2 inp
> real 0.09
> user 0.04
> sys 0.04
> $ grep-3.0/src/grep 01.2 inp
> real 0.09
> user 0.04
> sys 0.04
> $ grep-3.1/src/grep 01.2 inp
> real 0.11
> user 0.05
> sys 0.06
> $ grep-3.2/src/grep 01.2 inp
> real 0.37
> user 0.32
> sys 0.04
> $ grep-3.3/src/grep 01.2 inp
> real 0.29
> user 0.25
> sys 0.04
> 
> Thanks,
> Norihiro

Missing a patch for dfa.  Re-send correct patch file.
From d12d3256043a792fe65830ff6469bba6418876e1 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Sat, 23 Mar 2019 08:19:11 +0900
Subject: [PATCH] dfa: separate parse and compile phase

DFAMUST() must be called after parse and before tokens re-order which is
introduced in commit 5c7a0371823876cca7a1347fa09ca26bbbff0c98, but both are
executed in compilation phase.

* lib/dfa.c (dfaparse): Change it to global function.
(dfacomp): If first argument is NULL, skip parse.
* lib/dfa.h: (dfaparse): Add a prototype.
---
 lib/dfa.c |    6 ++++--
 lib/dfa.h |    3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/dfa.c b/lib/dfa.c
index 329a209..1e125b4 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -1969,7 +1969,7 @@ regexp (struct dfa *dfa)
 /* Main entry point for the parser.  S is a string to be parsed, len is the
    length of the string, so s can include NUL characters.  D is a pointer to
    the struct dfa to parse into.  */
-static void
+void
 dfaparse (char const *s, size_t len, struct dfa *d)
 {
   d->lex.ptr = s;
@@ -3745,7 +3745,9 @@ dfassbuild (struct dfa *d)
 void
 dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag)
 {
-  dfaparse (s, len, d);
+  if (s != NULL)
+    dfaparse (s, len, d);
+
   dfassbuild (d);
 
   if (dfa_supported (d))
diff --git a/lib/dfa.h b/lib/dfa.h
index 60512e2..221f7d1 100644
--- a/lib/dfa.h
+++ b/lib/dfa.h
@@ -71,6 +71,9 @@ extern struct dfamust *dfamust (struct dfa const *);
 /* Free the storage held by the components of a struct dfamust. */
 extern void dfamustfree (struct dfamust *);
 
+/* Parse the given string of given length into the given struct dfa.  */
+extern void dfaparse (char const *, size_t, struct dfa *);
+
 /* Compile the given string of the given length into the given struct dfa.
    Final argument is a flag specifying whether to build a searching or an
    exact matcher. */
-- 
1.7.1

Reply via email to