Sorry that patch took so long to review. I installed it, along with the attached followup patches which are mostly just minor style things (plus fixing the attribution for a patch that I forgot to specify --author for).

I didn't get as much performance improvement on my platform, so I toned down the NEWS item a bit. Still, wow. It is a 2.5x performance improvement for that test case, and it's asymptotically better. Thanks.

From 4e9899f18fdda1fe19a26f7e1a3e40e87f8c7f2c Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 2 Jun 2016 11:30:08 -0700
Subject: [PATCH 1/3] grep: minor cleanups for -F Aho-Corasick

* NEWS: Don't claim 7x, as the value seems to be system-dependent.
* src/kwset.c (struct kwset.kwsexec, bmexec, acexec_trans, acexec):
* src/kwset.c, src/kwset.h (kwsalloc, kwsexec):
Don't put 'const' into the declaration when that is irrelevant to
the API.  More generally, don't bother with 'const' when it's only
a local so it is reasonably obvious to a reader that it is 'const'
anyway.  It would be overkill to add 'const' to all locals that
never change.
* src/kwset.c (U): Avoid unnecessary parens.
(treefails, memoff2_kwset, bmexec_trans, bmexec, cwexec, acexec_trans):
Prefer SIZE_MAX to (size_t) -1.
(bmexec_trans, cwexec, acexec_trans):
Remove attributes for static functions that no longer seem needed.
(memoff2_kwset): Rename from memchr2_kwset, since it returns
an offset, not a pointer.  All uses changed.
(cwexec, acexec_trans) [lint]: Remove initialization that is no
longer needed; at least, GCC 6.1 x86-64 does not need it.
(acexec_trans): Clarify code by using nesting rather than 'continue'.
---
 NEWS        |   7 +--
 src/kwset.c | 153 +++++++++++++++++++++++++++---------------------------------
 src/kwset.h |   6 +--
 3 files changed, 76 insertions(+), 90 deletions(-)

diff --git a/NEWS b/NEWS
index fb6f41e..c3e6000 100644
--- a/NEWS
+++ b/NEWS
@@ -8,9 +8,10 @@ GNU grep NEWS                                    -*- outline -*-
 
 ** Improvements
 
-  grep -F is now typically 7 times faster for a lot of words.  The
-  improvement replace Commentz-Walter algorithm which has been used for
-  a long time to Aho-Corasick algorithm.
+  grep -F is now typically much faster when many patterns are given,
+  as it now uses the Aho-Corasick algorithm instead of the
+  Commentz-Walter algorithm in that case.
+
 
 * Noteworthy changes in release 2.25 (2016-04-21) [stable]
 
diff --git a/src/kwset.c b/src/kwset.c
index a279579..d14972f 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -54,7 +54,7 @@
 #define obstack_chunk_alloc malloc
 #define obstack_chunk_free free
 
-#define U(c) (to_uchar (c))
+#define U(c) to_uchar (c)
 
 /* Balanced tree of edges and labels leaving a given trie node. */
 struct tree
@@ -115,9 +115,8 @@ struct kwset
      algorithm in multiple words.  */
   bool reverse;
 
-  /* kwsexec() implementation.  */
-  size_t (*kwsexec) (kwset_t, char const *, size_t, struct kwsmatch *,
-                     bool);
+  /* kwsexec implementation.  */
+  size_t (*kwsexec) (kwset_t, char const *, size_t, struct kwsmatch *, bool);
 };
 
 /* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
@@ -127,17 +126,14 @@ tr (char const *trans, char c)
   return trans ? trans[U(c)] : c;
 }
 
-static size_t acexec (kwset_t, char const *, size_t, struct kwsmatch *,
-                      bool);
-static size_t cwexec (kwset_t, char const *, size_t, struct kwsmatch *,
-                      bool);
-static size_t bmexec (kwset_t, char const *, size_t, struct kwsmatch *,
-                      bool);
+static size_t acexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
+static size_t cwexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
+static size_t bmexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
 
 /* Allocate and initialize a keyword set object, returning an opaque
    pointer to it.  */
 kwset_t
-kwsalloc (char const *trans, bool const reverse)
+kwsalloc (char const *trans, bool reverse)
 {
   struct kwset *kwset = xmalloc (sizeof *kwset);
 
@@ -156,10 +152,7 @@ kwsalloc (char const *trans, bool const reverse)
   kwset->target = NULL;
   kwset->trans = trans;
   kwset->reverse = reverse;
-  if (reverse)
-    kwset->kwsexec = cwexec;
-  else
-    kwset->kwsexec = acexec;
+  kwset->kwsexec = reverse ? cwexec : acexec;
 
   return kwset;
 }
@@ -355,7 +348,7 @@ treefails (struct tree const *tree, struct trie const *fail,
         {
           tree->trie->fail = link->trie;
           if (!reverse && link->trie->accepting && !tree->trie->accepting)
-            tree->trie->accepting = -1;
+            tree->trie->accepting = SIZE_MAX;
           return;
         }
       fail = fail->fail;
@@ -430,7 +423,7 @@ kwsprep (kwset_t kwset)
           for (curr = last = kwset->trie; curr; curr = curr->next)
              enqueue (curr->links, &last);
 
-          /* Looking for just one string.  Extract it from the trie. */
+          /* Looking for just one string.  Extract it from the trie.  */
           kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
           for (i = 0, curr = kwset->trie; i < kwset->mind; ++i)
             {
@@ -622,7 +615,8 @@ bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
 }
 
 /* Return the address of the first byte in the buffer S (of size N)
-   that matches the last byte specified by KWSET, a singleton.  */
+   that matches the last byte specified by KWSET, a singleton.
+   Return NULL if there is no match.  */
 static char const *
 memchr_kwset (char const *s, size_t n, kwset_t kwset)
 {
@@ -640,17 +634,20 @@ memchr_kwset (char const *s, size_t n, kwset_t kwset)
   return n == 0 ? NULL : memchr2 (s, kwset->gc1, kwset->gc1help, n);
 }
 
-/* Return the address of the first byte in the buffer S (of size N)
-   that matches the last byte specified by KWSET, a singleton.  */
+/* Return the offset of the first byte in the buffer S (of size N)
+   that matches the last byte specified by KWSET, a pair.
+   Return SIZE_MAX if there is no match.  */
 static size_t
-memchr2_kwset (char const *s, size_t n, kwset_t kwset,
+memoff2_kwset (char const *s, size_t n, kwset_t kwset,
                struct kwsmatch *kwsmatch)
 {
   struct tree const *link = kwset->trie->links;
   struct tree const *clink = link->llink ? link->llink : link->rlink;
 
   char const *mch = memchr2 (s, link->label, clink->label, n);
-  if (mch)
+  if (! mch)
+    return SIZE_MAX;
+  else
     {
       size_t off = mch - s;
       if (*mch == link->label)
@@ -661,12 +658,10 @@ memchr2_kwset (char const *s, size_t n, kwset_t kwset,
       kwsmatch->size[0] = 1;
       return off;
     }
-  else
-    return -1;
 }
 
 /* Fast Boyer-Moore search (inlinable version).  */
-static inline size_t _GL_ATTRIBUTE_PURE
+static inline size_t
 bmexec_trans (kwset_t kwset, char const *text, size_t size)
 {
   unsigned char const *d1;
@@ -678,11 +673,11 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
   if (len == 0)
     return 0;
   if (len > size)
-    return -1;
+    return SIZE_MAX;
   if (len == 1)
     {
       tp = memchr_kwset (text, size, kwset);
-      return tp ? tp - text : -1;
+      return tp ? tp - text : SIZE_MAX;
     }
 
   d1 = kwset->delta;
@@ -722,7 +717,7 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
                     tp--;
                     tp = memchr_kwset (tp, text + size - tp, kwset);
                     if (! tp)
-                      return -1;
+                      return SIZE_MAX;
                     tp++;
                     if (ep <= tp)
                       break;
@@ -746,21 +741,21 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
         return tp - text;
     }
 
-  return -1;
+  return SIZE_MAX;
 }
 
 /* Fast Boyer-Moore search.  */
 static size_t
 bmexec (kwset_t kwset, char const *text, size_t size,
-        struct kwsmatch *kwsmatch, bool const longest)
+        struct kwsmatch *kwsmatch, bool longest)
 {
   /* Help the compiler inline bmexec_trans in two ways, depending on
      whether kwset->trans is null.  */
   size_t ret = (kwset->trans
-          ? bmexec_trans (kwset, text, size)
-          : bmexec_trans (kwset, text, size));
+                ? bmexec_trans (kwset, text, size)
+                : bmexec_trans (kwset, text, size));
 
-  if (ret != (size_t) -1)
+  if (ret != SIZE_MAX)
     {
        kwsmatch->index = 0;
        kwsmatch->offset[0] = ret;
@@ -771,7 +766,7 @@ bmexec (kwset_t kwset, char const *text, size_t size,
 }
 
 /* Hairy multiple string search with Commentz-Walter algorithm.  */
-static size_t _GL_ARG_NONNULL ((4))
+static size_t
 cwexec (kwset_t kwset, char const *text, size_t len,
         struct kwsmatch *kwsmatch, bool longest)
 {
@@ -786,15 +781,11 @@ cwexec (kwset_t kwset, char const *text, size_t len,
   struct tree const *tree;
   char const *trans;
 
-#ifdef lint
-  accept = NULL;
-#endif
-
-  /* Initialize register copies and look for easy ways out. */
+  /* Initialize register copies and look for easy ways out.  */
   if (len < kwset->mind)
-    return -1;
+    return SIZE_MAX;
   if (!kwset->trans && kwset->maxd == 1 && kwset->words == 2)
-    return memchr2_kwset (text, len, kwset, kwsmatch);
+    return memoff2_kwset (text, len, kwset, kwsmatch);
   next = kwset->next;
   delta = kwset->delta;
   trans = kwset->trans;
@@ -864,7 +855,7 @@ cwexec (kwset_t kwset, char const *text, size_t len,
       if (mch)
         goto match;
     }
-  return -1;
+  return SIZE_MAX;
 
  match:
   /* Given a known match, find the longest possible match anchored
@@ -934,9 +925,9 @@ cwexec (kwset_t kwset, char const *text, size_t len,
 
 /* Hairy multiple string search with Aho-Corasick algorithm.
    (inlinable version)  */
-static inline size_t _GL_ARG_NONNULL ((4))
+static inline size_t
 acexec_trans (kwset_t kwset, char const *text, size_t len,
-              struct kwsmatch *kwsmatch, bool const longest)
+              struct kwsmatch *kwsmatch, bool longest)
 {
   struct trie * const *next;
   struct trie const *trie, *accept;
@@ -945,15 +936,11 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
   struct tree const *tree;
   char const *trans;
 
-#ifdef lint
-  accept = NULL;
-#endif
-
-  /* Initialize register copies and look for easy ways out. */
+  /* Initialize register copies and look for easy ways out.  */
   if (len < kwset->mind)
-    return -1;
+    return SIZE_MAX;
   if (!kwset->trans && kwset->maxd == 1 && kwset->words == 2)
-    return memchr2_kwset (text, len, kwset, kwsmatch);
+    return memoff2_kwset (text, len, kwset, kwsmatch);
 
   next = kwset->next;
   trans = kwset->trans;
@@ -1007,14 +994,14 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
       while (!trie)
         {
           if (tp >= lim)
-            return -1;
+            return SIZE_MAX;
           trie = next[U(tr (trans, *tp++))];
         }
 
       if (trie->accepting)
         goto match;
       if (tp >= lim)
-        return -1;
+        return SIZE_MAX;
       c = tr (trans, *tp++);
       tree = trie->links;
 
@@ -1026,26 +1013,26 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
               if (trie->accepting)
                 goto match;
               if (tp >= lim)
-                return -1;
+                return SIZE_MAX;
               c = tr (trans, *tp++);
               tree = trie->links;
-              continue;
             }
-          else if (c < tree->label)
-            tree = tree->llink;
           else
-            tree = tree->rlink;
-          if (tree)
-            continue;
-          trie = trie->fail;
-          if (!trie)
-            break;
-          if (trie->accepting)
             {
-              --tp;
-              goto match;
+              tree = c < tree->label ? tree->llink : tree->rlink;
+              if (! tree)
+                {
+                  trie = trie->fail;
+                  if (!trie)
+                    break;
+                  if (trie->accepting)
+                    {
+                      --tp;
+                      goto match;
+                    }
+                  tree = trie->links;
+                }
             }
-          tree = trie->links;
         }
 
       trie = next[c];
@@ -1053,7 +1040,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
 
  match:
   accept = trie;
-  while (accept->accepting == (size_t) -1)
+  while (accept->accepting == SIZE_MAX)
     accept = accept->fail;
   left = tp - accept->depth;
 
@@ -1067,23 +1054,21 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
           c = tr (trans, *tp++);
           tree = trie->links;
           while (tree && c != tree->label)
-            if (c < tree->label)
-              tree = tree->llink;
-            else
-              tree = tree->rlink;
+            tree = c < tree->label ? tree->llink : tree->rlink;
           if (!tree)
             break;
           trie = tree->trie;
-          if (!trie->accepting)
-            continue;
-          accept1 = trie;
-          while (accept1->accepting == (size_t) -1)
-            accept1 = accept1->fail;
-          left1 = tp - accept1->depth;
-          if (left1 <= left)
+          if (trie->accepting)
             {
-              left = left1;
-              accept = accept1;
+              accept1 = trie;
+              while (accept1->accepting == SIZE_MAX)
+                accept1 = accept1->fail;
+              left1 = tp - accept1->depth;
+              if (left1 <= left)
+                {
+                  left = left1;
+                  accept = accept1;
+                }
             }
         }
     }
@@ -1098,7 +1083,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
 /* Hairy multiple string search with Aho-Corasick algorithm.  */
 static size_t
 acexec (kwset_t kwset, char const *text, size_t size,
-        struct kwsmatch *kwsmatch, bool const longest)
+        struct kwsmatch *kwsmatch, bool longest)
 {
   /* Help the compiler inline bmexec_trans in two ways, depending on
      whether kwset->trans is null.  */
@@ -1109,12 +1094,12 @@ acexec (kwset_t kwset, char const *text, size_t size,
 
 /* Search TEXT for a match of any member of KWSET.
    Return the offset (into TEXT) of the first byte of the matching substring,
-   or (size_t) -1 if no match is found.  Upon a match, store details in
+   or SIZE_MAX if no match is found.  Upon a match, store details in
    *KWSMATCH: index of matched keyword, start offset (same as the return
    value), and length.  */
 size_t
 kwsexec (kwset_t kwset, char const *text, size_t size,
-         struct kwsmatch *kwsmatch, bool const longest)
+         struct kwsmatch *kwsmatch, bool longest)
 {
   return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
 }
diff --git a/src/kwset.h b/src/kwset.h
index 59efa74..d2700e9 100644
--- a/src/kwset.h
+++ b/src/kwset.h
@@ -39,7 +39,7 @@ typedef struct kwset *kwset_t;
 /* Return an opaque pointer to a newly allocated keyword set.  A nonnull arg
    specifies a table of character translations to be applied to all
    pattern and search text.  */
-extern kwset_t kwsalloc (char const *, bool const);
+extern kwset_t kwsalloc (char const *, bool);
 
 /* Incrementally extend the keyword set to include the given string.
    Remember an index number for each keyword included in the set.  */
@@ -54,8 +54,8 @@ extern void kwsprep (kwset_t);
    the matching substring in the integer it points to.  Similarly,
    if foundindex is non-NULL, store the index of the particular
    keyword found therein. */
-extern size_t kwsexec (kwset_t, char const *, size_t, struct kwsmatch *,
-                       bool const) _GL_ARG_NONNULL ((4));
+extern size_t kwsexec (kwset_t, char const *, size_t, struct kwsmatch *, bool)
+  _GL_ARG_NONNULL ((4));
 
 /* Deallocate the given keyword set and all its associated storage. */
 extern void kwsfree (kwset_t);
-- 
2.5.5

From 81ccbaa009d4147840f480991ed74845841a436e Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 2 Jun 2016 15:24:38 -0700
Subject: [PATCH 2/3] grep: simplify -F Aho-Corasick a bit

This removes some tuning that complicates the code without providing
performance benefits that I could measure (GCC 6.1, x86-64).
(acexec_trans): Do not hand-unroll.  Unduplicate the code for a
transition step.

* src/kwset.c (struct kwset.kwsexec, bmexec, acexec_trans, acexec)
---
 src/kwset.c | 102 ++++++++++++++++--------------------------------------------
 1 file changed, 27 insertions(+), 75 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index d14972f..7391990 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -932,110 +932,62 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
   struct trie * const *next;
   struct trie const *trie, *accept;
   char const *tp, *left, *lim;
-  unsigned char c;
   struct tree const *tree;
   char const *trans;
 
   /* Initialize register copies and look for easy ways out.  */
   if (len < kwset->mind)
     return SIZE_MAX;
-  if (!kwset->trans && kwset->maxd == 1 && kwset->words == 2)
+  trans = kwset->trans;
+  if (!trans && kwset->maxd == 1 && kwset->words == 2)
     return memoff2_kwset (text, len, kwset, kwsmatch);
 
   next = kwset->next;
-  trans = kwset->trans;
+  trie = kwset->trie;
   lim = text + len;
   tp = text;
 
-  if (kwset->trie->accepting)
+  if (!trie->accepting)
     {
-      trie = kwset->trie;
-      goto match;
-    }
-
-  trie = next[U(tr (trans, *tp++))];
+      unsigned char c = tr (trans, *tp++);
 
-  while (true)
-    {
-      if (tp < lim - 8)
+      while (true)
         {
-          while (!trie)
+          while (! (trie = next[c]))
             {
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              trie = next[U(tr (trans, *tp++))];
-              if (trie)
-                break;
-              if (tp >= lim - 8)
-                break;
-              trie = next[U(tr (trans, *tp++))];
+              if (tp >= lim)
+                return SIZE_MAX;
+              c = tr (trans, *tp++);
             }
-        }
-
-      while (!trie)
-        {
-          if (tp >= lim)
-            return SIZE_MAX;
-          trie = next[U(tr (trans, *tp++))];
-        }
 
-      if (trie->accepting)
-        goto match;
-      if (tp >= lim)
-        return SIZE_MAX;
-      c = tr (trans, *tp++);
-      tree = trie->links;
-
-      while (true)
-        {
-          if (c == tree->label)
+          while (true)
             {
-              trie = tree->trie;
               if (trie->accepting)
                 goto match;
               if (tp >= lim)
                 return SIZE_MAX;
               c = tr (trans, *tp++);
-              tree = trie->links;
-            }
-          else
-            {
-              tree = c < tree->label ? tree->llink : tree->rlink;
-              if (! tree)
+
+              for (tree = trie->links; c != tree->label; )
                 {
-                  trie = trie->fail;
-                  if (!trie)
-                    break;
-                  if (trie->accepting)
+                  tree = c < tree->label ? tree->llink : tree->rlink;
+                  if (! tree)
                     {
-                      --tp;
-                      goto match;
+                      trie = trie->fail;
+                      if (!trie)
+                        goto next_trie;
+                      if (trie->accepting)
+                        {
+                          --tp;
+                          goto match;
+                        }
+                      tree = trie->links;
                     }
-                  tree = trie->links;
                 }
+              trie = tree->trie;
             }
+        next_trie:;
         }
-
-      trie = next[c];
     }
 
  match:
@@ -1051,7 +1003,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
         {
           struct trie const *accept1;
           char const *left1;
-          c = tr (trans, *tp++);
+          unsigned char c = tr (trans, *tp++);
           tree = trie->links;
           while (tree && c != tree->label)
             tree = c < tree->label ? tree->llink : tree->rlink;
-- 
2.5.5

From 3d5b898958b922995935f5d803c271696d127a8a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 2 Jun 2016 15:37:10 -0700
Subject: [PATCH 3/3] maint: correct attribution

* build-aux/git-log-fix: Fix attribution of primary Aho-Corasick patch
---
 build-aux/git-log-fix | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/build-aux/git-log-fix b/build-aux/git-log-fix
index b06803a..909a06f 100644
--- a/build-aux/git-log-fix
+++ b/build-aux/git-log-fix
@@ -2,6 +2,10 @@
 # option.  It specifies what changes to make to each given SHA1's commit
 # log and metadata, using Perl-eval'able expressions.
 
+a3d42de483a29c1da381f5c3dad87cbbc86a5c70
+# Correct the author:
+s/Paul Eggert  <eggert\@cs.ucla.edu>/Norihiro Tanaka  <noritnk\@kcn.ne.jp>/
+
 074842d3e3054714a495252e582886f0e4ace4e4
 # Correct spelling of name:
 s/Mercer/Meixner/
-- 
2.5.5

Reply via email to