Sorry that patch took so long to review. I installed it, along with the
attached followup patches which are mostly just minor style things (plus
fixing the attribution for a patch that I forgot to specify --author for).
I didn't get as much performance improvement on my platform, so I toned
down the NEWS item a bit. Still, wow. It is a 2.5x performance
improvement for that test case, and it's asymptotically better. Thanks.
From 4e9899f18fdda1fe19a26f7e1a3e40e87f8c7f2c Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 2 Jun 2016 11:30:08 -0700
Subject: [PATCH 1/3] grep: minor cleanups for -F Aho-Corasick
* NEWS: Don't claim 7x, as the value seems to be system-dependent.
* src/kwset.c (struct kwset.kwsexec, bmexec, acexec_trans, acexec):
* src/kwset.c, src/kwset.h (kwsalloc, kwsexec):
Don't put 'const' into the declaration when that is irrelevant to
the API. More generally, don't bother with 'const' when it's only
a local so it is reasonably obvious to a reader that it is 'const'
anyway. It would be overkill to add 'const' to all locals that
never change.
* src/kwset.c (U): Avoid unnecessary parens.
(treefails, memoff2_kwset, bmexec_trans, bmexec, cwexec, acexec_trans):
Prefer SIZE_MAX to (size_t) -1.
(bmexec_trans, cwexec, acexec_trans):
Remove attributes for static functions that no longer seem needed.
(memoff2_kwset): Rename from memchr2_kwset, since it returns
an offset, not a pointer. All uses changed.
(cwexec, acexec_trans) [lint]: Remove initialization that is no
longer needed; at least, GCC 6.1 x86-64 does not need it.
(acexec_trans): Clarify code by using nesting rather than 'continue'.
---
NEWS | 7 +--
src/kwset.c | 153 +++++++++++++++++++++++++++---------------------------------
src/kwset.h | 6 +--
3 files changed, 76 insertions(+), 90 deletions(-)
diff --git a/NEWS b/NEWS
index fb6f41e..c3e6000 100644
--- a/NEWS
+++ b/NEWS
@@ -8,9 +8,10 @@ GNU grep NEWS -*- outline -*-
** Improvements
- grep -F is now typically 7 times faster for a lot of words. The
- improvement replace Commentz-Walter algorithm which has been used for
- a long time to Aho-Corasick algorithm.
+ grep -F is now typically much faster when many patterns are given,
+ as it now uses the Aho-Corasick algorithm instead of the
+ Commentz-Walter algorithm in that case.
+
* Noteworthy changes in release 2.25 (2016-04-21) [stable]
diff --git a/src/kwset.c b/src/kwset.c
index a279579..d14972f 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -54,7 +54,7 @@
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
-#define U(c) (to_uchar (c))
+#define U(c) to_uchar (c)
/* Balanced tree of edges and labels leaving a given trie node. */
struct tree
@@ -115,9 +115,8 @@ struct kwset
algorithm in multiple words. */
bool reverse;
- /* kwsexec() implementation. */
- size_t (*kwsexec) (kwset_t, char const *, size_t, struct kwsmatch *,
- bool);
+ /* kwsexec implementation. */
+ size_t (*kwsexec) (kwset_t, char const *, size_t, struct kwsmatch *, bool);
};
/* Use TRANS to transliterate C. A null TRANS does no transliteration. */
@@ -127,17 +126,14 @@ tr (char const *trans, char c)
return trans ? trans[U(c)] : c;
}
-static size_t acexec (kwset_t, char const *, size_t, struct kwsmatch *,
- bool);
-static size_t cwexec (kwset_t, char const *, size_t, struct kwsmatch *,
- bool);
-static size_t bmexec (kwset_t, char const *, size_t, struct kwsmatch *,
- bool);
+static size_t acexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
+static size_t cwexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
+static size_t bmexec (kwset_t, char const *, size_t, struct kwsmatch *, bool);
/* Allocate and initialize a keyword set object, returning an opaque
pointer to it. */
kwset_t
-kwsalloc (char const *trans, bool const reverse)
+kwsalloc (char const *trans, bool reverse)
{
struct kwset *kwset = xmalloc (sizeof *kwset);
@@ -156,10 +152,7 @@ kwsalloc (char const *trans, bool const reverse)
kwset->target = NULL;
kwset->trans = trans;
kwset->reverse = reverse;
- if (reverse)
- kwset->kwsexec = cwexec;
- else
- kwset->kwsexec = acexec;
+ kwset->kwsexec = reverse ? cwexec : acexec;
return kwset;
}
@@ -355,7 +348,7 @@ treefails (struct tree const *tree, struct trie const *fail,
{
tree->trie->fail = link->trie;
if (!reverse && link->trie->accepting && !tree->trie->accepting)
- tree->trie->accepting = -1;
+ tree->trie->accepting = SIZE_MAX;
return;
}
fail = fail->fail;
@@ -430,7 +423,7 @@ kwsprep (kwset_t kwset)
for (curr = last = kwset->trie; curr; curr = curr->next)
enqueue (curr->links, &last);
- /* Looking for just one string. Extract it from the trie. */
+ /* Looking for just one string. Extract it from the trie. */
kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
for (i = 0, curr = kwset->trie; i < kwset->mind; ++i)
{
@@ -622,7 +615,8 @@ bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
}
/* Return the address of the first byte in the buffer S (of size N)
- that matches the last byte specified by KWSET, a singleton. */
+ that matches the last byte specified by KWSET, a singleton.
+ Return NULL if there is no match. */
static char const *
memchr_kwset (char const *s, size_t n, kwset_t kwset)
{
@@ -640,17 +634,20 @@ memchr_kwset (char const *s, size_t n, kwset_t kwset)
return n == 0 ? NULL : memchr2 (s, kwset->gc1, kwset->gc1help, n);
}
-/* Return the address of the first byte in the buffer S (of size N)
- that matches the last byte specified by KWSET, a singleton. */
+/* Return the offset of the first byte in the buffer S (of size N)
+ that matches the last byte specified by KWSET, a pair.
+ Return SIZE_MAX if there is no match. */
static size_t
-memchr2_kwset (char const *s, size_t n, kwset_t kwset,
+memoff2_kwset (char const *s, size_t n, kwset_t kwset,
struct kwsmatch *kwsmatch)
{
struct tree const *link = kwset->trie->links;
struct tree const *clink = link->llink ? link->llink : link->rlink;
char const *mch = memchr2 (s, link->label, clink->label, n);
- if (mch)
+ if (! mch)
+ return SIZE_MAX;
+ else
{
size_t off = mch - s;
if (*mch == link->label)
@@ -661,12 +658,10 @@ memchr2_kwset (char const *s, size_t n, kwset_t kwset,
kwsmatch->size[0] = 1;
return off;
}
- else
- return -1;
}
/* Fast Boyer-Moore search (inlinable version). */
-static inline size_t _GL_ATTRIBUTE_PURE
+static inline size_t
bmexec_trans (kwset_t kwset, char const *text, size_t size)
{
unsigned char const *d1;
@@ -678,11 +673,11 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
if (len == 0)
return 0;
if (len > size)
- return -1;
+ return SIZE_MAX;
if (len == 1)
{
tp = memchr_kwset (text, size, kwset);
- return tp ? tp - text : -1;
+ return tp ? tp - text : SIZE_MAX;
}
d1 = kwset->delta;
@@ -722,7 +717,7 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
tp--;
tp = memchr_kwset (tp, text + size - tp, kwset);
if (! tp)
- return -1;
+ return SIZE_MAX;
tp++;
if (ep <= tp)
break;
@@ -746,21 +741,21 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size)
return tp - text;
}
- return -1;
+ return SIZE_MAX;
}
/* Fast Boyer-Moore search. */
static size_t
bmexec (kwset_t kwset, char const *text, size_t size,
- struct kwsmatch *kwsmatch, bool const longest)
+ struct kwsmatch *kwsmatch, bool longest)
{
/* Help the compiler inline bmexec_trans in two ways, depending on
whether kwset->trans is null. */
size_t ret = (kwset->trans
- ? bmexec_trans (kwset, text, size)
- : bmexec_trans (kwset, text, size));
+ ? bmexec_trans (kwset, text, size)
+ : bmexec_trans (kwset, text, size));
- if (ret != (size_t) -1)
+ if (ret != SIZE_MAX)
{
kwsmatch->index = 0;
kwsmatch->offset[0] = ret;
@@ -771,7 +766,7 @@ bmexec (kwset_t kwset, char const *text, size_t size,
}
/* Hairy multiple string search with Commentz-Walter algorithm. */
-static size_t _GL_ARG_NONNULL ((4))
+static size_t
cwexec (kwset_t kwset, char const *text, size_t len,
struct kwsmatch *kwsmatch, bool longest)
{
@@ -786,15 +781,11 @@ cwexec (kwset_t kwset, char const *text, size_t len,
struct tree const *tree;
char const *trans;
-#ifdef lint
- accept = NULL;
-#endif
-
- /* Initialize register copies and look for easy ways out. */
+ /* Initialize register copies and look for easy ways out. */
if (len < kwset->mind)
- return -1;
+ return SIZE_MAX;
if (!kwset->trans && kwset->maxd == 1 && kwset->words == 2)
- return memchr2_kwset (text, len, kwset, kwsmatch);
+ return memoff2_kwset (text, len, kwset, kwsmatch);
next = kwset->next;
delta = kwset->delta;
trans = kwset->trans;
@@ -864,7 +855,7 @@ cwexec (kwset_t kwset, char const *text, size_t len,
if (mch)
goto match;
}
- return -1;
+ return SIZE_MAX;
match:
/* Given a known match, find the longest possible match anchored
@@ -934,9 +925,9 @@ cwexec (kwset_t kwset, char const *text, size_t len,
/* Hairy multiple string search with Aho-Corasick algorithm.
(inlinable version) */
-static inline size_t _GL_ARG_NONNULL ((4))
+static inline size_t
acexec_trans (kwset_t kwset, char const *text, size_t len,
- struct kwsmatch *kwsmatch, bool const longest)
+ struct kwsmatch *kwsmatch, bool longest)
{
struct trie * const *next;
struct trie const *trie, *accept;
@@ -945,15 +936,11 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
struct tree const *tree;
char const *trans;
-#ifdef lint
- accept = NULL;
-#endif
-
- /* Initialize register copies and look for easy ways out. */
+ /* Initialize register copies and look for easy ways out. */
if (len < kwset->mind)
- return -1;
+ return SIZE_MAX;
if (!kwset->trans && kwset->maxd == 1 && kwset->words == 2)
- return memchr2_kwset (text, len, kwset, kwsmatch);
+ return memoff2_kwset (text, len, kwset, kwsmatch);
next = kwset->next;
trans = kwset->trans;
@@ -1007,14 +994,14 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
while (!trie)
{
if (tp >= lim)
- return -1;
+ return SIZE_MAX;
trie = next[U(tr (trans, *tp++))];
}
if (trie->accepting)
goto match;
if (tp >= lim)
- return -1;
+ return SIZE_MAX;
c = tr (trans, *tp++);
tree = trie->links;
@@ -1026,26 +1013,26 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
if (trie->accepting)
goto match;
if (tp >= lim)
- return -1;
+ return SIZE_MAX;
c = tr (trans, *tp++);
tree = trie->links;
- continue;
}
- else if (c < tree->label)
- tree = tree->llink;
else
- tree = tree->rlink;
- if (tree)
- continue;
- trie = trie->fail;
- if (!trie)
- break;
- if (trie->accepting)
{
- --tp;
- goto match;
+ tree = c < tree->label ? tree->llink : tree->rlink;
+ if (! tree)
+ {
+ trie = trie->fail;
+ if (!trie)
+ break;
+ if (trie->accepting)
+ {
+ --tp;
+ goto match;
+ }
+ tree = trie->links;
+ }
}
- tree = trie->links;
}
trie = next[c];
@@ -1053,7 +1040,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
match:
accept = trie;
- while (accept->accepting == (size_t) -1)
+ while (accept->accepting == SIZE_MAX)
accept = accept->fail;
left = tp - accept->depth;
@@ -1067,23 +1054,21 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
c = tr (trans, *tp++);
tree = trie->links;
while (tree && c != tree->label)
- if (c < tree->label)
- tree = tree->llink;
- else
- tree = tree->rlink;
+ tree = c < tree->label ? tree->llink : tree->rlink;
if (!tree)
break;
trie = tree->trie;
- if (!trie->accepting)
- continue;
- accept1 = trie;
- while (accept1->accepting == (size_t) -1)
- accept1 = accept1->fail;
- left1 = tp - accept1->depth;
- if (left1 <= left)
+ if (trie->accepting)
{
- left = left1;
- accept = accept1;
+ accept1 = trie;
+ while (accept1->accepting == SIZE_MAX)
+ accept1 = accept1->fail;
+ left1 = tp - accept1->depth;
+ if (left1 <= left)
+ {
+ left = left1;
+ accept = accept1;
+ }
}
}
}
@@ -1098,7 +1083,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
/* Hairy multiple string search with Aho-Corasick algorithm. */
static size_t
acexec (kwset_t kwset, char const *text, size_t size,
- struct kwsmatch *kwsmatch, bool const longest)
+ struct kwsmatch *kwsmatch, bool longest)
{
/* Help the compiler inline bmexec_trans in two ways, depending on
whether kwset->trans is null. */
@@ -1109,12 +1094,12 @@ acexec (kwset_t kwset, char const *text, size_t size,
/* Search TEXT for a match of any member of KWSET.
Return the offset (into TEXT) of the first byte of the matching substring,
- or (size_t) -1 if no match is found. Upon a match, store details in
+ or SIZE_MAX if no match is found. Upon a match, store details in
*KWSMATCH: index of matched keyword, start offset (same as the return
value), and length. */
size_t
kwsexec (kwset_t kwset, char const *text, size_t size,
- struct kwsmatch *kwsmatch, bool const longest)
+ struct kwsmatch *kwsmatch, bool longest)
{
return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
}
diff --git a/src/kwset.h b/src/kwset.h
index 59efa74..d2700e9 100644
--- a/src/kwset.h
+++ b/src/kwset.h
@@ -39,7 +39,7 @@ typedef struct kwset *kwset_t;
/* Return an opaque pointer to a newly allocated keyword set. A nonnull arg
specifies a table of character translations to be applied to all
pattern and search text. */
-extern kwset_t kwsalloc (char const *, bool const);
+extern kwset_t kwsalloc (char const *, bool);
/* Incrementally extend the keyword set to include the given string.
Remember an index number for each keyword included in the set. */
@@ -54,8 +54,8 @@ extern void kwsprep (kwset_t);
the matching substring in the integer it points to. Similarly,
if foundindex is non-NULL, store the index of the particular
keyword found therein. */
-extern size_t kwsexec (kwset_t, char const *, size_t, struct kwsmatch *,
- bool const) _GL_ARG_NONNULL ((4));
+extern size_t kwsexec (kwset_t, char const *, size_t, struct kwsmatch *, bool)
+ _GL_ARG_NONNULL ((4));
/* Deallocate the given keyword set and all its associated storage. */
extern void kwsfree (kwset_t);
--
2.5.5
From 81ccbaa009d4147840f480991ed74845841a436e Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 2 Jun 2016 15:24:38 -0700
Subject: [PATCH 2/3] grep: simplify -F Aho-Corasick a bit
This removes some tuning that complicates the code without providing
performance benefits that I could measure (GCC 6.1, x86-64).
(acexec_trans): Do not hand-unroll. Unduplicate the code for a
transition step.
* src/kwset.c (struct kwset.kwsexec, bmexec, acexec_trans, acexec)
---
src/kwset.c | 102 ++++++++++++++++--------------------------------------------
1 file changed, 27 insertions(+), 75 deletions(-)
diff --git a/src/kwset.c b/src/kwset.c
index d14972f..7391990 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -932,110 +932,62 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
struct trie * const *next;
struct trie const *trie, *accept;
char const *tp, *left, *lim;
- unsigned char c;
struct tree const *tree;
char const *trans;
/* Initialize register copies and look for easy ways out. */
if (len < kwset->mind)
return SIZE_MAX;
- if (!kwset->trans && kwset->maxd == 1 && kwset->words == 2)
+ trans = kwset->trans;
+ if (!trans && kwset->maxd == 1 && kwset->words == 2)
return memoff2_kwset (text, len, kwset, kwsmatch);
next = kwset->next;
- trans = kwset->trans;
+ trie = kwset->trie;
lim = text + len;
tp = text;
- if (kwset->trie->accepting)
+ if (!trie->accepting)
{
- trie = kwset->trie;
- goto match;
- }
-
- trie = next[U(tr (trans, *tp++))];
+ unsigned char c = tr (trans, *tp++);
- while (true)
- {
- if (tp < lim - 8)
+ while (true)
{
- while (!trie)
+ while (! (trie = next[c]))
{
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- trie = next[U(tr (trans, *tp++))];
- if (trie)
- break;
- if (tp >= lim - 8)
- break;
- trie = next[U(tr (trans, *tp++))];
+ if (tp >= lim)
+ return SIZE_MAX;
+ c = tr (trans, *tp++);
}
- }
-
- while (!trie)
- {
- if (tp >= lim)
- return SIZE_MAX;
- trie = next[U(tr (trans, *tp++))];
- }
- if (trie->accepting)
- goto match;
- if (tp >= lim)
- return SIZE_MAX;
- c = tr (trans, *tp++);
- tree = trie->links;
-
- while (true)
- {
- if (c == tree->label)
+ while (true)
{
- trie = tree->trie;
if (trie->accepting)
goto match;
if (tp >= lim)
return SIZE_MAX;
c = tr (trans, *tp++);
- tree = trie->links;
- }
- else
- {
- tree = c < tree->label ? tree->llink : tree->rlink;
- if (! tree)
+
+ for (tree = trie->links; c != tree->label; )
{
- trie = trie->fail;
- if (!trie)
- break;
- if (trie->accepting)
+ tree = c < tree->label ? tree->llink : tree->rlink;
+ if (! tree)
{
- --tp;
- goto match;
+ trie = trie->fail;
+ if (!trie)
+ goto next_trie;
+ if (trie->accepting)
+ {
+ --tp;
+ goto match;
+ }
+ tree = trie->links;
}
- tree = trie->links;
}
+ trie = tree->trie;
}
+ next_trie:;
}
-
- trie = next[c];
}
match:
@@ -1051,7 +1003,7 @@ acexec_trans (kwset_t kwset, char const *text, size_t len,
{
struct trie const *accept1;
char const *left1;
- c = tr (trans, *tp++);
+ unsigned char c = tr (trans, *tp++);
tree = trie->links;
while (tree && c != tree->label)
tree = c < tree->label ? tree->llink : tree->rlink;
--
2.5.5
From 3d5b898958b922995935f5d803c271696d127a8a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 2 Jun 2016 15:37:10 -0700
Subject: [PATCH 3/3] maint: correct attribution
* build-aux/git-log-fix: Fix attribution of primary Aho-Corasick patch
---
build-aux/git-log-fix | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/build-aux/git-log-fix b/build-aux/git-log-fix
index b06803a..909a06f 100644
--- a/build-aux/git-log-fix
+++ b/build-aux/git-log-fix
@@ -2,6 +2,10 @@
# option. It specifies what changes to make to each given SHA1's commit
# log and metadata, using Perl-eval'able expressions.
+a3d42de483a29c1da381f5c3dad87cbbc86a5c70
+# Correct the author:
+s/Paul Eggert <eggert\@cs.ucla.edu>/Norihiro Tanaka <noritnk\@kcn.ne.jp>/
+
074842d3e3054714a495252e582886f0e4ace4e4
# Correct spelling of name:
s/Mercer/Meixner/
--
2.5.5