On Fri, 9 Dec 2016 01:24:19 -0600 Trevor Cordes <g...@tecnopolis.ca> wrote:
> I bisected this bug to commits: > 662b19f2d0edc0bf07f1a2c1421080252df4a37c > 468d5217ed6ec1679512dec208c7f30fb8612957 > (can't narrow it down because the latter doesn't compile for me) The changes switch used algorithm. They convert grep -w -F to grep -w. Try following test case and before and after the changes, please. $ yes $(printf %040d 0) | head -10000000 >k $ time -p env LC_ALL=C grep -w -F 0 k It did not finish in 40s on my machine without the changes, but finished in 0.54s after the changes, and following case finished in 1.35s regardless whether the changes applied or not. $ time -p env LC_ALL=C grep -w 0 k It may be an extreme example, but if a lot of people assume that grep -F is faster than grep, I think that it is a bug. > grep -w -f /usr/share/dict/words /tmp/greptest > (good older version: 2 seconds to complete, minimal memory) > (any version after the above commits: 10 or more minutes, never waited for > it to finish, 1.2GB RAM usage and 100% cpu) I believe that it can not happen, as the changes work for grep -F only. Is it grep -F -w, correctly? grep -F -w is not good at long pattern after the changes. I think that the real issue of bug#21763, bug#22239 and bug#22357 is that dfa uses a lot of memory for a long pattern, but We have no idea to improve it currently. Following case is poor performance and uses a lot of memory regardless whether the changes applied or not. $ env LC_ALL=C grep -w -f /usr/share/dict/words /dev/null However, the poor performance will be improved partly. Following case is returned in 2.4s after patched on my machine. $ env LC_ALL=C grep -F -w -f /usr/share/dict/words /dev/null Thanks, Norihiro
From 19502d13120d612fc89b922c9b28cc3030ea0674 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka <nori...@kcn.ne.jp> Date: Sun, 11 Dec 2016 09:35:50 +0900 Subject: [PATCH] dfa: performance improvement for removal of epsilon closure * lib/dfa.c (delete): Use binary search to find deleted index. (replace): New function. It replaces a position with the followed set. (epsclosure): Replace it with a new algorithm. Update caller. --- lib/dfa.c | 140 ++++++++++++++++++++++++++++++++++++------------------------- 1 files changed, 83 insertions(+), 57 deletions(-) diff --git a/lib/dfa.c b/lib/dfa.c index 33754fc..f0da30f 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -2019,17 +2019,62 @@ merge (position_set const *s1, position_set const *s2, position_set *m) } /* Delete a position from a set. */ +static position +delete (size_t del, position_set *s) +{ + position p; + size_t count = s->nelem; + size_t lo = 0, hi = count; + while (lo < hi) + { + size_t mid = (lo + hi) >> 1; + if (s->elems[mid].index > del) + lo = mid + 1; + else + hi = mid; + } + + if (lo < count && del == s->elems[lo].index) + { + p = s->elems[lo]; + for (size_t i = lo + 1; i < s->nelem; i++) + s->elems[i - 1] = s->elems[i]; + --s->nelem; + } + else + { + p.index = -1; + p.constraint = NO_CONSTRAINT; + } + return p; +} + +/* Replace a position with the followed set. */ static void -delete (position p, position_set *s) +replace (position_set *dst, size_t del, position_set *add, + unsigned int constraint, position_set *tmp) { - size_t i; + position pos; - for (i = 0; i < s->nelem; ++i) - if (p.index == s->elems[i].index) - break; - if (i < s->nelem) - for (--s->nelem; i < s->nelem; ++i) - s->elems[i] = s->elems[i + 1]; + pos = delete (del, dst); + + if (pos.index == (size_t) -1) + return; + + copy (add, tmp); + + pos.constraint &= constraint; + + for (size_t i = 0; i < tmp->nelem; i++) + { + tmp->elems[i].constraint &= pos.constraint; + + while (i < tmp->nelem && tmp->elems[i].constraint == 0) + delete (tmp->elems[i].index, tmp); + } + + for (size_t i = 0; i < tmp->nelem; i++) + insert (tmp->elems[i], dst); } /* Find the index of the state corresponding to the given position set with @@ -2121,63 +2166,48 @@ state_index (struct dfa *d, position_set const *s, int context) constraint. Repeat exhaustively until no funny positions are left. S->elems must be large enough to hold the result. */ static void -epsclosure (position_set *s, struct dfa const *d, char *visited) +epsclosure (position_set *initial, struct dfa const *d) { - size_t i, j; - position p, old; - bool initialized = false; - - for (i = 0; i < s->nelem; ++i) - if (d->tokens[s->elems[i].index] >= NOTCHAR - && d->tokens[s->elems[i].index] != BACKREF - && d->tokens[s->elems[i].index] != ANYCHAR - && d->tokens[s->elems[i].index] != MBCSET - && d->tokens[s->elems[i].index] < CSET) + position_set tmp; + alloc_position_set (&tmp, d->nleaves); + for (size_t i = 0; i < d->tindex; ++i) + if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR + && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR + && d->tokens[i] != MBCSET && d->tokens[i] < CSET) { - if (!initialized) - { - memset (visited, 0, d->tindex * sizeof (*visited)); - initialized = true; - } - old = s->elems[i]; - p.constraint = old.constraint; - delete (s->elems[i], s); - if (visited[old.index]) - { - --i; - continue; - } - visited[old.index] = 1; - switch (d->tokens[old.index]) + unsigned int constraint; + switch (d->tokens[i]) { case BEGLINE: - p.constraint &= BEGLINE_CONSTRAINT; + constraint = BEGLINE_CONSTRAINT; break; case ENDLINE: - p.constraint &= ENDLINE_CONSTRAINT; + constraint = ENDLINE_CONSTRAINT; break; case BEGWORD: - p.constraint &= BEGWORD_CONSTRAINT; + constraint = BEGWORD_CONSTRAINT; break; case ENDWORD: - p.constraint &= ENDWORD_CONSTRAINT; + constraint = ENDWORD_CONSTRAINT; break; case LIMWORD: - p.constraint &= LIMWORD_CONSTRAINT; + constraint = LIMWORD_CONSTRAINT; break; case NOTLIMWORD: - p.constraint &= NOTLIMWORD_CONSTRAINT; + constraint = NOTLIMWORD_CONSTRAINT; break; default: + constraint = NO_CONSTRAINT; break; } - for (j = 0; j < d->follows[old.index].nelem; ++j) - { - p.index = d->follows[old.index].elems[j].index; - insert (p, s); - } - /* Force rescan to start at the beginning. */ - i = -1; + + delete (i, &d->follows[i]); + + for (size_t j = 0; j < d->tindex; j++) + if (i != j && d->follows[j].nelem > 0) + replace (&d->follows[j], i, &d->follows[i], constraint, &tmp); + + replace (initial, i, &d->follows[i], constraint, &tmp); } } @@ -2304,7 +2334,6 @@ dfaanalyze (struct dfa *d, bool searchflag) int separate_contexts; /* Context wanted by some position. */ size_t i, j; position *pos; - char *visited = xnmalloc (d->tindex, sizeof *visited); #ifdef DEBUG fprintf (stderr, "dfaanalyze:\n"); @@ -2445,14 +2474,12 @@ dfaanalyze (struct dfa *d, bool searchflag) #endif } - /* For each follow set that is the follow set of a real position, replace - it with its epsilon closure. */ +#ifdef DEBUG for (i = 0; i < d->tindex; ++i) if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET || d->tokens[i] >= CSET) { -#ifdef DEBUG fprintf (stderr, "follows(%zu:", i); prtok (d->tokens[i]); fprintf (stderr, "):"); @@ -2462,18 +2489,18 @@ dfaanalyze (struct dfa *d, bool searchflag) prtok (d->tokens[d->follows[i].elems[j].index]); } putc ('\n', stderr); -#endif - copy (&d->follows[i], &merged); - epsclosure (&merged, d, visited); - copy (&merged, &d->follows[i]); } +#endif /* Get the epsilon closure of the firstpos of the regexp. The result will be the set of positions of state 0. */ merged.nelem = 0; for (i = 0; i < stk[-1].nfirstpos; ++i) insert (firstpos[i], &merged); - epsclosure (&merged, d, visited); + + /* For each follow set that is the follow set of a real position, replace + it with its epsilon closure. */ + epsclosure (&merged, d); /* Build the initial state. */ separate_contexts = state_separate_contexts (&merged); @@ -2489,7 +2516,6 @@ dfaanalyze (struct dfa *d, bool searchflag) free (posalloc); free (stkalloc); free (merged.elems); - free (visited); } -- 1.7.1