Norihiro Tanaka wrote:
I think this patch should be suspended because of this issue.
I reported it to glibc developers.  
https://sourceware.org/bugzilla/show_bug.cgi?id=20381

After thinking about it a bit, I came up with a variant of the patch that gives the performance improvement unless -i is used, so I installed the attached patches. The first patch is mostly just refactoring this somewhat-crufty code and fixing an O(N**2) reallocation problem. The second is the real improvement.

The second patch just captures the low-hanging fruit. For example, even with -i we could use a fastmap if all the pattern's letters (including letters matched by ranges) happen to avoid the glibc bug. Something like that might be worth pursuing.

Since the attached patch fixes the test case that prompted the bug report I'm closing the bug. We can reopen it, or open a new one, if someone wants to fix the remaining performance glitches.

Thanks again for all these fixes!
From f3c82fc663f7155e04d94bc0dcdc16bb338605a1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 1 Sep 2016 21:35:53 -0700
Subject: [PATCH 1/2] grep: improve dfasearch storage management

This patch is mostly refactoring, with a bit of performance tweaking.
It is done in preparation for a fix for Bug#24009.
* src/dfasearch.c (patterns): Now of type struct re_pattern_buffer *
instead of an anonymous struct pointer, since there is no longer
any need to keep regs here.  All uses changed.
(GEAcompile): Use patlim instead of a hard-to-follow "total".
Use x2nrealloc to avoid potential O(N**2) reallocation algorithm.
Initialize just the pattern members that need clearing.
(EGexecute): Put regs into a static variable, as this code did
before 2001-02-18, as there is no need to have a separate set of
regs for each pattern.  Explain the "Q@#%!#" comment better.
---
 src/dfasearch.c | 75 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 61b1f70..b5ae623 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -40,13 +40,7 @@ static kwset_t kwset;
 static struct dfa *dfa;
 
 /* The Regex compiled patterns.  */
-static struct
-{
-  /* Regex compiled regexp. */
-  struct re_pattern_buffer regexbuf;
-  struct re_registers regs; /* This is here on account of a BRAIN-DEAD
-                               Q@#%!# library interface in regex.c.  */
-} *patterns;
+static struct re_pattern_buffer *patterns;
 static size_t pcount;
 
 /* Number of compiled fixed strings known to exactly match the regexp.
@@ -122,7 +116,6 @@ kwsmusts (void)
 void
 GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
 {
-  size_t total = size;
   char *motif;
 
   dfa = dfaalloc ();
@@ -137,28 +130,31 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
      this should be a syntax error.  The same for backref, where the
      backref should be local to each pattern.  */
   char const *p = pattern;
+  char const *patlim = pattern + size;
   bool compilation_failed = false;
+  size_t palloc = 0;
+
   do
     {
       size_t len;
-      char const *sep = memchr (p, '\n', total);
+      char const *sep = memchr (p, '\n', patlim - p);
       if (sep)
         {
           len = sep - p;
           sep++;
-          total -= (len + 1);
         }
       else
-        {
-          len = total;
-          total = 0;
-        }
+        len = patlim - p;
 
-      patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns);
-      memset (&patterns[pcount], 0, sizeof patterns[pcount]);
+      if (palloc <= pcount)
+        patterns = x2nrealloc (patterns, &palloc, sizeof *patterns);
+      struct re_pattern_buffer *pat = &patterns[pcount];
+      pat->buffer = NULL;
+      pat->allocated = 0;
+      pat->fastmap = NULL;
+      pat->translate = NULL;
 
-      char const *err = re_compile_pattern (p, len,
-                                            &(patterns[pcount].regexbuf));
+      char const *err = re_compile_pattern (p, len, pat);
       if (err)
         {
           /* With patterns specified only on the command line, emit the bare
@@ -198,7 +194,7 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
 
       strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
                              : (bk ? word_beg_bk : word_beg_no_bk));
-      total = strlen(n);
+      size_t total = strlen (n);
       memcpy (n + total, pattern, size);
       total += size;
       strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
@@ -213,7 +209,7 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
   dfacomp (pattern, size, dfa, 1);
   kwsmusts ();
 
-  free(motif);
+  free (motif);
 }
 
 size_t
@@ -355,17 +351,24 @@ EGexecute (char *buf, size_t size, size_t *match_size,
       best_len = 0;
       for (i = 0; i < pcount; i++)
         {
-          patterns[i].regexbuf.not_eol = 0;
-          patterns[i].regexbuf.newline_anchor = eolbyte == '\n';
-          start = re_search (&(patterns[i].regexbuf),
-                             beg, end - beg - 1,
-                             ptr - beg, end - ptr - 1,
-                             &(patterns[i].regs));
+          /* This is static because of a BRAIN-DEAD Q@#%!# library
+             interface in regex.c, as later calls reuse the
+             dynamically allocated storage that REGS members point at
+             and the API provides no way to free this storage.
+             If grep is ever made multithreaded, REGS would have to be
+             per-thread or the library API changed or the library
+             encapsulation violated.  */
+          static struct re_registers regs;
+
+          patterns[i].not_eol = 0;
+          patterns[i].newline_anchor = eolbyte == '\n';
+          start = re_search (&patterns[i], beg, end - beg - 1,
+                             ptr - beg, end - ptr - 1, &regs);
           if (start < -1)
             xalloc_die ();
           else if (0 <= start)
             {
-              len = patterns[i].regs.end[0] - start;
+              len = regs.end[0] - start;
               match = beg + start;
               if (match > best_match)
                 continue;
@@ -396,11 +399,10 @@ EGexecute (char *buf, size_t size, size_t *match_size,
                       {
                         /* Try a shorter length anchored at the same place. */
                         --len;
-                        patterns[i].regexbuf.not_eol = 1;
-                        shorter_len = re_match (&(patterns[i].regexbuf),
-                                                beg, match + len - ptr,
-                                                match - beg,
-                                                &(patterns[i].regs));
+                        patterns[i].not_eol = 1;
+                        shorter_len = re_match (&patterns[i], beg,
+                                                match + len - ptr, match - beg,
+                                                &regs);
                         if (shorter_len < -1)
                           xalloc_die ();
                       }
@@ -412,18 +414,17 @@ EGexecute (char *buf, size_t size, size_t *match_size,
                         if (match == end - 1)
                           break;
                         match++;
-                        patterns[i].regexbuf.not_eol = 0;
-                        start = re_search (&(patterns[i].regexbuf),
-                                           beg, end - beg - 1,
+                        patterns[i].not_eol = 0;
+                        start = re_search (&patterns[i], beg, end - beg - 1,
                                            match - beg, end - match - 1,
-                                           &(patterns[i].regs));
+                                           &regs);
                         if (start < 0)
                           {
                             if (start < -1)
                               xalloc_die ();
                             break;
                           }
-                        len = patterns[i].regs.end[0] - start;
+                        len = regs.end[0] - start;
                         match = beg + start;
                       }
                   } /* while (match <= best_match) */
-- 
2.7.4

From 77a846927e4be2d6a58df8d65c22ff2ab1540275 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 1 Sep 2016 21:49:51 -0700
Subject: [PATCH 2/2] grep: use regex fastmap unless -i

This builds on a suggestion by Norihiro Tanaka (Bug#24009).
* src/dfasearch.c (GEAcompile): Use a fastmap unless -i.
This improves performance 20x for me using the first benchmark
given in Bug#24009.
---
 src/dfasearch.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/dfasearch.c b/src/dfasearch.c
index b5ae623..0838e1f 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -151,7 +151,10 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
       struct re_pattern_buffer *pat = &patterns[pcount];
       pat->buffer = NULL;
       pat->allocated = 0;
-      pat->fastmap = NULL;
+
+      /* Do not use a fastmap with -i, to work around glibc Bug#20381.  */
+      pat->fastmap = match_icase ? NULL : xmalloc (UCHAR_MAX + 1);
+
       pat->translate = NULL;
 
       char const *err = re_compile_pattern (p, len, pat);
-- 
2.7.4

Reply via email to