bug#24259: [PATCH 5/6] dfa: thread-safety: eliminate static local variables

Zev Weiss Thu, 18 Aug 2016 03:52:38 -0700

* src/dfa.c: Replace utf8 and unibyte_c static local variables with
static globals initialized by a new function dfa_init() which must be
called before any other dfa*() functions.
(dfa_using_utf8): Rename using_utf8() to dfa_using_utf8() for
consistency with other exported functions.
* src/dfa.h (dfa_using_utf8): Rename using_utf8() to dfa_using_utf8();
also add _GL_ATTRIBUTE_PURE.
(dfa_init): New function.
* src/grep.c (main), tests/dfa-match-aux.c (main): Call dfa_init().
* src/dfasearch.c (EGexecute), src/kwsearch.c (Fexecute),
src/pcresearch.c (Pcompile): Replace using_utf8() with
dfa_using_utf8().
---
 src/dfa.c             | 62 +++++++++++++++++++++++++++------------------------
 src/dfa.h             |  5 ++++-
 src/dfasearch.c       |  2 +-
 src/grep.c            |  2 ++
 src/kwsearch.c        |  2 +-
 src/pcresearch.c      |  2 +-
 tests/dfa-match-aux.c |  2 ++
 7 files changed, 44 insertions(+), 33 deletions(-)


diff --git a/src/dfa.c b/src/dfa.c
index ae1b340..970b51f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -756,6 +756,16 @@ char_context (struct dfa *dfa, unsigned char c)
   return CTX_NONE;
 }
 
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+   assume in a multibyte encoding.  */
+static bool using_utf8;
+
+bool
+dfa_using_utf8 (void)
+{
+  return using_utf8;
+}
+
 /* Entry point to set syntax options.  */
 void
 dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol)
@@ -788,7 +798,7 @@ dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, 
unsigned char eol)
 
       /* POSIX requires that the five bytes in "\n\r./" (including the
          terminating NUL) cannot occur inside a multibyte character.  */
-      dfa->syntax.never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
+      dfa->syntax.never_trail[uc] = (using_utf8 ? (uc & 0xc0) != 0x80
                                      : strchr ("\n\r./", uc) != NULL);
     }
 }
@@ -821,21 +831,21 @@ setbit_case_fold_c (int b, charclass c)
       setbit (i, c);
 }
 
+static void check_utf8 (void)
+{
+  wchar_t wc;
+  mbstate_t mbs = { 0 };
+  using_utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+}
 
+static bool unibyte_c;
 
-/* UTF-8 encoding allows some optimizations that we can't otherwise
-   assume in a multibyte encoding.  */
-bool
-using_utf8 (void)
+static void check_unibyte_c (void)
 {
-  static int utf8 = -1;
-  if (utf8 < 0)
-    {
-      wchar_t wc;
-      mbstate_t mbs = { 0 };
-      utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
-    }
-  return utf8;
+  char const *locale = setlocale (LC_ALL, NULL);
+  unibyte_c = (!locale
+               || STREQ (locale, "C")
+               || STREQ (locale, "POSIX"));
 }
 
 /* The current locale is known to be a unibyte locale
@@ -862,20 +872,7 @@ using_simple_locale (struct dfa *dfa)
      && '}' == 125 && '~' == 126)
   };
 
-  if (! native_c_charset || dfa->multibyte)
-    return false;
-  else
-    {
-      static int unibyte_c = -1;
-      if (unibyte_c < 0)
-        {
-          char const *locale = setlocale (LC_ALL, NULL);
-          unibyte_c = (!locale
-                       || STREQ (locale, "C")
-                       || STREQ (locale, "POSIX"));
-        }
-      return unibyte_c;
-    }
+  return (!native_c_charset || dfa->multibyte) ? false : unibyte_c;
 }
 
 /* Fetch the next lexical input character.  Set C (of type int) to the
@@ -1842,7 +1839,7 @@ atom (struct dfa *dfa)
 
       dfa->parsestate.tok = lex (dfa);
     }
-  else if (dfa->parsestate.tok == ANYCHAR && using_utf8 ())
+  else if (dfa->parsestate.tok == ANYCHAR && using_utf8)
     {
       /* For UTF-8 expand the period to a series of CSETs that define a valid
          UTF-8 character.  This avoids using the slow multibyte path.  I'm
@@ -3523,7 +3520,7 @@ dfaoptimize (struct dfa *d)
   size_t i;
   bool have_backref = false;
 
-  if (!using_utf8 ())
+  if (!using_utf8)
     return;
 
   for (i = 0; i < d->tindex; ++i)
@@ -4201,4 +4198,11 @@ dfaalloc (void)
   return d;
 }
 
+void
+dfa_init (void)
+{
+  check_utf8 ();
+  check_unibyte_c ();
+}
+
 /* vim:set shiftwidth=2: */
diff --git a/src/dfa.h b/src/dfa.h
index 014ae96..585390a 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -100,4 +100,7 @@ extern void dfawarn (const char *);
    The user must supply a dfaerror.  */
 extern _Noreturn void dfaerror (const char *);
 
-extern bool using_utf8 (void);
+extern bool dfa_using_utf8 (void) _GL_ATTRIBUTE_PURE;
+
+/* This must be called before calling any of the above dfa*() functions. */
+extern void dfa_init (void);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 3dbf76b..10c4f51 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -277,7 +277,7 @@ EGexecute (char *buf, size_t size, size_t *match_size,
 
               if (exact_kwset_match)
                 {
-                  if (MB_CUR_MAX == 1 || using_utf8 ())
+                  if (MB_CUR_MAX == 1 || dfa_using_utf8 ())
                     goto success;
                   if (mb_start < beg)
                     mb_start = beg;
diff --git a/src/grep.c b/src/grep.c
index a82da61..bd1c5cc 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2351,6 +2351,8 @@ main (int argc, char **argv)
   textdomain (PACKAGE);
 #endif
 
+  dfa_init ();
+
   atexit (clean_up_stdout);
 
   last_recursive = 0;
diff --git a/src/kwsearch.c b/src/kwsearch.c
index d2afa40..fb77280 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -93,7 +93,7 @@ Fexecute (char *buf, size_t size, size_t *match_size,
     mb_check = longest = false;
   else
     {
-      mb_check = MB_CUR_MAX > 1 && !using_utf8 ();
+      mb_check = MB_CUR_MAX > 1 && !dfa_using_utf8 ();
       longest = mb_check || start_ptr || match_words;
     }
 
diff --git a/src/pcresearch.c b/src/pcresearch.c
index f6e72b0..3f76603 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -114,7 +114,7 @@ Pcompile (char const *pattern, size_t size)
 
   if (1 < MB_CUR_MAX)
     {
-      if (! using_utf8 ())
+      if (! dfa_using_utf8 ())
         error (EXIT_TROUBLE, 0,
                _("-P supports only unibyte and UTF-8 locales"));
       multibyte_locale = true;
diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c
index 25b0535..e651735 100644
--- a/tests/dfa-match-aux.c
+++ b/tests/dfa-match-aux.c
@@ -54,6 +54,8 @@ main (int argc, char **argv)
 
   setlocale (LC_ALL, "");
 
+  dfa_init ();
+
   dfa = dfaalloc ();
   dfasyntax (dfa, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
   dfacomp (argv[1], strlen (argv[1]), dfa, 0);
-- 
2.8.1

bug#24259: [PATCH 5/6] dfa: thread-safety: eliminate static local variables

Reply via email to