bug#24259: [PATCH 3/6] dfa: thread-safety: move parser state into struct dfa

Zev Weiss Thu, 18 Aug 2016 03:53:19 -0700

* src/dfa.c: move global variables holding parser state (`tok' and
`depth') into a new struct (`struct parser_state') and add an instance
of it to struct dfa.  All references to the globals are replaced by
references to the dfa struct's new member.
---
 src/dfa.c | 92 ++++++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 52 insertions(+), 40 deletions(-)


diff --git a/src/dfa.c b/src/dfa.c
index d100578..858bc55 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -351,6 +351,18 @@ struct lexer_state
                      MB_CUR_MAX > 1.  */
 };
 
+/* Recursive descent parser for regular expressions.  */
+
+struct parser_state
+{
+  token tok;               /* Lookahead token.  */
+  size_t depth;            /* Current depth of a hypothetical stack
+                              holding deferred productions.  This is
+                              used to determine the depth that will be
+                              required of the real stack later on in
+                              dfaanalyze.  */
+};
+
 /* A compiled regular expression.  */
 struct dfa
 {
@@ -362,6 +374,9 @@ struct dfa
   /* Scanner state */
   struct lexer_state lexstate;
 
+  /* Parser state */
+  struct parser_state parsestate;
+
   /* Fields filled by the parser.  */
   token *tokens;                /* Postfix parse array.  */
   size_t tindex;                /* Index for adding new tokens.  */
@@ -1584,15 +1599,6 @@ lex (struct dfa *dfa)
   return END;                   /* keeps pedantic compilers happy.  */
 }
 
-/* Recursive descent parser for regular expressions.  */
-
-static token tok;               /* Lookahead token.  */
-static size_t depth;            /* Current depth of a hypothetical stack
-                                   holding deferred productions.  This is
-                                   used to determine the depth that will be
-                                   required of the real stack later on in
-                                   dfaanalyze.  */
-
 static void
 addtok_mb (struct dfa *dfa, token t, int mbprop)
 {
@@ -1617,7 +1623,7 @@ addtok_mb (struct dfa *dfa, token t, int mbprop)
 
     case CAT:
     case OR:
-      --depth;
+      --dfa->parsestate.depth;
       break;
 
     case BACKREF:
@@ -1627,11 +1633,11 @@ addtok_mb (struct dfa *dfa, token t, int mbprop)
       ++dfa->nleaves;
       /* fallthrough */
     case EMPTY:
-      ++depth;
+      ++dfa->parsestate.depth;
       break;
     }
-  if (depth > dfa->depth)
-    dfa->depth = depth;
+  if (dfa->parsestate.depth > dfa->depth)
+    dfa->depth = dfa->parsestate.depth;
 }
 
 static void addtok_wc (struct dfa *dfa, wint_t wc);
@@ -1801,7 +1807,7 @@ add_utf8_anychar (struct dfa *dfa)
 static void
 atom (struct dfa *dfa)
 {
-  if (tok == WCHAR)
+  if (dfa->parsestate.tok == WCHAR)
     {
       if (dfa->lexstate.wctok == WEOF)
         addtok (dfa, BACKREF);
@@ -1822,9 +1828,9 @@ atom (struct dfa *dfa)
             }
         }
 
-      tok = lex (dfa);
+      dfa->parsestate.tok = lex (dfa);
     }
-  else if (tok == ANYCHAR && using_utf8 ())
+  else if (dfa->parsestate.tok == ANYCHAR && using_utf8 ())
     {
       /* For UTF-8 expand the period to a series of CSETs that define a valid
          UTF-8 character.  This avoids using the slow multibyte path.  I'm
@@ -1834,23 +1840,26 @@ atom (struct dfa *dfa)
          UTF-8: it is the most used, and the structure of the encoding
          makes the correctness more obvious.  */
       add_utf8_anychar (dfa);
-      tok = lex (dfa);
+      dfa->parsestate.tok = lex (dfa);
     }
-  else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
-           || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
-           || tok == ANYCHAR || tok == MBCSET
-           || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+  else if ((dfa->parsestate.tok >= 0 && dfa->parsestate.tok < NOTCHAR)
+           || dfa->parsestate.tok >= CSET || dfa->parsestate.tok == BACKREF
+           || dfa->parsestate.tok == BEGLINE || dfa->parsestate.tok == ENDLINE
+           || dfa->parsestate.tok == BEGWORD || dfa->parsestate.tok == ANYCHAR
+           || dfa->parsestate.tok == MBCSET || dfa->parsestate.tok == ENDWORD
+           || dfa->parsestate.tok == LIMWORD
+           || dfa->parsestate.tok == NOTLIMWORD)
     {
-      addtok (dfa, tok);
-      tok = lex (dfa);
+      addtok (dfa, dfa->parsestate.tok);
+      dfa->parsestate.tok = lex (dfa);
     }
-  else if (tok == LPAREN)
+  else if (dfa->parsestate.tok == LPAREN)
     {
-      tok = lex (dfa);
+      dfa->parsestate.tok = lex (dfa);
       regexp (dfa);
-      if (tok != RPAREN)
+      if (dfa->parsestate.tok != RPAREN)
         dfaerror (_("unbalanced ("));
-      tok = lex (dfa);
+      dfa->parsestate.tok = lex (dfa);
     }
   else
     addtok (dfa, EMPTY);
@@ -1898,8 +1907,10 @@ closure (struct dfa *dfa)
   size_t tindex, ntokens;
 
   atom (dfa);
-  while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
-    if (tok == REPMN && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
+  while (dfa->parsestate.tok == QMARK || dfa->parsestate.tok == STAR
+         || dfa->parsestate.tok == PLUS || dfa->parsestate.tok == REPMN)
+    if (dfa->parsestate.tok == REPMN
+        && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
       {
         ntokens = nsubtoks (dfa, dfa->tindex);
         tindex = dfa->tindex - ntokens;
@@ -1918,18 +1929,18 @@ closure (struct dfa *dfa)
             addtok (dfa, QMARK);
             addtok (dfa, CAT);
           }
-        tok = lex (dfa);
+        dfa->parsestate.tok = lex (dfa);
       }
-    else if (tok == REPMN)
+    else if (dfa->parsestate.tok == REPMN)
       {
         dfa->tindex -= nsubtoks (dfa, dfa->tindex);
-        tok = lex (dfa);
+        dfa->parsestate.tok = lex (dfa);
         closure (dfa);
       }
     else
       {
-        addtok (dfa, tok);
-        tok = lex (dfa);
+        addtok (dfa, dfa->parsestate.tok);
+        dfa->parsestate.tok = lex (dfa);
       }
 }
 
@@ -1937,7 +1948,8 @@ static void
 branch (struct dfa* dfa)
 {
   closure (dfa);
-  while (tok != RPAREN && tok != OR && tok >= 0)
+  while (dfa->parsestate.tok != RPAREN && dfa->parsestate.tok != OR
+         && dfa->parsestate.tok >= 0)
     {
       closure (dfa);
       addtok (dfa, CAT);
@@ -1948,9 +1960,9 @@ static void
 regexp (struct dfa *dfa)
 {
   branch (dfa);
-  while (tok == OR)
+  while (dfa->parsestate.tok == OR)
     {
-      tok = lex (dfa);
+      dfa->parsestate.tok = lex (dfa);
       branch (dfa);
       addtok (dfa, OR);
     }
@@ -1976,12 +1988,12 @@ dfaparse (char const *s, size_t len, struct dfa *d)
   if (!syntax_bits_set)
     dfaerror (_("no syntax specified"));
 
-  tok = lex (d);
-  depth = d->depth;
+  d->parsestate.tok = lex (d);
+  d->parsestate.depth = d->depth;
 
   regexp (d);
 
-  if (tok != END)
+  if (d->parsestate.tok != END)
     dfaerror (_("unbalanced )"));
 
   addtok (d, END - d->nregexps);
-- 
2.8.1

bug#24259: [PATCH 3/6] dfa: thread-safety: move parser state into struct dfa

Reply via email to