Transition for non-UTF8 locales is somewhat complicated.  In addtion,
optimization is only state 0 for initial state.

The patch is simplify dfaexec() and extends optimization to initial
state for all contexts, which are s < D->min_trcount.
From c3ac857ee6f7167daaa242036769f5719732ae43 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Sun, 14 Aug 2016 11:21:48 +0900
Subject: [PATCH 1/2] dfa: simplify and optimize at initial state in execution

* src/dfa.c (skip_remains_mb): Remove argument *pwc.  Update calller.
(dfaexec_main): Simplify and optimize at initial state.
---
 src/dfa.c |   79 +++++++++++++++++-------------------------------------------
 1 files changed, 23 insertions(+), 56 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 30aaecf..59bb3bc 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -3144,16 +3144,14 @@ transit_state (struct dfa *d, state_num s, unsigned 
char const **pp,
    Both P and MBP must be no larger than END.  */
 static unsigned char const *
 skip_remains_mb (struct dfa *d, unsigned char const *p,
-                 unsigned char const *mbp, char const *end, wint_t *wcp)
+                 unsigned char const *mbp, char const *end)
 {
-  wint_t wc = WEOF;
+  wint_t wc;
   if (never_trail[*p])
     return p;
   while (mbp < p)
     mbp += mbs_to_wchar (&wc, (char const *) mbp,
                          end - (char const *) mbp, d);
-  if (wcp != NULL)
-    *wcp = wc;
   return mbp;
 }
 
@@ -3210,46 +3208,22 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, bool allow_nl,
 
   for (;;)
     {
-      if (multibyte)
+      while ((t = trans[s]) != NULL)
         {
-          while ((t = trans[s]) != NULL)
+          if (s < d->min_trcount)
             {
-              s1 = s;
-
-              if (s < d->min_trcount)
+              if (!multibyte || d->states[s].mbps.nelem == 0)
                 {
-                  if (d->min_trcount == 1)
-                    {
-                      if (d->states[s].mbps.nelem == 0)
-                        {
-                          do
-                            {
-                              while (t[*p] == 0)
-                                p++;
-                              p = mbp = skip_remains_mb (d, p, mbp, end, NULL);
-                            }
-                          while (t[*p] == 0);
-                        }
-                      else
-                        p = mbp = skip_remains_mb (d, p, mbp, end, NULL);
-                    }
-                  else
-                    {
-                      wint_t wc;
-                      mbp = skip_remains_mb (d, p, mbp, end, &wc);
-
-                      /* If d->min_trcount is greater than 1, maybe
-                         transit to another initial state after skip.  */
-                      if (p < mbp)
-                        {
-                          /* It's CTX_LETTER or CTX_NONE.  CTX_NEWLINE
-                             cannot happen, as we assume that a newline
-                             is always a single byte character.  */
-                          s1 = s = d->initstate_notbol;
-                          p = mbp;
-                        }
-                    }
+                  while (t[*p] == s)
+                    p++;
                 }
+              if (multibyte)
+                p = mbp = skip_remains_mb (d, p, mbp, end);
+            }
+
+          if (multibyte)
+            {
+              s1 = s;
 
               if (d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl)
                   || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
@@ -3269,22 +3243,7 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, bool allow_nl,
                   trans = d->trans;
                 }
             }
-        }
-      else
-        {
-          if (s == 0)
-            {
-              t = trans[s];
-              if (t)
-                {
-                  while (t[*p] == 0)
-                    p++;
-                  s1 = 0;
-                  s = t[*p++];
-                }
-            }
-
-          while ((t = trans[s]) != NULL)
+          else
             {
               s1 = t[*p++];
               t = trans[s1];
@@ -3295,6 +3254,11 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, bool allow_nl,
                   s1 = tmp;     /* swap */
                   break;
                 }
+              if (s < d->min_trcount)
+                {
+                  while (t[*p] == s1)
+                    p++;
+                }
               s = t[*p++];
             }
         }
@@ -3319,6 +3283,9 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, bool allow_nl,
           if (d->success[s] & sbit[*p])
             goto done;
 
+          if (multibyte && s < d->min_trcount)
+            p = mbp = skip_remains_mb (d, p, mbp, end);
+
           s1 = s;
           if (!multibyte || d->states[s].mbps.nelem == 0
               || (*p == eol && !allow_nl)
-- 
1.7.1

Reply via email to