[PATCH] dfa: narrow the scope of many local variables

2017-01-01 Thread Jim Meyering
FYI, I've just pushed this:

>From 387fd77e70fe9016d30e462d46c1126b32fe449b Mon Sep 17 00:00:00 2001
From: Jim Meyering 
Date: Sat, 31 Dec 2016 08:06:24 -0800
Subject: [PATCH] dfa: narrow the scope of many local variables

* lib/dfa.c: Now that we are no longer constrained to c89, move
declarations of many variables (often indices) "down" into the
scope(s) where used or to the point of definition.  This is a
no-semantic-change diff.
---
 ChangeLog |   8 +++
 lib/dfa.c | 214 --
 2 files changed, 104 insertions(+), 118 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index bbacbbf..a73a546 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-31  Jim Meyering  
+
+   dfa: narrow the scope of many local variables
+   * lib/dfa.c: Now that we are no longer constrained to c89, move
+   declarations of many variables (often indices) "down" into the
+   scope(s) where used or to the point of definition.  This is a
+   no-semantic-change diff.
+
 2017-01-01  Paul Eggert  

version-etc: new year
diff --git a/lib/dfa.c b/lib/dfa.c
index a47f407..b5de330 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -591,8 +591,6 @@ mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct 
dfa *d)
 static void
 prtok (token t)
 {
-  char const *s;
-
   if (t < 0)
 fprintf (stderr, "END");
   else if (t < NOTCHAR)
@@ -602,6 +600,7 @@ prtok (token t)
 }
   else
 {
+  char const *s;
   switch (t)
 {
 case EMPTY:
@@ -695,16 +694,14 @@ zeroset (charclass *s)
 static void
 fillset (charclass *s)
 {
-  int i;
-  for (i = 0; i < CHARCLASS_WORDS; i++)
+  for (int i = 0; i < CHARCLASS_WORDS; i++)
 s->w[i] = CHARCLASS_WORD_MASK;
 }

 static void
 notset (charclass *s)
 {
-  int i;
-  for (i = 0; i < CHARCLASS_WORDS; ++i)
+  for (int i = 0; i < CHARCLASS_WORDS; ++i)
 s->w[i] = CHARCLASS_WORD_MASK & ~s->w[i];
 }

@@ -712,8 +709,7 @@ static bool
 equal (charclass const *s1, charclass const *s2)
 {
   charclass_word w = 0;
-  int i;
-  for (i = 0; i < CHARCLASS_WORDS; i++)
+  for (int i = 0; i < CHARCLASS_WORDS; i++)
 w |= s1->w[i] ^ s2->w[i];
   return w == 0;
 }
@@ -722,8 +718,7 @@ static bool
 emptyset (charclass const *s)
 {
   charclass_word w = 0;
-  int i;
-  for (i = 0; i < CHARCLASS_WORDS; i++)
+  for (int i = 0; i < CHARCLASS_WORDS; i++)
 w |= s->w[i];
   return w == 0;
 }
@@ -860,8 +855,7 @@ static void
 setbit_case_fold_c (int b, charclass *c)
 {
   int ub = toupper (b);
-  int i;
-  for (i = 0; i < NOTCHAR; i++)
+  for (int i = 0; i < NOTCHAR; i++)
 if (toupper (i) == ub)
   setbit (i, c);
 }
@@ -959,8 +953,7 @@ static const struct dfa_ctype prednames[] = {
 static const struct dfa_ctype *_GL_ATTRIBUTE_PURE
 find_pred (const char *str)
 {
-  unsigned int i;
-  for (i = 0; prednames[i].name; ++i)
+  for (unsigned int i = 0; prednames[i].name; ++i)
 if (STREQ (str, prednames[i].name))
   return &prednames[i];
   return NULL;
@@ -972,7 +965,7 @@ static token
 parse_bracket_exp (struct dfa *dfa)
 {
   bool invert;
-  int c, c1, c2;
+  int c;
   charclass ccl;

   /* This is a bracket expression that dfaexec is known to
@@ -1002,6 +995,7 @@ parse_bracket_exp (struct dfa *dfa)
   else
 invert = false;

+  int c1;
   colon_warning_state = (c == ':');
   do
 {
@@ -1055,7 +1049,7 @@ parse_bracket_exp (struct dfa *dfa)
   if (dfa->localeinfo.multibyte && !pred->single_byte_only)
 known_bracket_exp = false;
   else
-for (c2 = 0; c2 < NOTCHAR; ++c2)
+for (int c2 = 0; c2 < NOTCHAR; ++c2)
   if (pred->func (c2))
 setbit (c2, &ccl);
 }
@@ -1073,7 +1067,8 @@ parse_bracket_exp (struct dfa *dfa)
  are already set up.  */
 }

-  if (c == '\\' && (dfa->syntax.syntax_bits & 
RE_BACKSLASH_ESCAPE_IN_LISTS))
+  if (c == '\\'
+  && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
 FETCH_WC (dfa, c, wc, _("unbalanced ["));

   if (c1 == NOTCHAR)
@@ -1082,6 +1077,7 @@ parse_bracket_exp (struct dfa *dfa)
   if (c1 == '-')
 /* build range characters.  */
 {
+  int c2;
   FETCH_WC (dfa, c2, wc2, _("unbalanced ["));

   /* A bracket expression like [a-[.aa.]] matches an unknown set.
@@ -1155,12 +1151,11 @@ parse_bracket_exp (struct dfa *dfa)
   else
 {
   wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
-  unsigned int i;
   unsigned int n = (dfa->syntax.case_fold
 ? case_folded_counterparts (wc, folded + 1) + 1
 : 1);
   folded[0] = wc;
-  for (i = 0; i < n; i++)
+  for (unsigned int i = 0; i < n; i++)
 if (!setbit_wc (folded[i], &ccl))
   {
 dfa->lex.brack.chars
@@ -1222,10 +1217,7 @@ pop_lex_state (s

Re: [PATCH] dfa: narrow the scope of many local variables

2017-01-01 Thread Bruno Haible
Hi Jim,

> move declarations of many variables (often indices) "down" into the
> scope(s) where used or to the point of definition.

Thanks! It does make the code a little more penetrable.

Bruno




suggested improvements to parse-datetime's debug messages ('date --debug')

2017-01-01 Thread Assaf Gordon
Hello,

Attached are four small bug fixes and two additions to the debug messages in 
parse-datetime.y (used in 'date --debug').
The commit message for each commit gives a detail example of how/when it is 
used.
There are no changes to the parsing, only to the debug messages.

Comments and suggestions welcomed,

regards,
  -assaf




gnulib-parse-datetime-improvements-2017-01-01.patch.xz
Description: Binary data





Re: [PATCH] dfa: narrow the scope of many local variables

2017-01-01 Thread Paul Eggert
Thanks. I found some more local vars whose scope could be narrowed, and 
installed the attached followup patch.
>From 359f0b1fcf904bb6f8ece929e27cf892ac4aa04d Mon Sep 17 00:00:00 2001
From: Paul Eggert 
Date: Sun, 1 Jan 2017 20:51:46 -0800
Subject: [PATCH] dfa: narrow more local var scopes

* lib/dfa.c: Move some more local decls down to nearer where
they're needed.
---
 ChangeLog |   6 +++
 lib/dfa.c | 138 +++---
 2 files changed, 75 insertions(+), 69 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a73a546..cb6ebfe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-01-01  Paul Eggert  
+
+	dfa: narrow more local var scopes
+	* lib/dfa.c: Move some more local decls down to nearer where
+	they're needed.
+
 2016-12-31  Jim Meyering  
 
 	dfa: narrow the scope of many local variables
diff --git a/lib/dfa.c b/lib/dfa.c
index b5de330..ed5297e 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -964,7 +964,6 @@ find_pred (const char *str)
 static token
 parse_bracket_exp (struct dfa *dfa)
 {
-  bool invert;
   int c;
   charclass ccl;
 
@@ -986,14 +985,13 @@ parse_bracket_exp (struct dfa *dfa)
   dfa->lex.brack.nchars = 0;
   zeroset (&ccl);
   FETCH_WC (dfa, c, wc, _("unbalanced ["));
-  if (c == '^')
+  bool invert = c == '^';
+  if (invert)
 {
   FETCH_WC (dfa, c, wc, _("unbalanced ["));
   invert = true;
   known_bracket_exp = dfa->simple_locale;
 }
-  else
-invert = false;
 
   int c1;
   colon_warning_state = (c == ':');
@@ -1608,11 +1606,10 @@ addtok (struct dfa *dfa, token t)
   if (dfa->localeinfo.multibyte && t == MBCSET)
 {
   bool need_or = false;
-  ptrdiff_t i;
 
   /* Extract wide characters into alternations for better performance.
  This does not require UTF-8.  */
-  for (i = 0; i < dfa->lex.brack.nchars; i++)
+  for (ptrdiff_t i = 0; i < dfa->lex.brack.nchars; i++)
 {
   addtok_wc (dfa, dfa->lex.brack.chars[i]);
   if (need_or)
@@ -1823,8 +1820,6 @@ atom (struct dfa *dfa)
 static size_t _GL_ATTRIBUTE_PURE
 nsubtoks (struct dfa const *dfa, size_t tindex)
 {
-  size_t ntoks1;
-
   switch (dfa->tokens[tindex - 1])
 {
 default:
@@ -1835,8 +1830,10 @@ nsubtoks (struct dfa const *dfa, size_t tindex)
   return 1 + nsubtoks (dfa, tindex - 1);
 case CAT:
 case OR:
-  ntoks1 = nsubtoks (dfa, tindex - 1);
-  return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1);
+  {
+size_t ntoks1 = nsubtoks (dfa, tindex - 1);
+return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1);
+  }
 }
 }
 
@@ -1984,7 +1981,6 @@ insert (position p, position_set *s)
 {
   ptrdiff_t count = s->nelem;
   ptrdiff_t lo = 0, hi = count;
-  ptrdiff_t i;
   while (lo < hi)
 {
   ptrdiff_t mid = (lo + hi) >> 1;
@@ -2000,7 +1996,7 @@ insert (position p, position_set *s)
 }
 
   s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems);
-  for (i = count; i > lo; i--)
+  for (ptrdiff_t i = count; i > lo; i--)
 s->elems[i] = s->elems[i - 1];
   s->elems[lo] = p;
   ++s->nelem;
@@ -2101,7 +2097,7 @@ state_index (struct dfa *d, position_set const *s, int context)
 {
   size_t hash = 0;
   int constraint = 0;
-  state_num i, j;
+  state_num i;
   token first_end = 0;
 
   for (i = 0; i < s->nelem; ++i)
@@ -2113,6 +2109,7 @@ state_index (struct dfa *d, position_set const *s, int context)
   if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
   || context != d->states[i].context)
 continue;
+  state_num j;
   for (j = 0; j < s->nelem; ++j)
 if (s->elems[j].constraint != d->states[i].elems.elems[j].constraint
 || s->elems[j].index != d->states[i].elems.elems[j].index)
@@ -2123,7 +2120,7 @@ state_index (struct dfa *d, position_set const *s, int context)
 
 #ifdef DEBUG
   fprintf (stderr, "new state %zd\n nextpos:", i);
-  for (j = 0; j < s->nelem; ++j)
+  for (state_num j = 0; j < s->nelem; j++)
 {
   fprintf (stderr, " %zu:", s->elems[j].index);
   prtok (d->tokens[s->elems[j].index]);
@@ -2143,7 +2140,7 @@ state_index (struct dfa *d, position_set const *s, int context)
   fprintf (stderr, "\n");
 #endif
 
-  for (j = 0; j < s->nelem; ++j)
+  for (state_num j = 0; j < s->nelem; j++)
 {
   int c = s->elems[j].constraint;
   if (d->tokens[s->elems[j].index] < 0)
@@ -2258,9 +2255,8 @@ static int _GL_ATTRIBUTE_PURE
 state_separate_contexts (position_set const *s)
 {
   int separate_contexts = 0;
-  size_t j;
 
-  for (j = 0; j < s->nelem; ++j)
+  for (size_t j = 0; j < s->nelem; j++)
 {
   if (PREV_NEWLINE_DEPENDENT (s->elems[j].constraint))
 separate_contexts |= CTX_NEWLINE;
@@ -2344,10 +2340,7 @@ dfaanalyze (struct dfa *d, bool searchflag)
 size_t nlastpos;
   } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc;
 
-  position_set tmp; /* Temporary set for merging sets.  */
   posi

[PATCH 2/2] dfa: simplify constraint-dependency checking

2017-01-01 Thread Paul Eggert
* lib/dfa.c (prev_newline_constraint, prev_letter_constraint)
(prev_other_constraint): Remove.
(prev_newline_dependent, prev_letter_dependent):
Simplify, to avoid an unnecessary bitwise AND operation.
---
 ChangeLog |  6 ++
 lib/dfa.c | 22 ++
 2 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 85436e1..2b8cbc1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2017-01-01  Paul Eggert  
 
+   dfa: simplify constraint-dependency checking
+   * lib/dfa.c (prev_newline_constraint, prev_letter_constraint)
+   (prev_other_constraint): Remove.
+   (prev_newline_dependent, prev_letter_dependent):
+   Simplify, to avoid an unnecessary bitwise AND operation.
+
dfa: prefer functions and constants to macros
* lib/dfa.c: Prefer constants to macros where either will do.
(streq, isasciidigit, newline_constraint)
diff --git a/lib/dfa.c b/lib/dfa.c
index 5b9de53..cd600aa 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -175,33 +175,15 @@ succeeds_in_context (int constraint, int prev, int curr)
 }
 
 /* The following describe what a constraint depends on.  */
-static int
-prev_newline_constraint (int constraint)
-{
-  return (constraint >> 2) & 0x111;
-}
-static int
-prev_letter_constraint (int constraint)
-{
-  return (constraint >> 1) & 0x111;
-}
-static int
-prev_other_constraint (int constraint)
-{
-  return constraint & 0x111;
-}
-
 static bool
 prev_newline_dependent (int constraint)
 {
-  return (prev_newline_constraint (constraint)
-  != prev_other_constraint (constraint));
+  return ((constraint ^ constraint >> 2) & 0x111) != 0;
 }
 static bool
 prev_letter_dependent (int constraint)
 {
-  return (prev_letter_constraint (constraint)
-  != prev_other_constraint (constraint));
+  return ((constraint ^ constraint >> 1) & 0x111) != 0;
 }
 
 /* Tokens that match the empty string subject to some constraint actually
-- 
2.7.4




[PATCH 1/2] dfa: prefer functions and constants to macros

2017-01-01 Thread Paul Eggert
* lib/dfa.c: Prefer constants to macros where either will do.
(streq, isasciidigit, newline_constraint)
(letter_constraint, other_constraint, succeeds_in_context)
(prev_newline_constraint, prev_letter_constraint)
(prev_other_constraint, prev_newline_dependent)
(prev_letter_dependent, accepting, accepts_in_context):
Now static functions instead of function-like macros.
Use lower-case names accordingly.  All uses changed.
---
 ChangeLog |  10 
 lib/dfa.c | 184 +++---
 2 files changed, 126 insertions(+), 68 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index cb6ebfe..85436e1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
 2017-01-01  Paul Eggert  
 
+   dfa: prefer functions and constants to macros
+   * lib/dfa.c: Prefer constants to macros where either will do.
+   (streq, isasciidigit, newline_constraint)
+   (letter_constraint, other_constraint, succeeds_in_context)
+   (prev_newline_constraint, prev_letter_constraint)
+   (prev_other_constraint, prev_newline_dependent)
+   (prev_letter_dependent, accepting, accepts_in_context):
+   Now static functions instead of function-like macros.
+   Use lower-case names accordingly.  All uses changed.
+
dfa: narrow more local var scopes
* lib/dfa.c: Move some more local decls down to nearer where
they're needed.
diff --git a/lib/dfa.c b/lib/dfa.c
index ed5297e..5b9de53 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -33,17 +33,17 @@
 #include 
 #include 
 
-#define STREQ(a, b) (strcmp (a, b) == 0)
-
-/* ISASCIIDIGIT differs from isdigit, as follows:
-   - Its arg may be any int or unsigned int; it need not be an unsigned char.
-   - It's guaranteed to evaluate its argument exactly once.
-   - It's typically faster.
-   Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
-   only '0' through '9' are digits.  Prefer ISASCIIDIGIT to isdigit unless
-   it's important to use the locale's definition of "digit" even when the
-   host does not conform to Posix.  */
-#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
+static bool
+streq (char const *a, char const *b)
+{
+  return strcmp (a, b) == 0;
+}
+
+static bool
+isasciidigit (char c)
+{
+  return '0' <= c && c <= '9';
+}
 
 #include "gettext.h"
 #define _(str) gettext (str)
@@ -126,10 +126,13 @@ to_uchar (char ch)
character is a word constituent.  A state whose context is CTX_ANY
might have transitions from any character.  */
 
-#define CTX_NONE   1
-#define CTX_LETTER 2
-#define CTX_NEWLINE4
-#define CTX_ANY7
+enum
+  {
+CTX_NONE = 1,
+CTX_LETTER = 2,
+CTX_NEWLINE = 4,
+CTX_ANY = 7
+  };
 
 /* Sometimes characters can only be matched depending on the surrounding
context.  Such context decisions depend on what the previous character
@@ -142,48 +145,86 @@ to_uchar (char ch)
bit 4-7  - valid contexts when next character is CTX_LETTER
bit 0-3  - valid contexts when next character is CTX_NONE
 
-   The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint
+   succeeds_in_context determines whether a given constraint
succeeds in a particular context.  Prev is a bitmask of possible
context values for the previous character, curr is the (single-bit)
context value for the lookahead character.  */
-#define NEWLINE_CONSTRAINT(constraint) (((constraint) >> 8) & 0xf)
-#define LETTER_CONSTRAINT(constraint)  (((constraint) >> 4) & 0xf)
-#define OTHER_CONSTRAINT(constraint)((constraint)   & 0xf)
-
-#define SUCCEEDS_IN_CONTEXT(constraint, prev, curr) \
-  curr) & CTX_NONE  ? OTHER_CONSTRAINT (constraint) : 0) \
-| ((curr) & CTX_LETTER  ? LETTER_CONSTRAINT (constraint) : 0) \
-| ((curr) & CTX_NEWLINE ? NEWLINE_CONSTRAINT (constraint) : 0)) \
-   & (prev))
-
-/* The following macros describe what a constraint depends on.  */
-#define PREV_NEWLINE_CONSTRAINT(constraint) (((constraint) >> 2) & 0x111)
-#define PREV_LETTER_CONSTRAINT(constraint)  (((constraint) >> 1) & 0x111)
-#define PREV_OTHER_CONSTRAINT(constraint)((constraint)   & 0x111)
-
-#define PREV_NEWLINE_DEPENDENT(constraint) \
-  (PREV_NEWLINE_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint))
-#define PREV_LETTER_DEPENDENT(constraint) \
-  (PREV_LETTER_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint))
+static int
+newline_constraint (int constraint)
+{
+  return (constraint >> 8) & 0xf;
+}
+static int
+letter_constraint (int constraint)
+{
+  return (constraint >> 4) & 0xf;
+}
+static int
+other_constraint (int constraint)
+{
+  return constraint & 0xf;
+}
+
+static bool
+succeeds_in_context (int constraint, int prev, int curr)
+{
+  return !! (((curr & CTX_NONE  ? other_constraint (constraint) : 0) \
+  | (curr & CTX_LETTER  ? letter_constraint (constraint) : 0) \
+  | (curr & CTX_NEWLINE ? newline_constraint (constraint) : 0)) \
+ & prev);
+}
+
+/* The fo