[PATCH] dfa: narrow the scope of many local variables
FYI, I've just pushed this: >From 387fd77e70fe9016d30e462d46c1126b32fe449b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 31 Dec 2016 08:06:24 -0800 Subject: [PATCH] dfa: narrow the scope of many local variables * lib/dfa.c: Now that we are no longer constrained to c89, move declarations of many variables (often indices) "down" into the scope(s) where used or to the point of definition. This is a no-semantic-change diff. --- ChangeLog | 8 +++ lib/dfa.c | 214 -- 2 files changed, 104 insertions(+), 118 deletions(-) diff --git a/ChangeLog b/ChangeLog index bbacbbf..a73a546 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2016-12-31 Jim Meyering + + dfa: narrow the scope of many local variables + * lib/dfa.c: Now that we are no longer constrained to c89, move + declarations of many variables (often indices) "down" into the + scope(s) where used or to the point of definition. This is a + no-semantic-change diff. + 2017-01-01 Paul Eggert version-etc: new year diff --git a/lib/dfa.c b/lib/dfa.c index a47f407..b5de330 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -591,8 +591,6 @@ mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) static void prtok (token t) { - char const *s; - if (t < 0) fprintf (stderr, "END"); else if (t < NOTCHAR) @@ -602,6 +600,7 @@ prtok (token t) } else { + char const *s; switch (t) { case EMPTY: @@ -695,16 +694,14 @@ zeroset (charclass *s) static void fillset (charclass *s) { - int i; - for (i = 0; i < CHARCLASS_WORDS; i++) + for (int i = 0; i < CHARCLASS_WORDS; i++) s->w[i] = CHARCLASS_WORD_MASK; } static void notset (charclass *s) { - int i; - for (i = 0; i < CHARCLASS_WORDS; ++i) + for (int i = 0; i < CHARCLASS_WORDS; ++i) s->w[i] = CHARCLASS_WORD_MASK & ~s->w[i]; } @@ -712,8 +709,7 @@ static bool equal (charclass const *s1, charclass const *s2) { charclass_word w = 0; - int i; - for (i = 0; i < CHARCLASS_WORDS; i++) + for (int i = 0; i < CHARCLASS_WORDS; i++) w |= s1->w[i] ^ s2->w[i]; return w == 0; } @@ -722,8 +718,7 @@ static bool emptyset (charclass const *s) { charclass_word w = 0; - int i; - for (i = 0; i < CHARCLASS_WORDS; i++) + for (int i = 0; i < CHARCLASS_WORDS; i++) w |= s->w[i]; return w == 0; } @@ -860,8 +855,7 @@ static void setbit_case_fold_c (int b, charclass *c) { int ub = toupper (b); - int i; - for (i = 0; i < NOTCHAR; i++) + for (int i = 0; i < NOTCHAR; i++) if (toupper (i) == ub) setbit (i, c); } @@ -959,8 +953,7 @@ static const struct dfa_ctype prednames[] = { static const struct dfa_ctype *_GL_ATTRIBUTE_PURE find_pred (const char *str) { - unsigned int i; - for (i = 0; prednames[i].name; ++i) + for (unsigned int i = 0; prednames[i].name; ++i) if (STREQ (str, prednames[i].name)) return &prednames[i]; return NULL; @@ -972,7 +965,7 @@ static token parse_bracket_exp (struct dfa *dfa) { bool invert; - int c, c1, c2; + int c; charclass ccl; /* This is a bracket expression that dfaexec is known to @@ -1002,6 +995,7 @@ parse_bracket_exp (struct dfa *dfa) else invert = false; + int c1; colon_warning_state = (c == ':'); do { @@ -1055,7 +1049,7 @@ parse_bracket_exp (struct dfa *dfa) if (dfa->localeinfo.multibyte && !pred->single_byte_only) known_bracket_exp = false; else -for (c2 = 0; c2 < NOTCHAR; ++c2) +for (int c2 = 0; c2 < NOTCHAR; ++c2) if (pred->func (c2)) setbit (c2, &ccl); } @@ -1073,7 +1067,8 @@ parse_bracket_exp (struct dfa *dfa) are already set up. */ } - if (c == '\\' && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) + if (c == '\\' + && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c1 == NOTCHAR) @@ -1082,6 +1077,7 @@ parse_bracket_exp (struct dfa *dfa) if (c1 == '-') /* build range characters. */ { + int c2; FETCH_WC (dfa, c2, wc2, _("unbalanced [")); /* A bracket expression like [a-[.aa.]] matches an unknown set. @@ -1155,12 +1151,11 @@ parse_bracket_exp (struct dfa *dfa) else { wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; - unsigned int i; unsigned int n = (dfa->syntax.case_fold ? case_folded_counterparts (wc, folded + 1) + 1 : 1); folded[0] = wc; - for (i = 0; i < n; i++) + for (unsigned int i = 0; i < n; i++) if (!setbit_wc (folded[i], &ccl)) { dfa->lex.brack.chars @@ -1222,10 +1217,7 @@ pop_lex_state (s
Re: [PATCH] dfa: narrow the scope of many local variables
Hi Jim, > move declarations of many variables (often indices) "down" into the > scope(s) where used or to the point of definition. Thanks! It does make the code a little more penetrable. Bruno
suggested improvements to parse-datetime's debug messages ('date --debug')
Hello, Attached are four small bug fixes and two additions to the debug messages in parse-datetime.y (used in 'date --debug'). The commit message for each commit gives a detail example of how/when it is used. There are no changes to the parsing, only to the debug messages. Comments and suggestions welcomed, regards, -assaf gnulib-parse-datetime-improvements-2017-01-01.patch.xz Description: Binary data
Re: [PATCH] dfa: narrow the scope of many local variables
Thanks. I found some more local vars whose scope could be narrowed, and installed the attached followup patch. >From 359f0b1fcf904bb6f8ece929e27cf892ac4aa04d Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sun, 1 Jan 2017 20:51:46 -0800 Subject: [PATCH] dfa: narrow more local var scopes * lib/dfa.c: Move some more local decls down to nearer where they're needed. --- ChangeLog | 6 +++ lib/dfa.c | 138 +++--- 2 files changed, 75 insertions(+), 69 deletions(-) diff --git a/ChangeLog b/ChangeLog index a73a546..cb6ebfe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2017-01-01 Paul Eggert + + dfa: narrow more local var scopes + * lib/dfa.c: Move some more local decls down to nearer where + they're needed. + 2016-12-31 Jim Meyering dfa: narrow the scope of many local variables diff --git a/lib/dfa.c b/lib/dfa.c index b5de330..ed5297e 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -964,7 +964,6 @@ find_pred (const char *str) static token parse_bracket_exp (struct dfa *dfa) { - bool invert; int c; charclass ccl; @@ -986,14 +985,13 @@ parse_bracket_exp (struct dfa *dfa) dfa->lex.brack.nchars = 0; zeroset (&ccl); FETCH_WC (dfa, c, wc, _("unbalanced [")); - if (c == '^') + bool invert = c == '^'; + if (invert) { FETCH_WC (dfa, c, wc, _("unbalanced [")); invert = true; known_bracket_exp = dfa->simple_locale; } - else -invert = false; int c1; colon_warning_state = (c == ':'); @@ -1608,11 +1606,10 @@ addtok (struct dfa *dfa, token t) if (dfa->localeinfo.multibyte && t == MBCSET) { bool need_or = false; - ptrdiff_t i; /* Extract wide characters into alternations for better performance. This does not require UTF-8. */ - for (i = 0; i < dfa->lex.brack.nchars; i++) + for (ptrdiff_t i = 0; i < dfa->lex.brack.nchars; i++) { addtok_wc (dfa, dfa->lex.brack.chars[i]); if (need_or) @@ -1823,8 +1820,6 @@ atom (struct dfa *dfa) static size_t _GL_ATTRIBUTE_PURE nsubtoks (struct dfa const *dfa, size_t tindex) { - size_t ntoks1; - switch (dfa->tokens[tindex - 1]) { default: @@ -1835,8 +1830,10 @@ nsubtoks (struct dfa const *dfa, size_t tindex) return 1 + nsubtoks (dfa, tindex - 1); case CAT: case OR: - ntoks1 = nsubtoks (dfa, tindex - 1); - return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1); + { +size_t ntoks1 = nsubtoks (dfa, tindex - 1); +return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1); + } } } @@ -1984,7 +1981,6 @@ insert (position p, position_set *s) { ptrdiff_t count = s->nelem; ptrdiff_t lo = 0, hi = count; - ptrdiff_t i; while (lo < hi) { ptrdiff_t mid = (lo + hi) >> 1; @@ -2000,7 +1996,7 @@ insert (position p, position_set *s) } s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems); - for (i = count; i > lo; i--) + for (ptrdiff_t i = count; i > lo; i--) s->elems[i] = s->elems[i - 1]; s->elems[lo] = p; ++s->nelem; @@ -2101,7 +2097,7 @@ state_index (struct dfa *d, position_set const *s, int context) { size_t hash = 0; int constraint = 0; - state_num i, j; + state_num i; token first_end = 0; for (i = 0; i < s->nelem; ++i) @@ -2113,6 +2109,7 @@ state_index (struct dfa *d, position_set const *s, int context) if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem || context != d->states[i].context) continue; + state_num j; for (j = 0; j < s->nelem; ++j) if (s->elems[j].constraint != d->states[i].elems.elems[j].constraint || s->elems[j].index != d->states[i].elems.elems[j].index) @@ -2123,7 +2120,7 @@ state_index (struct dfa *d, position_set const *s, int context) #ifdef DEBUG fprintf (stderr, "new state %zd\n nextpos:", i); - for (j = 0; j < s->nelem; ++j) + for (state_num j = 0; j < s->nelem; j++) { fprintf (stderr, " %zu:", s->elems[j].index); prtok (d->tokens[s->elems[j].index]); @@ -2143,7 +2140,7 @@ state_index (struct dfa *d, position_set const *s, int context) fprintf (stderr, "\n"); #endif - for (j = 0; j < s->nelem; ++j) + for (state_num j = 0; j < s->nelem; j++) { int c = s->elems[j].constraint; if (d->tokens[s->elems[j].index] < 0) @@ -2258,9 +2255,8 @@ static int _GL_ATTRIBUTE_PURE state_separate_contexts (position_set const *s) { int separate_contexts = 0; - size_t j; - for (j = 0; j < s->nelem; ++j) + for (size_t j = 0; j < s->nelem; j++) { if (PREV_NEWLINE_DEPENDENT (s->elems[j].constraint)) separate_contexts |= CTX_NEWLINE; @@ -2344,10 +2340,7 @@ dfaanalyze (struct dfa *d, bool searchflag) size_t nlastpos; } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc; - position_set tmp; /* Temporary set for merging sets. */ posi
[PATCH 2/2] dfa: simplify constraint-dependency checking
* lib/dfa.c (prev_newline_constraint, prev_letter_constraint) (prev_other_constraint): Remove. (prev_newline_dependent, prev_letter_dependent): Simplify, to avoid an unnecessary bitwise AND operation. --- ChangeLog | 6 ++ lib/dfa.c | 22 ++ 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 85436e1..2b8cbc1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2017-01-01 Paul Eggert + dfa: simplify constraint-dependency checking + * lib/dfa.c (prev_newline_constraint, prev_letter_constraint) + (prev_other_constraint): Remove. + (prev_newline_dependent, prev_letter_dependent): + Simplify, to avoid an unnecessary bitwise AND operation. + dfa: prefer functions and constants to macros * lib/dfa.c: Prefer constants to macros where either will do. (streq, isasciidigit, newline_constraint) diff --git a/lib/dfa.c b/lib/dfa.c index 5b9de53..cd600aa 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -175,33 +175,15 @@ succeeds_in_context (int constraint, int prev, int curr) } /* The following describe what a constraint depends on. */ -static int -prev_newline_constraint (int constraint) -{ - return (constraint >> 2) & 0x111; -} -static int -prev_letter_constraint (int constraint) -{ - return (constraint >> 1) & 0x111; -} -static int -prev_other_constraint (int constraint) -{ - return constraint & 0x111; -} - static bool prev_newline_dependent (int constraint) { - return (prev_newline_constraint (constraint) - != prev_other_constraint (constraint)); + return ((constraint ^ constraint >> 2) & 0x111) != 0; } static bool prev_letter_dependent (int constraint) { - return (prev_letter_constraint (constraint) - != prev_other_constraint (constraint)); + return ((constraint ^ constraint >> 1) & 0x111) != 0; } /* Tokens that match the empty string subject to some constraint actually -- 2.7.4
[PATCH 1/2] dfa: prefer functions and constants to macros
* lib/dfa.c: Prefer constants to macros where either will do. (streq, isasciidigit, newline_constraint) (letter_constraint, other_constraint, succeeds_in_context) (prev_newline_constraint, prev_letter_constraint) (prev_other_constraint, prev_newline_dependent) (prev_letter_dependent, accepting, accepts_in_context): Now static functions instead of function-like macros. Use lower-case names accordingly. All uses changed. --- ChangeLog | 10 lib/dfa.c | 184 +++--- 2 files changed, 126 insertions(+), 68 deletions(-) diff --git a/ChangeLog b/ChangeLog index cb6ebfe..85436e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,15 @@ 2017-01-01 Paul Eggert + dfa: prefer functions and constants to macros + * lib/dfa.c: Prefer constants to macros where either will do. + (streq, isasciidigit, newline_constraint) + (letter_constraint, other_constraint, succeeds_in_context) + (prev_newline_constraint, prev_letter_constraint) + (prev_other_constraint, prev_newline_dependent) + (prev_letter_dependent, accepting, accepts_in_context): + Now static functions instead of function-like macros. + Use lower-case names accordingly. All uses changed. + dfa: narrow more local var scopes * lib/dfa.c: Move some more local decls down to nearer where they're needed. diff --git a/lib/dfa.c b/lib/dfa.c index ed5297e..5b9de53 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -33,17 +33,17 @@ #include #include -#define STREQ(a, b) (strcmp (a, b) == 0) - -/* ISASCIIDIGIT differs from isdigit, as follows: - - Its arg may be any int or unsigned int; it need not be an unsigned char. - - It's guaranteed to evaluate its argument exactly once. - - It's typically faster. - Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that - only '0' through '9' are digits. Prefer ISASCIIDIGIT to isdigit unless - it's important to use the locale's definition of "digit" even when the - host does not conform to Posix. */ -#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) +static bool +streq (char const *a, char const *b) +{ + return strcmp (a, b) == 0; +} + +static bool +isasciidigit (char c) +{ + return '0' <= c && c <= '9'; +} #include "gettext.h" #define _(str) gettext (str) @@ -126,10 +126,13 @@ to_uchar (char ch) character is a word constituent. A state whose context is CTX_ANY might have transitions from any character. */ -#define CTX_NONE 1 -#define CTX_LETTER 2 -#define CTX_NEWLINE4 -#define CTX_ANY7 +enum + { +CTX_NONE = 1, +CTX_LETTER = 2, +CTX_NEWLINE = 4, +CTX_ANY = 7 + }; /* Sometimes characters can only be matched depending on the surrounding context. Such context decisions depend on what the previous character @@ -142,48 +145,86 @@ to_uchar (char ch) bit 4-7 - valid contexts when next character is CTX_LETTER bit 0-3 - valid contexts when next character is CTX_NONE - The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint + succeeds_in_context determines whether a given constraint succeeds in a particular context. Prev is a bitmask of possible context values for the previous character, curr is the (single-bit) context value for the lookahead character. */ -#define NEWLINE_CONSTRAINT(constraint) (((constraint) >> 8) & 0xf) -#define LETTER_CONSTRAINT(constraint) (((constraint) >> 4) & 0xf) -#define OTHER_CONSTRAINT(constraint)((constraint) & 0xf) - -#define SUCCEEDS_IN_CONTEXT(constraint, prev, curr) \ - curr) & CTX_NONE ? OTHER_CONSTRAINT (constraint) : 0) \ -| ((curr) & CTX_LETTER ? LETTER_CONSTRAINT (constraint) : 0) \ -| ((curr) & CTX_NEWLINE ? NEWLINE_CONSTRAINT (constraint) : 0)) \ - & (prev)) - -/* The following macros describe what a constraint depends on. */ -#define PREV_NEWLINE_CONSTRAINT(constraint) (((constraint) >> 2) & 0x111) -#define PREV_LETTER_CONSTRAINT(constraint) (((constraint) >> 1) & 0x111) -#define PREV_OTHER_CONSTRAINT(constraint)((constraint) & 0x111) - -#define PREV_NEWLINE_DEPENDENT(constraint) \ - (PREV_NEWLINE_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint)) -#define PREV_LETTER_DEPENDENT(constraint) \ - (PREV_LETTER_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint)) +static int +newline_constraint (int constraint) +{ + return (constraint >> 8) & 0xf; +} +static int +letter_constraint (int constraint) +{ + return (constraint >> 4) & 0xf; +} +static int +other_constraint (int constraint) +{ + return constraint & 0xf; +} + +static bool +succeeds_in_context (int constraint, int prev, int curr) +{ + return !! (((curr & CTX_NONE ? other_constraint (constraint) : 0) \ + | (curr & CTX_LETTER ? letter_constraint (constraint) : 0) \ + | (curr & CTX_NEWLINE ? newline_constraint (constraint) : 0)) \ + & prev); +} + +/* The fo