* src/dfa.c: move global variables holding regex syntax configuration into a new struct (`struct regex_syntax') and add an instance of it to struct dfa. All references to the globals are replaced with references to the dfa struct's new member. As a side effect, a `struct dfa' must be allocated with dfaalloc() and passed to dfasyntax(). * src/dfa.h (dfasyntax): Add new struct dfa* parameter. * src/dfasearch.c (GEAcompile): Allocate `dfa' earlier and pass it to dfasyntax(). * tests/dfa-match-aux.c (main): Pass `dfa' to dfasyntax(). --- src/dfa.c | 244 +++++++++++++++++++++++++------------------------- src/dfa.h | 8 +- src/dfasearch.c | 5 +- tests/dfa-match-aux.c | 2 +- 4 files changed, 132 insertions(+), 127 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c index 858bc55..ae1b340 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -328,6 +328,32 @@ struct mb_char_classes size_t nchars; }; +struct regex_syntax +{ + /* Syntax bits controlling the behavior of the lexical analyzer. */ + reg_syntax_t syntax_bits; + bool syntax_bits_set; + + /* Flag for case-folding letters into sets. */ + bool case_fold; + + /* End-of-line byte in data. */ + unsigned char eolbyte; + + /* Cache of char-context values. */ + int sbit[NOTCHAR]; + + /* If never_trail[B], the byte B cannot be a non-initial byte in a + multibyte character. */ + bool never_trail[NOTCHAR]; + + /* Set of characters considered letters. */ + charclass letters; + + /* Set of characters that are newline. */ + charclass newline; +}; + /* Lexical analyzer. All the dross that deals with the obnoxious GNU Regex syntax bits is located here. The poor, suffering reader is referred to the GNU Regex documentation for the @@ -366,6 +392,9 @@ struct parser_state /* A compiled regular expression. */ struct dfa { + /* Syntax configuration */ + struct regex_syntax syntax; + /* Fields filled by the scanner. */ charclass *charclasses; /* Array of character sets for CSET tokens. */ size_t cindex; /* Index for adding new charclasses. */ @@ -711,29 +740,6 @@ dfa_charclass_index (struct dfa *d, charclass const s) return i; } -/* Syntax bits controlling the behavior of the lexical analyzer. */ -static reg_syntax_t syntax_bits; -static bool syntax_bits_set; - -/* Flag for case-folding letters into sets. */ -static bool case_fold; - -/* End-of-line byte in data. */ -static unsigned char eolbyte; - -/* Cache of char-context values. */ -static int sbit[NOTCHAR]; - -/* If never_trail[B], the byte B cannot be a non-initial byte in a - multibyte character. */ -static bool never_trail[NOTCHAR]; - -/* Set of characters considered letters. */ -static charclass letters; - -/* Set of characters that are newline. */ -static charclass newline; - static bool unibyte_word_constituent (unsigned char c) { @@ -741,9 +747,9 @@ unibyte_word_constituent (unsigned char c) } static int -char_context (unsigned char c) +char_context (struct dfa *dfa, unsigned char c) { - if (c == eolbyte) + if (c == dfa->syntax.eolbyte) return CTX_NEWLINE; if (unibyte_word_constituent (c)) return CTX_LETTER; @@ -752,13 +758,13 @@ char_context (unsigned char c) /* Entry point to set syntax options. */ void -dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol) +dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol) { int i; - syntax_bits_set = true; - syntax_bits = bits; - case_fold = fold; - eolbyte = eol; + dfa->syntax.syntax_bits_set = true; + dfa->syntax.syntax_bits = bits; + dfa->syntax.case_fold = fold; + dfa->syntax.eolbyte = eol; for (i = CHAR_MIN; i <= CHAR_MAX; ++i) { @@ -769,21 +775,21 @@ dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol) mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit. */ - sbit[uc] = char_context (uc); - switch (sbit[uc]) + dfa->syntax.sbit[uc] = char_context (dfa, uc); + switch (dfa->syntax.sbit[uc]) { case CTX_LETTER: - setbit (uc, letters); + setbit (uc, dfa->syntax.letters); break; case CTX_NEWLINE: - setbit (uc, newline); + setbit (uc, dfa->syntax.newline); break; } /* POSIX requires that the five bytes in "\n\r./" (including the terminating NUL) cannot occur inside a multibyte character. */ - never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80 - : strchr ("\n\r./", uc) != NULL); + dfa->syntax.never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80 + : strchr ("\n\r./", uc) != NULL); } } @@ -1062,7 +1068,7 @@ parse_bracket_exp (struct dfa *dfa) { FETCH_WC (dfa, c1, wc1, _("unbalanced [")); - if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES)) + if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES)) || c1 == '.' || c1 == '=') { enum { MAX_BRACKET_STRING_LEN = 32 }; @@ -1091,8 +1097,9 @@ parse_bracket_exp (struct dfa *dfa) worry about that possibility. */ { char const *class - = (case_fold && (STREQ (str, "upper") - || STREQ (str, "lower")) ? "alpha" : str); + = (dfa->syntax.case_fold && (STREQ (str, "upper") + || STREQ (str, "lower")) ? + "alpha" : str); const struct dfa_ctype *pred = find_pred (class); if (!pred) dfaerror (_("invalid character class")); @@ -1118,7 +1125,7 @@ parse_bracket_exp (struct dfa *dfa) are already set up. */ } - if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) + if (c == '\\' && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c1 == NOTCHAR) @@ -1147,7 +1154,8 @@ parse_bracket_exp (struct dfa *dfa) } else { - if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) + if (c2 == '\\' && (dfa->syntax.syntax_bits + & RE_BACKSLASH_ESCAPE_IN_LISTS)) FETCH_WC (dfa, c2, wc2, _("unbalanced [")); colon_warning_state |= 8; @@ -1163,7 +1171,7 @@ parse_bracket_exp (struct dfa *dfa) int ci; for (ci = c; ci <= c2; ci++) setbit (ci, ccl); - if (case_fold) + if (dfa->syntax.case_fold) { int uc = toupper (c); int uc2 = toupper (c2); @@ -1187,7 +1195,7 @@ parse_bracket_exp (struct dfa *dfa) if (!dfa->multibyte) { - if (case_fold) + if (dfa->syntax.case_fold) setbit_case_fold_c (c, ccl); else setbit (c, ccl); @@ -1200,7 +1208,7 @@ parse_bracket_exp (struct dfa *dfa) { wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; unsigned int i; - unsigned int n = (case_fold + unsigned int n = (dfa->syntax.case_fold ? case_folded_counterparts (wc, folded + 1) + 1 : 1); folded[0] = wc; @@ -1233,7 +1241,7 @@ parse_bracket_exp (struct dfa *dfa) { assert (!dfa->multibyte); notset (ccl); - if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) + if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) clrbit ('\n', ccl); } @@ -1285,7 +1293,7 @@ lex (struct dfa *dfa) case '^': if (backslash) goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS + if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS || dfa->lexstate.lasttok == END || dfa->lexstate.lasttok == LPAREN || dfa->lexstate.lasttok == OR) return dfa->lexstate.lasttok = BEGLINE; @@ -1294,17 +1302,17 @@ lex (struct dfa *dfa) case '$': if (backslash) goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS + if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS || dfa->lexstate.lexleft == 0 - || (syntax_bits & RE_NO_BK_PARENS + || (dfa->syntax.syntax_bits & RE_NO_BK_PARENS ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == ')' : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\' && dfa->lexstate.lexptr[1] == ')') - || (syntax_bits & RE_NO_BK_VBAR + || (dfa->syntax.syntax_bits & RE_NO_BK_VBAR ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '|' : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\' && dfa->lexstate.lexptr[1] == '|') - || ((syntax_bits & RE_NEWLINE_ALT) + || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT) && dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '\n')) return dfa->lexstate.lasttok = ENDLINE; @@ -1319,7 +1327,7 @@ lex (struct dfa *dfa) case '7': case '8': case '9': - if (backslash && !(syntax_bits & RE_NO_BK_REFS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS)) { dfa->lexstate.laststart = false; return dfa->lexstate.lasttok = BACKREF; @@ -1327,7 +1335,7 @@ lex (struct dfa *dfa) goto normal_char; case '`': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) { /* FIXME: should be beginning of string */ return dfa->lexstate.lasttok = BEGLINE; @@ -1335,7 +1343,7 @@ lex (struct dfa *dfa) goto normal_char; case '\'': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) { /* FIXME: should be end of string */ return dfa->lexstate.lasttok = ENDLINE; @@ -1343,56 +1351,60 @@ lex (struct dfa *dfa) goto normal_char; case '<': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) return dfa->lexstate.lasttok = BEGWORD; goto normal_char; case '>': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) return dfa->lexstate.lasttok = ENDWORD; goto normal_char; case 'b': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) return dfa->lexstate.lasttok = LIMWORD; goto normal_char; case 'B': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) return dfa->lexstate.lasttok = NOTLIMWORD; goto normal_char; case '?': - if (syntax_bits & RE_LIMITED_OPS) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; return dfa->lexstate.lasttok = QMARK; case '*': if (backslash) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; return dfa->lexstate.lasttok = STAR; case '+': - if (syntax_bits & RE_LIMITED_OPS) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; return dfa->lexstate.lasttok = PLUS; case '{': - if (!(syntax_bits & RE_INTERVALS)) + if (!(dfa->syntax.syntax_bits & RE_INTERVALS)) goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0)) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; /* Cases: @@ -1439,7 +1451,7 @@ lex (struct dfa *dfa) && (dfa->lexstate.maxrep < 0 || dfa->lexstate.minrep <= dfa->lexstate.maxrep))) { - if (syntax_bits & RE_INVALID_INTERVAL_ORD) + if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD) goto normal_char; dfaerror (_("invalid content of \\{\\}")); } @@ -1452,32 +1464,32 @@ lex (struct dfa *dfa) return dfa->lexstate.lasttok = REPMN; case '|': - if (syntax_bits & RE_LIMITED_OPS) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0)) goto normal_char; dfa->lexstate.laststart = true; return dfa->lexstate.lasttok = OR; case '\n': - if (syntax_bits & RE_LIMITED_OPS - || backslash || !(syntax_bits & RE_NEWLINE_ALT)) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS + || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT)) goto normal_char; dfa->lexstate.laststart = true; return dfa->lexstate.lasttok = OR; case '(': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) goto normal_char; ++dfa->lexstate.parens; dfa->lexstate.laststart = true; return dfa->lexstate.lasttok = LPAREN; case ')': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) goto normal_char; if (dfa->lexstate.parens == 0 - && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) + && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_char; --dfa->lexstate.parens; dfa->lexstate.laststart = false; @@ -1495,16 +1507,16 @@ lex (struct dfa *dfa) } zeroset (ccl); notset (ccl); - if (!(syntax_bits & RE_DOT_NEWLINE)) + if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) clrbit ('\n', ccl); - if (syntax_bits & RE_DOT_NOT_NULL) + if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) clrbit ('\0', ccl); dfa->lexstate.laststart = false; return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, ccl); case 's': case 'S': - if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) + if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) goto normal_char; if (!dfa->multibyte) { @@ -1536,7 +1548,7 @@ lex (struct dfa *dfa) case 'w': case 'W': - if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) + if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) goto normal_char; if (!dfa->multibyte) @@ -1581,7 +1593,7 @@ lex (struct dfa *dfa) if (dfa->multibyte) return dfa->lexstate.lasttok = WCHAR; - if (case_fold && isalpha (c)) + if (dfa->syntax.case_fold && isalpha (c)) { zeroset (ccl); setbit_case_fold_c (c, ccl); @@ -1741,9 +1753,9 @@ add_utf8_anychar (struct dfa *dfa) copyset (utf8_classes[i], c); if (i == 1) { - if (!(syntax_bits & RE_DOT_NEWLINE)) + if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) clrbit ('\n', c); - if (syntax_bits & RE_DOT_NOT_NULL) + if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) clrbit ('\0', c); } dfa->utf8_anychar_classes[i] = CSET + dfa_charclass_index (dfa, c); @@ -1815,7 +1827,7 @@ atom (struct dfa *dfa) { addtok_wc (dfa, dfa->lexstate.wctok); - if (case_fold) + if (dfa->syntax.case_fold) { wchar_t folded[CASE_FOLDED_BUFSIZE]; unsigned int i, n = case_folded_counterparts (dfa->lexstate.wctok, @@ -1985,7 +1997,7 @@ dfaparse (char const *s, size_t len, struct dfa *d) memset (&d->mbs, 0, sizeof d->mbs); } - if (!syntax_bits_set) + if (!d->syntax.syntax_bits_set) dfaerror (_("no syntax specified")); d->parsestate.tok = lex (d); @@ -2271,19 +2283,19 @@ epsclosure (position_set *s, struct dfa const *d, char *visited) character included in C. */ static int -charclass_context (charclass c) +charclass_context (struct dfa *dfa, charclass c) { int context = 0; unsigned int j; - if (tstbit (eolbyte, c)) + if (tstbit (dfa->syntax.eolbyte, c)) context |= CTX_NEWLINE; for (j = 0; j < CHARCLASS_WORDS; ++j) { - if (c[j] & letters[j]) + if (c[j] & dfa->syntax.letters[j]) context |= CTX_LETTER; - if (c[j] & ~(letters[j] | newline[j])) + if (c[j] & ~(dfa->syntax.letters[j] | dfa->syntax.newline[j])) context |= CTX_NONE; } @@ -2678,15 +2690,15 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_NEWLINE)) for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= ~newline[j]; + matches[j] &= ~d->syntax.newline[j]; if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_LETTER)) for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= ~letters[j]; + matches[j] &= ~d->syntax.letters[j]; if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_NONE)) for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= letters[j] | newline[j]; + matches[j] &= d->syntax.letters[j] | d->syntax.newline[j]; /* If there are no characters left, there's no point in going on. */ for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j) @@ -2792,7 +2804,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) for (i = 0; i < NOTCHAR; ++i) trans[i] = unibyte_word_constituent (i) ? state_letter : state; - trans[eolbyte] = state_newline; + trans[d->syntax.eolbyte] = state_newline; } else for (i = 0; i < NOTCHAR; ++i) @@ -2848,7 +2860,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) } /* Find out if the new state will want any context information. */ - possible_contexts = charclass_context (labels[i]); + possible_contexts = charclass_context (d, labels[i]); separate_contexts = state_separate_contexts (&follows); /* Find the state(s) corresponding to the union of the follows. */ @@ -2895,7 +2907,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) { int c = j * CHARCLASS_WORD_BITS + k; - if (c == eolbyte) + if (c == d->syntax.eolbyte) trans[c] = state_newline; else if (unibyte_word_constituent (c)) trans[c] = state_letter; @@ -3021,8 +3033,8 @@ build_state (state_num s, struct dfa *d) /* Keep the newline transition in a special place so we can use it as a sentinel. */ - d->newlines[s] = trans[eolbyte]; - trans[eolbyte] = -1; + d->newlines[s] = trans[d->syntax.eolbyte]; + trans[d->syntax.eolbyte] = -1; if (ACCEPTING (s, *d)) d->fails[s] = trans; @@ -3041,7 +3053,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp) { state_num *t; - if (**pp == eolbyte) + if (**pp == d->syntax.eolbyte) { /* S is always an initial state in transit_state, so the transition table for the state must have been built already. */ @@ -3084,7 +3096,7 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp, size_t i, j; int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); - int context = wc == eolbyte ? CTX_NEWLINE : CTX_NONE; + int context = wc == d->syntax.eolbyte ? CTX_NEWLINE : CTX_NONE; bool context_newline = context == CTX_NEWLINE; /* This state has some operators which can match a multibyte character. */ @@ -3202,7 +3214,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p, unsigned char const *mbp, char const *end, wint_t *wcp) { wint_t wc = WEOF; - if (never_trail[*p]) + if (d->syntax.never_trail[*p]) return p; while (mbp < p) mbp += mbs_to_wchar (&wc, (char const *) mbp, @@ -3240,7 +3252,7 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, unsigned char const *p, *mbp; /* Current input character. */ state_num **trans, *t; /* Copy of d->trans so it can be optimized into a register. */ - unsigned char eol = eolbyte; /* Likewise for eolbyte. */ + unsigned char eol = d->syntax.eolbyte; /* Likewise for eolbyte. */ unsigned char saved_end; size_t nlcount = 0; @@ -3307,8 +3319,8 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, } if (d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl) - || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE)) - || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL)) + || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE)) + || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL)) || (char *) p >= end) { /* If an input character does not match ANYCHAR, do it @@ -3371,14 +3383,14 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, } else if (d->fails[s]) { - if (d->success[s] & sbit[*p]) + if (d->success[s] & d->syntax.sbit[*p]) goto done; s1 = s; if (!multibyte || d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl) - || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE)) - || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL)) + || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE)) + || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL)) || (char *) p >= end) { /* If a input character does not match ANYCHAR, do it @@ -3480,18 +3492,6 @@ free_mbdata (struct dfa *d) } } -/* Initialize the components of a dfa that the other routines don't - initialize for themselves. */ -static void -dfainit (struct dfa *d) -{ - memset (d, 0, sizeof *d); - d->multibyte = MB_CUR_MAX > 1; - d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb; - d->fast = !d->multibyte; - d->lexstate.cur_mb_len = 1; -} - /* Return true if every construct in D is supported by this DFA matcher. */ static bool _GL_ATTRIBUTE_PURE dfa_supported (struct dfa const *d) @@ -3642,7 +3642,6 @@ dfassbuild (struct dfa *d) void dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag) { - dfainit (d); dfaparse (s, len, d); dfassbuild (d); @@ -3958,7 +3957,7 @@ dfamust (struct dfa const *d) bool endline = false; bool need_begline = false; bool need_endline = false; - bool case_fold_unibyte = case_fold && MB_CUR_MAX == 1; + bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1; for (ri = 0; ri < d->tindex; ++ri) { @@ -4194,7 +4193,12 @@ dfamustfree (struct dfamust *dm) struct dfa * dfaalloc (void) { - return xmalloc (sizeof (struct dfa)); + struct dfa *d = xcalloc (1, sizeof (struct dfa)); + d->multibyte = MB_CUR_MAX > 1; + d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb; + d->fast = !d->multibyte; + d->lexstate.cur_mb_len = 1; + return d; } /* vim:set shiftwidth=2: */ diff --git a/src/dfa.h b/src/dfa.h index 60da0e4..014ae96 100644 --- a/src/dfa.h +++ b/src/dfa.h @@ -50,10 +50,10 @@ extern struct dfamust *dfamust (struct dfa const *); /* Free the storage held by the components of a struct dfamust. */ extern void dfamustfree (struct dfamust *); -/* dfasyntax() takes three arguments; the first sets the syntax bits described - earlier in this file, the second sets the case-folding flag, and the - third specifies the line terminator. */ -extern void dfasyntax (reg_syntax_t, bool, unsigned char); +/* dfasyntax() takes four arguments; the first is the dfa to operate on, the + second sets the syntax bits described earlier in this file, the third sets + the case-folding flag, and the fourth specifies the line terminator. */ +extern void dfasyntax (struct dfa *, reg_syntax_t, bool, unsigned char); /* Compile the given string of the given length into the given struct dfa. Final argument is a flag specifying whether to build a searching or an diff --git a/src/dfasearch.c b/src/dfasearch.c index 222232c..3dbf76b 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -123,10 +123,12 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) size_t total = size; char *motif; + dfa = dfaalloc (); + if (match_icase) syntax_bits |= RE_ICASE; re_set_syntax (syntax_bits); - dfasyntax (syntax_bits, match_icase, eolbyte); + dfasyntax (dfa, syntax_bits, match_icase, eolbyte); /* For GNU regex, pass the patterns separately to detect errors like "[\nallo\n]\n", where the patterns are "[", "allo" and "]", and @@ -206,7 +208,6 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) else motif = NULL; - dfa = dfaalloc (); dfacomp (pattern, size, dfa, 1); kwsmusts (); diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c index af933ff..25b0535 100644 --- a/tests/dfa-match-aux.c +++ b/tests/dfa-match-aux.c @@ -54,8 +54,8 @@ main (int argc, char **argv) setlocale (LC_ALL, ""); - dfasyntax (RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n'); dfa = dfaalloc (); + dfasyntax (dfa, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n'); dfacomp (argv[1], strlen (argv[1]), dfa, 0); beg = argv[2]; -- 2.8.1