I changed the type of `has_backref' into `bool'. Norihiro
From 11bf4318c360c29a3000afee8ee9f41ec431130e Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka <nori...@kcn.ne.jp> Date: Tue, 1 Apr 2014 23:48:16 +0900 Subject: [PATCH] grep: prefer regex to DFA for ANYCHAR in multi-byte locales
* src/dfa.c (dfaexec): prefer regex to for ANYCHAR in multi-byte locales. --- src/dfa.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index b6fbd58..80725ba 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -302,7 +302,8 @@ typedef struct size_t hash; /* Hash of the positions of this state. */ position_set elems; /* Positions this state could match. */ unsigned char context; /* Context from previous state. */ - char backref; /* True if this state matches a \<digit>. */ + bool has_backref; /* True if this state matches a \<digit>. */ + bool has_mbcset; /* True if this state matches a MBCSET. */ unsigned short constraint; /* Constraint for this state to accept. */ token first_end; /* Token value of the first END in elems. */ position_set mbps; /* Positions which can match multibyte @@ -2161,7 +2162,7 @@ state_index (struct dfa *d, position_set const *s, int context) alloc_position_set (&d->states[i].elems, s->nelem); copy (s, &d->states[i].elems); d->states[i].context = context; - d->states[i].backref = 0; + d->states[i].has_backref = false; d->states[i].constraint = 0; d->states[i].first_end = 0; if (MBS_SUPPORT) @@ -2181,7 +2182,7 @@ state_index (struct dfa *d, position_set const *s, int context) else if (d->tokens[s->elems[j].index] == BACKREF) { d->states[i].constraint = NO_CONSTRAINT; - d->states[i].backref = 1; + d->states[i].has_backref = true; } ++d->sindex; @@ -2649,6 +2650,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) if (d->states[s].mbps.nelem == 0) alloc_position_set (&d->states[s].mbps, 1); insert (pos, &(d->states[s].mbps)); + d->states[s].has_mbcset |= (d->tokens[pos.index] == MBCSET); continue; } else @@ -3454,7 +3456,7 @@ dfaexec (struct dfa *d, char const *begin, char *end, better performance (up to 25% better on [a-z], for example) and enables support for collating symbols and equivalence classes. */ - if (backref) + if (d->states[s].has_mbcset && backref) { *backref = 1; free (mblen_buf); @@ -3490,7 +3492,7 @@ dfaexec (struct dfa *d, char const *begin, char *end, if (d->success[s] & sbit[*p]) { if (backref) - *backref = (d->states[s].backref != 0); + *backref = d->states[s].has_backref; if (d->mb_cur_max > 1) { free (mblen_buf); -- 1.9.1