Thanks, I merged those two patches and installed them into the grep
master, with minor adjustments to the commit messages. I also installed
two minor fixup patches, mostly fixing minor style issues. I did notice
one minor technical issue; the second patch had code that looked like this:
+ context = (wc == (wchar_t) eolbyte || wc == 0) ? CTX_NEWLINE : CTX_NONE;
I realize this came from the old wchar_context function, but I don't see
why that "|| wc == 0" is there, so I removed it. The tests still pass.
If you (or someone else) can explain why it's needed I can put it back in.
Attached are the four patches I installed; the first and third are your
patches and the second and fourth are the fixups.
>From f0951ff04e023c24db8755fb5213f54491b795a9 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Wed, 6 Jul 2016 18:41:57 +0200
Subject: [PATCH 1/4] dfa: simplify for non-POSIX locales
Simplify the dfa code, since it no longer supports ranges,
collating elements, and equivalent classes in non-POSIX locales.
* src/dfa.c (struct dfa): Remove mb_match_lens.
(enum status_transit_state, match_anychar)
(check_matching_with_multibyte_ops, transit_state_consume_1char):
(State_transition): Remove.
(transit_state_singlebyte): Accepts pointer-to-pointer position,
instead of pointer, and no longer accept pointer to next state.
Return next state instead of status_transit_state. All callers
changed.
(transit_state_singlebyte, transit_state): Simplify.
(dfaexec_main): Now transit_state is called only when next character
matches with ANYCHAR.
---
src/dfa.c | 321 ++++++++++++++++----------------------------------------------
1 file changed, 81 insertions(+), 240 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 19363ce..74833ba 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -411,9 +411,6 @@ struct dfa
state_num initstate_others; /* Initial state for other contexts. */
position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET
on demand. */
- int *mb_match_lens; /* Array of length reduced by ANYCHAR and/or
- MBCSET. Null if mb_follows.elems has not
- been allocated. */
};
/* Some macros for user access to dfa internals. */
@@ -2930,132 +2927,66 @@ build_state (state_num s, struct dfa *d)
/* Multibyte character handling sub-routines for dfaexec. */
-/* Return values of transit_state_singlebyte, and
- transit_state_consume_1char. */
-typedef enum
-{
- TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
- TRANSIT_STATE_DONE, /* State transition has finished. */
- TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
-} status_transit_state;
-
/* Consume a single byte and transit state from 's' to '*next_state'.
This function is almost same as the state transition routin in dfaexec.
But state transition is done just once, otherwise matching succeed or
reach the end of the buffer. */
-static status_transit_state
-transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
- state_num * next_state)
+static state_num
+transit_state_singlebyte (struct dfa *d, state_num const s,
+ unsigned char const **pp)
{
state_num *t;
- state_num works = s;
-
- status_transit_state rval = TRANSIT_STATE_IN_PROGRESS;
- while (rval == TRANSIT_STATE_IN_PROGRESS)
+ if (**pp == eolbyte)
{
- if ((t = d->trans[works]) != NULL)
- {
- works = t[*p];
- rval = TRANSIT_STATE_DONE;
- if (works < 0)
- works = 0;
- }
- else if (works < 0)
- works = 0;
- else if (d->fails[works])
- {
- works = d->fails[works][*p];
- rval = TRANSIT_STATE_DONE;
- }
- else
- {
- build_state (works, d);
- }
- }
- *next_state = works;
- return rval;
-}
+ /* S is always an initial state in transit_state in order that the
+ newline is the single. When transit_state is called, the
+ transition table for the state must have been built already. */
+ assert (d->trans[s] != NULL || d->fails[s] != NULL);
-/* Match a "." against the current context. Return the length of the
- match, in bytes. POS is the position of the ".". */
-static int
-match_anychar (struct dfa *d, state_num s, position pos,
- wint_t wc, size_t mbclen)
-{
- int context;
-
- /* Check syntax bits. */
- if (wc == (wchar_t) '\n')
- {
- if (!(syntax_bits & RE_DOT_NEWLINE))
- return 0;
- }
- else if (wc == (wchar_t) '\0')
- {
- if (syntax_bits & RE_DOT_NOT_NULL)
- return 0;
+ ++*pp;
+ return d->newlines[s];
}
- else if (wc == WEOF)
- return 0;
-
- context = wchar_context (wc);
- if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
- return 0;
-
- return mbclen;
-}
-
-/* Check whether each of 'd->states[s].mbps.elem' can match. Then return the
- array which corresponds to 'd->states[s].mbps.elem'; each element of the
- array contains the number of bytes with which the element can match.
- The caller MUST free the array which this function return. */
-static int *
-check_matching_with_multibyte_ops (struct dfa *d, state_num s,
- char const *p, wint_t wc, size_t mbclen)
-{
- size_t i;
- int *rarray;
-
- rarray = d->mb_match_lens;
- for (i = 0; i < d->states[s].mbps.nelem; ++i)
+ if (d->trans[s] != NULL)
+ t = d->trans[s];
+ else if (d->fails[s] != NULL)
+ t = d->fails[s];
+ else
{
- position pos = d->states[s].mbps.elems[i];
- switch (d->tokens[pos.index])
- {
- case ANYCHAR:
- rarray[i] = match_anychar (d, s, pos, wc, mbclen);
- break;
- default:
- break; /* cannot happen. */
- }
+ build_state (s, d);
+ if (d->trans[s])
+ t = d->trans[s];
+ else if (d->fails[s])
+ t = d->fails[s];
+ else
+ abort ();
}
- return rarray;
-}
-/* Consume a single character and enumerate all of the positions which can
- be the next position from the state 's'.
-
- 'match_lens' is the input. It can be NULL, but it can also be the output
- of check_matching_with_multibyte_ops for optimization.
+ return t[*(*pp)++];
+}
- 'mbclen' and 'pps' are the output. 'mbclen' is the length of the
- character consumed, and 'pps' is the set this function enumerates. */
-static status_transit_state
-transit_state_consume_1char (struct dfa *d, state_num s,
- unsigned char const **pp,
- wint_t wc, size_t mbclen,
- int *match_lens)
+/* Transit state from s, then return new state and update the pointer of
+ the buffer. This function is for a period operator which can match a
+ multi-byte character. */
+static state_num
+transit_state (struct dfa *d, state_num s, unsigned char const **pp,
+ unsigned char const *end)
{
+ state_num s1, s2;
+ int mbclen; /* The length of current input multibyte character. */
+ wint_t wc;
+ int context;
size_t i, j;
int k;
- state_num s1, s2;
- status_transit_state rs = TRANSIT_STATE_DONE;
- if (! match_lens && d->states[s].mbps.nelem != 0)
- match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
- wc, mbclen);
+ /* Note: caller must free the return value of this function. */
+ mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+
+ context = wchar_context (wc);
+
+ /* This state has some operators which can match a multibyte character. */
+ d->mb_follows.nelem = 0;
/* Calculate the state which can be reached from the state 's' by
consuming 'mbclen' single bytes from the buffer. */
@@ -3063,7 +2994,7 @@ transit_state_consume_1char (struct dfa *d, state_num s,
for (k = 0; k < mbclen; k++)
{
s2 = s1;
- rs = transit_state_singlebyte (d, s2, (*pp)++, &s1);
+ s1 = transit_state_singlebyte (d, s2, pp);
}
copy (&d->states[s1].elems, &d->mb_follows);
@@ -3071,94 +3002,18 @@ transit_state_consume_1char (struct dfa *d, state_num s,
a single character. */
for (i = 0; i < d->states[s].mbps.nelem; i++)
{
- if (match_lens[i] == mbclen)
- for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
- j++)
- insert (d->follows[d->states[s].mbps.elems[i].index].elems[j],
- &d->mb_follows);
- }
-
- /* FIXME: this return value is always ignored. */
- return rs;
-}
-
-/* Transit state from s, then return new state and update the pointer of the
- buffer. This function is for some operator which can match with a multi-
- byte character or a collating element (which may be multi characters). */
-static state_num
-transit_state (struct dfa *d, state_num s, unsigned char const **pp,
- unsigned char const *end)
-{
- state_num s1;
- int mbclen; /* The length of current input multibyte character. */
- int maxlen = 0;
- size_t i, j;
- int *match_lens = NULL;
- size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */
- unsigned char const *p1 = *pp;
- wint_t wc;
-
- if (nelem > 0)
- /* This state has (a) multibyte operator(s).
- We check whether each of them can match or not. */
- {
- /* Note: caller must free the return value of this function. */
- mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
- match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
- wc, mbclen);
-
- for (i = 0; i < nelem; i++)
- /* Search the operator which match the longest string,
- in this state. */
- {
- if (match_lens[i] > maxlen)
- maxlen = match_lens[i];
- }
- }
-
- if (nelem == 0 || maxlen == 0)
- /* This state has no multibyte operator which can match.
- We need to check only one single byte character. */
- {
- status_transit_state rs;
- rs = transit_state_singlebyte (d, s, *pp, &s1);
-
- /* We must update the pointer if state transition succeeded. */
- if (rs == TRANSIT_STATE_DONE)
- ++*pp;
-
- return s1;
+ if (!SUCCEEDS_IN_CONTEXT (d->states[s].mbps.elems[i].constraint,
+ d->states[s].context, context))
+ continue;
+ for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
+ j++)
+ insert (d->follows[d->states[s].mbps.elems[i].index].elems[j],
+ &d->mb_follows);
}
- /* This state has some operators which can match a multibyte character. */
- d->mb_follows.nelem = 0;
-
- /* 'maxlen' may be longer than the length of a character, because it may
- not be a character but a (multi character) collating element.
- We enumerate all of the positions which 's' can reach by consuming
- 'maxlen' bytes. */
- transit_state_consume_1char (d, s, pp, wc, mbclen, match_lens);
-
s1 = state_index (d, &d->mb_follows, wchar_context (wc));
realloc_trans_if_necessary (d, s1);
- while (*pp - p1 < maxlen)
- {
- mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
- transit_state_consume_1char (d, s1, pp, wc, mbclen, NULL);
-
- for (i = 0; i < nelem; i++)
- {
- if (match_lens[i] == *pp - p1)
- for (j = 0;
- j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
- insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j],
- &d->mb_follows);
- }
-
- s1 = state_index (d, &d->mb_follows, wchar_context (wc));
- realloc_trans_if_necessary (d, s1);
- }
return s1;
}
@@ -3238,11 +3093,8 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
if (multibyte)
{
memset (&d->mbs, 0, sizeof d->mbs);
- if (! d->mb_match_lens)
- {
- d->mb_match_lens = xnmalloc (d->nleaves, sizeof *d->mb_match_lens);
- alloc_position_set (&d->mb_follows, d->nleaves);
- }
+ if (d->mb_follows.alloc == 0)
+ alloc_position_set (&d->mb_follows, d->nleaves);
}
for (;;)
@@ -3293,44 +3145,21 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
}
}
- if (d->states[s].mbps.nelem == 0)
+ if (d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl)
+ || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
+ || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+ || (char *) p >= end)
+ /* If a input character does not match ANYCHAR, do it
+ like a single-byte character. */
+ s = t[*p++];
+ else
{
- s = t[*p++];
- continue;
+ s = transit_state (d, s, &p, (unsigned char *) end);
+ if (s >= 0 && p[-1] == eol)
+ nlcount++;
+ mbp = p;
+ trans = d->trans;
}
-
- /* The following code is used twice.
- Use a macro to avoid the risk that they diverge. */
-#define State_transition() \
- do { \
- /* Can match with a multibyte character (and multi-character \
- collating element). Transition table might be updated. */ \
- s = transit_state (d, s, &p, (unsigned char *) end); \
- \
- /* If previous character is newline after a transition \
- for ANYCHAR or MBCSET in non-UTF8 multibyte locales, \
- check whether current position is beyond the end of \
- the input buffer. Also, transit to initial state if \
- !ALLOW_NL, even if RE_DOT_NEWLINE is set. */ \
- if (p[-1] == eol) \
- { \
- if ((char *) p > end) \
- { \
- p = NULL; \
- goto done; \
- } \
- \
- nlcount++; \
- \
- if (!allow_nl) \
- s = 0; \
- } \
- \
- mbp = p; \
- trans = d->trans; \
- } while (false)
-
- State_transition();
}
}
else
@@ -3378,10 +3207,24 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
goto done;
s1 = s;
- if (multibyte)
- State_transition();
- else
+ if (!multibyte || d->states[s].mbps.nelem == 0
+ || (*p == eol && !allow_nl)
+ || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
+ || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+ || (char *) p >= end)
+ /* If a input character does not match ANYCHAR, do it
+ like a single-byte character. */
s = d->fails[s][*p++];
+ else
+ {
+ s = transit_state (d, s, &p, (unsigned char *) end);
+
+ if (s >= 0 && p[-1] == eol)
+ nlcount++;
+
+ mbp = p;
+ trans = d->trans;
+ }
}
else
{
@@ -3461,8 +3304,6 @@ free_mbdata (struct dfa *d)
free (d->mbcsets);
free (d->mb_follows.elems);
- free (d->mb_match_lens);
- d->mb_match_lens = NULL;
}
/* Initialize the components of a dfa that the other routines don't
--
2.5.5
>From 8ac05f9cc80e1f0c1cbaeb4215d59822080421c5 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 6 Jul 2016 18:53:25 +0200
Subject: [PATCH 2/4] dfa: minor cleanups for non-POSIX simplification
* src/dfa.c (transit_state_singlebyte): Remove unnecessary 'const'
from arg; we usually don't bother with 'const' on locals.
(transit_state_singlebyte): Omit '!= NULL' in boolean context.
Use assert rather than abort.
---
src/dfa.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 74833ba..9116df4 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -2932,35 +2932,34 @@ build_state (state_num s, struct dfa *d)
But state transition is done just once, otherwise matching succeed or
reach the end of the buffer. */
static state_num
-transit_state_singlebyte (struct dfa *d, state_num const s,
- unsigned char const **pp)
+transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp)
{
state_num *t;
if (**pp == eolbyte)
{
- /* S is always an initial state in transit_state in order that the
- newline is the single. When transit_state is called, the
+ /* S is always an initial state in transit_state, so the
transition table for the state must have been built already. */
- assert (d->trans[s] != NULL || d->fails[s] != NULL);
+ assert (d->trans[s] || d->fails[s]);
++*pp;
return d->newlines[s];
}
- if (d->trans[s] != NULL)
+ if (d->trans[s])
t = d->trans[s];
- else if (d->fails[s] != NULL)
+ else if (d->fails[s])
t = d->fails[s];
else
{
build_state (s, d);
if (d->trans[s])
t = d->trans[s];
- else if (d->fails[s])
- t = d->fails[s];
else
- abort ();
+ {
+ t = d->fails[s];
+ assert (t);
+ }
}
return t[*(*pp)++];
@@ -3005,8 +3004,7 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp,
if (!SUCCEEDS_IN_CONTEXT (d->states[s].mbps.elems[i].constraint,
d->states[s].context, context))
continue;
- for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
- j++)
+ for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem; j++)
insert (d->follows[d->states[s].mbps.elems[i].index].elems[j],
&d->mb_follows);
}
@@ -3149,9 +3147,11 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
|| (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
|| (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
|| (char *) p >= end)
- /* If a input character does not match ANYCHAR, do it
- like a single-byte character. */
- s = t[*p++];
+ {
+ /* If an input character does not match ANYCHAR, do it
+ like a single-byte character. */
+ s = t[*p++];
+ }
else
{
s = transit_state (d, s, &p, (unsigned char *) end);
@@ -3212,16 +3212,16 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
|| (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
|| (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
|| (char *) p >= end)
- /* If a input character does not match ANYCHAR, do it
- like a single-byte character. */
- s = d->fails[s][*p++];
+ {
+ /* If a input character does not match ANYCHAR, do it
+ like a single-byte character. */
+ s = d->fails[s][*p++];
+ }
else
{
s = transit_state (d, s, &p, (unsigned char *) end);
-
if (s >= 0 && p[-1] == eol)
nlcount++;
-
mbp = p;
trans = d->trans;
}
--
2.5.5
>From 7c0d855bfa8d6e5aa91ebd60681bc7afbbe1f8d4 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Wed, 6 Jul 2016 19:10:04 +0200
Subject: [PATCH 3/4] dfa: don't distingish letter in non-POSIX locales
For non-POSIX locales, dfa does not support word delimiter
support, so remove distinction between letters and non-letters.
* src/dfa.c (struct dfa): Remove members initstate_letter,
initstate_others. All uses removed. New member initstate_notbol.
(dfaanalyze, dfaexec_main): Replace old members with new member.
(wchar_context): Remove. Update callers.
---
src/dfa.c | 47 ++++++++++++++++++-----------------------------
1 file changed, 18 insertions(+), 29 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 9116df4..d5ffe72 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -407,9 +407,11 @@ struct dfa
newline is stored separately and handled
as a special case. Newline is also used
as a sentinel at the end of the buffer. */
- state_num initstate_letter; /* Initial state for letter context. */
- state_num initstate_others; /* Initial state for other contexts. */
- position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET
+ state_num initstate_notbol; /* Initial state for CTX_LETTER and CTX_NONE
+ context in multibyte locales, in which we
+ do not distinguish between their contexts,
+ as not supported word. */
+ position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET
on demand. */
};
@@ -676,16 +678,6 @@ char_context (unsigned char c)
return CTX_NONE;
}
-static int
-wchar_context (wint_t wc)
-{
- if (wc == (wchar_t) eolbyte || wc == 0)
- return CTX_NEWLINE;
- if (wc == L'_' || iswalnum (wc))
- return CTX_LETTER;
- return CTX_NONE;
-}
-
/* Entry point to set syntax options. */
void
dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol)
@@ -2490,13 +2482,10 @@ dfaanalyze (struct dfa *d, bool searchflag)
separate_contexts = state_separate_contexts (&merged);
if (separate_contexts & CTX_NEWLINE)
state_index (d, &merged, CTX_NEWLINE);
- d->initstate_others = d->min_trcount
+ d->initstate_notbol = d->min_trcount
= state_index (d, &merged, separate_contexts ^ CTX_ANY);
if (separate_contexts & CTX_LETTER)
- d->initstate_letter = d->min_trcount
- = state_index (d, &merged, CTX_LETTER);
- else
- d->initstate_letter = d->initstate_others;
+ d->min_trcount = state_index (d, &merged, CTX_LETTER);
d->min_trcount++;
free (posalloc);
@@ -2978,11 +2967,12 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp,
int context;
size_t i, j;
int k;
+ int separate_contexts;
/* Note: caller must free the return value of this function. */
mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
- context = wchar_context (wc);
+ context = (wc == (wchar_t) eolbyte || wc == 0) ? CTX_NEWLINE : CTX_NONE;
/* This state has some operators which can match a multibyte character. */
d->mb_follows.nelem = 0;
@@ -3009,7 +2999,11 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp,
&d->mb_follows);
}
- s1 = state_index (d, &d->mb_follows, wchar_context (wc));
+ separate_contexts = state_separate_contexts (&d->mb_follows);
+ if (context == CTX_NEWLINE && separate_contexts & CTX_NEWLINE)
+ s1 = state_index (d, &d->mb_follows, CTX_NEWLINE);
+ else
+ s1 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY);
realloc_trans_if_necessary (d, s1);
return s1;
@@ -3129,16 +3123,11 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
transit to another initial state after skip. */
if (p < mbp)
{
- int context = wchar_context (wc);
- if (context == CTX_LETTER)
- s = d->initstate_letter;
- else
- /* It's CTX_NONE. CTX_NEWLINE cannot happen,
- as we assume that a newline is always a
- single byte character. */
- s = d->initstate_others;
+ /* It's CTX_LETTER or CTX_NONE. CTX_NEWLINE
+ cannot happen, as we assume that a newline
+ is always a single byte character. */
+ s1 = s = d->initstate_notbol;
p = mbp;
- s1 = s;
}
}
}
--
2.5.5
>From 4a3f8c93d8848ea42cabb747282aa05a32ae8c13 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 6 Jul 2016 19:25:20 +0200
Subject: [PATCH 4/4] dfa: don't treat null bytes specially
* src/dfa.c (transit_state): Do not treat null byte specially
when eolbyte == '\n'.
---
src/dfa.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index d5ffe72..8f9f0bc 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -2962,17 +2962,13 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp,
unsigned char const *end)
{
state_num s1, s2;
- int mbclen; /* The length of current input multibyte character. */
wint_t wc;
- int context;
size_t i, j;
int k;
int separate_contexts;
- /* Note: caller must free the return value of this function. */
- mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
-
- context = (wc == (wchar_t) eolbyte || wc == 0) ? CTX_NEWLINE : CTX_NONE;
+ int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+ int context = wc == eolbyte ? CTX_NEWLINE : CTX_NONE;
/* This state has some operators which can match a multibyte character. */
d->mb_follows.nelem = 0;
@@ -3000,10 +2996,9 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp,
}
separate_contexts = state_separate_contexts (&d->mb_follows);
- if (context == CTX_NEWLINE && separate_contexts & CTX_NEWLINE)
- s1 = state_index (d, &d->mb_follows, CTX_NEWLINE);
- else
- s1 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY);
+ if (! (context == CTX_NEWLINE || separate_contexts & CTX_NEWLINE))
+ context = separate_contexts ^ CTX_ANY;
+ s1 = state_index (d, &d->mb_follows, context);
realloc_trans_if_necessary (d, s1);
return s1;
--
2.5.5