Thanks, I installed that patch. If you configure with --enable-gcc-warnings
(which I recommend), the patch causes GCC to complain about an unused variable
wc2. I fixed that by installing the attached further patch, which adds a minor
optimization.
>From 5700656ed2ec6a93e0a5c825b445f639d21a0d6e Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 12 Aug 2015 07:35:03 -0700
Subject: [PATCH] dfa: optimize [x-x]
* src/dfa.c (parse_bracket_exp): Treat [x-x] as if it were [x].
This also pacifies GCC, which otherwise complains about wc2
being set but not used.
---
src/dfa.c | 56 ++++++++++++++++++++++++++++++++------------------------
1 file changed, 32 insertions(+), 24 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 18c86d7..ac5129b 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -1101,41 +1101,49 @@ parse_bracket_exp (void)
c2 = ']';
}
- if (c2 != ']')
+ if (c2 == ']')
+ {
+ /* In the case [x-], the - is an ordinary hyphen,
+ which is left in c1, the lookahead character. */
+ lexptr -= cur_mb_len;
+ lexleft += cur_mb_len;
+ }
+ else
{
if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH_WC (c2, wc2, _("unbalanced ["));
- if (dfa->multibyte)
- known_bracket_exp = false;
- else if (using_simple_locale ())
+ colon_warning_state |= 8;
+ FETCH_WC (c1, wc1, _("unbalanced ["));
+
+ /* Treat [x-y] as a range if x != y. */
+ if (wc != wc2 || wc == WEOF)
{
- for (c1 = c; c1 <= c2; c1++)
- setbit (c1, ccl);
- if (case_fold)
+ if (dfa->multibyte)
+ known_bracket_exp = false;
+ else if (using_simple_locale ())
{
- int uc = toupper (c);
- int uc2 = toupper (c2);
- for (c1 = 0; c1 < NOTCHAR; c1++)
+ int ci;
+ for (ci = c; ci <= c2; ci++)
+ setbit (ci, ccl);
+ if (case_fold)
{
- int uc1 = toupper (c1);
- if (uc <= uc1 && uc1 <= uc2)
- setbit (c1, ccl);
+ int uc = toupper (c);
+ int uc2 = toupper (c2);
+ for (ci = 0; ci < NOTCHAR; ci++)
+ {
+ int uci = toupper (ci);
+ if (uc <= uci && uci <= uc2)
+ setbit (ci, ccl);
+ }
}
}
- }
- else
- known_bracket_exp = false;
+ else
+ known_bracket_exp = false;
- colon_warning_state |= 8;
- FETCH_WC (c1, wc1, _("unbalanced ["));
- continue;
+ continue;
+ }
}
-
- /* In the case [x-], the - is an ordinary hyphen,
- which is left in c1, the lookahead character. */
- lexptr -= cur_mb_len;
- lexleft += cur_mb_len;
}
colon_warning_state |= (c == ':') ? 2 : 4;
--
2.1.0