When trying to use U-Boot's regex facility, it is a rather large gotcha that [a-z] range syntax is not supported. It doesn't require a lot of extra code to implement that; we just let the regular parsing emit the start and end literal symbols as usual, and add a new "escape" code RANGE.
At match time, this means the code will first just see an 'a' and try to match that, and only then recognize that it's actually part of a range and then do the 'a' <= ch <= 'z' test. Of course, this means that a - in the middle of a [] pair no longer matches a literal dash, but I highly doubt anybody relies on that. Putting it first or last, or escaping it with \, as in most other RE engines, continues to work. Reviewed-by: Simon Glass <s...@chromium.org> Signed-off-by: Rasmus Villemoes <r...@prevas.dk> --- lib/slre.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/slre.c b/lib/slre.c index 87dfde720e9..117815a6d60 100644 --- a/lib/slre.c +++ b/lib/slre.c @@ -30,7 +30,7 @@ #include <slre.h> enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, - STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT}; + STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT, RANGE}; #ifdef SLRE_TEST static struct { @@ -55,7 +55,8 @@ static struct { {"QUEST", 1, "o"}, /* Match zero or one time, "?" */ {"SPACE", 0, ""}, /* Match whitespace, "\s" */ {"NONSPACE", 0, ""}, /* Match non-space, "\S" */ - {"DIGIT", 0, ""} /* Match digit, "\d" */ + {"DIGIT", 0, ""}, /* Match digit, "\d" */ + {"RANGE", 0, ""}, /* Range separator - */ }; #endif /* SLRE_TEST */ @@ -260,6 +261,15 @@ anyof(struct slre *r, const char **re) return; /* NOTREACHED */ break; + case '-': + if (r->data_size == old_data_size || **re == ']') { + /* First or last character, just match - itself. */ + store_char_in_data(r, '-'); + break; + } + store_char_in_data(r, 0); + store_char_in_data(r, RANGE); + break; case '\\': esc = get_escape_char(re); if ((esc & 0xff) == 0) { @@ -487,6 +497,14 @@ is_any_of(const unsigned char *p, int len, const char *s, int *ofs) if (isdigit(ch)) goto match; break; + case RANGE: + /* + * a-z is represented in the data array as {'a', \0, RANGE, 'z'} + */ + ++i; + if (p[i - 3] <= (unsigned char)ch && (unsigned char)ch <= p[i]) + goto match; + break; } continue; } -- 2.49.0