For simple patterns, grep has an optimization to avoid regex and run
about 50% faster. The problem is its idea of simple patterns is too
simple.
This diff switches the logic around from a whitelist to a blacklist. We
only need to abort the fast path if we see a magic regex character.
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/grep/util.c,v
retrieving revision 1.45
diff -u -p -r1.45 util.c
--- util.c 29 Dec 2012 01:32:44 -0000 1.45
+++ util.c 1 May 2013 00:00:30 -0000
@@ -348,15 +348,8 @@ fastcomp(fastgrep_t *fg, const char *pat
/* Look for ways to cheat...er...avoid the full regex engine. */
for (i = 0; i < fg->patternLen; i++)
{
- /* Can still cheat? */
- if ((isalnum(fg->pattern[i])) || isspace(fg->pattern[i]) ||
- (fg->pattern[i] == '_') || (fg->pattern[i] == ',') ||
- (fg->pattern[i] == '=') || (fg->pattern[i] == '-') ||
- (fg->pattern[i] == ':') || (fg->pattern[i] == '/')) {
- /* As long as it is good, upper case it for later. */
- if (iflag)
- fg->pattern[i] = toupper(fg->pattern[i]);
- } else if (fg->pattern[i] == '.') {
+ switch (fg->pattern[i]) {
+ case '.':
hasDot = i;
if (i < fg->patternLen / 2) {
if (firstHalfDot < 0)
@@ -368,11 +361,23 @@ fastcomp(fastgrep_t *fg, const char *pat
if (firstLastHalfDot < 0)
firstLastHalfDot = i;
}
- } else {
+ break;
+ case '\\':
+ case '[':
+ case '(':
+ case '{':
+ case '?':
+ case '*':
+ case '+':
+ case '|':
/* Free memory and let others know this is empty. */
free(fg->pattern);
fg->pattern = NULL;
return (-1);
+ default:
+ if (iflag)
+ fg->pattern[i] = toupper(fg->pattern[i]);
+ break;
}
}