Module Name: src Committed By: rillig Date: Thu Jun 22 12:59:54 UTC 2023
Modified Files: src/usr.bin/make: str.c src/usr.bin/make/unit-tests: varmod-match.mk Log Message: make: speed up pattern matching in the ':M' and ':N' modifiers In the code coverage report, the highest count for Str_Match goes from 5,298,924 down to 79,646. To generate a diff of this commit: cvs rdiff -u -r1.94 -r1.95 src/usr.bin/make/str.c cvs rdiff -u -r1.13 -r1.14 src/usr.bin/make/unit-tests/varmod-match.mk Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/make/str.c diff -u src/usr.bin/make/str.c:1.94 src/usr.bin/make/str.c:1.95 --- src/usr.bin/make/str.c:1.94 Wed Dec 7 10:28:48 2022 +++ src/usr.bin/make/str.c Thu Jun 22 12:59:54 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $ */ +/* $NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $ */ /* * Copyright (c) 1988, 1989, 1990, 1993 @@ -71,7 +71,7 @@ #include "make.h" /* "@(#)str.c 5.8 (Berkeley) 6/1/90" */ -MAKE_RCSID("$NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $"); +MAKE_RCSID("$NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $"); static HashTable interned_strings; @@ -323,19 +323,18 @@ in_range(char e1, char c, char e2) bool Str_Match(const char *str, const char *pat) { - for (; *pat != '\0'; pat++, str++) { - if (*pat == '*') { /* match any substring */ - pat++; - while (*pat == '*') - pat++; - if (*pat == '\0') - return true; - for (; *str != '\0'; str++) - if (Str_Match(str, pat)) - return true; - return false; - } - + enum { MFL_START, MFL_MIDDLE, MFL_END } mfl; + const char *str1, *pat1; + bool matched; + + mfl = MFL_START; + str1 = str; + pat1 = pat; +match_fixed_length: + str = str1; + pat = pat1; + matched = false; + for (; *pat != '\0' && *pat != '*'; str++, pat++) { if (*str == '\0') return false; @@ -350,7 +349,7 @@ Str_Match(const char *str, const char *p if (*pat == ']' || *pat == '\0') { if (neg) break; - return false; + goto match_done; } if (*pat == *str) break; @@ -364,7 +363,7 @@ Str_Match(const char *str, const char *p pat++; } if (neg && *pat != ']' && *pat != '\0') - return false; + goto match_done; while (*pat != ']' && *pat != '\0') pat++; if (*pat == '\0') @@ -374,11 +373,43 @@ Str_Match(const char *str, const char *p if (*pat == '\\') /* match the next character exactly */ pat++; - if (*pat != *str) + goto match_done; + } + matched = true; + +match_done: + switch (mfl) { + case MFL_START: + if (!matched) return false; + if (*pat == '\0') + return *str == '\0'; + mfl = MFL_MIDDLE; + break; + default: + if (!matched) { + str1++; + goto match_fixed_length; + } + if (*pat == '\0') { + mfl = MFL_END; + str1 = str + strlen(str) - (str - str1); + goto match_fixed_length; + } + break; + case MFL_END: + return matched; } - return *str == '\0'; + + while (*pat == '*') + pat++; + if (*pat == '\0') + return true; + if (*str == '\0') + return false; + str1 = str, pat1 = pat; + goto match_fixed_length; } void Index: src/usr.bin/make/unit-tests/varmod-match.mk diff -u src/usr.bin/make/unit-tests/varmod-match.mk:1.13 src/usr.bin/make/unit-tests/varmod-match.mk:1.14 --- src/usr.bin/make/unit-tests/varmod-match.mk:1.13 Thu Jun 22 09:09:08 2023 +++ src/usr.bin/make/unit-tests/varmod-match.mk Thu Jun 22 12:59:54 2023 @@ -1,4 +1,4 @@ -# $NetBSD: varmod-match.mk,v 1.13 2023/06/22 09:09:08 rillig Exp $ +# $NetBSD: varmod-match.mk,v 1.14 2023/06/22 12:59:54 rillig Exp $ # # Tests for the :M variable modifier, which filters words that match the # given pattern. @@ -33,11 +33,12 @@ NUMBERS= One Two Three Four five six sev .if ${:U****************:M****************b} .endif -# As of 2023-06-22, this expression calls Str_Match 2,621,112 times. -# Adding another '*?' to the pattern calls Str_Match 20,630,572 times. -# Adding another '*?' to the pattern calls Str_Match 136,405,672 times. -# Adding another '*?' to the pattern calls Str_Match 773,168,722 times. -# Adding another '*?' to the pattern calls Str_Match 3,815,481,072 times. +# Before 2023-06-22, this expression called Str_Match 2,621,112 times. +# Adding another '*?' to the pattern called Str_Match 20,630,572 times. +# Adding another '*?' to the pattern called Str_Match 136,405,672 times. +# Adding another '*?' to the pattern called Str_Match 773,168,722 times. +# Adding another '*?' to the pattern called Str_Match 3,815,481,072 times. +# Since 2023-06-22, Str_Match no longer backtracks. .if ${:U..................................................b:M*?*?*?*?*?a} .endif @@ -217,6 +218,13 @@ WORDS= - + x xx 0 1 2 3 4 [x1-3 . error .endif +# *[-x1-3 Incomplete character list after a wildcard, matches those +# words that end with one of the characters from the list. +WORDS= - + x xx 0 1 2 3 4 00 01 10 11 000 001 010 011 100 101 110 111 [x1-3 +.if ${WORDS:M*[-x1-3} != "- x xx 1 2 3 01 11 001 011 101 111 [x1-3" +. warning ${WORDS:M*[-x1-3} +.endif + # [^-x1-3 # Incomplete negated character list, matches any character # except those elements that can be parsed without lookahead.