Module Name:    src
Committed By:   rillig
Date:           Thu Jun 22 12:59:54 UTC 2023

Modified Files:
        src/usr.bin/make: str.c
        src/usr.bin/make/unit-tests: varmod-match.mk

Log Message:
make: speed up pattern matching in the ':M' and ':N' modifiers

In the code coverage report, the highest count for Str_Match goes from
5,298,924 down to 79,646.


To generate a diff of this commit:
cvs rdiff -u -r1.94 -r1.95 src/usr.bin/make/str.c
cvs rdiff -u -r1.13 -r1.14 src/usr.bin/make/unit-tests/varmod-match.mk

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/make/str.c
diff -u src/usr.bin/make/str.c:1.94 src/usr.bin/make/str.c:1.95
--- src/usr.bin/make/str.c:1.94	Wed Dec  7 10:28:48 2022
+++ src/usr.bin/make/str.c	Thu Jun 22 12:59:54 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $	*/
+/*	$NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $	*/
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -71,7 +71,7 @@
 #include "make.h"
 
 /*	"@(#)str.c	5.8 (Berkeley) 6/1/90"	*/
-MAKE_RCSID("$NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $");
+MAKE_RCSID("$NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $");
 
 
 static HashTable interned_strings;
@@ -323,19 +323,18 @@ in_range(char e1, char c, char e2)
 bool
 Str_Match(const char *str, const char *pat)
 {
-	for (; *pat != '\0'; pat++, str++) {
-		if (*pat == '*') {	/* match any substring */
-			pat++;
-			while (*pat == '*')
-				pat++;
-			if (*pat == '\0')
-				return true;
-			for (; *str != '\0'; str++)
-				if (Str_Match(str, pat))
-					return true;
-			return false;
-		}
-
+	enum { MFL_START, MFL_MIDDLE, MFL_END } mfl;
+	const char *str1, *pat1;
+	bool matched;
+
+	mfl = MFL_START;
+	str1 = str;
+	pat1 = pat;
+match_fixed_length:
+	str = str1;
+	pat = pat1;
+	matched = false;
+	for (; *pat != '\0' && *pat != '*'; str++, pat++) {
 		if (*str == '\0')
 			return false;
 
@@ -350,7 +349,7 @@ Str_Match(const char *str, const char *p
 				if (*pat == ']' || *pat == '\0') {
 					if (neg)
 						break;
-					return false;
+					goto match_done;
 				}
 				if (*pat == *str)
 					break;
@@ -364,7 +363,7 @@ Str_Match(const char *str, const char *p
 				pat++;
 			}
 			if (neg && *pat != ']' && *pat != '\0')
-				return false;
+				goto match_done;
 			while (*pat != ']' && *pat != '\0')
 				pat++;
 			if (*pat == '\0')
@@ -374,11 +373,43 @@ Str_Match(const char *str, const char *p
 
 		if (*pat == '\\')	/* match the next character exactly */
 			pat++;
-
 		if (*pat != *str)
+			goto match_done;
+	}
+	matched = true;
+
+match_done:
+	switch (mfl) {
+	case MFL_START:
+		if (!matched)
 			return false;
+		if (*pat == '\0')
+			return *str == '\0';
+		mfl = MFL_MIDDLE;
+		break;
+	default:
+		if (!matched) {
+			str1++;
+			goto match_fixed_length;
+		}
+		if (*pat == '\0') {
+			mfl = MFL_END;
+			str1 = str + strlen(str) - (str - str1);
+			goto match_fixed_length;
+		}
+		break;
+	case MFL_END:
+		return matched;
 	}
-	return *str == '\0';
+
+	while (*pat == '*')
+		pat++;
+	if (*pat == '\0')
+		return true;
+	if (*str == '\0')
+		return false;
+	str1 = str, pat1 = pat;
+	goto match_fixed_length;
 }
 
 void

Index: src/usr.bin/make/unit-tests/varmod-match.mk
diff -u src/usr.bin/make/unit-tests/varmod-match.mk:1.13 src/usr.bin/make/unit-tests/varmod-match.mk:1.14
--- src/usr.bin/make/unit-tests/varmod-match.mk:1.13	Thu Jun 22 09:09:08 2023
+++ src/usr.bin/make/unit-tests/varmod-match.mk	Thu Jun 22 12:59:54 2023
@@ -1,4 +1,4 @@
-# $NetBSD: varmod-match.mk,v 1.13 2023/06/22 09:09:08 rillig Exp $
+# $NetBSD: varmod-match.mk,v 1.14 2023/06/22 12:59:54 rillig Exp $
 #
 # Tests for the :M variable modifier, which filters words that match the
 # given pattern.
@@ -33,11 +33,12 @@ NUMBERS=	One Two Three Four five six sev
 .if ${:U****************:M****************b}
 .endif
 
-# As of 2023-06-22, this expression calls Str_Match 2,621,112 times.
-# Adding another '*?' to the pattern calls Str_Match 20,630,572 times.
-# Adding another '*?' to the pattern calls Str_Match 136,405,672 times.
-# Adding another '*?' to the pattern calls Str_Match 773,168,722 times.
-# Adding another '*?' to the pattern calls Str_Match 3,815,481,072 times.
+# Before 2023-06-22, this expression called Str_Match 2,621,112 times.
+# Adding another '*?' to the pattern called Str_Match 20,630,572 times.
+# Adding another '*?' to the pattern called Str_Match 136,405,672 times.
+# Adding another '*?' to the pattern called Str_Match 773,168,722 times.
+# Adding another '*?' to the pattern called Str_Match 3,815,481,072 times.
+# Since 2023-06-22, Str_Match no longer backtracks.
 .if ${:U..................................................b:M*?*?*?*?*?a}
 .endif
 
@@ -217,6 +218,13 @@ WORDS=		- + x xx 0 1 2 3 4 [x1-3
 .  error
 .endif
 
+#	*[-x1-3	Incomplete character list after a wildcard, matches those
+#		words that end with one of the characters from the list.
+WORDS=		- + x xx 0 1 2 3 4 00 01 10 11 000 001 010 011 100 101 110 111 [x1-3
+.if ${WORDS:M*[-x1-3} != "- x xx 1 2 3 01 11 001 011 101 111 [x1-3"
+.  warning ${WORDS:M*[-x1-3}
+.endif
+
 #	[^-x1-3
 #		Incomplete negated character list, matches any character
 #		except those elements that can be parsed without lookahead.

Reply via email to