Module Name: src Committed By: rillig Date: Fri May 13 21:42:30 UTC 2022
Modified Files: src/usr.bin/make: str.c Log Message: make: document platform dependency in string pattern matching No unit test for this edge case since all other unit tests are platform- independent. To reproduce: $ make clean $ make -s PROG=s-make NOMAN=yes USER_CFLAGS=-fsigned-char $ make clean $ make -s PROG=u-make NOMAN=yes USER_CFLAGS=-funsigned-char $ make clean $ range=$(lua -e 'print(("[%c-%c]"):format(0xe4, 0x61))') $ ./s-make -V "\${:UM:M$range}\${:UN:N$range}" M $ ./u-make -V "\${:UM:M$range}\${:UN:N$range}" N To generate a diff of this commit: cvs rdiff -u -r1.90 -r1.91 src/usr.bin/make/str.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/make/str.c diff -u src/usr.bin/make/str.c:1.90 src/usr.bin/make/str.c:1.91 --- src/usr.bin/make/str.c:1.90 Fri May 13 20:37:01 2022 +++ src/usr.bin/make/str.c Fri May 13 21:42:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: str.c,v 1.90 2022/05/13 20:37:01 rillig Exp $ */ +/* $NetBSD: str.c,v 1.91 2022/05/13 21:42:30 rillig Exp $ */ /* * Copyright (c) 1988, 1989, 1990, 1993 @@ -71,7 +71,7 @@ #include "make.h" /* "@(#)str.c 5.8 (Berkeley) 6/1/90" */ -MAKE_RCSID("$NetBSD: str.c,v 1.90 2022/05/13 20:37:01 rillig Exp $"); +MAKE_RCSID("$NetBSD: str.c,v 1.91 2022/05/13 21:42:30 rillig Exp $"); static HashTable interned_strings; @@ -293,6 +293,26 @@ Str_Words(const char *str, bool expand) } /* + * XXX: In the extreme edge case that one of the characters is from the basic + * execution character set and the other isn't, the result of the comparison + * differs depending on whether plain char is signed or unsigned. + * + * An example is the character range from \xE4 to 'a', where \xE4 may come + * from U+00E4 'Latin small letter A with diaeresis'. + * + * If char is signed, \xE4 evaluates to -28, the first half of the condition + * becomes -28 <= '0' && '0' <= 'a', which evaluates to true. + * + * If char is unsigned, \xE4 evaluates to 228, the second half of the + * condition becomes 'a' <= '0' && '0' <= 228, which evaluates to false. + */ +static bool +in_range(char e1, char c, char e2) +{ + return (e1 <= c && c <= e2) || (e2 <= c && c <= e1); +} + +/* * Str_Match -- Test if a string matches a pattern like "*.[ch]". * The following special characters are known *?\[] (as in fnmatch(3)). * @@ -355,9 +375,7 @@ Str_Match(const char *str, const char *p if (pat[1] == '-') { if (pat[2] == '\0') return neg; - if (pat[0] <= *str && *str <= pat[2]) - break; - if (pat[2] <= *str && *str <= pat[0]) + if (in_range(pat[0], *str, pat[2])) break; pat += 2; }