https://github.com/Thibault-Monnier created https://github.com/llvm/llvm-project/pull/171052
This PR optimizes some of the utilities in `Charinfo.h` by replacing lookup table checks with a simple bounds or mask check when possible. This reduces instruction latency, allowing for a faster compilation overall. This change _does_ increase instruction count, but seems to be faster nonetheless: [llvm-compile-time-tracker](https://llvm-compile-time-tracker.com/compare.php?from=93e18db3e48dc28818d4880e813b9027bfbf3c16&to=cf26169083f2a68766df65607bec1547d3079aad&stat=cycles) (that's a different commit, but basically the same changes). @cor3ntin @AaronBallman >From d7e1102d5985cfbcc5427c8aa1a3dd8127951f26 Mon Sep 17 00:00:00 2001 From: Thibault-Monnier <[email protected]> Date: Sun, 7 Dec 2025 15:37:43 +0100 Subject: [PATCH] Optimize CharInfo.h utilities --- clang/include/clang/Basic/CharInfo.h | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h index 87626eeb8a700..c34bcf4fbf88e 100644 --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -89,16 +89,15 @@ LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c, /// /// Note that this returns false for '\\0'. LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; + constexpr unsigned long long Mask = 0b100000000000000000001101000000000; + return (c <= 32) && (Mask >> c) & 1; } /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'. /// /// Note that this returns false for '\\0'. LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & CHAR_VERT_WS) != 0; + return c == '\n' || c == '\r'; } /// Return true if this character is horizontal or vertical ASCII whitespace: @@ -106,26 +105,23 @@ LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) { /// /// Note that this returns false for '\\0'. LLVM_READONLY inline bool isWhitespace(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0; + constexpr unsigned long long Mask = 0b100000000000000000011111000000000; + return (c <= 32) && (Mask >> c) & 1; } /// Return true if this character is an ASCII digit: [0-9] LLVM_READONLY inline bool isDigit(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & CHAR_DIGIT) != 0; + return c >= '0' && c <= '9'; } /// Return true if this character is a lowercase ASCII letter: [a-z] LLVM_READONLY inline bool isLowercase(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & CHAR_LOWER) != 0; + return c >= 'a' && c <= 'z'; } /// Return true if this character is an uppercase ASCII letter: [A-Z] LLVM_READONLY inline bool isUppercase(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & CHAR_UPPER) != 0; + return c >= 'A' && c <= 'Z'; } /// Return true if this character is an ASCII letter: [a-zA-Z] @@ -158,9 +154,7 @@ LLVM_READONLY inline bool isPunctuation(unsigned char c) { /// character that should take exactly one column to print in a fixed-width /// terminal. LLVM_READONLY inline bool isPrintable(unsigned char c) { - using namespace charinfo; - return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT | - CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0; + return c >= 32 && c <= 126; } /// Return true if this is the body character of a C preprocessing number, @@ -236,7 +230,6 @@ LLVM_READONLY inline char toUppercase(char c) { return c; } - /// Return true if this is a valid ASCII identifier. /// /// Note that this is a very simple check; it does not accept UCNs as valid _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
