https://github.com/Thibault-Monnier created 
https://github.com/llvm/llvm-project/pull/171052

This PR optimizes some of the utilities in `Charinfo.h` by replacing lookup 
table checks with a simple bounds or mask check when possible. This reduces 
instruction latency, allowing for a faster compilation overall.

This change _does_ increase instruction count, but seems to be faster 
nonetheless: 
[llvm-compile-time-tracker](https://llvm-compile-time-tracker.com/compare.php?from=93e18db3e48dc28818d4880e813b9027bfbf3c16&to=cf26169083f2a68766df65607bec1547d3079aad&stat=cycles)
 (that's a different commit, but basically the same changes).

@cor3ntin @AaronBallman

>From d7e1102d5985cfbcc5427c8aa1a3dd8127951f26 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <[email protected]>
Date: Sun, 7 Dec 2025 15:37:43 +0100
Subject: [PATCH] Optimize CharInfo.h utilities

---
 clang/include/clang/Basic/CharInfo.h | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/Basic/CharInfo.h 
b/clang/include/clang/Basic/CharInfo.h
index 87626eeb8a700..c34bcf4fbf88e 100644
--- a/clang/include/clang/Basic/CharInfo.h
+++ b/clang/include/clang/Basic/CharInfo.h
@@ -89,16 +89,15 @@ LLVM_READONLY inline bool 
isAsciiIdentifierContinue(unsigned char c,
 ///
 /// Note that this returns false for '\\0'.
 LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0;
+  constexpr unsigned long long Mask = 0b100000000000000000001101000000000;
+  return (c <= 32) && (Mask >> c) & 1;
 }
 
 /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'.
 ///
 /// Note that this returns false for '\\0'.
 LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & CHAR_VERT_WS) != 0;
+  return c == '\n' || c == '\r';
 }
 
 /// Return true if this character is horizontal or vertical ASCII whitespace:
@@ -106,26 +105,23 @@ LLVM_READONLY inline bool isVerticalWhitespace(unsigned 
char c) {
 ///
 /// Note that this returns false for '\\0'.
 LLVM_READONLY inline bool isWhitespace(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0;
+  constexpr unsigned long long Mask = 0b100000000000000000011111000000000;
+  return (c <= 32) && (Mask >> c) & 1;
 }
 
 /// Return true if this character is an ASCII digit: [0-9]
 LLVM_READONLY inline bool isDigit(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & CHAR_DIGIT) != 0;
+  return c >= '0' && c <= '9';
 }
 
 /// Return true if this character is a lowercase ASCII letter: [a-z]
 LLVM_READONLY inline bool isLowercase(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & CHAR_LOWER) != 0;
+  return c >= 'a' && c <= 'z';
 }
 
 /// Return true if this character is an uppercase ASCII letter: [A-Z]
 LLVM_READONLY inline bool isUppercase(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & CHAR_UPPER) != 0;
+  return c >= 'A' && c <= 'Z';
 }
 
 /// Return true if this character is an ASCII letter: [a-zA-Z]
@@ -158,9 +154,7 @@ LLVM_READONLY inline bool isPunctuation(unsigned char c) {
 /// character that should take exactly one column to print in a fixed-width
 /// terminal.
 LLVM_READONLY inline bool isPrintable(unsigned char c) {
-  using namespace charinfo;
-  return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT |
-                          CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0;
+  return c >= 32 && c <= 126;
 }
 
 /// Return true if this is the body character of a C preprocessing number,
@@ -236,7 +230,6 @@ LLVM_READONLY inline char toUppercase(char c) {
   return c;
 }
 
-
 /// Return true if this is a valid ASCII identifier.
 ///
 /// Note that this is a very simple check; it does not accept UCNs as valid

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to