https://github.com/kees created https://github.com/llvm/llvm-project/pull/167254
Switch from xxHash64 to FNV-1a (to match the coming GCC KCFI implementation) and to remove the last user of xxHash64. >From e6257e667e5d3697f51af8ba42edf12ee1c0657e Mon Sep 17 00:00:00 2001 From: Kees Cook <[email protected]> Date: Sun, 9 Nov 2025 12:33:29 -0800 Subject: [PATCH 1/4] [CodeGen][KCFI] Emit signed .set directive value LLVM IR's default printing behavior uses APInt::operator<<, which calls APInt::print(OS, /*isSigned=*/true). This means KCFI operand bundles in call instructions print as signed (e.g. [ "kcfi"(i32 -1208803271) ]), and KCFI type metadata prints as signed (e.g. !3 = !{i32 -1208803271}). When emitting the assembly .set directive, KCFI was using getZExtValue() (reasonably, since the hash is a u32). However, this means that FileCheck pattern matching can't match between the .set directive and the IR. Switch the .set direct to use getSExtValue(), which still gets assembled correctly and allows FileCheck matching. We had gotten lucky with the existing tests happening to just not have had the high bit set. Signed-off-by: Kees Cook <[email protected]> --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 98d59b79ab881..f751ae405266a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3186,7 +3186,7 @@ void CodeGenModule::finalizeKCFITypes() { continue; std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" + - Name + ", " + Twine(Type->getZExtValue()) + "\n") + Name + ", " + Twine(Type->getSExtValue()) + "\n") .str(); M.appendModuleInlineAsm(Asm); } >From 84398ee00cda4ca79ce69d44d66d7f3fffa689e9 Mon Sep 17 00:00:00 2001 From: Kees Cook <[email protected]> Date: Sun, 9 Nov 2025 13:47:35 -0800 Subject: [PATCH 2/4] [CodeGen][KCFI] Introduce llvm::getKCFITypeID to wrap hash implementation KCFI generates hashes in two places. Instead of exposing the hash implementation in both places, introduce a helper that wraps the specific hash implementation in a single place. Signed-off-by: Kees Cook <[email protected]> --- clang/lib/CodeGen/CodeGenModule.cpp | 5 ++--- llvm/include/llvm/Transforms/Instrumentation/KCFI.h | 5 +++++ llvm/lib/Transforms/Instrumentation/KCFI.cpp | 5 +++++ llvm/lib/Transforms/Utils/ModuleUtils.cpp | 12 ++++++------ 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index f751ae405266a..c992a0bc4aa11 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -67,10 +67,10 @@ #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" -#include "llvm/Support/xxhash.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/Triple.h" #include "llvm/TargetParser/X86TargetParser.h" +#include "llvm/Transforms/Instrumentation/KCFI.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include <optional> #include <set> @@ -2431,8 +2431,7 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T, StringRef Salt) { if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) Out << ".generalized"; - return llvm::ConstantInt::get(Int32Ty, - static_cast<uint32_t>(llvm::xxHash64(OutName))); + return llvm::ConstantInt::get(Int32Ty, llvm::getKCFITypeID(OutName)); } void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, diff --git a/llvm/include/llvm/Transforms/Instrumentation/KCFI.h b/llvm/include/llvm/Transforms/Instrumentation/KCFI.h index 2711930a20b5b..7a2d42fa1e1c6 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/KCFI.h +++ b/llvm/include/llvm/Transforms/Instrumentation/KCFI.h @@ -18,10 +18,15 @@ #include "llvm/Support/Compiler.h" namespace llvm { + class KCFIPass : public PassInfoMixin<KCFIPass> { public: static bool isRequired() { return true; } LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; + +/// Compute the KCFI type identifier hash for a given mangled type name. +uint32_t getKCFITypeID(StringRef MangledTypeName); + } // namespace llvm #endif // LLVM_TRANSFORMS_INSTRUMENTATION_KCFI_H diff --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp b/llvm/lib/Transforms/Instrumentation/KCFI.cpp index f4cb4e2d1c9e1..8d1af59f55978 100644 --- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp +++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Support/xxhash.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -32,6 +33,10 @@ using namespace llvm; STATISTIC(NumKCFIChecks, "Number of kcfi operands transformed into checks"); +uint32_t llvm::getKCFITypeID(StringRef MangledTypeName) { + return static_cast<uint32_t>(xxHash64(MangledTypeName)); +} + namespace { class DiagnosticInfoKCFI : public DiagnosticInfo { const Twine &Msg; diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 596849ecab742..4b496dbae5a61 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -21,7 +21,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/xxhash.h" +#include "llvm/Transforms/Instrumentation/KCFI.h" using namespace llvm; @@ -208,10 +208,10 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { std::string Type = MangledType.str(); if (M.getModuleFlag("cfi-normalize-integers")) Type += ".normalized"; - F.setMetadata(LLVMContext::MD_kcfi_type, - MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( - Type::getInt32Ty(Ctx), - static_cast<uint32_t>(xxHash64(Type)))))); + F.setMetadata( + LLVMContext::MD_kcfi_type, + MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), getKCFITypeID(Type))))); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null<ConstantInt>( >From 757926a3cf62d38c6a97ec08f9d5ed4fe62d6b7c Mon Sep 17 00:00:00 2001 From: Kees Cook <[email protected]> Date: Sun, 9 Nov 2025 13:10:06 -0800 Subject: [PATCH 3/4] [CodeGen][KCFI] Replace xxHash64 with FNV-1a The KCFI type hash does not need to be cryptographically secure. To keep Clang KCFI hash-identical to GCC KCFI, switch to FNV-1a for hashing. This also removes the last user of xxHash64. Signed-off-by: Kees Cook <[email protected]> --- clang/test/CodeGen/kcfi-generalize.c | 20 ++++++++++---------- clang/test/CodeGen/kcfi-normalize.c | 20 ++++++++++---------- llvm/lib/Transforms/Instrumentation/KCFI.cpp | 9 +++++++-- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/clang/test/CodeGen/kcfi-generalize.c b/clang/test/CodeGen/kcfi-generalize.c index 5a44d97412af9..47d8128c45bcf 100644 --- a/clang/test/CodeGen/kcfi-generalize.c +++ b/clang/test/CodeGen/kcfi-generalize.c @@ -21,8 +21,8 @@ int** f3(char *a, char **b) { } void g(int** (*fp)(const char *, const char **)) { - // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 1296635908) ] - // GENERALIZED: call {{.*}} [ "kcfi"(i32 -49168686) ] + // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 -1900814401) ] + // GENERALIZED: call {{.*}} [ "kcfi"(i32 355875385) ] fp(0, 0); } @@ -33,16 +33,16 @@ union Union { // CHECK: define{{.*}} void @uni({{.*}} !kcfi_type [[TYPE4:![0-9]+]] void uni(void (*fn)(union Union), union Union arg1) { - // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 -587217045) ] - // GENERALIZED: call {{.*}} [ "kcfi"(i32 2139530422) ] + // UNGENERALIZED: call {{.*}} [ "kcfi"(i32 514817671) ] + // GENERALIZED: call {{.*}} [ "kcfi"(i32 1629153266) ] fn(arg1); } -// UNGENERALIZED: [[TYPE]] = !{i32 1296635908} -// GENERALIZED: [[TYPE]] = !{i32 -49168686} +// UNGENERALIZED: [[TYPE]] = !{i32 -1900814401} +// GENERALIZED: [[TYPE]] = !{i32 355875385} -// UNGENERALIZED: [[TYPE3]] = !{i32 874141567} -// GENERALIZED: [[TYPE3]] = !{i32 954385378} +// UNGENERALIZED: [[TYPE3]] = !{i32 1089235487} +// GENERALIZED: [[TYPE3]] = !{i32 1460151842} -// UNGENERALIZED: [[TYPE4]] = !{i32 -1619636625} -// GENERALIZED: [[TYPE4]] = !{i32 -125078496} +// UNGENERALIZED: [[TYPE4]] = !{i32 1937639136} +// GENERALIZED: [[TYPE4]] = !{i32 734921772} diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c index bd87f4af534a1..320f9e398954f 100644 --- a/clang/test/CodeGen/kcfi-normalize.c +++ b/clang/test/CodeGen/kcfi-normalize.c @@ -10,21 +10,21 @@ void foo(void (*fn)(int), int arg) { // CHECK-LABEL: define{{.*}}foo // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE1:[0-9]+]] - // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 1162514891) ] + // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 -402462225) ] fn(arg); } void bar(void (*fn)(int, int), int arg1, int arg2) { // CHECK-LABEL: define{{.*}}bar // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE2:[0-9]+]] - // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 448046469) ] + // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 -972192795) ] fn(arg1, arg2); } void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) { // CHECK-LABEL: define{{.*}}baz // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE3:[0-9]+]] - // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 -2049681433) ] + // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 -1376104717) ] fn(arg1, arg2, arg3); } @@ -36,14 +36,14 @@ union Union { void uni(void (*fn)(union Union), union Union arg1) { // CHECK-LABEL: define{{.*}}uni // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE4:[0-9]+]] - // C: call void %0(ptr %1) [ "kcfi"(i32 1819770848) ] - // CPP: call void %0(ptr %1) [ "kcfi"(i32 -1430221633) ] + // C: call void %0(ptr %1) [ "kcfi"(i32 641309179) ] + // CPP: call void %0(ptr %1) [ "kcfi"(i32 15039153) ] fn(arg1); } // CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1} -// CHECK: ![[TYPE1]] = !{i32 -1143117868} -// CHECK: ![[TYPE2]] = !{i32 -460921415} -// CHECK: ![[TYPE3]] = !{i32 -333839615} -// C: ![[TYPE4]] = !{i32 -650530463} -// CPP: ![[TYPE4]] = !{i32 1766237188} +// CHECK: ![[TYPE1]] = !{i32 -1113907258} +// CHECK: ![[TYPE2]] = !{i32 994987278} +// CHECK: ![[TYPE3]] = !{i32 -886425042} +// C: ![[TYPE4]] = !{i32 -1919128908} +// CPP: ![[TYPE4]] = !{i32 1834954376} diff --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp b/llvm/lib/Transforms/Instrumentation/KCFI.cpp index 8d1af59f55978..31c5c972589c2 100644 --- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp +++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp @@ -23,7 +23,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/Support/xxhash.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -34,7 +33,13 @@ using namespace llvm; STATISTIC(NumKCFIChecks, "Number of kcfi operands transformed into checks"); uint32_t llvm::getKCFITypeID(StringRef MangledTypeName) { - return static_cast<uint32_t>(xxHash64(MangledTypeName)); + // FNV-1a hash (32-bit) + uint32_t Hash = 2166136261u; // FNV offset basis + for (unsigned char C : MangledTypeName) { + Hash ^= C; + Hash *= 16777619u; // FNV prime + } + return Hash; } namespace { >From 77769ef7e432f07d0dde05e26dc1a4d6a58f3753 Mon Sep 17 00:00:00 2001 From: Kees Cook <[email protected]> Date: Sun, 9 Nov 2025 14:18:16 -0800 Subject: [PATCH 4/4] [Support] Remove unused xxHash64 Now that there are no more users of xxHash64, remove it. Signed-off-by: Kees Cook <[email protected]> --- llvm/include/llvm/Support/xxhash.h | 3 -- llvm/lib/Support/xxhash.cpp | 78 --------------------------- llvm/unittests/Support/xxhashTest.cpp | 8 --- 3 files changed, 89 deletions(-) diff --git a/llvm/include/llvm/Support/xxhash.h b/llvm/include/llvm/Support/xxhash.h index b521adbef3456..9f95e7d4db58d 100644 --- a/llvm/include/llvm/Support/xxhash.h +++ b/llvm/include/llvm/Support/xxhash.h @@ -44,9 +44,6 @@ namespace llvm { -LLVM_ABI uint64_t xxHash64(llvm::StringRef Data); -LLVM_ABI uint64_t xxHash64(llvm::ArrayRef<uint8_t> Data); - LLVM_ABI uint64_t xxh3_64bits(ArrayRef<uint8_t> data); inline uint64_t xxh3_64bits(StringRef data) { return xxh3_64bits(ArrayRef(data.bytes_begin(), data.size())); diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp index cdb76d57e2c1d..79df4bc4d0ec0 100644 --- a/llvm/lib/Support/xxhash.cpp +++ b/llvm/lib/Support/xxhash.cpp @@ -77,20 +77,6 @@ static const uint64_t PRIME64_3 = 1609587929392839161ULL; static const uint64_t PRIME64_4 = 9650029242287828579ULL; static const uint64_t PRIME64_5 = 2870177450012600261ULL; -static uint64_t round(uint64_t Acc, uint64_t Input) { - Acc += Input * PRIME64_2; - Acc = rotl64(Acc, 31); - Acc *= PRIME64_1; - return Acc; -} - -static uint64_t mergeRound(uint64_t Acc, uint64_t Val) { - Val = round(0, Val); - Acc ^= Val; - Acc = Acc * PRIME64_1 + PRIME64_4; - return Acc; -} - static uint64_t XXH64_avalanche(uint64_t hash) { hash ^= hash >> 33; hash *= PRIME64_2; @@ -100,70 +86,6 @@ static uint64_t XXH64_avalanche(uint64_t hash) { return hash; } -uint64_t llvm::xxHash64(StringRef Data) { - size_t Len = Data.size(); - uint64_t Seed = 0; - const unsigned char *P = Data.bytes_begin(); - const unsigned char *const BEnd = Data.bytes_end(); - uint64_t H64; - - if (Len >= 32) { - const unsigned char *const Limit = BEnd - 32; - uint64_t V1 = Seed + PRIME64_1 + PRIME64_2; - uint64_t V2 = Seed + PRIME64_2; - uint64_t V3 = Seed + 0; - uint64_t V4 = Seed - PRIME64_1; - - do { - V1 = round(V1, endian::read64le(P)); - P += 8; - V2 = round(V2, endian::read64le(P)); - P += 8; - V3 = round(V3, endian::read64le(P)); - P += 8; - V4 = round(V4, endian::read64le(P)); - P += 8; - } while (P <= Limit); - - H64 = rotl64(V1, 1) + rotl64(V2, 7) + rotl64(V3, 12) + rotl64(V4, 18); - H64 = mergeRound(H64, V1); - H64 = mergeRound(H64, V2); - H64 = mergeRound(H64, V3); - H64 = mergeRound(H64, V4); - - } else { - H64 = Seed + PRIME64_5; - } - - H64 += (uint64_t)Len; - - while (reinterpret_cast<uintptr_t>(P) + 8 <= - reinterpret_cast<uintptr_t>(BEnd)) { - uint64_t const K1 = round(0, endian::read64le(P)); - H64 ^= K1; - H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4; - P += 8; - } - - if (reinterpret_cast<uintptr_t>(P) + 4 <= reinterpret_cast<uintptr_t>(BEnd)) { - H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1; - H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3; - P += 4; - } - - while (P < BEnd) { - H64 ^= (*P) * PRIME64_5; - H64 = rotl64(H64, 11) * PRIME64_1; - P++; - } - - return XXH64_avalanche(H64); -} - -uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) { - return xxHash64({(const char *)Data.data(), Data.size()}); -} - constexpr size_t XXH3_SECRETSIZE_MIN = 136; constexpr size_t XXH_SECRET_DEFAULT_SIZE = 192; diff --git a/llvm/unittests/Support/xxhashTest.cpp b/llvm/unittests/Support/xxhashTest.cpp index 84308ce130e72..dcc654b800484 100644 --- a/llvm/unittests/Support/xxhashTest.cpp +++ b/llvm/unittests/Support/xxhashTest.cpp @@ -31,14 +31,6 @@ static void fillTestBuffer(uint8_t *buffer, size_t len) { } } -TEST(xxhashTest, Basic) { - EXPECT_EQ(0xef46db3751d8e999U, xxHash64(StringRef())); - EXPECT_EQ(0x33bf00a859c4ba3fU, xxHash64("foo")); - EXPECT_EQ(0x48a37c90ad27a659U, xxHash64("bar")); - EXPECT_EQ(0x69196c1b3af0bff9U, - xxHash64("0123456789abcdefghijklmnopqrstuvwxyz")); -} - TEST(xxhashTest, xxh3) { constexpr size_t size = 2243; uint8_t a[size]; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
