https://github.com/gbMattN updated https://github.com/llvm/llvm-project/pull/166381
>From 4481075fce712a3f55493264c11fba6cd4015a4b Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Tue, 4 Nov 2025 15:04:29 +0000 Subject: [PATCH 01/12] [TySan] User-friendly (C style) pointer type names for error reports --- clang/docs/TypeSanitizer.rst | 2 - compiler-rt/test/tysan/print_stacktrace.c | 2 +- compiler-rt/test/tysan/ptr-float.c | 2 +- .../Instrumentation/TypeSanitizer.cpp | 40 ++++++++++++++++++- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/clang/docs/TypeSanitizer.rst b/clang/docs/TypeSanitizer.rst index 3c683a6c24bb4..c2f628cb231db 100644 --- a/clang/docs/TypeSanitizer.rst +++ b/clang/docs/TypeSanitizer.rst @@ -119,8 +119,6 @@ brief dictionary of these terms. * ``omnipotent char``: This is a special type which can alias with anything. Its name comes from the C/C++ type ``char``. -* ``type p[x]``: This signifies pointers to the type. ``x`` is the number of indirections to reach the final value. - As an example, a pointer to a pointer to an integer would be ``type p2 int``. TypeSanitizer is still experimental. User-facing error messages should be improved in the future to remove references to LLVM IR specific terms. diff --git a/compiler-rt/test/tysan/print_stacktrace.c b/compiler-rt/test/tysan/print_stacktrace.c index 3ffb6063377d9..831be5e4afed9 100644 --- a/compiler-rt/test/tysan/print_stacktrace.c +++ b/compiler-rt/test/tysan/print_stacktrace.c @@ -10,7 +10,7 @@ void zero_array() { for (i = 0; i < 1; ++i) P[i] = 0.0f; // CHECK: ERROR: TypeSanitizer: type-aliasing-violation - // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type p1 float + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type float* // CHECK: {{#0 0x.* in zero_array .*print_stacktrace.c:}}[[@LINE-3]] // CHECK-SHORT-NOT: {{#1 0x.* in main .*print_stacktrace.c}} // CHECK-LONG-NEXT: {{#1 0x.* in main .*print_stacktrace.c}} diff --git a/compiler-rt/test/tysan/ptr-float.c b/compiler-rt/test/tysan/ptr-float.c index aaa9895986988..145d5d8f289ea 100644 --- a/compiler-rt/test/tysan/ptr-float.c +++ b/compiler-rt/test/tysan/ptr-float.c @@ -7,7 +7,7 @@ void zero_array() { for (i = 0; i < 1; ++i) P[i] = 0.0f; // CHECK: ERROR: TypeSanitizer: type-aliasing-violation - // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type p1 float + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type float* // CHECK: {{#0 0x.* in zero_array .*ptr-float.c:}}[[@LINE-3]] } diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index 87eba5f2c5242..e5109c047584e 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -70,6 +70,12 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation( "function calls. This verifies that they behave the same."), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClUseTBAATypeNames( + "tysan-use-tbaa-type-names", + cl::desc("Print TBAA-style type names for pointers rather than C-style " + "names (e.g. 'p2 int' rather than 'int**')"), + cl::Hidden, cl::init(false)); + STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses"); namespace { @@ -260,6 +266,29 @@ static std::string encodeName(StringRef Name) { return Output; } +/// Converts pointer type names from TBAA "p2 int" style to C style ("int**"). +/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly name for this type. +/// If the type name was changed, returns true and stores the new type name in `Dest`. +/// Otherwise, returns false (`Dest` is unchanged). +static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string &Dest) { + if (!TypeName.consume_front("p")) + return false; + + int Indirection; + if (TypeName.consumeInteger(10, Indirection)) + return false; + + if (!TypeName.consume_front(" ")) + return false; + + Dest.clear(); + Dest.reserve(TypeName.size() + Indirection); // One * per indirection + Dest.append(TypeName); + Dest.append(Indirection, '*'); + + return true; +} + std::string TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD, TypeNameMapTy &TypeNames) { @@ -355,7 +384,16 @@ bool TypeSanitizer::generateBaseTypeDescriptor( // [2, member count, [type pointer, offset]..., name] LLVMContext &C = MD->getContext(); - Constant *NameData = ConstantDataArray::getString(C, NameNode->getString()); + StringRef TypeName = NameNode->getString(); + + // Convert LLVM-internal TBAA-style type names to C-style type names + // (more user-friendly) + std::string CStyleTypeName; + if (!ClUseTBAATypeNames) + if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) + TypeName = CStyleTypeName; + + Constant *NameData = ConstantDataArray::getString(C, TypeName); SmallVector<Type *> TDSubTys; SmallVector<Constant *> TDSubData; >From a71d469a233dffbac5fb2935bbecbc07416597bd Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Tue, 4 Nov 2025 17:21:49 +0000 Subject: [PATCH 02/12] Fix failing test --- clang/test/CodeGen/sanitize-type-globals.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/sanitize-type-globals.cpp b/clang/test/CodeGen/sanitize-type-globals.cpp index 1154ab4ca5df2..1300396795ff3 100644 --- a/clang/test/CodeGen/sanitize-type-globals.cpp +++ b/clang/test/CodeGen/sanitize-type-globals.cpp @@ -13,7 +13,7 @@ // CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat // CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat // CHECK: @__tysan_v1_any_20pointer = linkonce_odr constant { i64, i64, ptr, i64, [12 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [12 x i8] c"any pointer\00" }, comdat -// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [7 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [7 x i8] c"p1 int\00" }, comdat +// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [5 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [5 x i8] c"int*\00" }, comdat // CHECK: @__tysan_v1___ZTS9CompleteS = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [15 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_p1_20int, i64 8, [15 x i8] c"_ZTS9CompleteS\00" }, comdat // CHECK: @__tysan_v1___ZTS1b = linkonce_odr constant { i64, i64, [7 x i8] } { i64 2, i64 0, [7 x i8] c"_ZTS1b\00" }, comdat // CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_any_20pointer, ptr @__tysan_v1_p1_20int, ptr @__tysan_v1___ZTS9CompleteS, ptr @__tysan_v1___ZTS1b], section "llvm.metadata" >From ac99a5be6be662c51d3c838774c76c28e5382bc7 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 6 Nov 2025 13:56:29 +0000 Subject: [PATCH 03/12] Remove command line flag, fix formatting --- .../Instrumentation/TypeSanitizer.cpp | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index e5109c047584e..ab59c3e9de151 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -70,12 +70,6 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation( "function calls. This verifies that they behave the same."), cl::Hidden, cl::init(false)); -static cl::opt<bool> ClUseTBAATypeNames( - "tysan-use-tbaa-type-names", - cl::desc("Print TBAA-style type names for pointers rather than C-style " - "names (e.g. 'p2 int' rather than 'int**')"), - cl::Hidden, cl::init(false)); - STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses"); namespace { @@ -267,10 +261,11 @@ static std::string encodeName(StringRef Name) { } /// Converts pointer type names from TBAA "p2 int" style to C style ("int**"). -/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly name for this type. -/// If the type name was changed, returns true and stores the new type name in `Dest`. -/// Otherwise, returns false (`Dest` is unchanged). -static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string &Dest) { +/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly +/// name for this type. If the type name was changed, returns true and stores +/// the new type name in `Dest`. Otherwise, returns false (`Dest` is unchanged). +static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, + std::string &Dest) { if (!TypeName.consume_front("p")) return false; @@ -389,9 +384,8 @@ bool TypeSanitizer::generateBaseTypeDescriptor( // Convert LLVM-internal TBAA-style type names to C-style type names // (more user-friendly) std::string CStyleTypeName; - if (!ClUseTBAATypeNames) - if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) - TypeName = CStyleTypeName; + if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) + TypeName = CStyleTypeName; Constant *NameData = ConstantDataArray::getString(C, TypeName); SmallVector<Type *> TDSubTys; >From 62d29b91c330606b4ce603dc177fe37098448e24 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 20 Nov 2025 11:31:16 +0000 Subject: [PATCH 04/12] Rework pointer typename rewriting to occur in runtime rather than instrumentation, fix demangling for pointer names --- compiler-rt/lib/tysan/tysan.cpp | 54 ++++++++++++++++--- .../Instrumentation/TypeSanitizer.cpp | 34 +----------- 2 files changed, 49 insertions(+), 39 deletions(-) diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index 1c67adeba0fc5..76fa8f45ebe4f 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -22,6 +22,7 @@ #include "tysan/tysan.h" +#include <ctype.h> #include <stdint.h> #include <string.h> @@ -40,20 +41,62 @@ tysan_copy_types(const void *daddr, const void *saddr, uptr size) { internal_memmove(shadow_for(daddr), shadow_for(saddr), size * sizeof(uptr)); } -static const char *getDisplayName(const char *Name) { +/// Struct returned by `parseIndirectionPrefix`. +struct ParseIndirectionPrefixResult { + /// Level of indirection - 0 if the prefix is not found. + size_t Indirection; + /// Pointer to the remaining part of the name after the indirection prefix. + /// (This is the original pointer if the prefix is not found.) + const char *RemainingName; +}; + +/// Parses the "p{indirection} " prefix given to pointer type names in TBAA. +static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) { + size_t CharIndex = 0; + + // Parse 'p'. + // This also handles the case of an empty string. + if (Name[CharIndex++] != 'p') + return {0, Name}; + + // Parse indirection level. + size_t Indirection = 0; + while (isdigit(Name[CharIndex])) { + const auto DigitValue = static_cast<size_t>(Name[CharIndex] - '0'); + Indirection = Indirection * 10 + DigitValue; + ++CharIndex; + } + + // Parse space. + if (Name[CharIndex++] != ' ') + return {0, Name}; + + return {Indirection, Name + CharIndex}; +} + +static void printDisplayName(const char *Name) { if (Name[0] == '\0') - return "<anonymous type>"; + Printf("<anonymous type>"); + + // Parse indirection prefix and remove it. + const auto [Indirection, RemainingName] = parseIndirectionPrefix(Name); // Clang generates tags for C++ types that demangle as typeinfo. Remove the // prefix from the generated string. const char *TIPrefix = "typeinfo name for "; size_t TIPrefixLen = strlen(TIPrefix); - const char *DName = Symbolizer::GetOrInit()->Demangle(Name); + const char *DName = Symbolizer::GetOrInit()->Demangle(RemainingName); if (!internal_strncmp(DName, TIPrefix, TIPrefixLen)) DName += TIPrefixLen; - return DName; + // Print type name. + Printf("%s", DName); + + // Print asterisks for indirection (C pointer notation). + for (size_t i = 0; i < Indirection; ++i) { + Printf("*"); + } } static void printTDName(tysan_type_descriptor *td) { @@ -75,8 +118,7 @@ static void printTDName(tysan_type_descriptor *td) { } break; case TYSAN_STRUCT_TD: - Printf("%s", getDisplayName( - (char *)(td->Struct.Members + td->Struct.MemberCount))); + printDisplayName((char *)(td->Struct.Members + td->Struct.MemberCount)); break; } } diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index ab59c3e9de151..87eba5f2c5242 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -260,30 +260,6 @@ static std::string encodeName(StringRef Name) { return Output; } -/// Converts pointer type names from TBAA "p2 int" style to C style ("int**"). -/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly -/// name for this type. If the type name was changed, returns true and stores -/// the new type name in `Dest`. Otherwise, returns false (`Dest` is unchanged). -static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, - std::string &Dest) { - if (!TypeName.consume_front("p")) - return false; - - int Indirection; - if (TypeName.consumeInteger(10, Indirection)) - return false; - - if (!TypeName.consume_front(" ")) - return false; - - Dest.clear(); - Dest.reserve(TypeName.size() + Indirection); // One * per indirection - Dest.append(TypeName); - Dest.append(Indirection, '*'); - - return true; -} - std::string TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD, TypeNameMapTy &TypeNames) { @@ -379,15 +355,7 @@ bool TypeSanitizer::generateBaseTypeDescriptor( // [2, member count, [type pointer, offset]..., name] LLVMContext &C = MD->getContext(); - StringRef TypeName = NameNode->getString(); - - // Convert LLVM-internal TBAA-style type names to C-style type names - // (more user-friendly) - std::string CStyleTypeName; - if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) - TypeName = CStyleTypeName; - - Constant *NameData = ConstantDataArray::getString(C, TypeName); + Constant *NameData = ConstantDataArray::getString(C, NameNode->getString()); SmallVector<Type *> TDSubTys; SmallVector<Constant *> TDSubData; >From c02782655c5a061ee28e3456abe7f188aec082a4 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 20 Nov 2025 11:46:43 +0000 Subject: [PATCH 05/12] Fix test --- clang/test/CodeGen/sanitize-type-globals.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/sanitize-type-globals.cpp b/clang/test/CodeGen/sanitize-type-globals.cpp index 1300396795ff3..1154ab4ca5df2 100644 --- a/clang/test/CodeGen/sanitize-type-globals.cpp +++ b/clang/test/CodeGen/sanitize-type-globals.cpp @@ -13,7 +13,7 @@ // CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat // CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat // CHECK: @__tysan_v1_any_20pointer = linkonce_odr constant { i64, i64, ptr, i64, [12 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [12 x i8] c"any pointer\00" }, comdat -// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [5 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [5 x i8] c"int*\00" }, comdat +// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [7 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [7 x i8] c"p1 int\00" }, comdat // CHECK: @__tysan_v1___ZTS9CompleteS = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [15 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_p1_20int, i64 8, [15 x i8] c"_ZTS9CompleteS\00" }, comdat // CHECK: @__tysan_v1___ZTS1b = linkonce_odr constant { i64, i64, [7 x i8] } { i64 2, i64 0, [7 x i8] c"_ZTS1b\00" }, comdat // CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_any_20pointer, ptr @__tysan_v1_p1_20int, ptr @__tysan_v1___ZTS9CompleteS, ptr @__tysan_v1___ZTS1b], section "llvm.metadata" >From c69a74e435beb2a178cbf301f2498fe9a17d7ad8 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 27 Nov 2025 15:26:53 +0000 Subject: [PATCH 06/12] Add regression test for pointer typename formatting --- .../test/tysan/pointer_typename_demangling.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 compiler-rt/test/tysan/pointer_typename_demangling.cpp diff --git a/compiler-rt/test/tysan/pointer_typename_demangling.cpp b/compiler-rt/test/tysan/pointer_typename_demangling.cpp new file mode 100644 index 0000000000000..72ad420825519 --- /dev/null +++ b/compiler-rt/test/tysan/pointer_typename_demangling.cpp @@ -0,0 +1,14 @@ +// RUN: %clangxx_tysan %s -o %t && %run %t 10 >%t.out.0 2>&1 +// RUN: FileCheck %s < %t.out.0 + +namespace fancy_namespace { +struct fancy_struct { + int member; +}; +} // namespace fancy_namespace + +int main() { + fancy_namespace::fancy_struct *x = new fancy_namespace::fancy_struct{42}; + *(float *)&x = 1.0f; + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type fancy_namespace::fancy_struct* +} >From 08a135196cd5b6e58a878543ca59f8cb5dab1bc3 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Fri, 28 Nov 2025 11:38:51 +0000 Subject: [PATCH 07/12] Refactor type name demangling to write into buffer --- compiler-rt/lib/tysan/tysan.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index 76fa8f45ebe4f..f3c1d2c8adf9a 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -74,9 +74,12 @@ static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) { return {Indirection, Name + CharIndex}; } -static void printDisplayName(const char *Name) { +static size_t writeDemangledTypeName(char *Buffer, size_t BufferSize, + const char *Name) { if (Name[0] == '\0') - Printf("<anonymous type>"); + return internal_snprintf(Buffer, BufferSize, "<anonymous type>"); + + size_t Written = 0; // Parse indirection prefix and remove it. const auto [Indirection, RemainingName] = parseIndirectionPrefix(Name); @@ -91,12 +94,15 @@ static void printDisplayName(const char *Name) { DName += TIPrefixLen; // Print type name. - Printf("%s", DName); + Written += + internal_snprintf(&Buffer[Written], BufferSize - Written, "%s", DName); // Print asterisks for indirection (C pointer notation). for (size_t i = 0; i < Indirection; ++i) { - Printf("*"); + Written += internal_snprintf(&Buffer[Written], BufferSize - Written, "*"); } + + return Written; } static void printTDName(tysan_type_descriptor *td) { @@ -118,7 +124,12 @@ static void printTDName(tysan_type_descriptor *td) { } break; case TYSAN_STRUCT_TD: - printDisplayName((char *)(td->Struct.Members + td->Struct.MemberCount)); + constexpr size_t BufferSize = 512; + char Buffer[BufferSize]; + writeDemangledTypeName( + Buffer, BufferSize, + (char *)(td->Struct.Members + td->Struct.MemberCount)); + Printf("%s", Buffer); break; } } >From 7a44629225d7a2d12e26279ca04f76aadc7d9883 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Fri, 28 Nov 2025 11:59:42 +0000 Subject: [PATCH 08/12] Add comment explaining purpose of writeDemangledTypeName --- compiler-rt/lib/tysan/tysan.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index f3c1d2c8adf9a..1e64e37a6c5db 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -74,6 +74,8 @@ static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) { return {Indirection, Name + CharIndex}; } +/// Given a TBAA type descriptor name, this function demangles it, also +/// rewriting the `pN T` pointer notation with more conventional "T*" notation. static size_t writeDemangledTypeName(char *Buffer, size_t BufferSize, const char *Name) { if (Name[0] == '\0') >From 888c3660bb275e7d26effa7bb99ac7b88b530f02 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Fri, 28 Nov 2025 13:30:08 +0000 Subject: [PATCH 09/12] Refactor parseIndirectionPrefix to use internal_simple_strtoll --- compiler-rt/lib/tysan/tysan.cpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index 1e64e37a6c5db..704ae86eb518f 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -52,26 +52,23 @@ struct ParseIndirectionPrefixResult { /// Parses the "p{indirection} " prefix given to pointer type names in TBAA. static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) { - size_t CharIndex = 0; + const char *Remaining = Name; // Parse 'p'. // This also handles the case of an empty string. - if (Name[CharIndex++] != 'p') - return {0, Name}; + if (*Remaining != 'p') + return {0, Remaining}; + ++Remaining; // Parse indirection level. - size_t Indirection = 0; - while (isdigit(Name[CharIndex])) { - const auto DigitValue = static_cast<size_t>(Name[CharIndex] - '0'); - Indirection = Indirection * 10 + DigitValue; - ++CharIndex; - } + size_t Indirection = internal_simple_strtoll(Remaining, &Remaining, 10); // Parse space. - if (Name[CharIndex++] != ' ') + if (*Remaining != ' ') return {0, Name}; + ++Remaining; - return {Indirection, Name + CharIndex}; + return {Indirection, Remaining}; } /// Given a TBAA type descriptor name, this function demangles it, also >From 4856f460bcc0b5051d0bdb731a536290678e51c2 Mon Sep 17 00:00:00 2001 From: Benjamin Stott <[email protected]> Date: Tue, 9 Dec 2025 10:28:54 +0000 Subject: [PATCH 10/12] Drop braces Co-authored-by: Florian Hahn <[email protected]> --- compiler-rt/lib/tysan/tysan.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index 704ae86eb518f..2089ec43dc745 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -97,9 +97,8 @@ static size_t writeDemangledTypeName(char *Buffer, size_t BufferSize, internal_snprintf(&Buffer[Written], BufferSize - Written, "%s", DName); // Print asterisks for indirection (C pointer notation). - for (size_t i = 0; i < Indirection; ++i) { + for (size_t i = 0; i < Indirection; ++i) Written += internal_snprintf(&Buffer[Written], BufferSize - Written, "*"); - } return Written; } >From b490d5c3dd4b9302e4331d713ef01819abeb2e45 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 11 Dec 2025 10:17:13 +0000 Subject: [PATCH 11/12] Replace stack allocated buffer with vector to eliminate length limit, add test case for double indirection --- .../lib/sanitizer_common/sanitizer_printf.cpp | 14 ++-- .../lib/sanitizer_common/sanitizer_printf.h | 24 +++++++ .../lib/sanitizer_common/sanitizer_vector.h | 4 ++ compiler-rt/lib/tysan/tysan.cpp | 64 ++++++++++++++----- .../tysan/pointer_typename_demangling.cpp | 4 ++ 5 files changed, 87 insertions(+), 23 deletions(-) create mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_printf.h diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp index 24511720bd99f..d276c174be0ed 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp @@ -125,8 +125,8 @@ static int AppendPointer(char **buff, const char *buff_end, u64 ptr_value) { return result; } -int VSNPrintf(char *buff, int buff_length, - const char *format, va_list args) { +int internal_vsnprintf(char* buff, int buff_length, const char* format, + va_list args) { static const char *kPrintfFormatsHelp = "Supported Printf formats: %([0-9]*)?(z|l|ll)?{d,u,x,X}; %p; " "%[-]([0-9]*)?(\\.\\*)?s; %c\nProvided format: "; @@ -282,8 +282,8 @@ static void NOINLINE SharedPrintfCodeNoBuffer(bool append_pid, if (needed_length >= buffer_size) continue; } - needed_length += VSNPrintf(buffer + needed_length, - buffer_size - needed_length, format, args); + needed_length += internal_vsnprintf( + buffer + needed_length, buffer_size - needed_length, format, args); if (needed_length >= buffer_size) continue; // If the message fit into the buffer, print it and exit. @@ -332,7 +332,7 @@ void Report(const char *format, ...) { int internal_snprintf(char *buffer, uptr length, const char *format, ...) { va_list args; va_start(args, format); - int needed_length = VSNPrintf(buffer, length, format, args); + int needed_length = internal_vsnprintf(buffer, length, format, args); va_end(args); return needed_length; } @@ -352,8 +352,8 @@ void InternalScopedString::AppendF(const char *format, ...) { va_list args; va_start(args, format); - uptr sz = VSNPrintf(buffer_.data() + prev_len, buffer_.size() - prev_len, - format, args); + uptr sz = internal_vsnprintf(buffer_.data() + prev_len, + buffer_.size() - prev_len, format, args); va_end(args); if (sz < buffer_.size() - prev_len) { buffer_.resize(prev_len + sz + 1); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_printf.h b/compiler-rt/lib/sanitizer_common/sanitizer_printf.h new file mode 100644 index 0000000000000..fc4470a78204a --- /dev/null +++ b/compiler-rt/lib/sanitizer_common/sanitizer_printf.h @@ -0,0 +1,24 @@ +//===-- sanitizer_printf.h ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is shared between AddressSanitizer and ThreadSanitizer. +// +// Declares the internal vsnprintf function, used inside run-time libraries. +// `internal_snprintf` is declared in sanitizer_libc. +//===----------------------------------------------------------------------===// + +#include <stdarg.h> + +#include "sanitizer_internal_defs.h" + +namespace __sanitizer { + +int internal_vsnprintf(char* buff, int buff_length, const char* format, + va_list args); + +} // namespace __sanitizer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_vector.h b/compiler-rt/lib/sanitizer_common/sanitizer_vector.h index 79ff275660d36..7318ee3f91ea0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_vector.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_vector.h @@ -38,6 +38,10 @@ class Vector { last_ = 0; } + const T* Data() const { return begin_; } + + T* Data() { return begin_; } + uptr Size() const { return end_ - begin_; } diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index 2089ec43dc745..47ff8662e886a 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -15,10 +15,13 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_flag_parser.h" #include "sanitizer_common/sanitizer_flags.h" +#include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_printf.h" #include "sanitizer_common/sanitizer_report_decorator.h" #include "sanitizer_common/sanitizer_stacktrace.h" #include "sanitizer_common/sanitizer_symbolizer.h" +#include "sanitizer_common/sanitizer_vector.h" #include "tysan/tysan.h" @@ -41,6 +44,36 @@ tysan_copy_types(const void *daddr, const void *saddr, uptr size) { internal_memmove(shadow_for(daddr), shadow_for(saddr), size * sizeof(uptr)); } +/// `printf` into a Vector<char> using sanitizer-internal libraries. +/// No null terminator is appended. +FORMAT(2, 3) +static void printfToVector(Vector<char> &V, const char *FormatString, ...) { + // Unlike regular vsnprintf, internal_vsnprintf does not accept a + // null/zero-length buffer. + char PretendBuffer[2]; + + va_list Args, ArgsCopy; + va_start(Args, FormatString); + va_copy(ArgsCopy, Args); + + const size_t CurrentSize = V.Size(); + const size_t AddedSize = + internal_vsnprintf(PretendBuffer, 2, FormatString, Args); + + // Allocate one byte extra for the null terminator inserted by + // internal_vsnprintf. + V.Resize(CurrentSize + AddedSize + 1); + + internal_vsnprintf(&V[CurrentSize], V.Size() - CurrentSize, FormatString, + ArgsCopy); + + // Remove the null terminator added by internal_vsnprintf. + V.PopBack(); + + va_end(ArgsCopy); + va_end(Args); +} + /// Struct returned by `parseIndirectionPrefix`. struct ParseIndirectionPrefixResult { /// Level of indirection - 0 if the prefix is not found. @@ -73,12 +106,12 @@ static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) { /// Given a TBAA type descriptor name, this function demangles it, also /// rewriting the `pN T` pointer notation with more conventional "T*" notation. -static size_t writeDemangledTypeName(char *Buffer, size_t BufferSize, - const char *Name) { - if (Name[0] == '\0') - return internal_snprintf(Buffer, BufferSize, "<anonymous type>"); - - size_t Written = 0; +/// Warning: This function does not add a null terminator to the buffer. +static void writeDemangledTypeName(Vector<char> &Buffer, const char *Name) { + if (Name[0] == '\0') { + printfToVector(Buffer, "<anonymous type>"); + return; + } // Parse indirection prefix and remove it. const auto [Indirection, RemainingName] = parseIndirectionPrefix(Name); @@ -93,14 +126,11 @@ static size_t writeDemangledTypeName(char *Buffer, size_t BufferSize, DName += TIPrefixLen; // Print type name. - Written += - internal_snprintf(&Buffer[Written], BufferSize - Written, "%s", DName); + printfToVector(Buffer, "%s", DName); // Print asterisks for indirection (C pointer notation). for (size_t i = 0; i < Indirection; ++i) - Written += internal_snprintf(&Buffer[Written], BufferSize - Written, "*"); - - return Written; + printfToVector(Buffer, "*"); } static void printTDName(tysan_type_descriptor *td) { @@ -122,12 +152,14 @@ static void printTDName(tysan_type_descriptor *td) { } break; case TYSAN_STRUCT_TD: - constexpr size_t BufferSize = 512; - char Buffer[BufferSize]; + Vector<char> Buffer; writeDemangledTypeName( - Buffer, BufferSize, - (char *)(td->Struct.Members + td->Struct.MemberCount)); - Printf("%s", Buffer); + Buffer, (char *)(td->Struct.Members + td->Struct.MemberCount)); + + // Append null terminator. + Buffer.PushBack('\0'); + Printf("%s", Buffer.Data()); + break; } } diff --git a/compiler-rt/test/tysan/pointer_typename_demangling.cpp b/compiler-rt/test/tysan/pointer_typename_demangling.cpp index 72ad420825519..c395000ca3ad2 100644 --- a/compiler-rt/test/tysan/pointer_typename_demangling.cpp +++ b/compiler-rt/test/tysan/pointer_typename_demangling.cpp @@ -11,4 +11,8 @@ int main() { fancy_namespace::fancy_struct *x = new fancy_namespace::fancy_struct{42}; *(float *)&x = 1.0f; // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type fancy_namespace::fancy_struct* + + fancy_namespace::fancy_struct **double_indirection = &x; + *(float *)&double_indirection = 1.0f; + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type fancy_namespace::fancy_struct** } >From 7e17b7879efb921fbe4ea416ea8f5d0e5919bef8 Mon Sep 17 00:00:00 2001 From: gbMattN <[email protected]> Date: Thu, 22 Jan 2026 17:35:54 +0000 Subject: [PATCH 12/12] Rearange vsnprintf --- .../lib/sanitizer_common/sanitizer_common.h | 5 ++++ .../lib/sanitizer_common/sanitizer_printf.h | 24 ------------------- compiler-rt/lib/tysan/tysan.cpp | 1 - 3 files changed, 5 insertions(+), 25 deletions(-) delete mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_printf.h diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index ba85a0eb5a35e..49fdcd8612130 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -15,6 +15,8 @@ #ifndef SANITIZER_COMMON_H #define SANITIZER_COMMON_H +#include <stdarg.h> + #include "sanitizer_flags.h" #include "sanitizer_internal_defs.h" #include "sanitizer_libc.h" @@ -668,6 +670,9 @@ class InternalScopedString { InternalMmapVector<char> buffer_; }; +int internal_vsnprintf(char* buff, int buff_length, const char* format, + va_list args); + template <class T> struct CompareLess { bool operator()(const T &a, const T &b) const { return a < b; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_printf.h b/compiler-rt/lib/sanitizer_common/sanitizer_printf.h deleted file mode 100644 index fc4470a78204a..0000000000000 --- a/compiler-rt/lib/sanitizer_common/sanitizer_printf.h +++ /dev/null @@ -1,24 +0,0 @@ -//===-- sanitizer_printf.h ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is shared between AddressSanitizer and ThreadSanitizer. -// -// Declares the internal vsnprintf function, used inside run-time libraries. -// `internal_snprintf` is declared in sanitizer_libc. -//===----------------------------------------------------------------------===// - -#include <stdarg.h> - -#include "sanitizer_internal_defs.h" - -namespace __sanitizer { - -int internal_vsnprintf(char* buff, int buff_length, const char* format, - va_list args); - -} // namespace __sanitizer diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index 47ff8662e886a..68567216894e8 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -17,7 +17,6 @@ #include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_libc.h" -#include "sanitizer_common/sanitizer_printf.h" #include "sanitizer_common/sanitizer_report_decorator.h" #include "sanitizer_common/sanitizer_stacktrace.h" #include "sanitizer_common/sanitizer_symbolizer.h" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
