https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/155483
Part of https://github.com/llvm/llvm-project/pull/149827 Allows us to use the mangling substitution facilities in CPlusPlusLanguage but also SymbolFileDWARF. Added tests now that they're "public". >From 0b07e48a1c338ca449094067fd462f148ef6d073 Mon Sep 17 00:00:00 2001 From: Michael Buch <michaelbuc...@gmail.com> Date: Thu, 14 Aug 2025 10:14:24 +0100 Subject: [PATCH] [lldb][CPlusPlusLanguage] Expose ManglingSubstitutor as static helpers Part of https://github.com/llvm/llvm-project/pull/149827 Allows us to use the mangling substitution facilities in CPlusPlusLanguage but also SymbolFileDWARF. Added tests now that they're "public". --- .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 322 ++++++++++-------- .../Language/CPlusPlus/CPlusPlusLanguage.h | 13 + .../Language/CPlusPlus/CMakeLists.txt | 1 + .../CPlusPlus/CPlusPlusLanguageTest.cpp | 158 +++++++++ 4 files changed, 360 insertions(+), 134 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index b4207439f5285..c39b529f7305a 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -604,126 +604,6 @@ bool CPlusPlusLanguage::ExtractContextAndIdentifier( return false; } -namespace { -class NodeAllocator { - llvm::BumpPtrAllocator Alloc; - -public: - void reset() { Alloc.Reset(); } - - template <typename T, typename... Args> T *makeNode(Args &&...args) { - return new (Alloc.Allocate(sizeof(T), alignof(T))) - T(std::forward<Args>(args)...); - } - - void *allocateNodeArray(size_t sz) { - return Alloc.Allocate(sizeof(llvm::itanium_demangle::Node *) * sz, - alignof(llvm::itanium_demangle::Node *)); - } -}; - -template <typename Derived> -class ManglingSubstitutor - : public llvm::itanium_demangle::AbstractManglingParser<Derived, - NodeAllocator> { - using Base = - llvm::itanium_demangle::AbstractManglingParser<Derived, NodeAllocator>; - -public: - ManglingSubstitutor() : Base(nullptr, nullptr) {} - - template <typename... Ts> - ConstString substitute(llvm::StringRef Mangled, Ts &&...Vals) { - this->getDerived().reset(Mangled, std::forward<Ts>(Vals)...); - return substituteImpl(Mangled); - } - -protected: - void reset(llvm::StringRef Mangled) { - Base::reset(Mangled.begin(), Mangled.end()); - Written = Mangled.begin(); - Result.clear(); - Substituted = false; - } - - ConstString substituteImpl(llvm::StringRef Mangled) { - Log *log = GetLog(LLDBLog::Language); - if (this->parse() == nullptr) { - LLDB_LOG(log, "Failed to substitute mangling in {0}", Mangled); - return ConstString(); - } - if (!Substituted) - return ConstString(); - - // Append any trailing unmodified input. - appendUnchangedInput(); - LLDB_LOG(log, "Substituted mangling {0} -> {1}", Mangled, Result); - return ConstString(Result); - } - - void trySubstitute(llvm::StringRef From, llvm::StringRef To) { - if (!llvm::StringRef(currentParserPos(), this->numLeft()).starts_with(From)) - return; - - // We found a match. Append unmodified input up to this point. - appendUnchangedInput(); - - // And then perform the replacement. - Result += To; - Written += From.size(); - Substituted = true; - } - -private: - /// Input character until which we have constructed the respective output - /// already. - const char *Written = ""; - - llvm::SmallString<128> Result; - - /// Whether we have performed any substitutions. - bool Substituted = false; - - const char *currentParserPos() const { return this->First; } - - void appendUnchangedInput() { - Result += - llvm::StringRef(Written, std::distance(Written, currentParserPos())); - Written = currentParserPos(); - } -}; - -/// Given a mangled function `Mangled`, replace all the primitive function type -/// arguments of `Search` with type `Replace`. -class TypeSubstitutor : public ManglingSubstitutor<TypeSubstitutor> { - llvm::StringRef Search; - llvm::StringRef Replace; - -public: - void reset(llvm::StringRef Mangled, llvm::StringRef Search, - llvm::StringRef Replace) { - ManglingSubstitutor::reset(Mangled); - this->Search = Search; - this->Replace = Replace; - } - - llvm::itanium_demangle::Node *parseType() { - trySubstitute(Search, Replace); - return ManglingSubstitutor::parseType(); - } -}; - -class CtorDtorSubstitutor : public ManglingSubstitutor<CtorDtorSubstitutor> { -public: - llvm::itanium_demangle::Node * - parseCtorDtorName(llvm::itanium_demangle::Node *&SoFar, NameState *State) { - trySubstitute("C1", "C2"); - trySubstitute("D1", "D2"); - return ManglingSubstitutor::parseCtorDtorName(SoFar, State); - } -}; -} // namespace - std::vector<ConstString> CPlusPlusLanguage::GenerateAlternateFunctionManglings( const ConstString mangled_name) const { std::vector<ConstString> alternates; @@ -751,29 +631,49 @@ std::vector<ConstString> CPlusPlusLanguage::GenerateAlternateFunctionManglings( alternates.push_back(ConstString(fixed_scratch)); } - TypeSubstitutor TS; + auto *log = GetLog(LLDBLog::Language); + // `char` is implementation defined as either `signed` or `unsigned`. As a // result a char parameter has 3 possible manglings: 'c'-char, 'a'-signed // char, 'h'-unsigned char. If we're looking for symbols with a signed char // parameter, try finding matches which have the general case 'c'. - if (ConstString char_fixup = - TS.substitute(mangled_name.GetStringRef(), "a", "c")) - alternates.push_back(char_fixup); + if (auto char_fixup_or_err = + SubstituteType_ItaniumMangle(mangled_name.GetStringRef(), "a", "c")) { + // LLDB_LOG(log, "Substituted mangling {0} -> {1}", Mangled, Result); + if (*char_fixup_or_err) + alternates.push_back(*char_fixup_or_err); + } else + LLDB_LOG_ERROR(log, char_fixup_or_err.takeError(), + "Failed to substitute 'char' type mangling: {0}"); // long long parameter mangling 'x', may actually just be a long 'l' argument - if (ConstString long_fixup = - TS.substitute(mangled_name.GetStringRef(), "x", "l")) - alternates.push_back(long_fixup); + if (auto long_fixup_or_err = + SubstituteType_ItaniumMangle(mangled_name.GetStringRef(), "x", "l")) { + if (*long_fixup_or_err) + alternates.push_back(*long_fixup_or_err); + } else + LLDB_LOG_ERROR(log, long_fixup_or_err.takeError(), + "Failed to substitute 'long long' type mangling: {0}"); // unsigned long long parameter mangling 'y', may actually just be unsigned // long 'm' argument - if (ConstString ulong_fixup = - TS.substitute(mangled_name.GetStringRef(), "y", "m")) - alternates.push_back(ulong_fixup); - - if (ConstString ctor_fixup = - CtorDtorSubstitutor().substitute(mangled_name.GetStringRef())) - alternates.push_back(ctor_fixup); + if (auto ulong_fixup_or_err = + SubstituteType_ItaniumMangle(mangled_name.GetStringRef(), "y", "m")) { + if (*ulong_fixup_or_err) + alternates.push_back(*ulong_fixup_or_err); + } else + LLDB_LOG_ERROR( + log, ulong_fixup_or_err.takeError(), + "Failed to substitute 'unsigned long long' type mangling: {0}"); + + if (auto ctor_fixup_or_err = SubstituteStructorAliases_ItaniumMangle( + mangled_name.GetStringRef())) { + if (*ctor_fixup_or_err) { + alternates.push_back(*ctor_fixup_or_err); + } + } else + LLDB_LOG_ERROR(log, ctor_fixup_or_err.takeError(), + "Failed to substitute structor alias manglings: {0}"); return alternates; } @@ -2442,6 +2342,160 @@ bool CPlusPlusLanguage::HandleFrameFormatVariable( } } +namespace { +class NodeAllocator { + llvm::BumpPtrAllocator Alloc; + +public: + void reset() { Alloc.Reset(); } + + template <typename T, typename... Args> T *makeNode(Args &&...args) { + return new (Alloc.Allocate(sizeof(T), alignof(T))) + T(std::forward<Args>(args)...); + } + + void *allocateNodeArray(size_t sz) { + return Alloc.Allocate(sizeof(llvm::itanium_demangle::Node *) * sz, + alignof(llvm::itanium_demangle::Node *)); + } +}; + +template <typename Derived> +class ManglingSubstitutor + : public llvm::itanium_demangle::AbstractManglingParser<Derived, + NodeAllocator> { + using Base = + llvm::itanium_demangle::AbstractManglingParser<Derived, NodeAllocator>; + +public: + ManglingSubstitutor() : Base(nullptr, nullptr) {} + + template <typename... Ts> + llvm::Expected<ConstString> substitute(llvm::StringRef Mangled, + Ts &&...Vals) { + this->getDerived().reset(Mangled, std::forward<Ts>(Vals)...); + return substituteImpl(Mangled); + } + +protected: + void reset(llvm::StringRef Mangled) { + Base::reset(Mangled.begin(), Mangled.end()); + Written = Mangled.begin(); + Result.clear(); + Substituted = false; + } + + llvm::Expected<ConstString> substituteImpl(llvm::StringRef Mangled) { + if (this->parse() == nullptr) + return llvm::createStringError( + llvm::formatv("Failed to substitute mangling in '{0}'", Mangled)); + + if (!Substituted) + return ConstString(); + + // Append any trailing unmodified input. + appendUnchangedInput(); + return ConstString(Result); + } + + void trySubstitute(llvm::StringRef From, llvm::StringRef To) { + if (!llvm::StringRef(currentParserPos(), this->numLeft()).starts_with(From)) + return; + + // We found a match. Append unmodified input up to this point. + appendUnchangedInput(); + + // And then perform the replacement. + Result += To; + Written += From.size(); + Substituted = true; + } + +private: + /// Input character until which we have constructed the respective output + /// already. + const char *Written = ""; + + llvm::SmallString<128> Result; + + /// Whether we have performed any substitutions. + bool Substituted = false; + + const char *currentParserPos() const { return this->First; } + + void appendUnchangedInput() { + Result += + llvm::StringRef(Written, std::distance(Written, currentParserPos())); + Written = currentParserPos(); + } +}; + +/// Given a mangled function `Mangled`, replace all the primitive function type +/// arguments of `Search` with type `Replace`. +class TypeSubstitutor : public ManglingSubstitutor<TypeSubstitutor> { + llvm::StringRef Search; + llvm::StringRef Replace; + +public: + void reset(llvm::StringRef Mangled, llvm::StringRef Search, + llvm::StringRef Replace) { + ManglingSubstitutor::reset(Mangled); + this->Search = Search; + this->Replace = Replace; + } + + llvm::itanium_demangle::Node *parseType() { + trySubstitute(Search, Replace); + return ManglingSubstitutor::parseType(); + } +}; + +class CtorDtorSubstitutor : public ManglingSubstitutor<CtorDtorSubstitutor> { + llvm::StringRef Search; + llvm::StringRef Replace; + +public: + void reset(llvm::StringRef Mangled, llvm::StringRef Search, + llvm::StringRef Replace) { + ManglingSubstitutor::reset(Mangled); + this->Search = Search; + this->Replace = Replace; + } + + void reset(llvm::StringRef Mangled) { ManglingSubstitutor::reset(Mangled); } + + llvm::itanium_demangle::Node * + parseCtorDtorName(llvm::itanium_demangle::Node *&SoFar, NameState *State) { + if (!Search.empty() && !Replace.empty()) { + trySubstitute(Search, Replace); + } else { + trySubstitute("D1", "D2"); + trySubstitute("C1", "C2"); + } + return ManglingSubstitutor::parseCtorDtorName(SoFar, State); + } +}; +} // namespace + +llvm::Expected<ConstString> +CPlusPlusLanguage::SubstituteType_ItaniumMangle(llvm::StringRef mangled_name, + llvm::StringRef subst_from, + llvm::StringRef subst_to) { + return TypeSubstitutor().substitute(mangled_name, subst_from, subst_to); +} + +llvm::Expected<ConstString> CPlusPlusLanguage::SubstituteStructor_ItaniumMangle( + llvm::StringRef mangled_name, llvm::StringRef subst_from, + llvm::StringRef subst_to) { + return CtorDtorSubstitutor().substitute(mangled_name, subst_from, subst_to); +} + +llvm::Expected<ConstString> +CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle( + llvm::StringRef mangled_name) { + return CtorDtorSubstitutor().substitute(mangled_name); +} + #define LLDB_PROPERTIES_language_cplusplus #include "LanguageCPlusPlusProperties.inc" diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 4a30299dd2658..67b28bf202c3a 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -164,6 +164,19 @@ class CPlusPlusLanguage : public Language { ConstString FindBestAlternateFunctionMangledName( const Mangled mangled, const SymbolContext &sym_ctx) const override; + static llvm::Expected<ConstString> + SubstituteType_ItaniumMangle(llvm::StringRef mangled_name, + llvm::StringRef subst_from, + llvm::StringRef subst_to); + + static llvm::Expected<ConstString> + SubstituteStructor_ItaniumMangle(llvm::StringRef mangled_name, + llvm::StringRef subst_from, + llvm::StringRef subst_to); + + static llvm::Expected<ConstString> + SubstituteStructorAliases_ItaniumMangle(llvm::StringRef mangled_name); + llvm::StringRef GetInstanceVariableName() override { return "this"; } FormatEntity::Entry GetFunctionNameFormat() const override; diff --git a/lldb/unittests/Language/CPlusPlus/CMakeLists.txt b/lldb/unittests/Language/CPlusPlus/CMakeLists.txt index 4882eafc8d854..1d96fcf3db1b8 100644 --- a/lldb/unittests/Language/CPlusPlus/CMakeLists.txt +++ b/lldb/unittests/Language/CPlusPlus/CMakeLists.txt @@ -3,4 +3,5 @@ add_lldb_unittest(LanguageCPlusPlusTests LINK_LIBS lldbPluginCPlusPlusLanguage + LLVMTestingSupport ) diff --git a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp index 957fb3f600499..92d49eb0f93ff 100644 --- a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp +++ b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp @@ -9,6 +9,8 @@ #include "Plugins/Language/CPlusPlus/CPlusPlusNameParser.h" #include "TestingSupport/SubsystemRAII.h" #include "lldb/lldb-enumerations.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include <optional> @@ -427,3 +429,159 @@ TEST(CPlusPlusLanguage, MatchesCxx) { Mangled msvcSymbol("??x@@3AH"); EXPECT_TRUE(CPlusPlusLang->SymbolNameFitsToLanguage(msvcSymbol)); } + +struct ManglingSubstitutorTestCase { + llvm::StringRef mangled; + llvm::StringRef from; + llvm::StringRef to; + llvm::StringRef expected; + bool expect_error; +}; + +struct ManglingSubstitutorTestFixture + : public ::testing::TestWithParam<ManglingSubstitutorTestCase> {}; + +ManglingSubstitutorTestCase g_mangled_substitutor_type_test_cases[] = { + {/*.mangled*/ "_Z3fooa", /*from*/ "a", /*to*/ "c", /*expected*/ "_Z3fooc", + /*expect_error*/ false}, + {/*.mangled*/ "_Z3fooy", /*from*/ "y", /*to*/ "m", /*expected*/ "_Z3foom", + /*expect_error*/ false}, + {/*.mangled*/ "_Z3foox", /*from*/ "x", /*to*/ "l", /*expected*/ "_Z3fool", + /*expect_error*/ false}, + {/*.mangled*/ "_Z3baraa", /*from*/ "a", /*to*/ "c", /*expected*/ "_Z3barcc", + /*expect_error*/ false}, + {/*.mangled*/ "_Z3foov", /*from*/ "x", /*to*/ "l", /*expected*/ "", + /*expect_error*/ false}, + {/*.mangled*/ "_Z3fooB3Tagv", /*from*/ "Tag", /*to*/ "random", + /*expected*/ "", /*expect_error*/ false}, + {/*.mangled*/ "_Z3foocc", /*from*/ "a", /*to*/ "c", /*expected*/ "", + /*expect_error*/ false}, + {/*.mangled*/ "_ZN3fooIaE3barIaEEvaT_", /*from*/ "a", /*to*/ "c", + /*expected*/ "_ZN3fooIcE3barIcEEvcT_", /*expect_error*/ false}, + {/*.mangled*/ "foo", /*from*/ "x", /*to*/ "l", /*expected*/ "", + /*expect_error*/ true}, + {/*.mangled*/ "", /*from*/ "x", /*to*/ "l", /*expected*/ "", + /*expect_error*/ true}, + // FIXME: these two cases are odd behaviours, though not realistic in + // practice. + {/*.mangled*/ "_Z3foox", /*from*/ "", /*to*/ "l", /*expected*/ "_Z3foolx", + /*expect_error*/ false}, + {/*.mangled*/ "_Z3foox", /*from*/ "x", /*to*/ "", /*expected*/ "_Z3foo", + /*expect_error*/ false}}; + +TEST_P(ManglingSubstitutorTestFixture, Type) { + // Tests the CPlusPlusLanguage::SubstituteType_ItaniumMangle API. + + const auto &[mangled, from, to, expected, expect_error] = GetParam(); + + auto subst_or_err = + CPlusPlusLanguage::SubstituteType_ItaniumMangle(mangled, from, to); + if (expect_error) { + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Failed()); + } else { + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Succeeded()); + EXPECT_EQ(*subst_or_err, expected); + } +} + +INSTANTIATE_TEST_SUITE_P( + ManglingSubstitutorTypeTests, ManglingSubstitutorTestFixture, + ::testing::ValuesIn(g_mangled_substitutor_type_test_cases)); + +struct ManglingSubstitutorStructorTestFixture + : public ::testing::TestWithParam<ManglingSubstitutorTestCase> {}; + +ManglingSubstitutorTestCase g_mangled_substitutor_structor_test_cases[] = { + {/*.mangled*/ "_ZN3FooC1Ev", /*from*/ "C1", /*to*/ "C2", + /*expected*/ "_ZN3FooC2Ev", /*expect_error*/ false}, + {/*.mangled*/ "_ZN3FooC4Ev", /*from*/ "C4", /*to*/ "C2", + /*expected*/ "_ZN3FooC2Ev", /*expect_error*/ false}, + {/*.mangled*/ "_ZN3FooC2Ev", /*from*/ "C1", /*to*/ "C2", /*expected*/ "", + /*expect_error*/ false}, + {/*.mangled*/ "_ZN3FooD1Ev", /*from*/ "D1", /*to*/ "D2", + /*expected*/ "_ZN3FooD2Ev", /*expect_error*/ false}, + {/*.mangled*/ "_ZN3FooD2Ev", /*from*/ "D1", /*to*/ "D2", /*expected*/ "", + /*expect_error*/ false}, + {/*.mangled*/ "_ZN3FooD4Ev", /*from*/ "D4", /*to*/ "D2", + /*expected*/ "_ZN3FooD2Ev", /*expect_error*/ false}, + {/*.mangled*/ "_ZN2D12C1C1I2C12D1EE2C12D1", /*from*/ "C1", /*to*/ "C2", + /*expected*/ "_ZN2D12C1C2I2C12D1EE2C12D1", /*expect_error*/ false}, + {/*.mangled*/ "_ZN2D12C1D1I2C12D1EE2C12D1", /*from*/ "D1", /*to*/ "D2", + /*expected*/ "_ZN2D12C1D2I2C12D1EE2C12D1", /*expect_error*/ false}, + {/*.mangled*/ "_ZN3FooC6Ev", /*from*/ "D1", /*to*/ "D2", /*expected*/ "", + /*expect_error*/ true}, +}; + +TEST_P(ManglingSubstitutorStructorTestFixture, Structors) { + // Tests the CPlusPlusLanguage::SubstituteStructor_ItaniumMangle API. + + const auto &[mangled, from, to, expected, expect_error] = GetParam(); + + auto subst_or_err = + CPlusPlusLanguage::SubstituteStructor_ItaniumMangle(mangled, from, to); + if (expect_error) { + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Failed()); + } else { + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Succeeded()); + EXPECT_EQ(*subst_or_err, expected); + } +} + +INSTANTIATE_TEST_SUITE_P( + ManglingSubstitutorStructorTests, ManglingSubstitutorStructorTestFixture, + ::testing::ValuesIn(g_mangled_substitutor_structor_test_cases)); + +TEST(CPlusPlusLanguage, ManglingSubstitutor_StructorAlias) { + // Tests the CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle API. + { + // Invalid mangling. + auto subst_or_err = + CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle("Foo"); + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Failed()); + } + + { + // Ctor C1 alias. + auto subst_or_err = + CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle( + "_ZN3FooC1Ev"); + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Succeeded()); + EXPECT_EQ(*subst_or_err, "_ZN3FooC2Ev"); + } + + { + // Dtor D1 alias. + auto subst_or_err = + CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle( + "_ZN3FooD1Ev"); + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Succeeded()); + EXPECT_EQ(*subst_or_err, "_ZN3FooD2Ev"); + } + + { + // Ctor C2 not aliased. + auto subst_or_err = + CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle( + "_ZN3FooC2Ev"); + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Succeeded()); + EXPECT_FALSE(*subst_or_err); + } + + { + // Dtor D2 not aliased. + auto subst_or_err = + CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle( + "_ZN3FooD2Ev"); + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Succeeded()); + EXPECT_FALSE(*subst_or_err); + } + + { + // Check that variants in other parts of the name don't get replaced. + auto subst_or_err = + CPlusPlusLanguage::SubstituteStructorAliases_ItaniumMangle( + "_ZN2D12C1C1I2C12D1EE2C12D1"); + EXPECT_THAT_EXPECTED(subst_or_err, llvm::Succeeded()); + EXPECT_EQ(*subst_or_err, "_ZN2D12C1C2I2C12D1EE2C12D1"); + } +} _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits