https://github.com/slydiman updated https://github.com/llvm/llvm-project/pull/132274
>From ba94c683a3d652a14a13313653bc70d5430e7b83 Mon Sep 17 00:00:00 2001 From: Dmitry Vasilyev <dvassil...@accesssoftek.com> Date: Thu, 20 Mar 2025 21:50:51 +0400 Subject: [PATCH] [LLDB] Refactor Module::LookupInfo constructor and move out CPlusPlusLanguage::MethodName to break lldb-server dependencies This patch addresses the issue #129543. After this patch the size of lldb-server is reduced by 9MB. Based on https://github.com/swiftlang/llvm-project/pull/3240 by @bulbazord Alex Langford --- lldb/include/lldb/Target/Language.h | 5 + lldb/source/Core/CMakeLists.txt | 1 - lldb/source/Core/Module.cpp | 130 ++++---- lldb/source/Core/RichManglingContext.cpp | 2 +- .../Clang/ClangExpressionDeclMap.cpp | 11 +- lldb/source/Plugins/Language/CMakeLists.txt | 2 + .../Plugins/Language/CPlusPlus/CMakeLists.txt | 1 + .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 294 ++---------------- .../Language/CPlusPlus/CPlusPlusLanguage.h | 91 +----- .../CPlusPlus/CPlusPlusLanguageMethod.cpp | 279 +++++++++++++++++ .../CPlusPlus/CPlusPlusLanguageMethod.h | 106 +++++++ .../Plugins/Language/ObjC/ObjCLanguage.cpp | 15 + .../Plugins/Language/ObjC/ObjCLanguage.h | 3 + .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 6 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 4 +- 15 files changed, 510 insertions(+), 440 deletions(-) create mode 100644 lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp create mode 100644 lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index b699a90aff8e4..d095499bd596e 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -214,6 +214,11 @@ class Language : public PluginInterface { return std::vector<Language::MethodNameVariant>(); }; + virtual std::pair<lldb::FunctionNameType, llvm::StringRef> + GetFunctionNameInfo(ConstString name) const { + return std::pair{lldb::eFunctionNameTypeNone, llvm::StringRef()}; + }; + /// Returns true iff the given symbol name is compatible with the mangling /// scheme of this language. /// diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index 0a08da0fec230..62390104cd588 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -72,7 +72,6 @@ add_lldb_library(lldbCore lldbValueObject lldbVersion lldbPluginCPlusPlusLanguage - lldbPluginObjCLanguage ${LLDB_CURSES_LIBS} CLANG_LIBS diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index 53dc6fcde0381..43635b7e31ca4 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -52,8 +52,7 @@ #include "lldb/Host/windows/PosixApi.h" #endif -#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" -#include "Plugins/Language/ObjC/ObjCLanguage.h" +#include "Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" @@ -641,98 +640,75 @@ void Module::FindCompileUnits(const FileSpec &path, Module::LookupInfo::LookupInfo(ConstString name, FunctionNameType name_type_mask, LanguageType language) - : m_name(name), m_lookup_name(), m_language(language) { - const char *name_cstr = name.GetCString(); + : m_name(name), m_lookup_name(name), m_language(language) { llvm::StringRef basename; - llvm::StringRef context; + + std::vector<Language *> languages; + auto collect_language_plugins = [&languages](Language *lang) { + languages.push_back(lang); + return true; + }; if (name_type_mask & eFunctionNameTypeAuto) { - if (CPlusPlusLanguage::IsCPPMangledName(name_cstr)) - m_name_type_mask = eFunctionNameTypeFull; - else if ((language == eLanguageTypeUnknown || - Language::LanguageIsObjC(language)) && - ObjCLanguage::IsPossibleObjCMethodName(name_cstr)) - m_name_type_mask = eFunctionNameTypeFull; - else if (Language::LanguageIsC(language)) { - m_name_type_mask = eFunctionNameTypeFull; + if (language == eLanguageTypeUnknown) { + Language::ForEach(collect_language_plugins); + for (Language *lang : languages) { + auto info = lang->GetFunctionNameInfo(name); + if (info.first != eFunctionNameTypeNone) { + m_name_type_mask |= info.first; + basename = info.second; + break; + } + } } else { - if ((language == eLanguageTypeUnknown || - Language::LanguageIsObjC(language)) && - ObjCLanguage::IsPossibleObjCSelector(name_cstr)) - m_name_type_mask |= eFunctionNameTypeSelector; - - CPlusPlusLanguage::MethodName cpp_method(name); - basename = cpp_method.GetBasename(); - if (basename.empty()) { - if (CPlusPlusLanguage::ExtractContextAndIdentifier(name_cstr, context, - basename)) - m_name_type_mask |= (eFunctionNameTypeMethod | eFunctionNameTypeBase); - else - m_name_type_mask |= eFunctionNameTypeFull; - } else { - m_name_type_mask |= (eFunctionNameTypeMethod | eFunctionNameTypeBase); + if (auto *lang = Language::FindPlugin(language)) { + auto info = lang->GetFunctionNameInfo(name); + m_name_type_mask = info.first; + basename = info.second; } } + + // NOTE: There are several ways to get here, but this is a fallback path in + // case the above does not succeed at extracting any useful information from + // the loaded language plugins. + if (m_name_type_mask == eFunctionNameTypeNone) + m_name_type_mask = eFunctionNameTypeFull; + } else { m_name_type_mask = name_type_mask; - if (name_type_mask & eFunctionNameTypeMethod || - name_type_mask & eFunctionNameTypeBase) { - // If they've asked for a CPP method or function name and it can't be - // that, we don't even need to search for CPP methods or names. - CPlusPlusLanguage::MethodName cpp_method(name); - if (cpp_method.IsValid()) { - basename = cpp_method.GetBasename(); - - if (!cpp_method.GetQualifiers().empty()) { - // There is a "const" or other qualifier following the end of the - // function parens, this can't be a eFunctionNameTypeBase - m_name_type_mask &= ~(eFunctionNameTypeBase); - if (m_name_type_mask == eFunctionNameTypeNone) - return; + if (language == eLanguageTypeUnknown) { + Language::ForEach(collect_language_plugins); + for (Language *lang : languages) { + auto info = lang->GetFunctionNameInfo(name); + if (info.first & m_name_type_mask) { + m_name_type_mask &= info.first; + basename = info.second; + break; } - } else { - // If the CPP method parser didn't manage to chop this up, try to fill - // in the base name if we can. If a::b::c is passed in, we need to just - // look up "c", and then we'll filter the result later. - CPlusPlusLanguage::ExtractContextAndIdentifier(name_cstr, context, - basename); - } - } - - if (name_type_mask & eFunctionNameTypeSelector) { - if (!ObjCLanguage::IsPossibleObjCSelector(name_cstr)) { - m_name_type_mask &= ~(eFunctionNameTypeSelector); - if (m_name_type_mask == eFunctionNameTypeNone) - return; } - } - - // Still try and get a basename in case someone specifies a name type mask - // of eFunctionNameTypeFull and a name like "A::func" - if (basename.empty()) { - if (name_type_mask & eFunctionNameTypeFull && - !CPlusPlusLanguage::IsCPPMangledName(name_cstr)) { - CPlusPlusLanguage::MethodName cpp_method(name); - basename = cpp_method.GetBasename(); - if (basename.empty()) - CPlusPlusLanguage::ExtractContextAndIdentifier(name_cstr, context, - basename); + } else { + if (auto *lang = Language::FindPlugin(language)) { + auto info = lang->GetFunctionNameInfo(name); + if (info.first & m_name_type_mask) { + // If the user asked for FunctionNameTypes that aren't possible, + // then filter those out. (e.g. asking for Selectors on + // C++ symbols, or even if the symbol given can't be a selector in + // ObjC) + m_name_type_mask &= info.first; + basename = info.second; + } } } } if (!basename.empty()) { - // The name supplied was a partial C++ path like "a::count". In this case - // we want to do a lookup on the basename "count" and then make sure any - // matching results contain "a::count" so that it would match "b::a::count" - // and "a::count". This is why we set "match_name_after_lookup" to true + // The name supplied was incomplete for lookup purposes. For example, in C++ + // we may have gotten something like "a::count". In this case, we want to do + // a lookup on the basename "count" and then make sure any matching results + // contain "a::count" so that it would match "b::a::count" and "a::count". + // This is why we set match_name_after_lookup to true. m_lookup_name.SetString(basename); m_match_name_after_lookup = true; - } else { - // The name is already correct, just use the exact name as supplied, and we - // won't need to check if any matches contain "name" - m_lookup_name = name; - m_match_name_after_lookup = false; } } diff --git a/lldb/source/Core/RichManglingContext.cpp b/lldb/source/Core/RichManglingContext.cpp index b68c9e11581b4..a0709903fdae4 100644 --- a/lldb/source/Core/RichManglingContext.cpp +++ b/lldb/source/Core/RichManglingContext.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/Core/RichManglingContext.h" -#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" +#include "Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h" #include "lldb/Utility/LLDBLog.h" #include "llvm/ADT/StringRef.h" diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 9e96f6557c7ba..aefd42aab0b0c 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -56,7 +56,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/AST/RecursiveASTVisitor.h" -#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" +#include "Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h" #include "Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.h" #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" @@ -1809,10 +1809,11 @@ void ClangExpressionDeclMap::AddOneFunction(NameSearchContext &context, const auto lang = function->GetCompileUnit()->GetLanguage(); const auto name = function->GetMangled().GetMangledName().AsCString(); - const bool extern_c = (Language::LanguageIsC(lang) && - !CPlusPlusLanguage::IsCPPMangledName(name)) || - (Language::LanguageIsObjC(lang) && - !Language::LanguageIsCPlusPlus(lang)); + const bool extern_c = + (Language::LanguageIsC(lang) && + !CPlusPlusLanguage::MethodName::IsCPPMangledName(name)) || + (Language::LanguageIsObjC(lang) && + !Language::LanguageIsCPlusPlus(lang)); if (!extern_c) { TypeSystem *type_system = function->GetDeclContext().GetTypeSystem(); diff --git a/lldb/source/Plugins/Language/CMakeLists.txt b/lldb/source/Plugins/Language/CMakeLists.txt index 7869074566d1e..f447f694922ed 100644 --- a/lldb/source/Plugins/Language/CMakeLists.txt +++ b/lldb/source/Plugins/Language/CMakeLists.txt @@ -1,4 +1,6 @@ add_subdirectory(ClangCommon) + +# Preserve language plug-ins sorting. add_subdirectory(CPlusPlus) add_subdirectory(ObjC) add_subdirectory(ObjCPlusPlus) diff --git a/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt b/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt index ccdc4d0ae99b3..5b866ee8edc02 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt +++ b/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt @@ -2,6 +2,7 @@ add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN BlockPointer.cpp Coroutines.cpp CPlusPlusLanguage.cpp + CPlusPlusLanguageMethod.cpp CPlusPlusNameParser.cpp CxxStringTypes.cpp GenericBitset.cpp diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 4b045d12ad494..2a4d332de2e1c 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -36,6 +36,7 @@ #include "lldb/ValueObject/ValueObjectVariable.h" #include "BlockPointer.h" +#include "CPlusPlusLanguageMethod.h" #include "CPlusPlusNameParser.h" #include "Coroutines.h" #include "CxxStringTypes.h" @@ -44,7 +45,6 @@ #include "LibCxxAtomic.h" #include "LibCxxVariant.h" #include "LibStdcpp.h" -#include "MSVCUndecoratedNameParser.h" #include "lldb/lldb-enumerations.h" using namespace lldb; @@ -62,9 +62,37 @@ void CPlusPlusLanguage::Terminate() { PluginManager::UnregisterPlugin(CreateInstance); } +std::pair<FunctionNameType, llvm::StringRef> +CPlusPlusLanguage::GetFunctionNameInfo(ConstString name) const { + if (MethodName::IsCPPMangledName(name.GetCString())) + return {eFunctionNameTypeFull, llvm::StringRef()}; + + FunctionNameType func_name_type = eFunctionNameTypeNone; + CPlusPlusLanguage::MethodName method(name); + llvm::StringRef basename = method.GetBasename(); + if (basename.empty()) { + llvm::StringRef context; + func_name_type |= + (CPlusPlusLanguage::MethodName::ExtractContextAndIdentifier( + name.GetCString(), context, basename) + ? (eFunctionNameTypeMethod | eFunctionNameTypeBase) + : eFunctionNameTypeFull); + } else { + func_name_type |= (eFunctionNameTypeMethod | eFunctionNameTypeBase); + } + + if (!method.GetQualifiers().empty()) { + // There is a 'const' or other qualifier following the end of the function + // parens, this can't be a eFunctionNameTypeBase. + func_name_type &= ~eFunctionNameTypeBase; + } + + return {func_name_type, basename}; +} + bool CPlusPlusLanguage::SymbolNameFitsToLanguage(Mangled mangled) const { const char *mangled_name = mangled.GetMangledName().GetCString(); - return mangled_name && CPlusPlusLanguage::IsCPPMangledName(mangled_name); + return mangled_name && MethodName::IsCPPMangledName(mangled_name); } ConstString CPlusPlusLanguage::GetDemangledFunctionNameWithoutArguments( @@ -106,74 +134,6 @@ Language *CPlusPlusLanguage::CreateInstance(lldb::LanguageType language) { return nullptr; } -void CPlusPlusLanguage::MethodName::Clear() { - m_full.Clear(); - m_basename = llvm::StringRef(); - m_context = llvm::StringRef(); - m_arguments = llvm::StringRef(); - m_qualifiers = llvm::StringRef(); - m_return_type = llvm::StringRef(); - m_parsed = false; - m_parse_error = false; -} - -static bool ReverseFindMatchingChars(const llvm::StringRef &s, - const llvm::StringRef &left_right_chars, - size_t &left_pos, size_t &right_pos, - size_t pos = llvm::StringRef::npos) { - assert(left_right_chars.size() == 2); - left_pos = llvm::StringRef::npos; - const char left_char = left_right_chars[0]; - const char right_char = left_right_chars[1]; - pos = s.find_last_of(left_right_chars, pos); - if (pos == llvm::StringRef::npos || s[pos] == left_char) - return false; - right_pos = pos; - uint32_t depth = 1; - while (pos > 0 && depth > 0) { - pos = s.find_last_of(left_right_chars, pos); - if (pos == llvm::StringRef::npos) - return false; - if (s[pos] == left_char) { - if (--depth == 0) { - left_pos = pos; - return left_pos < right_pos; - } - } else if (s[pos] == right_char) { - ++depth; - } - } - return false; -} - -static bool IsTrivialBasename(const llvm::StringRef &basename) { - // Check that the basename matches with the following regular expression - // "^~?([A-Za-z_][A-Za-z_0-9]*)$" We are using a hand written implementation - // because it is significantly more efficient then using the general purpose - // regular expression library. - size_t idx = 0; - if (basename.starts_with('~')) - idx = 1; - - if (basename.size() <= idx) - return false; // Empty string or "~" - - if (!std::isalpha(basename[idx]) && basename[idx] != '_') - return false; // First character (after removing the possible '~'') isn't in - // [A-Za-z_] - - // Read all characters matching [A-Za-z_0-9] - ++idx; - while (idx < basename.size()) { - if (!std::isalnum(basename[idx]) && basename[idx] != '_') - break; - ++idx; - } - - // We processed all characters. It is a vaild basename. - return idx == basename.size(); -} - /// Writes out the function name in 'full_name' to 'out_stream' /// but replaces each argument type with the variable name /// and the corresponding pretty-printed value @@ -208,206 +168,12 @@ static bool PrettyPrintFunctionNameWithArgs(Stream &out_stream, return true; } -bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() { - // This method tries to parse simple method definitions which are presumably - // most comman in user programs. Definitions that can be parsed by this - // function don't have return types and templates in the name. - // A::B::C::fun(std::vector<T> &) const - size_t arg_start, arg_end; - llvm::StringRef full(m_full.GetCString()); - llvm::StringRef parens("()", 2); - if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) { - m_arguments = full.substr(arg_start, arg_end - arg_start + 1); - if (arg_end + 1 < full.size()) - m_qualifiers = full.substr(arg_end + 1).ltrim(); - - if (arg_start == 0) - return false; - size_t basename_end = arg_start; - size_t context_start = 0; - size_t context_end = full.rfind(':', basename_end); - if (context_end == llvm::StringRef::npos) - m_basename = full.substr(0, basename_end); - else { - if (context_start < context_end) - m_context = full.substr(context_start, context_end - 1 - context_start); - const size_t basename_begin = context_end + 1; - m_basename = full.substr(basename_begin, basename_end - basename_begin); - } - - if (IsTrivialBasename(m_basename)) { - return true; - } else { - // The C++ basename doesn't match our regular expressions so this can't - // be a valid C++ method, clear everything out and indicate an error - m_context = llvm::StringRef(); - m_basename = llvm::StringRef(); - m_arguments = llvm::StringRef(); - m_qualifiers = llvm::StringRef(); - m_return_type = llvm::StringRef(); - return false; - } - } - return false; -} - -void CPlusPlusLanguage::MethodName::Parse() { - if (!m_parsed && m_full) { - if (TrySimplifiedParse()) { - m_parse_error = false; - } else { - CPlusPlusNameParser parser(m_full.GetStringRef()); - if (auto function = parser.ParseAsFunctionDefinition()) { - m_basename = function->name.basename; - m_context = function->name.context; - m_arguments = function->arguments; - m_qualifiers = function->qualifiers; - m_return_type = function->return_type; - m_parse_error = false; - } else { - m_parse_error = true; - } - } - m_parsed = true; - } -} - -llvm::StringRef CPlusPlusLanguage::MethodName::GetBasename() { - if (!m_parsed) - Parse(); - return m_basename; -} - -llvm::StringRef CPlusPlusLanguage::MethodName::GetContext() { - if (!m_parsed) - Parse(); - return m_context; -} - -llvm::StringRef CPlusPlusLanguage::MethodName::GetArguments() { - if (!m_parsed) - Parse(); - return m_arguments; -} - -llvm::StringRef CPlusPlusLanguage::MethodName::GetQualifiers() { - if (!m_parsed) - Parse(); - return m_qualifiers; -} - -llvm::StringRef CPlusPlusLanguage::MethodName::GetReturnType() { - if (!m_parsed) - Parse(); - return m_return_type; -} - -std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() { - if (!m_parsed) - Parse(); - if (m_context.empty()) - return std::string(m_basename); - - std::string res; - res += m_context; - res += "::"; - res += m_basename; - return res; -} - -llvm::StringRef -CPlusPlusLanguage::MethodName::GetBasenameNoTemplateParameters() { - llvm::StringRef basename = GetBasename(); - size_t arg_start, arg_end; - llvm::StringRef parens("<>", 2); - if (ReverseFindMatchingChars(basename, parens, arg_start, arg_end)) - return basename.substr(0, arg_start); - - return basename; -} - -bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { - if (!m_parsed) - Parse(); - - // If we can't parse the incoming name, then just check that it contains path. - if (m_parse_error) - return m_full.GetStringRef().contains(path); - - llvm::StringRef identifier; - llvm::StringRef context; - std::string path_str = path.str(); - bool success = CPlusPlusLanguage::ExtractContextAndIdentifier( - path_str.c_str(), context, identifier); - if (!success) - return m_full.GetStringRef().contains(path); - - // Basename may include template arguments. - // E.g., - // GetBaseName(): func<int> - // identifier : func - // - // ...but we still want to account for identifiers with template parameter - // lists, e.g., when users set breakpoints on template specializations. - // - // E.g., - // GetBaseName(): func<uint32_t> - // identifier : func<int32_t*> - // - // Try to match the basename with or without template parameters. - if (GetBasename() != identifier && - GetBasenameNoTemplateParameters() != identifier) - return false; - - // Incoming path only had an identifier, so we match. - if (context.empty()) - return true; - // Incoming path has context but this method does not, no match. - if (m_context.empty()) - return false; - - llvm::StringRef haystack = m_context; - if (!haystack.consume_back(context)) - return false; - if (haystack.empty() || !isalnum(haystack.back())) - return true; - - return false; -} - -bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) { - // FIXME!! we should really run through all the known C++ Language plugins - // and ask each one if this is a C++ mangled name - - Mangled::ManglingScheme scheme = Mangled::GetManglingScheme(name); - - if (scheme == Mangled::eManglingSchemeNone) - return false; - - return true; -} - bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path, ConstString demangled) const { MethodName demangled_name(demangled); return demangled_name.ContainsPath(path); } -bool CPlusPlusLanguage::ExtractContextAndIdentifier( - const char *name, llvm::StringRef &context, llvm::StringRef &identifier) { - if (MSVCUndecoratedNameParser::IsMSVCUndecoratedName(name)) - return MSVCUndecoratedNameParser::ExtractContextAndIdentifier(name, context, - identifier); - - CPlusPlusNameParser parser(name); - if (auto full_name = parser.ParseAsFullName()) { - identifier = full_name->basename; - context = full_name->context; - return true; - } - return false; -} - namespace { class NodeAllocator { llvm::BumpPtrAllocator Alloc; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 623d481bf117f..2946dca436aa1 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -25,75 +25,7 @@ class CPlusPlusLanguage : public Language { ClangHighlighter m_highlighter; public: - class MethodName { - public: - MethodName() - : m_full(), m_basename(), m_context(), m_arguments(), m_qualifiers() {} - - MethodName(ConstString s) - : m_full(s), m_basename(), m_context(), m_arguments(), m_qualifiers(), - m_parsed(false), m_parse_error(false) {} - - void Clear(); - - bool IsValid() { - if (!m_parsed) - Parse(); - if (m_parse_error) - return false; - return (bool)m_full; - } - - ConstString GetFullName() const { return m_full; } - - std::string GetScopeQualifiedName(); - - llvm::StringRef GetBasename(); - - llvm::StringRef GetContext(); - - llvm::StringRef GetArguments(); - - llvm::StringRef GetQualifiers(); - - /// Returns the methods return-type. - /// - /// Currently returns an empty llvm::StringRef - /// if the return-type is a function pointer. - llvm::StringRef GetReturnType(); - - bool ContainsPath(llvm::StringRef path); - - private: - /// Returns the Basename of this method without a template parameter - /// list, if any. - /// - // Examples: - // - // +--------------------------------+---------+ - // | MethodName | Returns | - // +--------------------------------+---------+ - // | void func() | func | - // | void func<int>() | func | - // | void func<std::vector<int>>() | func | - // +--------------------------------+---------+ - llvm::StringRef GetBasenameNoTemplateParameters(); - - protected: - void Parse(); - bool TrySimplifiedParse(); - - ConstString m_full; // Full name: - // "size_t lldb::SBTarget::GetBreakpointAtIndex(unsigned - // int) const" - llvm::StringRef m_basename; // Basename: "GetBreakpointAtIndex" - llvm::StringRef m_context; // Decl context: "lldb::SBTarget" - llvm::StringRef m_arguments; // Arguments: "(unsigned int)" - llvm::StringRef m_qualifiers; // Qualifiers: "const" - llvm::StringRef m_return_type; // Return type: "size_t" - bool m_parsed = false; - bool m_parse_error = false; - }; + class MethodName; CPlusPlusLanguage() = default; @@ -130,6 +62,9 @@ class CPlusPlusLanguage : public Language { static llvm::StringRef GetPluginNameStatic() { return "cplusplus"; } + std::pair<lldb::FunctionNameType, llvm::StringRef> + GetFunctionNameInfo(ConstString name) const override; + bool SymbolNameFitsToLanguage(Mangled mangled) const override; bool DemangledNameContainsPath(llvm::StringRef path, @@ -143,24 +78,6 @@ class CPlusPlusLanguage : public Language { FunctionNameRepresentation representation, Stream &s) override; - static bool IsCPPMangledName(llvm::StringRef name); - - // Extract C++ context and identifier from a string using heuristic matching - // (as opposed to - // CPlusPlusLanguage::MethodName which has to have a fully qualified C++ name - // with parens and arguments. - // If the name is a lone C identifier (e.g. C) or a qualified C identifier - // (e.g. A::B::C) it will return true, - // and identifier will be the identifier (C and C respectively) and the - // context will be "" and "A::B" respectively. - // If the name fails the heuristic matching for a qualified or unqualified - // C/C++ identifier, then it will return false - // and identifier and context will be unchanged. - - static bool ExtractContextAndIdentifier(const char *name, - llvm::StringRef &context, - llvm::StringRef &identifier); - std::vector<ConstString> GenerateAlternateFunctionManglings(const ConstString mangled) const override; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp new file mode 100644 index 0000000000000..a0fae81cc9bbe --- /dev/null +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp @@ -0,0 +1,279 @@ +//===-- CPlusPlusLanguageMethod.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CPlusPlusLanguageMethod.h" + +#include "lldb/Core/Mangled.h" + +#include "CPlusPlusNameParser.h" +#include "MSVCUndecoratedNameParser.h" + +using namespace lldb; +using namespace lldb_private; + +static bool ReverseFindMatchingChars(const llvm::StringRef &s, + const llvm::StringRef &left_right_chars, + size_t &left_pos, size_t &right_pos, + size_t pos = llvm::StringRef::npos) { + assert(left_right_chars.size() == 2); + left_pos = llvm::StringRef::npos; + const char left_char = left_right_chars[0]; + const char right_char = left_right_chars[1]; + pos = s.find_last_of(left_right_chars, pos); + if (pos == llvm::StringRef::npos || s[pos] == left_char) + return false; + right_pos = pos; + uint32_t depth = 1; + while (pos > 0 && depth > 0) { + pos = s.find_last_of(left_right_chars, pos); + if (pos == llvm::StringRef::npos) + return false; + if (s[pos] == left_char) { + if (--depth == 0) { + left_pos = pos; + return left_pos < right_pos; + } + } else if (s[pos] == right_char) { + ++depth; + } + } + return false; +} + +static bool IsTrivialBasename(const llvm::StringRef &basename) { + // Check that the basename matches with the following regular expression + // "^~?([A-Za-z_][A-Za-z_0-9]*)$" We are using a hand written implementation + // because it is significantly more efficient then using the general purpose + // regular expression library. + size_t idx = 0; + if (basename.starts_with('~')) + idx = 1; + + if (basename.size() <= idx) + return false; // Empty string or "~" + + if (!std::isalpha(basename[idx]) && basename[idx] != '_') + return false; // First character (after removing the possible '~'') isn't in + // [A-Za-z_] + + // Read all characters matching [A-Za-z_0-9] + ++idx; + while (idx < basename.size()) { + if (!std::isalnum(basename[idx]) && basename[idx] != '_') + break; + ++idx; + } + + // We processed all characters. It is a vaild basename. + return idx == basename.size(); +} + +bool CPlusPlusLanguage::MethodName::IsCPPMangledName(llvm::StringRef name) { + // FIXME!! we should really run through all the known C++ Language plugins + // and ask each one if this is a C++ mangled name + + Mangled::ManglingScheme scheme = Mangled::GetManglingScheme(name); + + if (scheme == Mangled::eManglingSchemeNone) + return false; + + return true; +} + +bool CPlusPlusLanguage::MethodName::ExtractContextAndIdentifier( + const char *name, llvm::StringRef &context, llvm::StringRef &identifier) { + if (MSVCUndecoratedNameParser::IsMSVCUndecoratedName(name)) + return MSVCUndecoratedNameParser::ExtractContextAndIdentifier(name, context, + identifier); + + CPlusPlusNameParser parser(name); + if (auto full_name = parser.ParseAsFullName()) { + identifier = full_name->basename; + context = full_name->context; + return true; + } + return false; +} + +void CPlusPlusLanguage::MethodName::Clear() { + m_full.Clear(); + m_basename = llvm::StringRef(); + m_context = llvm::StringRef(); + m_arguments = llvm::StringRef(); + m_qualifiers = llvm::StringRef(); + m_return_type = llvm::StringRef(); + m_parsed = false; + m_parse_error = false; +} + +bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() { + // This method tries to parse simple method definitions which are presumably + // most comman in user programs. Definitions that can be parsed by this + // function don't have return types and templates in the name. + // A::B::C::fun(std::vector<T> &) const + size_t arg_start, arg_end; + llvm::StringRef full(m_full.GetCString()); + llvm::StringRef parens("()", 2); + if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) { + m_arguments = full.substr(arg_start, arg_end - arg_start + 1); + if (arg_end + 1 < full.size()) + m_qualifiers = full.substr(arg_end + 1).ltrim(); + + if (arg_start == 0) + return false; + size_t basename_end = arg_start; + size_t context_start = 0; + size_t context_end = full.rfind(':', basename_end); + if (context_end == llvm::StringRef::npos) + m_basename = full.substr(0, basename_end); + else { + if (context_start < context_end) + m_context = full.substr(context_start, context_end - 1 - context_start); + const size_t basename_begin = context_end + 1; + m_basename = full.substr(basename_begin, basename_end - basename_begin); + } + + if (IsTrivialBasename(m_basename)) { + return true; + } else { + // The C++ basename doesn't match our regular expressions so this can't + // be a valid C++ method, clear everything out and indicate an error + m_context = llvm::StringRef(); + m_basename = llvm::StringRef(); + m_arguments = llvm::StringRef(); + m_qualifiers = llvm::StringRef(); + m_return_type = llvm::StringRef(); + return false; + } + } + return false; +} + +void CPlusPlusLanguage::MethodName::Parse() { + if (!m_parsed && m_full) { + if (TrySimplifiedParse()) { + m_parse_error = false; + } else { + CPlusPlusNameParser parser(m_full.GetStringRef()); + if (auto function = parser.ParseAsFunctionDefinition()) { + m_basename = function->name.basename; + m_context = function->name.context; + m_arguments = function->arguments; + m_qualifiers = function->qualifiers; + m_return_type = function->return_type; + m_parse_error = false; + } else { + m_parse_error = true; + } + } + m_parsed = true; + } +} + +llvm::StringRef CPlusPlusLanguage::MethodName::GetBasename() { + if (!m_parsed) + Parse(); + return m_basename; +} + +llvm::StringRef CPlusPlusLanguage::MethodName::GetContext() { + if (!m_parsed) + Parse(); + return m_context; +} + +llvm::StringRef CPlusPlusLanguage::MethodName::GetArguments() { + if (!m_parsed) + Parse(); + return m_arguments; +} + +llvm::StringRef CPlusPlusLanguage::MethodName::GetQualifiers() { + if (!m_parsed) + Parse(); + return m_qualifiers; +} + +llvm::StringRef CPlusPlusLanguage::MethodName::GetReturnType() { + if (!m_parsed) + Parse(); + return m_return_type; +} + +std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() { + if (!m_parsed) + Parse(); + if (m_context.empty()) + return std::string(m_basename); + + std::string res; + res += m_context; + res += "::"; + res += m_basename; + return res; +} + +llvm::StringRef +CPlusPlusLanguage::MethodName::GetBasenameNoTemplateParameters() { + llvm::StringRef basename = GetBasename(); + size_t arg_start, arg_end; + llvm::StringRef parens("<>", 2); + if (ReverseFindMatchingChars(basename, parens, arg_start, arg_end)) + return basename.substr(0, arg_start); + + return basename; +} + +bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) { + if (!m_parsed) + Parse(); + + // If we can't parse the incoming name, then just check that it contains path. + if (m_parse_error) + return m_full.GetStringRef().contains(path); + + llvm::StringRef identifier; + llvm::StringRef context; + std::string path_str = path.str(); + bool success = + ExtractContextAndIdentifier(path_str.c_str(), context, identifier); + if (!success) + return m_full.GetStringRef().contains(path); + + // Basename may include template arguments. + // E.g., + // GetBaseName(): func<int> + // identifier : func + // + // ...but we still want to account for identifiers with template parameter + // lists, e.g., when users set breakpoints on template specializations. + // + // E.g., + // GetBaseName(): func<uint32_t> + // identifier : func<int32_t*> + // + // Try to match the basename with or without template parameters. + if (GetBasename() != identifier && + GetBasenameNoTemplateParameters() != identifier) + return false; + + // Incoming path only had an identifier, so we match. + if (context.empty()) + return true; + // Incoming path has context but this method does not, no match. + if (m_context.empty()) + return false; + + llvm::StringRef haystack = m_context; + if (!haystack.consume_back(context)) + return false; + if (haystack.empty() || !isalnum(haystack.back())) + return true; + + return false; +} diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h new file mode 100644 index 0000000000000..d591dcf7f93a6 --- /dev/null +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h @@ -0,0 +1,106 @@ +//===-- CPlusPlusLanguageMethod.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_CPLUSPLUS_CPLUSPLUSLANGUAGEMETHOD_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_CPLUSPLUS_CPLUSPLUSLANGUAGEMETHOD_H + +#include "CPlusPlusLanguage.h" + +namespace lldb_private { + +class CPlusPlusLanguage::MethodName { +public: + MethodName() + : m_full(), m_basename(), m_context(), m_arguments(), m_qualifiers() {} + + MethodName(ConstString s) + : m_full(s), m_basename(), m_context(), m_arguments(), m_qualifiers(), + m_parsed(false), m_parse_error(false) {} + + void Clear(); + + bool IsValid() { + if (!m_parsed) + Parse(); + if (m_parse_error) + return false; + return (bool)m_full; + } + + ConstString GetFullName() const { return m_full; } + + std::string GetScopeQualifiedName(); + + llvm::StringRef GetBasename(); + + llvm::StringRef GetContext(); + + llvm::StringRef GetArguments(); + + llvm::StringRef GetQualifiers(); + + /// Returns the methods return-type. + /// + /// Currently returns an empty llvm::StringRef + /// if the return-type is a function pointer. + llvm::StringRef GetReturnType(); + + bool ContainsPath(llvm::StringRef path); + + // Extract C++ context and identifier from a string using heuristic matching + // (as opposed to + // CPlusPlusLanguage::MethodName which has to have a fully qualified C++ name + // with parens and arguments. + // If the name is a lone C identifier (e.g. C) or a qualified C identifier + // (e.g. A::B::C) it will return true, + // and identifier will be the identifier (C and C respectively) and the + // context will be "" and "A::B" respectively. + // If the name fails the heuristic matching for a qualified or unqualified + // C/C++ identifier, then it will return false + // and identifier and context will be unchanged. + + static bool IsCPPMangledName(llvm::StringRef name); + + static bool ExtractContextAndIdentifier(const char *name, + llvm::StringRef &context, + llvm::StringRef &identifier); + +private: + /// Returns the Basename of this method without a template parameter + /// list, if any. + /// + // Examples: + // + // +--------------------------------+---------+ + // | MethodName | Returns | + // +--------------------------------+---------+ + // | void func() | func | + // | void func<int>() | func | + // | void func<std::vector<int>>() | func | + // +--------------------------------+---------+ + llvm::StringRef GetBasenameNoTemplateParameters(); + +protected: + void Parse(); + bool TrySimplifiedParse(); + + ConstString m_full; // Full name: + // "size_t lldb::SBTarget::GetBreakpointAtIndex(unsigned + // int) const" + llvm::StringRef m_basename; // Basename: "GetBreakpointAtIndex" + llvm::StringRef m_context; // Decl context: "lldb::SBTarget" + llvm::StringRef m_arguments; // Arguments: "(unsigned int)" + llvm::StringRef m_qualifiers; // Qualifiers: "const" + llvm::StringRef m_return_type; // Return type: "size_t" + bool m_parsed = false; + bool m_parse_error = false; +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_CPLUSPLUS_CPLUSPLUSLANGUAGEMETHOD_H diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp index 2ae203405cbba..1e6a969926e3b 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp @@ -222,6 +222,21 @@ ObjCLanguage::GetMethodNameVariants(ConstString method_name) const { return variant_names; } +std::pair<FunctionNameType, llvm::StringRef> +ObjCLanguage::GetFunctionNameInfo(ConstString name) const { + FunctionNameType func_name_type = eFunctionNameTypeNone; + + if (ObjCLanguage::IsPossibleObjCMethodName(name.GetCString())) { + func_name_type = eFunctionNameTypeFull; + } + + if (ObjCLanguage::IsPossibleObjCSelector(name.GetCString())) { + func_name_type |= eFunctionNameTypeSelector; + } + + return {func_name_type, llvm::StringRef()}; +} + bool ObjCLanguage::SymbolNameFitsToLanguage(Mangled mangled) const { ConstString demangled_name = mangled.GetDemangledName(); if (!demangled_name) diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h index 6d265a9be5277..073ea1d730e52 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -142,6 +142,9 @@ class ObjCLanguage : public Language { std::vector<Language::MethodNameVariant> GetMethodNameVariants(ConstString method_name) const override; + std::pair<lldb::FunctionNameType, llvm::StringRef> + GetFunctionNameInfo(ConstString name) const override; + bool SymbolNameFitsToLanguage(Mangled mangled) const override; lldb::TypeCategoryImplSP GetFormatters() override; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index b95159d882bc7..ae388c8c834ae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -31,7 +31,7 @@ #include "lldb/Utility/Timer.h" #include "Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h" -#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" +#include "Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/Host.h" @@ -2333,8 +2333,8 @@ void SymbolFileDWARF::FindGlobalVariables( bool name_is_mangled = Mangled::GetManglingScheme(name.GetStringRef()) != Mangled::eManglingSchemeNone; - if (!CPlusPlusLanguage::ExtractContextAndIdentifier(name.GetCString(), - context, basename)) + if (!CPlusPlusLanguage::MethodName::ExtractContextAndIdentifier( + name.GetCString(), context, basename)) basename = name.GetStringRef(); // Loop invariant: Variables up to this index have been checked for context diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 352163ceaae9e..15c6f8f980aa1 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -53,7 +53,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" -#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" +#include "Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.h" #include "Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.h" #include "Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h" @@ -1279,7 +1279,7 @@ void SymbolFilePDB::CacheFunctionNames() { if (name.empty()) continue; - if (CPlusPlusLanguage::IsCPPMangledName(name.c_str())) { + if (CPlusPlusLanguage::MethodName::IsCPPMangledName(name.c_str())) { // PDB public symbol has mangled name for its associated function. if (auto vm_addr = pub_sym_up->getVirtualAddress()) { if (auto it = addr_ids.find(vm_addr); it != addr_ids.end()) _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits