https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/137072
When doing function lookup in an LLDB module by name, we may need to hand-parse C++ method names to determine properties of the function name such as its "basename". LLDB currently has two ways of parsing methods: 1. Using `TrySimplifiedParse`, which was historically the go-to parser 2. Using the `CPlusPlusNameParser`, which is the more recent parser and has more knowledge of C++ syntax When `CPlusPlusNameParser` was introduced (https://reviews.llvm.org/D31451), `TrySimplifiedParse` was kept around as the first parser to keep performance in check. Though the performance difference didn't seem very convincing from the numbers provided. 11s vs 10s when setting a breakpoint in a project with 500k functions. The main difference being that we're invoking the clang::Lexer in the new parser. It would be nice to just get rid of it entirely. There's an edge-case for which I added a test-case where the `TrySimplifiedParse` code-path succeeds but incorrectly deduces the basename of `operator()`. Happy to do benchmarking to see what the performance impact of this is these days. >From 91fc35b07ca2a282886b8c340d3dd37a6c881519 Mon Sep 17 00:00:00 2001 From: Michael Buch <michaelbuc...@gmail.com> Date: Wed, 23 Apr 2025 22:48:12 +0100 Subject: [PATCH] [lldb][CPlusPlus] Remove TrySimplifiedParse when hand-parsing C++ function names When doing function lookup in an LLDB module by name, we may need to hand-parse C++ method names to determine properties of the function name such as its "basename". LLDB currently has two ways of parsing methods: 1. Using `TrySimplifiedParse`, which was historically the go-to parser 2. Using the `CPlusPlusNameParser`, which is the more recent parser and has more knowledge of C++ syntax When `CPlusPlusNameParser` was introduced (https://reviews.llvm.org/D31451), `TrySimplifiedParse` was kept around as the first parser to keep performance in check. Though the performance difference didn't seem very convincing from the numbers provided. 11s vs 10s when setting a breakpoint in a project with 500k functions. The main difference being that we're invoking the clang::Lexer in the new parser. It would be nice to just get rid of it entirely. There's an edge-case for which I added a test-case where the `TrySimplifiedParse` code-path succeeds but incorrectly deduces the basename of `operator()`. Happy to do benchmarking to see what the performance impact of this is these days. --- .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 92 ++----------------- lldb/test/API/lang/cpp/operators/main.cpp | 2 + 2 files changed, 11 insertions(+), 83 deletions(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 40b6563aeb410..f8b53c0837a54 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -173,34 +173,6 @@ static bool ReverseFindMatchingChars(const llvm::StringRef &s, return false; } -static bool IsTrivialBasename(const llvm::StringRef &basename) { - // Check that the basename matches with the following regular expression - // "^~?([A-Za-z_][A-Za-z_0-9]*)$" We are using a hand written implementation - // because it is significantly more efficient then using the general purpose - // regular expression library. - size_t idx = 0; - if (basename.starts_with('~')) - idx = 1; - - if (basename.size() <= idx) - return false; // Empty string or "~" - - if (!std::isalpha(basename[idx]) && basename[idx] != '_') - return false; // First character (after removing the possible '~'') isn't in - // [A-Za-z_] - - // Read all characters matching [A-Za-z_0-9] - ++idx; - while (idx < basename.size()) { - if (!std::isalnum(basename[idx]) && basename[idx] != '_') - break; - ++idx; - } - - // We processed all characters. It is a vaild basename. - return idx == basename.size(); -} - /// Writes out the function name in 'full_name' to 'out_stream' /// but replaces each argument type with the variable name /// and the corresponding pretty-printed value @@ -235,66 +207,20 @@ static bool PrettyPrintFunctionNameWithArgs(Stream &out_stream, return true; } -bool CPlusPlusLanguage::CxxMethodName::TrySimplifiedParse() { - // This method tries to parse simple method definitions which are presumably - // most comman in user programs. Definitions that can be parsed by this - // function don't have return types and templates in the name. - // A::B::C::fun(std::vector<T> &) const - size_t arg_start, arg_end; - llvm::StringRef full(m_full.GetCString()); - llvm::StringRef parens("()", 2); - if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) { - m_arguments = full.substr(arg_start, arg_end - arg_start + 1); - if (arg_end + 1 < full.size()) - m_qualifiers = full.substr(arg_end + 1).ltrim(); - - if (arg_start == 0) - return false; - size_t basename_end = arg_start; - size_t context_start = 0; - size_t context_end = full.rfind(':', basename_end); - if (context_end == llvm::StringRef::npos) - m_basename = full.substr(0, basename_end); - else { - if (context_start < context_end) - m_context = full.substr(context_start, context_end - 1 - context_start); - const size_t basename_begin = context_end + 1; - m_basename = full.substr(basename_begin, basename_end - basename_begin); - } - - if (IsTrivialBasename(m_basename)) { - return true; - } else { - // The C++ basename doesn't match our regular expressions so this can't - // be a valid C++ method, clear everything out and indicate an error - m_context = llvm::StringRef(); - m_basename = llvm::StringRef(); - m_arguments = llvm::StringRef(); - m_qualifiers = llvm::StringRef(); - m_return_type = llvm::StringRef(); - return false; - } - } - return false; -} - void CPlusPlusLanguage::CxxMethodName::Parse() { if (!m_parsed && m_full) { - if (TrySimplifiedParse()) { + CPlusPlusNameParser parser(m_full.GetStringRef()); + if (auto function = parser.ParseAsFunctionDefinition()) { + m_basename = function->name.basename; + m_context = function->name.context; + m_arguments = function->arguments; + m_qualifiers = function->qualifiers; + m_return_type = function->return_type; m_parse_error = false; } else { - CPlusPlusNameParser parser(m_full.GetStringRef()); - if (auto function = parser.ParseAsFunctionDefinition()) { - m_basename = function->name.basename; - m_context = function->name.context; - m_arguments = function->arguments; - m_qualifiers = function->qualifiers; - m_return_type = function->return_type; - m_parse_error = false; - } else { - m_parse_error = true; - } + m_parse_error = true; } + if (m_context.empty()) { m_scope_qualified = std::string(m_basename); } else { diff --git a/lldb/test/API/lang/cpp/operators/main.cpp b/lldb/test/API/lang/cpp/operators/main.cpp index c52ef1c8cac47..139c16b02ffe9 100644 --- a/lldb/test/API/lang/cpp/operators/main.cpp +++ b/lldb/test/API/lang/cpp/operators/main.cpp @@ -174,6 +174,8 @@ int main(int argc, char **argv) { //% self.expect("expr (new struct C[1]); side_effect", endstr=" = 4\n") //% self.expect("expr delete c2; side_effect", endstr=" = 1\n") //% self.expect("expr delete[] c3; side_effect", endstr=" = 2\n") + //% self.expect("image lookup -n operator()", substrs=["C::operator()(int)"]) + //% self.expect("image lookup -n C::operator()", substrs=["C::operator()(int)"]) delete c2; delete[] c3; return 0; _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits