https://github.com/augusto2112 updated 
https://github.com/llvm/llvm-project/pull/181952

>From e09ca06c816e716146c6745e95faee6e9fd86503 Mon Sep 17 00:00:00 2001
From: Augusto Noronha <[email protected]>
Date: Tue, 17 Feb 2026 17:23:32 -0800
Subject: [PATCH] [lldb] Speed up SymbolContextList::AppendIfUnique

d7fb086668dff68 changed some calls from SymbolContextList::Append to
SymbolContextList::AppendIfUnique. This has unfortunately caused a huge
slow down in operations involving a large amount of symbol contexts (for
example, trying to autocomplete from an empty input "b <TAB>" will add
every function to the list), since AppendIfUnique scans the entire symbol
context list. Speed this up by adding a hash set to quickly answer whether
a symbol context is on the list or not.

This takes the time from running "b <TAB>" when debugging yaml2obj on my
machine from 600 seconds down to 13, which is about the same as before
d7fb086668dff68.

Note that AppendIfUnique has a logic error, which has been present since
its introduction. This has to do with the behavior controlled by
"merge_symbol_into_function", which will try to merge symbols with symbol
context containing the equivalent function to that symbol.

The previous patch tried to correct this by adding
CompareConsideringPossiblyNullSymbol(), which is not quite correct.

With CompareConsideringPossiblyNullSymbol(), if symbols are added in
this order:

- Symbol context without symbol.
- Equivalent symbol context with symbol.

The list will have only one symbol context WITHOUT the symbol.

If we stop using CompareConsideringPossiblyNullSymbol() and instead go
back to the == operator which d7fb086668dff68 introduced, with symbols
added in this order, the following will happen:

- Symbol context without symbol.
- Equivalent symbol context with symbol.
- The bare symbol, with "merge_symbol_into_function = true", the list
  will have the same symbol twice.

This patch does not attempt to solve this, and instead focuses on the
performance issue d7fb086668dff68 introduced.

rdar://170477680
---
 lldb/include/lldb/Symbol/SymbolContext.h |  15 ++++
 lldb/source/Symbol/SymbolContext.cpp     | 106 +++++++++++++++++++----
 2 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/lldb/include/lldb/Symbol/SymbolContext.h 
b/lldb/include/lldb/Symbol/SymbolContext.h
index 0834825cdbd25..f63d23eaefae3 100644
--- a/lldb/include/lldb/Symbol/SymbolContext.h
+++ b/lldb/include/lldb/Symbol/SymbolContext.h
@@ -19,6 +19,7 @@
 #include "lldb/Utility/Iterable.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/lldb-private.h"
+#include "llvm/ADT/DenseSet.h"
 
 namespace lldb_private {
 
@@ -483,6 +484,20 @@ class SymbolContextList {
   // Member variables.
   collection m_symbol_contexts; ///< The list of symbol contexts.
 
+private:
+  struct SymbolContextInfo {
+    static SymbolContext getEmptyKey();
+    static SymbolContext getTombstoneKey();
+    static unsigned getHashValue(const SymbolContext &sc);
+    static bool isEqual(const SymbolContext &lhs, const SymbolContext &rhs);
+  };
+
+  /// Threshold above which we switch from linear scan to hash-set lookup
+  /// for uniqueness checks.
+  static constexpr size_t kSetThreshold = 1024;
+
+  llvm::SmallDenseSet<SymbolContext, 1, SymbolContextInfo> m_lookup_set;
+
 public:
   const_iterator begin() const { return m_symbol_contexts.begin(); }
   const_iterator end() const { return m_symbol_contexts.end(); }
diff --git a/lldb/source/Symbol/SymbolContext.cpp 
b/lldb/source/Symbol/SymbolContext.cpp
index ead924afa9fc2..62b4091c6b5fb 100644
--- a/lldb/source/Symbol/SymbolContext.cpp
+++ b/lldb/source/Symbol/SymbolContext.cpp
@@ -28,6 +28,8 @@
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/StreamString.h"
 #include "lldb/lldb-enumerations.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -1191,18 +1193,66 @@ void SymbolContextSpecifier::GetDescription(
 //  SymbolContextList
 //
 
+// SymbolContextInfo implementations for DenseSet
+SymbolContext SymbolContextList::SymbolContextInfo::getEmptyKey() {
+  SymbolContext sc;
+  sc.function = llvm::DenseMapInfo<Function *>::getEmptyKey();
+  return sc;
+}
+
+SymbolContext SymbolContextList::SymbolContextInfo::getTombstoneKey() {
+  SymbolContext sc;
+  sc.function = llvm::DenseMapInfo<Function *>::getTombstoneKey();
+  return sc;
+}
+
+unsigned
+SymbolContextList::SymbolContextInfo::getHashValue(const SymbolContext &sc) {
+  // Hash all fields EXCEPT symbol, since CompareConsideringPossiblyNullSymbol
+  // ignores them.
+  auto line_entry_hash =
+      sc.line_entry.IsValid()
+          ? llvm::hash_combine(
+                sc.line_entry.range.GetBaseAddress().GetFileAddress(),
+                sc.line_entry.range.GetByteSize(),
+                sc.line_entry.is_terminal_entry, sc.line_entry.line,
+                sc.line_entry.column)
+          : llvm::hash_value(0);
+  return static_cast<unsigned>(llvm::hash_combine(
+      sc.function, sc.module_sp.get(), sc.comp_unit, sc.target_sp.get(),
+      line_entry_hash, sc.variable, sc.block));
+}
+
+bool SymbolContextList::SymbolContextInfo::isEqual(const SymbolContext &lhs,
+                                                   const SymbolContext &rhs) {
+  // Check for empty/tombstone keys first, since these are invalid pointers we
+  // don't want to accidentally dereference them in
+  // CompareConsideringPossiblyNullSymbol.
+  if (lhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() ||
+      rhs.function == llvm::DenseMapInfo<Function *>::getEmptyKey() ||
+      lhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey() ||
+      rhs.function == llvm::DenseMapInfo<Function *>::getTombstoneKey())
+    return lhs.function == rhs.function;
+
+  return SymbolContext::CompareConsideringPossiblyNullSymbol(lhs, rhs);
+}
+
 SymbolContextList::SymbolContextList() : m_symbol_contexts() {}
 
 SymbolContextList::~SymbolContextList() = default;
 
 void SymbolContextList::Append(const SymbolContext &sc) {
   m_symbol_contexts.push_back(sc);
+  if (!m_lookup_set.empty())
+    m_lookup_set.insert(sc);
 }
 
 void SymbolContextList::Append(const SymbolContextList &sc_list) {
-  collection::const_iterator pos, end = sc_list.m_symbol_contexts.end();
-  for (pos = sc_list.m_symbol_contexts.begin(); pos != end; ++pos)
-    m_symbol_contexts.push_back(*pos);
+  for (const auto &sc : sc_list.m_symbol_contexts) {
+    m_symbol_contexts.push_back(sc);
+    if (!m_lookup_set.empty())
+      m_lookup_set.insert(sc);
+  }
 }
 
 uint32_t SymbolContextList::AppendIfUnique(const SymbolContextList &sc_list,
@@ -1218,30 +1268,44 @@ uint32_t SymbolContextList::AppendIfUnique(const 
SymbolContextList &sc_list,
 
 bool SymbolContextList::AppendIfUnique(const SymbolContext &sc,
                                        bool merge_symbol_into_function) {
-  collection::iterator pos, end = m_symbol_contexts.end();
-  for (pos = m_symbol_contexts.begin(); pos != end; ++pos) {
-    // Because symbol contexts might first be built without the symbol,
-    // which is then appended later on, compare the symbol contexts taking into
-    // accout that one (or either) of them might not have a symbol yet.
-    if (SymbolContext::CompareConsideringPossiblyNullSymbol(*pos, sc))
+
+  // Look in the set if it's populated, otherwise, fall back to linear scan.
+  if (!m_lookup_set.empty()) {
+    if (m_lookup_set.count(sc))
+      return false;
+  } else if (m_symbol_contexts.size() < kSetThreshold) {
+    for (const auto &existing : m_symbol_contexts) {
+      if (SymbolContext::CompareConsideringPossiblyNullSymbol(existing, sc))
+        return false;
+    }
+  } else {
+    // We've just crossed the threshold. Populate the set from existing items.
+    // Since the set won't be empty after this every new append operation will
+    // keep the set in sync with the vector.
+    for (const auto &existing : m_symbol_contexts)
+      m_lookup_set.insert(existing);
+    if (m_lookup_set.count(sc))
       return false;
   }
+
+  // This path is only taken when sc is an "isolated symbol" (only symbol field
+  // is set), so it's not the hot path.
   if (merge_symbol_into_function && sc.symbol != nullptr &&
       sc.comp_unit == nullptr && sc.function == nullptr &&
       sc.block == nullptr && !sc.line_entry.IsValid()) {
     if (sc.symbol->ValueIsAddress()) {
-      for (pos = m_symbol_contexts.begin(); pos != end; ++pos) {
+      for (auto &pos : m_symbol_contexts) {
         // Don't merge symbols into inlined function symbol contexts
-        if (pos->block && pos->block->GetContainingInlinedBlock())
+        if (pos.block && pos.block->GetContainingInlinedBlock())
           continue;
 
-        if (pos->function) {
-          if (pos->function->GetAddress() == sc.symbol->GetAddressRef()) {
+        if (pos.function) {
+          if (pos.function->GetAddress() == sc.symbol->GetAddressRef()) {
             // Do we already have a function with this symbol?
-            if (pos->symbol == sc.symbol)
+            if (pos.symbol == sc.symbol)
               return false;
-            if (pos->symbol == nullptr) {
-              pos->symbol = sc.symbol;
+            if (pos.symbol == nullptr) {
+              pos.symbol = sc.symbol;
               return false;
             }
           }
@@ -1249,11 +1313,17 @@ bool SymbolContextList::AppendIfUnique(const 
SymbolContext &sc,
       }
     }
   }
+
   m_symbol_contexts.push_back(sc);
+  if (!m_lookup_set.empty())
+    m_lookup_set.insert(sc);
   return true;
 }
 
-void SymbolContextList::Clear() { m_symbol_contexts.clear(); }
+void SymbolContextList::Clear() {
+  m_symbol_contexts.clear();
+  m_lookup_set.clear();
+}
 
 void SymbolContextList::Dump(Stream *s, Target *target) const {
 
@@ -1281,6 +1351,8 @@ bool SymbolContextList::GetContextAtIndex(size_t idx, 
SymbolContext &sc) const {
 
 bool SymbolContextList::RemoveContextAtIndex(size_t idx) {
   if (idx < m_symbol_contexts.size()) {
+    if (!m_lookup_set.empty())
+      m_lookup_set.erase(m_symbol_contexts[idx]);
     m_symbol_contexts.erase(m_symbol_contexts.begin() + idx);
     return true;
   }

_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to