hokein created this revision.
hokein added a reviewer: kadircet.
Herald added subscribers: arphaman, mgrang, jkorous, MaskRay, ioeric, 
ilya-biryukov.
Herald added a project: clang.

Currently, we only do deduplication when we flush final results. We may
have huge duplications (refs from headers) during the indexing period (running
clangd-indexer on Chromium).

With this change, clangd-indexer can index the whole chromium projects
(48 threads, 40 GB peak memory usage).


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D59092

Files:
  clangd/index/Ref.cpp
  clangd/index/Ref.h
  clangd/indexer/IndexerMain.cpp


Index: clangd/indexer/IndexerMain.cpp
===================================================================
--- clangd/indexer/IndexerMain.cpp
+++ clangd/indexer/IndexerMain.cpp
@@ -56,7 +56,6 @@
                [&](RefSlab S) {
                  std::lock_guard<std::mutex> Lock(SymbolsMu);
                  for (const auto &Sym : S) {
-                   // No need to merge as currently all Refs are from main 
file.
                    for (const auto &Ref : Sym.second)
                      Refs.insert(Sym.first, Ref);
                  }
Index: clangd/index/Ref.h
===================================================================
--- clangd/index/Ref.h
+++ clangd/index/Ref.h
@@ -16,6 +16,7 @@
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
+#include <set>
 #include <utility>
 
 namespace clang {
@@ -99,7 +100,7 @@
   private:
     llvm::BumpPtrAllocator Arena;
     llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
-    llvm::DenseMap<SymbolID, std::vector<Ref>> Refs;
+    llvm::DenseMap<SymbolID, std::set<Ref>> Refs;
   };
 
 private:
Index: clangd/index/Ref.cpp
===================================================================
--- clangd/index/Ref.cpp
+++ clangd/index/Ref.cpp
@@ -33,9 +33,12 @@
 
 void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) {
   auto &M = Refs[ID];
-  M.push_back(S);
-  M.back().Location.FileURI =
-      UniqueStrings.save(M.back().Location.FileURI).data();
+  if (M.count(S))
+    return;
+  Ref R = S;
+  R.Location.FileURI =
+      UniqueStrings.save(R.Location.FileURI).data();
+  M.insert(std::move(R));
 }
 
 RefSlab RefSlab::Builder::build() && {
@@ -45,11 +48,7 @@
   Result.reserve(Refs.size());
   size_t NumRefs = 0;
   for (auto &Sym : Refs) {
-    auto &SymRefs = Sym.second;
-    llvm::sort(SymRefs);
-    // FIXME: do we really need to dedup?
-    SymRefs.erase(std::unique(SymRefs.begin(), SymRefs.end()), SymRefs.end());
-
+    std::vector<Ref> SymRefs(Sym.second.begin(), Sym.second.end());
     NumRefs += SymRefs.size();
     Result.emplace_back(Sym.first, llvm::ArrayRef<Ref>(SymRefs).copy(Arena));
   }


Index: clangd/indexer/IndexerMain.cpp
===================================================================
--- clangd/indexer/IndexerMain.cpp
+++ clangd/indexer/IndexerMain.cpp
@@ -56,7 +56,6 @@
                [&](RefSlab S) {
                  std::lock_guard<std::mutex> Lock(SymbolsMu);
                  for (const auto &Sym : S) {
-                   // No need to merge as currently all Refs are from main file.
                    for (const auto &Ref : Sym.second)
                      Refs.insert(Sym.first, Ref);
                  }
Index: clangd/index/Ref.h
===================================================================
--- clangd/index/Ref.h
+++ clangd/index/Ref.h
@@ -16,6 +16,7 @@
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
+#include <set>
 #include <utility>
 
 namespace clang {
@@ -99,7 +100,7 @@
   private:
     llvm::BumpPtrAllocator Arena;
     llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
-    llvm::DenseMap<SymbolID, std::vector<Ref>> Refs;
+    llvm::DenseMap<SymbolID, std::set<Ref>> Refs;
   };
 
 private:
Index: clangd/index/Ref.cpp
===================================================================
--- clangd/index/Ref.cpp
+++ clangd/index/Ref.cpp
@@ -33,9 +33,12 @@
 
 void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) {
   auto &M = Refs[ID];
-  M.push_back(S);
-  M.back().Location.FileURI =
-      UniqueStrings.save(M.back().Location.FileURI).data();
+  if (M.count(S))
+    return;
+  Ref R = S;
+  R.Location.FileURI =
+      UniqueStrings.save(R.Location.FileURI).data();
+  M.insert(std::move(R));
 }
 
 RefSlab RefSlab::Builder::build() && {
@@ -45,11 +48,7 @@
   Result.reserve(Refs.size());
   size_t NumRefs = 0;
   for (auto &Sym : Refs) {
-    auto &SymRefs = Sym.second;
-    llvm::sort(SymRefs);
-    // FIXME: do we really need to dedup?
-    SymRefs.erase(std::unique(SymRefs.begin(), SymRefs.end()), SymRefs.end());
-
+    std::vector<Ref> SymRefs(Sym.second.begin(), Sym.second.end());
     NumRefs += SymRefs.size();
     Result.emplace_back(Sym.first, llvm::ArrayRef<Ref>(SymRefs).copy(Arena));
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to