kadircet created this revision.
kadircet added a reviewer: sammccall.
Herald added subscribers: cfe-commits, usaxena95, arphaman, jkorous, MaskRay, 
ilya-biryukov.
Herald added a project: clang.

This reduces memory usage by dynamic index from more than 400MB to 32MB
when all files in clang-tools-extra/clangd/*.cpp are active in clangd.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D77732

Files:
  clang-tools-extra/clangd/index/FileIndex.cpp
  clang-tools-extra/clangd/index/FileIndex.h
  clang-tools-extra/clangd/unittests/FileIndexTests.cpp
  clang-tools-extra/clangd/unittests/TestTU.cpp

Index: clang-tools-extra/clangd/unittests/TestTU.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/TestTU.cpp
+++ clang-tools-extra/clangd/unittests/TestTU.cpp
@@ -116,9 +116,10 @@
 std::unique_ptr<SymbolIndex> TestTU::index() const {
   auto AST = build();
   auto Idx = std::make_unique<FileIndex>(/*UseDex=*/true);
-  Idx->updatePreamble(Filename, /*Version=*/"null", AST.getASTContext(),
-                      AST.getPreprocessorPtr(), AST.getCanonicalIncludes());
-  Idx->updateMain(Filename, AST);
+  Idx->updatePreamble(testPath(Filename), /*Version=*/"null",
+                      AST.getASTContext(), AST.getPreprocessorPtr(),
+                      AST.getCanonicalIncludes());
+  Idx->updateMain(testPath(Filename), AST);
   return std::move(Idx);
 }
 
Index: clang-tools-extra/clangd/unittests/FileIndexTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -151,8 +151,9 @@
   File.HeaderFilename = (Basename + ".h").str();
   File.HeaderCode = std::string(Code);
   auto AST = File.build();
-  M.updatePreamble(File.Filename, /*Version=*/"null", AST.getASTContext(),
-                   AST.getPreprocessorPtr(), AST.getCanonicalIncludes());
+  M.updatePreamble(testPath(File.Filename), /*Version=*/"null",
+                   AST.getASTContext(), AST.getPreprocessorPtr(),
+                   AST.getCanonicalIncludes());
 }
 
 TEST(FileIndexTest, CustomizedURIScheme) {
@@ -393,8 +394,9 @@
   TU.HeaderCode = "class A {}; class B : public A {};";
   auto AST = TU.build();
   FileIndex Index;
-  Index.updatePreamble(TU.Filename, /*Version=*/"null", AST.getASTContext(),
-                       AST.getPreprocessorPtr(), AST.getCanonicalIncludes());
+  Index.updatePreamble(testPath(TU.Filename), /*Version=*/"null",
+                       AST.getASTContext(), AST.getPreprocessorPtr(),
+                       AST.getCanonicalIncludes());
   SymbolID A = findSymbol(TU.headerSymbols(), "A").ID;
   uint32_t Results = 0;
   RelationsRequest Req;
Index: clang-tools-extra/clangd/index/FileIndex.h
===================================================================
--- clang-tools-extra/clangd/index/FileIndex.h
+++ clang-tools-extra/clangd/index/FileIndex.h
@@ -109,17 +109,15 @@
 private:
   bool UseDex; // FIXME: this should be always on.
 
-  // Contains information from each file's preamble only.
-  // These are large, but update fairly infrequently (preambles are stable).
+  // Contains information from each file's preamble only. Symbols and relations
+  // are sharded per declaration file to deduplicate multiple symbols and reduce
+  // memory usage.
   // Missing information:
   //  - symbol refs (these are always "from the main file")
   //  - definition locations in the main file
   //
-  // FIXME: Because the preambles for different TUs have large overlap and
-  // FileIndex doesn't deduplicate, this uses lots of extra RAM.
-  // The biggest obstacle in fixing this: the obvious approach of partitioning
-  // by declaring file (rather than main file) fails if headers provide
-  // different symbols based on preprocessor state.
+  // FIXME: We ignore symbol resulting from different PP states while parsing a
+  // file and store only the last one.
   FileSymbols PreambleSymbols;
   SwapIndex PreambleIndex;
 
Index: clang-tools-extra/clangd/index/FileIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/FileIndex.cpp
+++ clang-tools-extra/clangd/index/FileIndex.cpp
@@ -10,11 +10,14 @@
 #include "CollectMacros.h"
 #include "Logger.h"
 #include "ParsedAST.h"
+#include "Path.h"
 #include "SymbolCollector.h"
 #include "index/CanonicalIncludes.h"
 #include "index/Index.h"
 #include "index/MemIndex.h"
 #include "index/Merge.h"
+#include "index/Relation.h"
+#include "index/Symbol.h"
 #include "index/SymbolOrigin.h"
 #include "index/dex/Dex.h"
 #include "clang/AST/ASTContext.h"
@@ -23,19 +26,21 @@
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include <memory>
+#include <tuple>
 
 namespace clang {
 namespace clangd {
+namespace {
 
-static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
-                              llvm::ArrayRef<Decl *> DeclsToIndex,
-                              const MainFileMacros *MacroRefsToIndex,
-                              const CanonicalIncludes &Includes,
-                              bool IsIndexMainAST, llvm::StringRef Version) {
+SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
+                       llvm::ArrayRef<Decl *> DeclsToIndex,
+                       const MainFileMacros *MacroRefsToIndex,
+                       const CanonicalIncludes &Includes, bool IsIndexMainAST,
+                       llvm::StringRef Version) {
   SymbolCollector::Options CollectorOpts;
   CollectorOpts.CollectIncludePath = true;
   CollectorOpts.Includes = &Includes;
@@ -85,6 +90,53 @@
                          std::move(Relations));
 }
 
+/// Takes slabs coming from a TU(multiple files) and shards them per declaration
+/// location. Afterwards updates \p FileSyms.
+/// \p HintPath is used to convert file uris stored in symbols into absolute
+/// paths used by \p FileSyms.
+void shardSlabToFiles(const SymbolSlab &Syms, const RelationSlab &Rels,
+                      FileSymbols &FileSyms, llvm::StringRef HintPath) {
+  llvm::StringMap<SymbolSlab::Builder> FileToSyms;
+  llvm::StringMap<RelationSlab::Builder> FileToRels;
+  llvm::DenseMap<SymbolID, llvm::StringRef> SymbolIDToFile;
+  llvm::StringMap<std::string> URIToPathCache;
+  auto FileForSymbol = [&](const Symbol &S) -> llvm::StringRef {
+    llvm::StringRef FileURI = S.CanonicalDeclaration.FileURI;
+    // Symbols might contain empty URIs when they are not coming from a file,
+    // e.g. command line macros.
+    if (FileURI.empty())
+      return "";
+    auto I = URIToPathCache.try_emplace(FileURI);
+    if (I.second) {
+      auto URIPath = URI::resolve(FileURI, HintPath);
+      assert(URIPath);
+      I.first->second = *URIPath;
+    }
+    return I.first->second;
+  };
+  for (const auto &S : Syms) {
+    auto File = FileForSymbol(S);
+    FileToSyms[File].insert(S);
+    SymbolIDToFile[S.ID] = File;
+  }
+  for (const auto &R : Rels) {
+    auto File = SymbolIDToFile.lookup(R.Subject);
+    assert(!File.empty() && "unknown symbol in relation slab");
+    FileToRels[File].insert(R);
+  }
+  for (auto &Entry : FileToSyms) {
+    auto File = Entry.first();
+    auto &SymSlab = Entry.getValue();
+    auto &RelSlab = FileToRels[File];
+    FileSyms.update(File,
+                    std::make_unique<SymbolSlab>(std::move(SymSlab).build()),
+                    std::make_unique<RefSlab>(),
+                    std::make_unique<RelationSlab>(std::move(RelSlab).build()),
+                    /*CountReferences=*/false);
+  }
+}
+} // namespace
+
 SlabTuple indexMainDecls(ParsedAST &AST) {
   return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(),
                       AST.getLocalTopLevelDecls(), &AST.getMacros(),
@@ -254,15 +306,17 @@
                                ASTContext &AST,
                                std::shared_ptr<Preprocessor> PP,
                                const CanonicalIncludes &Includes) {
-  auto Slabs = indexHeaderSymbols(Version, AST, std::move(PP), Includes);
-  PreambleSymbols.update(
-      Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Slabs))),
-      std::make_unique<RefSlab>(),
-      std::make_unique<RelationSlab>(std::move(std::get<2>(Slabs))),
-      /*CountReferences=*/false);
+  SymbolSlab Syms;
+  RelationSlab Rels;
+  std::tie(Syms, std::ignore, Rels) =
+      indexHeaderSymbols(Version, AST, std::move(PP), Includes);
+  shardSlabToFiles(Syms, Rels, PreambleSymbols, Path);
   PreambleIndex.reset(
       PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
                                  DuplicateHandling::PickOne));
+  vlog("Build dynamic index for header symbols with estimated memory usage of "
+       "{0} bytes",
+       PreambleIndex.estimateMemoryUsage());
 }
 
 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
@@ -274,6 +328,9 @@
       /*CountReferences=*/true);
   MainFileIndex.reset(
       MainFileSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge));
+  vlog("Build dynamic index for mainfile symbols with estimated memory usage "
+       "of {0} bytes",
+       MainFileIndex.estimateMemoryUsage());
 }
 
 } // namespace clangd
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to