ioeric updated this revision to Diff 170440.
ioeric marked 2 inline comments as done.
ioeric added a comment.

- address review comments
- minor cleanup


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D53433

Files:
  clangd/index/Background.cpp
  clangd/index/Background.h
  clangd/index/FileIndex.cpp
  clangd/index/FileIndex.h
  clangd/index/IndexAction.cpp
  clangd/index/IndexAction.h
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h
  clangd/indexer/IndexerMain.cpp
  unittests/clangd/BackgroundIndexTests.cpp
  unittests/clangd/FileIndexTests.cpp
  unittests/clangd/SyncAPI.cpp
  unittests/clangd/SyncAPI.h

Index: unittests/clangd/SyncAPI.h
===================================================================
--- unittests/clangd/SyncAPI.h
+++ unittests/clangd/SyncAPI.h
@@ -52,6 +52,7 @@
 
 SymbolSlab runFuzzyFind(const SymbolIndex &Index, StringRef Query);
 SymbolSlab runFuzzyFind(const SymbolIndex &Index, const FuzzyFindRequest &Req);
+RefSlab getRefs(const SymbolIndex &Index, SymbolID ID);
 
 } // namespace clangd
 } // namespace clang
Index: unittests/clangd/SyncAPI.cpp
===================================================================
--- unittests/clangd/SyncAPI.cpp
+++ unittests/clangd/SyncAPI.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SyncAPI.h"
+#include "index/Index.h"
 
 using namespace llvm;
 namespace clang {
@@ -138,5 +139,14 @@
   return std::move(Builder).build();
 }
 
+RefSlab getRefs(const SymbolIndex &Index, SymbolID ID) {
+  RefsRequest Req;
+  Req.IDs = {ID};
+  RefSlab::Builder Slab;
+  Index.refs(Req, [&](const Ref &S) { Slab.insert(ID, S); });
+  return std::move(Slab).build();
+}
+
+
 } // namespace clangd
 } // namespace clang
Index: unittests/clangd/FileIndexTests.cpp
===================================================================
--- unittests/clangd/FileIndexTests.cpp
+++ unittests/clangd/FileIndexTests.cpp
@@ -14,6 +14,7 @@
 #include "TestFS.h"
 #include "TestTU.h"
 #include "index/FileIndex.h"
+#include "index/Index.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Frontend/Utils.h"
@@ -39,6 +40,7 @@
 }
 MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; }
 MATCHER_P(DeclURI, U, "") { return arg.CanonicalDeclaration.FileURI == U; }
+MATCHER_P(DefURI, U, "") { return arg.Definition.FileURI == U; }
 MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
 
 using namespace llvm;
@@ -73,14 +75,6 @@
   return llvm::make_unique<RefSlab>(std::move(Slab).build());
 }
 
-RefSlab getRefs(const SymbolIndex &I, SymbolID ID) {
-  RefsRequest Req;
-  Req.IDs = {ID};
-  RefSlab::Builder Slab;
-  I.refs(Req, [&](const Ref &S) { Slab.insert(ID, S); });
-  return std::move(Slab).build();
-}
-
 TEST(FileSymbolsTest, UpdateAndGet) {
   FileSymbols FS;
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""), IsEmpty());
@@ -102,6 +96,26 @@
                                      QName("4"), QName("5")));
 }
 
+TEST(FileSymbolsTest, MergeOverlap) {
+  FileSymbols FS;
+  auto OneSymboSlab = [](Symbol Sym) {
+    SymbolSlab::Builder S;
+    S.insert(Sym);
+    return make_unique<SymbolSlab>(std::move(S).build());
+  };
+  auto X1 = symbol("x");
+  X1.CanonicalDeclaration.FileURI = "file:///x1";
+  auto X2 = symbol("x");
+  X2.Definition.FileURI = "file:///x2";
+
+  FS.update("f1", OneSymboSlab(X1), nullptr);
+  FS.update("f2", OneSymboSlab(X2), nullptr);
+  for (auto Type : {IndexType::Light, IndexType::Heavy})
+    EXPECT_THAT(runFuzzyFind(*FS.buildIndex(Type, /*MergeSymbols=*/true), "x"),
+                UnorderedElementsAre(AllOf(QName("x"), DeclURI("file:///x1"),
+                                           DefURI("file:///x2"))));
+}
+
 TEST(FileSymbolsTest, SnapshotAliveAfterRemove) {
   FileSymbols FS;
 
Index: unittests/clangd/BackgroundIndexTests.cpp
===================================================================
--- unittests/clangd/BackgroundIndexTests.cpp
+++ unittests/clangd/BackgroundIndexTests.cpp
@@ -4,33 +4,74 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+using testing::_;
+using testing::AllOf;
+using testing::Not;
 using testing::UnorderedElementsAre;
 
 namespace clang {
 namespace clangd {
 
 MATCHER_P(Named, N, "") { return arg.Name == N; }
+MATCHER(Declared, "") { return !arg.CanonicalDeclaration.FileURI.empty(); }
+MATCHER(Defined, "") { return !arg.Definition.FileURI.empty(); }
+
+MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; }
+testing::Matcher<const RefSlab &>
+RefsAre(std::vector<testing::Matcher<Ref>> Matchers) {
+  return ElementsAre(testing::Pair(_, UnorderedElementsAreArray(Matchers)));
+}
 
 TEST(BackgroundIndexTest, IndexTwoFiles) {
   MockFSProvider FS;
   // a.h yields different symbols when included by A.cc vs B.cc.
   // Currently we store symbols for each TU, so we get both.
-  FS.Files[testPath("root/A.h")] = "void a_h(); void NAME(){}";
-  FS.Files[testPath("root/A.cc")] = "#include \"A.h\"";
-  FS.Files[testPath("root/B.cc")] = "#define NAME bar\n#include \"A.h\"";
-  BackgroundIndex Idx(Context::empty(), "", FS);
+  FS.Files[testPath("root/A.h")] = R"(
+      void common();
+      void f_b();
+      #if CC == A
+        class A_H {};
+      #elif CC == B
+        class B_H {};
+      #else
+        class _H {};
+      #endif
+      )";
+  FS.Files[testPath("root/A.cc")] =
+      "#include \"A.h\"\nvoid g() { (void)common; }";
+  FS.Files[testPath("root/B.cc")] =
+      "#define CC B\n#include \"A.h\"\nvoid f_b() { (void)common; }";
+  BackgroundIndex Idx(Context::empty(), "", FS, /*URISchmes=*/{"unittest"});
 
   tooling::CompileCommand Cmd;
   Cmd.Filename = testPath("root/A.cc");
   Cmd.Directory = testPath("root");
-  Cmd.CommandLine = {"clang++", "-DNAME=foo", testPath("root/A.cc")};
+  Cmd.CommandLine = {"clang++", "-DCC=A", testPath("root/A.cc")};
   Idx.enqueue(testPath("root"), Cmd);
-  Cmd.CommandLine.back() = Cmd.Filename = testPath("root/B.cc");
+
+  Idx.blockUntilIdleForTest();
+  EXPECT_THAT(
+      runFuzzyFind(Idx, ""),
+      UnorderedElementsAre(Named("common"), Named("A_H"),
+                           AllOf(Named("f_b"), Declared(), Not(Defined()))));
+
+  Cmd.Filename = testPath("root/B.cc");
+  Cmd.CommandLine = {"clang++", Cmd.Filename};
   Idx.enqueue(testPath("root"), Cmd);
 
   Idx.blockUntilIdleForTest();
+  // B_H is dropped as we don't collect symbols from A.h in this compilation.
   EXPECT_THAT(runFuzzyFind(Idx, ""),
-              UnorderedElementsAre(Named("a_h"), Named("foo"), Named("bar")));
+              UnorderedElementsAre(Named("common"), Named("A_H"),
+                                   AllOf(Named("f_b"), Declared(), Defined())));
+
+  auto Syms = runFuzzyFind(Idx, "common");
+  EXPECT_THAT(Syms, UnorderedElementsAre(Named("common")));
+  auto Common = *Syms.begin();
+  EXPECT_THAT(getRefs(Idx, Common.ID),
+              RefsAre({FileURI("file:///clangd-test/root/A.h"),
+                       FileURI("file:///clangd-test/root/A.cc"),
+                       FileURI("file:///clangd-test/root/B.cc")}));
 }
 
 } // namespace clangd
Index: clangd/indexer/IndexerMain.cpp
===================================================================
--- clangd/indexer/IndexerMain.cpp
+++ clangd/indexer/IndexerMain.cpp
@@ -62,7 +62,8 @@
                    for (const auto &Ref : Sym.second)
                      Refs.insert(Sym.first, Ref);
                  }
-               })
+               },
+               /*FileDigestsCallback=*/nullptr)
         .release();
   }
 
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -10,12 +10,14 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H
 
 #include "CanonicalIncludes.h"
+#include "FileIndex.h"
 #include "Index.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
+#include "llvm/ADT/DenseSet.h"
 
 namespace clang {
 namespace clangd {
@@ -96,6 +98,7 @@
 
   SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
   RefSlab takeRefs() { return std::move(Refs).build(); }
+  FileDigests takeFileDigests() { return std::move(IndexedFileDigests); }
 
   void finish() override;
 
@@ -109,6 +112,7 @@
   // Only symbols declared in preamble (from #include) and referenced from the
   // main file will be included.
   RefSlab::Builder Refs;
+  FileDigests IndexedFileDigests;
   ASTContext *ASTCtx;
   std::shared_ptr<Preprocessor> PP;
   std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator;
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -24,10 +24,12 @@
 #include "clang/Basic/Specifiers.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Index/USRGeneration.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SHA1.h"
 
 using namespace llvm;
 namespace clang {
@@ -203,18 +205,33 @@
           CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
 }
 
+void digestFile(const SourceManager &SM, SourceLocation Loc,
+                llvm::StringRef FileURI, FileDigests *Digest) {
+  if (Digest->count(FileURI) != 0)
+    return;
+  FileID FID = SM.getFileID(Loc);
+  if (FID.isInvalid())
+    return;
+  bool Invalid = false;
+  StringRef Content = SM.getBufferData(FID, &Invalid);
+  if (!Invalid)
+    (*Digest)[FileURI] = digest(Content);
+}
+
 // Return the symbol location of the token at \p TokLoc.
 Optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc,
                                           const SourceManager &SM,
                                           const SymbolCollector::Options &Opts,
                                           const clang::LangOptions &LangOpts,
+                                          FileDigests &Digest,
                                           std::string &FileURIStorage) {
   auto U = toURI(SM, SM.getFilename(TokLoc), Opts);
   if (!U)
     return None;
   FileURIStorage = std::move(*U);
   SymbolLocation Result;
   Result.FileURI = FileURIStorage;
+  digestFile(SM, TokLoc, Result.FileURI, &Digest);
   auto Range = getTokenRange(TokLoc, SM, LangOpts);
   Result.Start = Range.first;
   Result.End = Range.second;
@@ -426,8 +443,9 @@
   S.Flags |= Symbol::IndexedForCodeCompletion;
   S.SymInfo = index::getSymbolInfoForMacro(*MI);
   std::string FileURI;
-  if (auto DeclLoc = getTokenLocation(MI->getDefinitionLoc(), SM, Opts,
-                                      PP->getLangOpts(), FileURI))
+  if (auto DeclLoc =
+          getTokenLocation(MI->getDefinitionLoc(), SM, Opts, PP->getLangOpts(),
+                           IndexedFileDigests, FileURI))
     S.CanonicalDeclaration = *DeclLoc;
 
   CodeCompletionResult SymbolCompletion(Name);
@@ -504,6 +522,7 @@
         for (const auto &LocAndRole : It.second) {
           auto FileID = SM.getFileID(LocAndRole.first);
           if (auto FileURI = GetURI(FileID)) {
+            digestFile(SM, LocAndRole.first, *FileURI, &IndexedFileDigests);
             auto Range =
                 getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts());
             Ref R;
@@ -541,8 +560,9 @@
     S.Flags |= Symbol::ImplementationDetail;
   S.SymInfo = index::getSymbolInfo(&ND);
   std::string FileURI;
-  if (auto DeclLoc = getTokenLocation(findNameLoc(&ND), SM, Opts,
-                                      ASTCtx->getLangOpts(), FileURI))
+  if (auto DeclLoc =
+          getTokenLocation(findNameLoc(&ND), SM, Opts, ASTCtx->getLangOpts(),
+                           IndexedFileDigests, FileURI))
     S.CanonicalDeclaration = *DeclLoc;
 
   // Add completion info.
@@ -593,9 +613,9 @@
   // in clang::index. We should only see one definition.
   Symbol S = DeclSym;
   std::string FileURI;
-  if (auto DefLoc = getTokenLocation(findNameLoc(&ND),
-                                     ND.getASTContext().getSourceManager(),
-                                     Opts, ASTCtx->getLangOpts(), FileURI))
+  if (auto DefLoc = getTokenLocation(
+          findNameLoc(&ND), ND.getASTContext().getSourceManager(), Opts,
+          ASTCtx->getLangOpts(), IndexedFileDigests, FileURI))
     S.Definition = *DefLoc;
   Symbols.insert(S);
 }
Index: clangd/index/IndexAction.h
===================================================================
--- clangd/index/IndexAction.h
+++ clangd/index/IndexAction.h
@@ -9,6 +9,7 @@
 
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_ACTION_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_ACTION_H
+#include "FileIndex.h"
 #include "SymbolCollector.h"
 #include "clang/Frontend/FrontendActions.h"
 
@@ -23,10 +24,13 @@
 //   - references are always counted
 //   - all references are collected (if RefsCallback is non-null)
 //   - the symbol origin is always Static
-std::unique_ptr<FrontendAction>
-createStaticIndexingAction(SymbolCollector::Options Opts,
-                           std::function<void(SymbolSlab)> SymbolsCallback,
-                           std::function<void(RefSlab)> RefsCallback);
+// FIXME: replace callbacks with a single callback that passes a struct that
+// contains all index results.
+std::unique_ptr<FrontendAction> createStaticIndexingAction(
+    SymbolCollector::Options Opts,
+    std::function<void(SymbolSlab)> SymbolsCallback,
+    std::function<void(RefSlab)> RefsCallback,
+    std::function<void(FileDigests)> FileDigestsCallback);
 
 } // namespace clangd
 } // namespace clang
Index: clangd/index/IndexAction.cpp
===================================================================
--- clangd/index/IndexAction.cpp
+++ clangd/index/IndexAction.cpp
@@ -1,4 +1,5 @@
 #include "IndexAction.h"
+#include "FileIndex.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexingAction.h"
@@ -16,10 +17,12 @@
               std::unique_ptr<CanonicalIncludes> Includes,
               const index::IndexingOptions &Opts,
               std::function<void(SymbolSlab)> SymbolsCallback,
-              std::function<void(RefSlab)> RefsCallback)
+              std::function<void(RefSlab)> RefsCallback,
+              std::function<void(FileDigests)> FileDigestsCallback)
       : WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)),
         SymbolsCallback(SymbolsCallback), RefsCallback(RefsCallback),
-        Collector(C), Includes(std::move(Includes)),
+        FileDigestsCallback(FileDigestsCallback), Collector(C),
+        Includes(std::move(Includes)),
         PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
 
   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
@@ -46,22 +49,26 @@
     SymbolsCallback(Collector->takeSymbols());
     if (RefsCallback != nullptr)
       RefsCallback(Collector->takeRefs());
+    if (FileDigestsCallback)
+      FileDigestsCallback(Collector->takeFileDigests());
   }
 
 private:
   std::function<void(SymbolSlab)> SymbolsCallback;
   std::function<void(RefSlab)> RefsCallback;
+  std::function<void(FileDigests)> FileDigestsCallback;
   std::shared_ptr<SymbolCollector> Collector;
   std::unique_ptr<CanonicalIncludes> Includes;
   std::unique_ptr<CommentHandler> PragmaHandler;
 };
 
 } // namespace
 
-std::unique_ptr<FrontendAction>
-createStaticIndexingAction(SymbolCollector::Options Opts,
-                           std::function<void(SymbolSlab)> SymbolsCallback,
-                           std::function<void(RefSlab)> RefsCallback) {
+std::unique_ptr<FrontendAction> createStaticIndexingAction(
+    SymbolCollector::Options Opts,
+    std::function<void(SymbolSlab)> SymbolsCallback,
+    std::function<void(RefSlab)> RefsCallback,
+    std::function<void(FileDigests)> FileDigestsCallback) {
   index::IndexingOptions IndexOpts;
   IndexOpts.SystemSymbolFilter =
       index::IndexingOptions::SystemSymbolFilterKind::All;
@@ -77,7 +84,7 @@
   Opts.Includes = Includes.get();
   return llvm::make_unique<IndexAction>(
       std::make_shared<SymbolCollector>(std::move(Opts)), std::move(Includes),
-      IndexOpts, SymbolsCallback, RefsCallback);
+      IndexOpts, SymbolsCallback, RefsCallback, FileDigestsCallback);
 }
 
 } // namespace clangd
Index: clangd/index/FileIndex.h
===================================================================
--- clangd/index/FileIndex.h
+++ clangd/index/FileIndex.h
@@ -21,6 +21,9 @@
 #include "MemIndex.h"
 #include "Merge.h"
 #include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SHA1.h"
 #include <memory>
 
 namespace clang {
@@ -34,6 +37,13 @@
   Heavy,
 };
 
+using FileDigest = decltype(llvm::SHA1::hash({}));
+using FileDigests = llvm::StringMap<FileDigest>;
+
+inline FileDigest digest(llvm::StringRef Content) {
+  return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
 /// A container of Symbols from several source files. It can be updated
 /// at source-file granularity, replacing all symbols from one file with a new
 /// set.
@@ -55,8 +65,12 @@
               std::unique_ptr<RefSlab> Refs);
 
   // The index keeps the symbols alive.
+  // If \p MergeSymbols is true, this will merge symbols from different files;
+  // otherwise, a random one is picked, which is less accurate but faster to
+  // build.
   std::unique_ptr<SymbolIndex>
-  buildIndex(IndexType, ArrayRef<std::string> URISchemes = {});
+  buildIndex(IndexType, bool MergeSymbols = false,
+             ArrayRef<std::string> URISchemes = {});
 
 private:
   mutable std::mutex Mutex;
Index: clangd/index/FileIndex.cpp
===================================================================
--- clangd/index/FileIndex.cpp
+++ clangd/index/FileIndex.cpp
@@ -12,11 +12,16 @@
 #include "Logger.h"
 #include "SymbolCollector.h"
 #include "index/Index.h"
+#include "index/MemIndex.h"
 #include "index/Merge.h"
 #include "index/dex/Dex.h"
 #include "clang/Index/IndexingAction.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include <memory>
 
 using namespace llvm;
@@ -101,7 +106,8 @@
 }
 
 std::unique_ptr<SymbolIndex>
-FileSymbols::buildIndex(IndexType Type, ArrayRef<std::string> URISchemes) {
+FileSymbols::buildIndex(IndexType Type, bool MergeSymbols,
+                        ArrayRef<std::string> URISchemes) {
   std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
   std::vector<std::shared_ptr<RefSlab>> RefSlabs;
   {
@@ -111,6 +117,21 @@
     for (const auto &FileAndRefs : FileToRefs)
       RefSlabs.push_back(FileAndRefs.second);
   }
+  if (MergeSymbols) {
+    // Merge slabs into a single slab and only keep alive the merged.
+    SymbolSlab::Builder Merged;
+    for (const auto &Slab : SymbolSlabs) {
+      for (const auto &Sym : *Slab) {
+        if (const auto *Existing = Merged.find(Sym.ID))
+          Merged.insert(mergeSymbol(*Existing, Sym));
+        else
+          Merged.insert(Sym);
+      }
+    }
+    auto Slab = std::make_shared<SymbolSlab>(std::move(Merged).build());
+    SymbolSlabs = {Slab};
+    // FIXME: aggregate symbol reference count based on references.
+  }
   std::vector<const Symbol *> AllSymbols;
   for (const auto &Slab : SymbolSlabs)
     for (const auto &Sym : *Slab)
@@ -176,16 +197,18 @@
   PreambleSymbols.update(Path,
                          llvm::make_unique<SymbolSlab>(std::move(Symbols)),
                          llvm::make_unique<RefSlab>());
-  PreambleIndex.reset(PreambleSymbols.buildIndex(
-      UseDex ? IndexType::Heavy : IndexType::Light, URISchemes));
+  PreambleIndex.reset(
+      PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
+                                 /*MergeSymbols=*/false, URISchemes));
 }
 
 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
   auto Contents = indexMainDecls(AST, URISchemes);
   MainFileSymbols.update(
       Path, llvm::make_unique<SymbolSlab>(std::move(Contents.first)),
       llvm::make_unique<RefSlab>(std::move(Contents.second)));
-  MainFileIndex.reset(MainFileSymbols.buildIndex(IndexType::Light, URISchemes));
+  MainFileIndex.reset(MainFileSymbols.buildIndex(
+      IndexType::Light, /*MergeSymbols=*/false, URISchemes));
 }
 
 } // namespace clangd
Index: clangd/index/Background.h
===================================================================
--- clangd/index/Background.h
+++ clangd/index/Background.h
@@ -15,6 +15,7 @@
 #include "index/FileIndex.h"
 #include "index/Index.h"
 #include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/SHA1.h"
 #include <condition_variable>
 #include <deque>
@@ -33,8 +34,7 @@
 public:
   // FIXME: resource-dir injection should be hoisted somewhere common.
   BackgroundIndex(Context BackgroundContext, StringRef ResourceDir,
-                  const FileSystemProvider &,
-                  ArrayRef<std::string> URISchemes = {});
+                  const FileSystemProvider &, ArrayRef<std::string> URISchemes);
   ~BackgroundIndex(); // Blocks while the current task finishes.
 
   // Enqueue a translation unit for indexing.
@@ -53,17 +53,19 @@
   void blockUntilIdleForTest();
 
 private:
+  void update(SymbolSlab Symbols, RefSlab Refs, FileDigests Digests);
+
   // configuration
   std::string ResourceDir;
   const FileSystemProvider &FSProvider;
   Context BackgroundContext;
   std::vector<std::string> URISchemes;
 
   // index state
   llvm::Error index(tooling::CompileCommand);
-  FileSymbols IndexedSymbols; // Index contents.
-  using Hash = decltype(llvm::SHA1::hash({}));
-  llvm::StringMap<Hash> FileHash; // Digest of indexed file.
+
+  FileSymbols IndexedSymbols;
+  FileDigests IndexedFileDigests; // Keyed by file URIs.
 
   // queue management
   using Task = std::function<void()>; // FIXME: use multiple worker threads.
Index: clangd/index/Background.cpp
===================================================================
--- clangd/index/Background.cpp
+++ clangd/index/Background.cpp
@@ -12,9 +12,11 @@
 #include "Compiler.h"
 #include "Logger.h"
 #include "Trace.h"
+#include "URI.h"
 #include "index/IndexAction.h"
 #include "index/MemIndex.h"
 #include "index/Serialization.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/SHA1.h"
 #include <random>
 
@@ -112,6 +114,61 @@
       std::move(Cmd)));
 }
 
+// Given index results from an index action, update symbols/references for files
+// that have changed.
+void BackgroundIndex::update(SymbolSlab Symbols, RefSlab Refs,
+                             FileDigests Digests) {
+  // Find files with new digests or not seen before.
+  llvm::DenseSet<llvm::StringRef> FilesToUpdate;
+  for (const auto &D : Digests) {
+    auto I = IndexedFileDigests.find(D.first());
+    if (I == IndexedFileDigests.end() || I->second != D.second)
+      FilesToUpdate.insert(D.first());
+  }
+
+  // Partition symbols/references into files.
+  struct File {
+    llvm::DenseSet<const Symbol *> Symbols;
+    llvm::DenseSet<const Ref *> Refs;
+  };
+  llvm::StringMap<File> Files;
+  for (const auto &Sym : Symbols) {
+    if (FilesToUpdate.count(Sym.CanonicalDeclaration.FileURI) != 0)
+      Files[Sym.CanonicalDeclaration.FileURI].Symbols.insert(&Sym);
+    // For symbols with different declaration and definition locations, we store
+    // the full symbol in both the header file and the implementation file, so
+    // that merging can tell the preferred symbols (from canonical headers) from
+    // other symbols (e.g. forward declarations).
+    if (Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI &&
+        FilesToUpdate.count(Sym.Definition.FileURI) != 0)
+      Files[Sym.Definition.FileURI].Symbols.insert(&Sym);
+  }
+  llvm::DenseMap<const Ref *, SymbolID> RefToIDs;
+  for (const auto &SymRefs : Refs) {
+    for (const auto &R : SymRefs.second) {
+      if (FilesToUpdate.count(R.Location.FileURI) != 0) {
+        auto &F = Files[R.Location.FileURI];
+        RefToIDs[&R] = SymRefs.first;
+        F.Refs.insert(&R);
+      }
+    }
+  }
+
+  // Build and store new slabs for each updated file.
+  for (const auto &F : Files) {
+    StringRef FileURI = F.first();
+    SymbolSlab::Builder Syms;
+    RefSlab::Builder Refs;
+    for (const auto *S : F.second.Symbols)
+      Syms.insert(*S);
+    for (const auto *R : F.second.Refs)
+      Refs.insert(RefToIDs[R], *R);
+    IndexedSymbols.update(
+        FileURI, llvm::make_unique<SymbolSlab>(std::move(Syms).build()),
+        llvm::make_unique<RefSlab>(std::move(Refs).build()));
+  }
+}
+
 Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
   trace::Span Tracer("BackgroundIndex");
   SPAN_ATTACH(Tracer, "file", Cmd.Filename);
@@ -127,10 +184,13 @@
   auto Buf = FS->getBufferForFile(AbsolutePath);
   if (!Buf)
     return errorCodeToError(Buf.getError());
-  StringRef Contents = Buf->get()->getBuffer();
-  auto Hash = SHA1::hash({(const uint8_t *)Contents.data(), Contents.size()});
+  auto Hash = digest(Buf->get()->getBuffer());
 
-  if (FileHash.lookup(AbsolutePath) == Hash) {
+  auto MainURI = URI::create(AbsolutePath, URISchemes);
+  if (!MainURI)
+    return MainURI.takeError();
+  std::string MainURIStr = MainURI->toString();
+  if (IndexedFileDigests.lookup(MainURIStr) == Hash) {
     vlog("No need to index {0}, already up to date", AbsolutePath);
     return Error::success();
   }
@@ -155,10 +215,12 @@
   SymbolCollector::Options IndexOpts;
   SymbolSlab Symbols;
   RefSlab Refs;
+  FileDigests Digests;
   IndexFileIn IndexData;
   auto Action = createStaticIndexingAction(
       IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); },
-      [&](RefSlab R) { Refs = std::move(R); });
+      [&](RefSlab R) { Refs = std::move(R); },
+      [&](FileDigests D) { Digests = std::move(D); });
 
   // We're going to run clang here, and it could potentially crash.
   // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
@@ -177,16 +239,15 @@
       Symbols.size(), Refs.numRefs());
   SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
   SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
-  // FIXME: partition the symbols by file rather than TU, to avoid duplication.
-  IndexedSymbols.update(AbsolutePath,
-                        llvm::make_unique<SymbolSlab>(std::move(Symbols)),
-                        llvm::make_unique<RefSlab>(std::move(Refs)));
-  FileHash[AbsolutePath] = Hash;
+  update(std::move(Symbols), std::move(Refs), std::move(Digests));
+  IndexedFileDigests[MainURIStr] = Hash;
 
   // FIXME: this should rebuild once-in-a-while, not after every file.
   //       At that point we should use Dex, too.
   vlog("Rebuilding automatic index");
-  reset(IndexedSymbols.buildIndex(IndexType::Light, URISchemes));
+  reset(IndexedSymbols.buildIndex(IndexType::Light, /*MergeSymbols=*/true,
+                                  URISchemes));
+
   return Error::success();
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to