kadircet updated this revision to Diff 174019.
kadircet added a comment.

- Get rid off getIndexStorage and use IndexStorageCreator directly.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D54269

Files:
  clangd/index/Background.cpp
  clangd/index/Background.h
  unittests/clangd/BackgroundIndexTests.cpp

Index: unittests/clangd/BackgroundIndexTests.cpp
===================================================================
--- unittests/clangd/BackgroundIndexTests.cpp
+++ unittests/clangd/BackgroundIndexTests.cpp
@@ -76,5 +76,73 @@
                        FileURI("unittest:///root/B.cc")}));
 }
 
+TEST(BackgroundIndexTest, ShardStorageWriteTest) {
+  class MemoryShardStorage : public BackgroundIndexStorage {
+    mutable std::mutex StorageMu;
+    llvm::StringMap<std::string> &Storage;
+    size_t &CacheHits;
+
+  public:
+    MemoryShardStorage(llvm::StringMap<std::string> &Storage, size_t &CacheHits)
+        : Storage(Storage), CacheHits(CacheHits) {}
+    llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+                           IndexFileOut Shard) const override {
+      std::lock_guard<std::mutex> Lock(StorageMu);
+      std::string &str = Storage[ShardIdentifier];
+      llvm::raw_string_ostream OS(str);
+      OS << Shard;
+      OS.flush();
+      return llvm::Error::success();
+    }
+    std::unique_ptr<IndexFileIn>
+    loadShard(llvm::StringRef ShardIdentifier) const override {
+      std::lock_guard<std::mutex> Lock(StorageMu);
+      if (Storage.find(ShardIdentifier) == Storage.end()) {
+        elog("Shard {0}: not found.", ShardIdentifier);
+        return nullptr;
+      }
+      auto IndexFile = readIndexFile(Storage[ShardIdentifier]);
+      if (!IndexFile) {
+        elog("Error while reading {0}: {1}.", ShardIdentifier,
+             IndexFile.takeError());
+        return nullptr;
+      }
+      CacheHits++;
+      return llvm::make_unique<IndexFileIn>(std::move(*IndexFile));
+    }
+  };
+  MockFSProvider FS;
+  FS.Files[testPath("root/A.h")] = R"cpp(
+      void common();
+      void f_b();
+      class A_CC {};
+      )cpp";
+  FS.Files[testPath("root/A.cc")] =
+      "#include \"A.h\"\nvoid g() { (void)common; }";
+
+  llvm::StringMap<std::string> Storage;
+  size_t CacheHits = 0;
+  BackgroundIndex::IndexStorageFactory MemoryStorageFactory =
+      [&Storage, &CacheHits](llvm::StringRef) {
+        static MemoryShardStorage MSS(Storage, CacheHits);
+        return &MSS;
+      };
+
+  tooling::CompileCommand Cmd;
+  Cmd.Filename = testPath("root/A.cc");
+  Cmd.Directory = testPath("root");
+  Cmd.CommandLine = {"clang++", testPath("root/A.cc")};
+  // Check nothing is loaded from Storage, but A.cc and A.h has been stored.
+  {
+    BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"},
+                        MemoryStorageFactory);
+    Idx.enqueue(testPath("root"), Cmd);
+    Idx.blockUntilIdleForTest();
+  }
+  EXPECT_EQ(Storage.size(), 2U);
+  EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end());
+  EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end());
+}
+
 } // namespace clangd
 } // namespace clang
Index: clangd/index/Background.h
===================================================================
--- clangd/index/Background.h
+++ clangd/index/Background.h
@@ -14,6 +14,7 @@
 #include "FSProvider.h"
 #include "index/FileIndex.h"
 #include "index/Index.h"
+#include "index/Serialization.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/SHA1.h"
@@ -27,15 +28,41 @@
 namespace clang {
 namespace clangd {
 
+// Handles storage and retrieval of index shards. Both store and load
+// operations can be called from multiple-threads concurrently.
+class BackgroundIndexStorage {
+public:
+  // Shards of the index are stored and retrieved independently, keyed by shard
+  // identifier - in practice this is a source file name
+  virtual llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+                                 IndexFileOut Shard) const = 0;
+
+  // Tries to load shard with given identifier, returns llvm::None if shard
+  // couldn't be loaded.
+  virtual std::unique_ptr<IndexFileIn>
+  loadShard(llvm::StringRef ShardIdentifier) const = 0;
+
+  // Creates an Index Storage that saves shards into disk. Index storage uses
+  // CDBDirectory + ".clangd-index/" as the folder to save shards.
+  static BackgroundIndexStorage *
+  createDiskStorage(llvm::StringRef CDBDirectory);
+};
+
 // Builds an in-memory index by by running the static indexer action over
 // all commands in a compilation database. Indexing happens in the background.
 // FIXME: it should also persist its state on disk for fast start.
 // FIXME: it should watch for changes to files on disk.
 class BackgroundIndex : public SwapIndex {
 public:
+  // A factory function used to create IndexStorage units for each compilation
+  // database. Those databases are identified by the directory they are found
+  // in. The function must be thread-safe.
+  using IndexStorageFactory =
+      std::function<BackgroundIndexStorage *(llvm::StringRef)>;
   // FIXME: resource-dir injection should be hoisted somewhere common.
-  BackgroundIndex(Context BackgroundContext, StringRef ResourceDir,
+  BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir,
                   const FileSystemProvider &, ArrayRef<std::string> URISchemes,
+                  IndexStorageFactory IndexStorageCreator = nullptr,
                   size_t ThreadPoolSize = llvm::hardware_concurrency());
   ~BackgroundIndex(); // Blocks while the current task finishes.
 
@@ -59,25 +86,31 @@
 private:
   /// Given index results from a TU, only update files in \p FilesToUpdate.
   void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
-              const llvm::StringMap<FileDigest> &FilesToUpdate);
+              const llvm::StringMap<FileDigest> &FilesToUpdate,
+              BackgroundIndexStorage *IndexStorage);
 
   // configuration
   std::string ResourceDir;
   const FileSystemProvider &FSProvider;
   Context BackgroundContext;
   std::vector<std::string> URISchemes;
 
   // index state
-  llvm::Error index(tooling::CompileCommand);
+  llvm::Error index(tooling::CompileCommand,
+                    BackgroundIndexStorage *IndexStorage);
 
   FileSymbols IndexedSymbols;
   llvm::StringMap<FileDigest> IndexedFileDigests; // Key is absolute file path.
   std::mutex DigestsMu;
 
+  // index storage
+  IndexStorageFactory IndexStorageCreator;
+
   // queue management
   using Task = std::function<void()>;
   void run(); // Main loop executed by Thread. Runs tasks from Queue.
-  void enqueueLocked(tooling::CompileCommand Cmd);
+  void enqueueLocked(tooling::CompileCommand Cmd,
+                     BackgroundIndexStorage *IndexStorage);
   std::mutex QueueMu;
   unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks.
   std::condition_variable QueueCV;
Index: clangd/index/Background.cpp
===================================================================
--- clangd/index/Background.cpp
+++ clangd/index/Background.cpp
@@ -24,28 +24,139 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SHA1.h"
+
+#include <memory>
+#include <queue>
 #include <random>
 #include <string>
 
 using namespace llvm;
 namespace clang {
 namespace clangd {
 
+namespace {
+
+static BackgroundIndex::FileDigest digest(StringRef Content) {
+  return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
+                                                        FileID FID) {
+  bool Invalid = false;
+  StringRef Content = SM.getBufferData(FID, &Invalid);
+  if (Invalid)
+    return None;
+  return digest(Content);
+}
+
+std::string getShardPathFromFilePath(llvm::StringRef ShardRoot,
+                                     llvm::StringRef FilePath) {
+  llvm::SmallString<128> ShardRootSS(ShardRoot);
+  sys::path::append(ShardRootSS, sys::path::filename(FilePath) +
+                                     toHex(digest(FilePath)) + ".idx");
+  return ShardRoot.str();
+}
+
+// Uses disk as a storage for index shards. Creates a directory called
+// ".clangd-index/" under the path provided during construction.
+class DiskBackedIndexStorage : public BackgroundIndexStorage {
+  std::string DiskShardRoot;
+
+public:
+  std::unique_ptr<IndexFileIn>
+  loadShard(llvm::StringRef ShardIdentifier) const override {
+    const std::string ShardPath =
+        getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+    auto Buffer = MemoryBuffer::getFile(ShardPath);
+    if (!Buffer)
+      return nullptr;
+    if (auto I = readIndexFile(Buffer->get()->getBuffer()))
+      return llvm::make_unique<IndexFileIn>(std::move(*I));
+    else
+      elog("Error while reading shard {0}: {1}", ShardIdentifier,
+           I.takeError());
+    return nullptr;
+  }
+
+  llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+                         IndexFileOut Shard) const override {
+    auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+    std::error_code EC;
+    llvm::raw_fd_ostream OS(ShardPath, EC);
+    if (EC)
+      return errorCodeToError(EC);
+    OS << Shard;
+    return llvm::Error::success();
+  }
+
+  // Sets DiskShardRoot to (Directory + ".clangd-index/") which is the base
+  // directory for all shard files.
+  DiskBackedIndexStorage(llvm::StringRef Directory) {
+    llvm::SmallString<128> CDBDirectory(Directory);
+    sys::path::append(CDBDirectory, ".clangd-index/");
+    DiskShardRoot = CDBDirectory.str();
+    if (!llvm::sys::fs::exists(DiskShardRoot)) {
+      std::error_code OK;
+      std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot);
+      if (EC != OK) {
+        elog("Failed to create {0}: {1}", DiskShardRoot, EC.message());
+      }
+    }
+  }
+};
+
+class LoggingIndexStorage : public BackgroundIndexStorage {
+public:
+  std::unique_ptr<IndexFileIn>
+  loadShard(llvm::StringRef ShardIdentifier) const override {
+    vlog("called loadShard({0})", ShardIdentifier);
+    return nullptr;
+  }
+  llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+                         IndexFileOut Shard) const override {
+    vlog("Called storeShard({0}, ...)", ShardIdentifier);
+    return llvm::Error::success();
+  }
+  static BackgroundIndexStorage *
+  createLoggingStorage(llvm::StringRef CDBDirectory) {
+    static LoggingIndexStorage LIS;
+    vlog("Creating Logging Storage for: {0}", CDBDirectory);
+    return &LIS;
+  }
+};
+
+std::string getAbsoluteFilePath(const tooling::CompileCommand &Cmd) {
+  SmallString<128> AbsolutePath;
+  if (sys::path::is_absolute(Cmd.Filename)) {
+    AbsolutePath = Cmd.Filename;
+  } else {
+    AbsolutePath = Cmd.Directory;
+    sys::path::append(AbsolutePath, Cmd.Filename);
+  }
+  return AbsolutePath.str();
+}
+
+} // namespace
+
 BackgroundIndex::BackgroundIndex(Context BackgroundContext,
                                  StringRef ResourceDir,
                                  const FileSystemProvider &FSProvider,
                                  ArrayRef<std::string> URISchemes,
+                                 IndexStorageFactory IndexStorageCreator,
                                  size_t ThreadPoolSize)
     : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir),
       FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
-      URISchemes(URISchemes) {
+      URISchemes(URISchemes),
+      IndexStorageCreator(IndexStorageCreator
+                              ? std::move(IndexStorageCreator)
+                              : LoggingIndexStorage::createLoggingStorage) {
   assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
   while (ThreadPoolSize--) {
     ThreadPool.emplace_back([this] { run(); });
     // Set priority to low, since background indexing is a long running task we
     // do not want to eat up cpu when there are any other high priority threads.
     // FIXME: In the future we might want a more general way of handling this to
-    // support a tasks with various priorities.
+    // support tasks with various priorities.
     setThreadPriority(ThreadPool.back(), ThreadPriority::Low);
   }
 }
@@ -97,9 +208,10 @@
 
 void BackgroundIndex::enqueue(StringRef Directory,
                               tooling::CompileCommand Cmd) {
+  BackgroundIndexStorage *IndexStorage = IndexStorageCreator(Directory);
   {
     std::lock_guard<std::mutex> Lock(QueueMu);
-    enqueueLocked(std::move(Cmd));
+    enqueueLocked(std::move(Cmd), IndexStorage);
   }
   QueueCV.notify_all();
 }
@@ -110,42 +222,31 @@
   // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB
   // from disk and enqueue the commands asynchronously?
   auto Cmds = CDB.getAllCompileCommands();
+  BackgroundIndexStorage *IndexStorage = IndexStorageCreator(Directory);
   SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size()));
   std::mt19937 Generator(std::random_device{}());
   std::shuffle(Cmds.begin(), Cmds.end(), Generator);
   log("Enqueueing {0} commands for indexing from {1}", Cmds.size(), Directory);
   {
     std::lock_guard<std::mutex> Lock(QueueMu);
     for (auto &Cmd : Cmds)
-      enqueueLocked(std::move(Cmd));
+      enqueueLocked(std::move(Cmd), IndexStorage);
   }
   QueueCV.notify_all();
 }
 
-void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) {
+void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd,
+                                    BackgroundIndexStorage *IndexStorage) {
   Queue.push_back(Bind(
-      [this](tooling::CompileCommand Cmd) {
+      [this, IndexStorage](tooling::CompileCommand Cmd) {
         std::string Filename = Cmd.Filename;
         Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
-        if (auto Error = index(std::move(Cmd)))
+        if (auto Error = index(std::move(Cmd), IndexStorage))
           log("Indexing {0} failed: {1}", Filename, std::move(Error));
       },
       std::move(Cmd)));
 }
 
-static BackgroundIndex::FileDigest digest(StringRef Content) {
-  return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
-}
-
-static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
-                                                        FileID FID) {
-  bool Invalid = false;
-  StringRef Content = SM.getBufferData(FID, &Invalid);
-  if (Invalid)
-    return None;
-  return digest(Content);
-}
-
 // Resolves URI to file paths with cache.
 class URIToFileCache {
 public:
@@ -179,7 +280,8 @@
 /// Given index results from a TU, only update files in \p FilesToUpdate.
 void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
                              RefSlab Refs,
-                             const StringMap<FileDigest> &FilesToUpdate) {
+                             const StringMap<FileDigest> &FilesToUpdate,
+                             BackgroundIndexStorage *IndexStorage) {
   // Partition symbols/references into files.
   struct File {
     DenseSet<const Symbol *> Symbols;
@@ -227,20 +329,34 @@
     for (const auto *R : F.second.Refs)
       Refs.insert(RefToIDs[R], *R);
 
+    auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
+    auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
+
+    auto Hash = FilesToUpdate.lookup(Path);
+    // We need to store shards before updating the index, since the latter
+    // consumes slabs.
+    // FIXME: Store Hash in the Shard.
+    if (IndexStorage) {
+      IndexFileOut Shard;
+      Shard.Symbols = SS.get();
+      Shard.Refs = RS.get();
+      if (auto Error = IndexStorage->storeShard(Path, Shard))
+        elog("Failed to store shard for {0}: {1}", Path, std::move(Error));
+    }
+
     std::lock_guard<std::mutex> Lock(DigestsMu);
     // This can override a newer version that is added in another thread,
     // if this thread sees the older version but finishes later. This should be
     // rare in practice.
-    IndexedFileDigests[Path] = FilesToUpdate.lookup(Path);
-    IndexedSymbols.update(Path,
-                          make_unique<SymbolSlab>(std::move(Syms).build()),
-                          make_unique<RefSlab>(std::move(Refs).build()));
+    IndexedFileDigests[Path] = Hash;
+    IndexedSymbols.update(Path, std::move(SS), std::move(RS));
   }
 }
 
 // Creates a filter to not collect index results from files with unchanged
 // digests.
-// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate.
+// \p FileDigests contains file digests for the current indexed files, and all
+// changed files will be added to \p FilesToUpdate.
 decltype(SymbolCollector::Options::FileFilter) createFileFilter(
     const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
     llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
@@ -269,16 +385,11 @@
   };
 }
 
-Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
+Error BackgroundIndex::index(tooling::CompileCommand Cmd,
+                             BackgroundIndexStorage *IndexStorage) {
   trace::Span Tracer("BackgroundIndex");
   SPAN_ATTACH(Tracer, "file", Cmd.Filename);
-  SmallString<128> AbsolutePath;
-  if (sys::path::is_absolute(Cmd.Filename)) {
-    AbsolutePath = Cmd.Filename;
-  } else {
-    AbsolutePath = Cmd.Directory;
-    sys::path::append(AbsolutePath, Cmd.Filename);
-  }
+  const std::string AbsolutePath = getAbsoluteFilePath(Cmd);
 
   auto FS = FSProvider.getFileSystem();
   auto Buf = FS->getBufferForFile(AbsolutePath);
@@ -342,7 +453,8 @@
       Symbols.size(), Refs.numRefs());
   SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
   SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
-  update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate);
+  update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
+         IndexStorage);
   {
     // Make sure hash for the main file is always updated even if there is no
     // index data in it.
@@ -359,5 +471,19 @@
   return Error::success();
 }
 
+BackgroundIndexStorage *
+BackgroundIndexStorage::createDiskStorage(llvm::StringRef CDBDirectory) {
+  // Maps CDB Directory to index storage.
+  static llvm::StringMap<std::unique_ptr<BackgroundIndexStorage>>
+      IndexStorageMap;
+  static std::mutex IndexStorageMapMu;
+
+  std::lock_guard<std::mutex> Lock(IndexStorageMapMu);
+  auto &IndexStorage = IndexStorageMap[CDBDirectory];
+  if (!IndexStorage)
+    IndexStorage = llvm::make_unique<DiskBackedIndexStorage>(CDBDirectory);
+  return IndexStorage.get();
+}
+
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to