Author: kadircet Date: Thu Nov 15 02:31:10 2018 New Revision: 346938 URL: http://llvm.org/viewvc/llvm-project?rev=346938&view=rev Log: Introduce shard storage to auto-index.
Reviewers: sammccall, ioeric Subscribers: ilya-biryukov, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D54269 Modified: clang-tools-extra/trunk/clangd/index/Background.cpp clang-tools-extra/trunk/clangd/index/Background.h clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Modified: clang-tools-extra/trunk/clangd/index/Background.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=346938&r1=346937&r2=346938&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Background.cpp (original) +++ clang-tools-extra/trunk/clangd/index/Background.cpp Thu Nov 15 02:31:10 2018 @@ -26,26 +26,52 @@ #include "llvm/Support/SHA1.h" #include <random> #include <string> +#include <queue> +#include <memory> using namespace llvm; namespace clang { namespace clangd { -BackgroundIndex::BackgroundIndex(Context BackgroundContext, - StringRef ResourceDir, - const FileSystemProvider &FSProvider, - ArrayRef<std::string> URISchemes, - size_t ThreadPoolSize) +namespace { + +static BackgroundIndex::FileDigest digest(StringRef Content) { + return SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); +} + +static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM, + FileID FID) { + bool Invalid = false; + StringRef Content = SM.getBufferData(FID, &Invalid); + if (Invalid) + return None; + return digest(Content); +} + +llvm::SmallString<128> +getShardPathFromFilePath(llvm::SmallString<128> ShardRoot, + llvm::StringRef FilePath) { + sys::path::append(ShardRoot, sys::path::filename(FilePath) + + toHex(digest(FilePath)) + ".idx"); + return ShardRoot; +} + +} // namespace + +BackgroundIndex::BackgroundIndex( + Context BackgroundContext, StringRef ResourceDir, + const FileSystemProvider &FSProvider, ArrayRef<std::string> URISchemes, + std::unique_ptr<ShardStorage> IndexShardStorage, size_t ThreadPoolSize) : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir), FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)), - URISchemes(URISchemes) { + URISchemes(URISchemes), IndexShardStorage(std::move(IndexShardStorage)) { assert(ThreadPoolSize > 0 && "Thread pool size can't be zero."); while (ThreadPoolSize--) { ThreadPool.emplace_back([this] { run(); }); // Set priority to low, since background indexing is a long running task we // do not want to eat up cpu when there are any other high priority threads. // FIXME: In the future we might want a more general way of handling this to - // support a tasks with various priorities. + // support tasks with various priorities. setThreadPriority(ThreadPool.back(), ThreadPriority::Low); } } @@ -123,6 +149,12 @@ void BackgroundIndex::enqueueAll(StringR } void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) { + // Initialize storage to project root. Since Initialize is no-op for multiple + // calls we can simply call it for each file. + if (IndexShardStorage && !IndexShardStorage->initialize(Cmd.Directory)) { + elog("Failed to initialize shard storage"); + IndexShardStorage.reset(); + } Queue.push_back(Bind( [this](tooling::CompileCommand Cmd) { std::string Filename = Cmd.Filename; @@ -133,19 +165,6 @@ void BackgroundIndex::enqueueLocked(tool std::move(Cmd))); } -static BackgroundIndex::FileDigest digest(StringRef Content) { - return SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); -} - -static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM, - FileID FID) { - bool Invalid = false; - StringRef Content = SM.getBufferData(FID, &Invalid); - if (Invalid) - return None; - return digest(Content); -} - // Resolves URI to file paths with cache. class URIToFileCache { public: @@ -227,14 +246,25 @@ void BackgroundIndex::update(StringRef M for (const auto *R : F.second.Refs) Refs.insert(RefToIDs[R], *R); + auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build()); + auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build()); + + auto Hash = FilesToUpdate.lookup(Path); + // Put shards into storage for subsequent use. + // FIXME: Store Hash in the Shard. + if (IndexShardStorage) { + IndexFileOut Shard; + Shard.Symbols = SS.get(); + Shard.Refs = RS.get(); + IndexShardStorage->storeShard(Path, Shard); + } + std::lock_guard<std::mutex> Lock(DigestsMu); // This can override a newer version that is added in another thread, // if this thread sees the older version but finishes later. This should be // rare in practice. - IndexedFileDigests[Path] = FilesToUpdate.lookup(Path); - IndexedSymbols.update(Path, - make_unique<SymbolSlab>(std::move(Syms).build()), - make_unique<RefSlab>(std::move(Refs).build())); + IndexedFileDigests[Path] = Hash; + IndexedSymbols.update(Path, std::move(SS), std::move(RS)); } } @@ -293,6 +323,18 @@ Error BackgroundIndex::index(tooling::Co if (IndexedFileDigests.lookup(AbsolutePath) == Hash) { vlog("No need to index {0}, already up to date", AbsolutePath); return Error::success(); + } else if (IndexShardStorage) { // Check if shard storage has the index. + auto Shard = IndexShardStorage->retrieveShard(AbsolutePath, Hash); + if (Shard) { + // FIXME: We might still want to re-index headers. + IndexedFileDigests[AbsolutePath] = Hash; + IndexedSymbols.update( + AbsolutePath, make_unique<SymbolSlab>(std::move(*Shard->Symbols)), + make_unique<RefSlab>(std::move(*Shard->Refs))); + + vlog("Loaded {0} from storage", AbsolutePath); + return Error::success(); + } } DigestsSnapshot = IndexedFileDigests; @@ -359,5 +401,59 @@ Error BackgroundIndex::index(tooling::Co return Error::success(); } +llvm::Expected<IndexFileIn> +DiskShardStorage::retrieveShard(llvm::StringRef ShardIdentifier, + FileDigest Hash) const { + assert(Initialized && "Not initialized?"); + llvm::SmallString<128> ShardPath; + { + std::lock_guard<std::mutex> Lock(DiskShardRootMu); + ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + } + auto Buffer = MemoryBuffer::getFile(ShardPath); + if (!Buffer) { + elog("Couldn't retrieve {0}: {1}", ShardPath, Buffer.getError().message()); + return llvm::make_error<llvm::StringError>(Buffer.getError()); + } + // FIXME: Change readIndexFile to also look at Hash of the source that + // generated index and skip if there is a mismatch. + return readIndexFile(Buffer->get()->getBuffer()); +} + +bool DiskShardStorage::storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const { + assert(Initialized && "Not initialized?"); + llvm::SmallString<128> ShardPath; + { + std::lock_guard<std::mutex> Lock(DiskShardRootMu); + ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + } + std::error_code EC; + llvm::raw_fd_ostream OS(ShardPath, EC); + if (EC) { + elog("Failed to open {0} for writing: {1}", ShardPath, EC.message()); + return false; + } + OS << Shard; + return true; +} + +bool DiskShardStorage::initialize(llvm::StringRef Directory) { + if (Initialized) + return true; + std::lock_guard<std::mutex> Lock(DiskShardRootMu); + DiskShardRoot = Directory; + sys::path::append(DiskShardRoot, ".clangd-index/"); + if (!llvm::sys::fs::exists(DiskShardRoot)) { + std::error_code OK; + std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot); + if (EC != OK) { + elog("Failed to create {0}: {1}", DiskShardRoot, EC.message()); + return Initialized = false; + } + } + return Initialized = true; +} + } // namespace clangd } // namespace clang Modified: clang-tools-extra/trunk/clangd/index/Background.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=346938&r1=346937&r2=346938&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Background.h (original) +++ clang-tools-extra/trunk/clangd/index/Background.h Thu Nov 15 02:31:10 2018 @@ -14,6 +14,7 @@ #include "FSProvider.h" #include "index/FileIndex.h" #include "index/Index.h" +#include "index/Serialization.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/SHA1.h" @@ -27,6 +28,17 @@ namespace clang { namespace clangd { +// Base class for Shard Storage operations. See DiskShardStorage for more info. +class ShardStorage { +public: + using FileDigest = decltype(llvm::SHA1::hash({})); + virtual bool storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const = 0; + virtual llvm::Expected<IndexFileIn> + retrieveShard(llvm::StringRef ShardIdentifier, FileDigest Hash) const = 0; + virtual bool initialize(llvm::StringRef Directory) = 0; +}; + // Builds an in-memory index by by running the static indexer action over // all commands in a compilation database. Indexing happens in the background. // FIXME: it should also persist its state on disk for fast start. @@ -34,8 +46,9 @@ namespace clangd { class BackgroundIndex : public SwapIndex { public: // FIXME: resource-dir injection should be hoisted somewhere common. - BackgroundIndex(Context BackgroundContext, StringRef ResourceDir, + BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir, const FileSystemProvider &, ArrayRef<std::string> URISchemes, + std::unique_ptr<ShardStorage> IndexShardStorage = nullptr, size_t ThreadPoolSize = llvm::hardware_concurrency()); ~BackgroundIndex(); // Blocks while the current task finishes. @@ -66,6 +79,7 @@ private: const FileSystemProvider &FSProvider; Context BackgroundContext; std::vector<std::string> URISchemes; + std::unique_ptr<ShardStorage> IndexShardStorage; // index state llvm::Error index(tooling::CompileCommand); @@ -86,6 +100,30 @@ private: std::vector<std::thread> ThreadPool; // FIXME: Abstract this away. }; +// Handles storage and retrieval of index shards into disk. Requires Initialize +// to be called before storing or retrieval. Creates a directory called +// ".clangd-index/" under the path provided during initialize. This class is +// thread-safe. +class DiskShardStorage : public ShardStorage { + mutable std::mutex DiskShardRootMu; + llvm::SmallString<128> DiskShardRoot; + bool Initialized; + +public: + // Retrieves the shard if found and contents are consistent with the provided + // Hash. + llvm::Expected<IndexFileIn> retrieveShard(llvm::StringRef ShardIdentifier, + FileDigest Hash) const; + + // Stores given shard with name ShardIdentifier under initialized directory. + bool storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const; + + // Initializes DiskShardRoot to (Directory + ".clangd-index/") which is the + // base directory for all shard files. After the initialization succeeds all + // subsequent calls or no-op. + bool initialize(llvm::StringRef Directory); +}; + } // namespace clangd } // namespace clang Modified: clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp?rev=346938&r1=346937&r2=346938&view=diff ============================================================================== --- clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp (original) +++ clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Thu Nov 15 02:31:10 2018 @@ -78,5 +78,79 @@ TEST(BackgroundIndexTest, IndexTwoFiles) FileURI("unittest:///root/B.cc")})); } +TEST(BackgroundIndexTest, ShardStorageTest) { + class MemoryShardStorage : public ShardStorage { + mutable std::mutex StorageMu; + llvm::StringMap<std::string> &Storage; + size_t& CacheHits; + + public: + MemoryShardStorage(llvm::StringMap<std::string> &Storage, size_t &CacheHits) + : Storage(Storage), CacheHits(CacheHits) {} + + bool storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const { + std::lock_guard<std::mutex> Lock(StorageMu); + std::string &str = Storage[ShardIdentifier]; + llvm::raw_string_ostream OS(str); + OS << Shard; + OS.flush(); + return true; + } + llvm::Expected<IndexFileIn> retrieveShard(llvm::StringRef ShardIdentifier, + FileDigest Hash) const { + std::lock_guard<std::mutex> Lock(StorageMu); + if (Storage.find(ShardIdentifier) == Storage.end()) + return llvm::make_error<llvm::StringError>( + "Shard not found.", llvm::inconvertibleErrorCode()); + auto IndexFile = readIndexFile(Storage[ShardIdentifier]); + if(!IndexFile) + return IndexFile; + CacheHits++; + return IndexFile; + } + bool initialize(llvm::StringRef Directory) { return true; } + }; + MockFSProvider FS; + FS.Files[testPath("root/A.h")] = R"cpp( + void common(); + void f_b(); + class A_CC {}; + )cpp"; + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nvoid g() { (void)common; }"; + llvm::StringMap<std::string> Storage; + size_t CacheHits = 0; + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + { + BackgroundIndex Idx( + Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, + /*IndexShardStorage=*/ + llvm::make_unique<MemoryShardStorage>(Storage, CacheHits)); + Idx.enqueue(testPath("root"), Cmd); + Idx.blockUntilIdleForTest(); + } + EXPECT_EQ(CacheHits, 0U); + EXPECT_EQ(Storage.size(), 2U); + EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end()); + EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end()); + + { + BackgroundIndex Idx( + Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, + /*IndexShardStorage=*/ + llvm::make_unique<MemoryShardStorage>(Storage, CacheHits)); + Idx.enqueue(testPath("root"), Cmd); + Idx.blockUntilIdleForTest(); + } + EXPECT_EQ(CacheHits, 1U); + EXPECT_EQ(Storage.size(), 2U); + EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end()); + EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end()); + // B_CC is dropped as we don't collect symbols from A.h in this compilation. +} + } // namespace clangd } // namespace clang _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits