ioeric created this revision. ioeric added reviewers: ilya-biryukov, hokein. Herald added subscribers: cfe-commits, arphaman, jkorous, MaskRay.
Currently, dynamic index collects symbols for the entire TU for each open/active file. When static index is enabled, this can be wasteful as (intuitively) most files in a TU should remain the same. Duplicate symbols in both dynamic index and static index can lead to unncessary memory usage and merging duplcated symbols can contribute to latency. To avoid indexing the same files in both dynamic index and static index, we also collect hash values/digests for all headers in the TU during indexing time and expose an interface from the index to get digests for files. Dynamic/file index can simply drop symbols from files whose digests are the same as those in static index. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D50331 Files: clangd/ClangdServer.cpp clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp clangd/index/FileIndex.cpp clangd/index/FileIndex.h clangd/index/Index.cpp clangd/index/Index.h clangd/index/MemIndex.cpp clangd/index/MemIndex.h clangd/index/Merge.cpp clangd/index/SymbolCollector.cpp clangd/index/SymbolCollector.h clangd/index/SymbolYAML.cpp clangd/tool/ClangdMain.cpp unittests/clangd/CodeCompleteTests.cpp unittests/clangd/FileIndexTests.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp
Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -679,73 +679,51 @@ } TEST_F(SymbolCollectorTest, YAMLConversions) { - const std::string YAML1 = R"( ---- -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 -Name: 'Foo1' -Scope: 'clang::' -SymInfo: - Kind: Function - Lang: Cpp -CanonicalDeclaration: - FileURI: file:///path/foo.h - Start: - Line: 1 - Column: 0 - End: - Line: 1 - Column: 1 -IsIndexedForCodeCompletion: true -Detail: - Documentation: 'Foo doc' - ReturnType: 'int' + const std::string YAML = R"(--- +Symbols: + - ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 + Name: 'Foo1' + Scope: 'clang::' + SymInfo: + Kind: Function + Lang: Cpp + CanonicalDeclaration: + FileURI: file:///path/foo.h + Start: + Line: 1 + Column: 0 + End: + Line: 1 + Column: 1 + IsIndexedForCodeCompletion: true + Detail: + Documentation: 'Foo doc' + ReturnType: 'int' +FileDigests: + - URI: 'file:///a' + Digest: 123 ... )"; - const std::string YAML2 = R"( ---- -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 -Name: 'Foo2' -Scope: 'clang::' -SymInfo: - Kind: Function - Lang: Cpp -CanonicalDeclaration: - FileURI: file:///path/bar.h - Start: - Line: 1 - Column: 0 - End: - Line: 1 - Column: 1 -IsIndexedForCodeCompletion: false -Signature: '-sig' -CompletionSnippetSuffix: '-snippet' -... -)"; - - auto Symbols1 = symbolsFromYAML(YAML1); - - EXPECT_THAT(Symbols1, - UnorderedElementsAre(AllOf(QName("clang::Foo1"), Labeled("Foo1"), - Doc("Foo doc"), ReturnType("int"), - DeclURI("file:///path/foo.h"), - ForCodeCompletion(true)))); - auto Symbols2 = symbolsFromYAML(YAML2); - EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf( - QName("clang::Foo2"), Labeled("Foo2-sig"), - Not(HasReturnType()), DeclURI("file:///path/bar.h"), - ForCodeCompletion(false)))); - - std::string ConcatenatedYAML; - { - llvm::raw_string_ostream OS(ConcatenatedYAML); - SymbolsToYAML(Symbols1, OS); - SymbolsToYAML(Symbols2, OS); - } - auto ConcatenatedSymbols = symbolsFromYAML(ConcatenatedYAML); - EXPECT_THAT(ConcatenatedSymbols, - UnorderedElementsAre(QName("clang::Foo1"), - QName("clang::Foo2"))); + auto Symbols = symbolsFromYAML(YAML); + + auto SymbolsExpected = [](const SymbolSlab &Symbols) { + EXPECT_THAT(Symbols, + UnorderedElementsAre( + AllOf(QName("clang::Foo1"), Labeled("Foo1"), Doc("Foo doc"), + ReturnType("int"), DeclURI("file:///path/foo.h"), + ForCodeCompletion(true)))); + auto D = Symbols.fileDigests().find("file:///a"); + assert(D != Symbols.fileDigests().end()); + EXPECT_EQ(D->second, 123ull); + }; + SymbolsExpected(Symbols); + + // Convert symbols to yaml again and back. + std::string Yout; + llvm::raw_string_ostream OS(Yout); + SymbolsToYAML(Symbols, OS); + OS.flush(); + SymbolsExpected(symbolsFromYAML(Yout)); } TEST_F(SymbolCollectorTest, IncludeHeaderSameAsFileURI) { Index: unittests/clangd/IndexTests.cpp =================================================================== --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -54,14 +54,14 @@ struct SlabAndPointers { SymbolSlab Slab; - std::vector<const Symbol *> Pointers; + MemIndex::SymbolSlabView View; }; // Create a slab of symbols with the given qualified names as both IDs and // names. The life time of the slab is managed by the returned shared pointer. // If \p WeakSymbols is provided, it will be pointed to the managed object in // the returned shared pointer. -std::shared_ptr<std::vector<const Symbol *>> +std::shared_ptr<MemIndex::SymbolSlabView> generateSymbols(std::vector<std::string> QualifiedNames, std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) { SymbolSlab::Builder Slab; @@ -71,16 +71,15 @@ auto Storage = std::make_shared<SlabAndPointers>(); Storage->Slab = std::move(Slab).build(); for (const auto &Sym : Storage->Slab) - Storage->Pointers.push_back(&Sym); + Storage->View.Pointers.push_back(&Sym); if (WeakSymbols) *WeakSymbols = Storage; - auto *Pointers = &Storage->Pointers; - return {std::move(Storage), Pointers}; + return {std::move(Storage), &Storage->View}; } // Create a slab of symbols with IDs and names [Begin, End], otherwise identical // to the `generateSymbols` above. -std::shared_ptr<std::vector<const Symbol *>> +std::shared_ptr<MemIndex::SymbolSlabView> generateNumSymbols(int Begin, int End, std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) { std::vector<std::string> Names; @@ -124,9 +123,9 @@ // Inject some duplicates and make sure we only match the same symbol once. auto Sym = symbol("7"); - Symbols->push_back(&Sym); - Symbols->push_back(&Sym); - Symbols->push_back(&Sym); + Symbols->Pointers.push_back(&Sym); + Symbols->Pointers.push_back(&Sym); + Symbols->Pointers.push_back(&Sym); FuzzyFindRequest Req; Req.Query = "7"; Index: unittests/clangd/FileIndexTests.cpp =================================================================== --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -11,10 +11,13 @@ #include "TestFS.h" #include "TestTU.h" #include "index/FileIndex.h" +#include "index/Index.h" +#include "index/MemIndex.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/PCHContainerOperations.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/Hashing.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -40,9 +43,9 @@ } std::vector<std::string> -getSymbolNames(const std::vector<const Symbol *> &Symbols) { +getSymbolNames(const MemIndex::SymbolSlabView &Symbols) { std::vector<std::string> Names; - for (const Symbol *Sym : Symbols) + for (const Symbol *Sym : Symbols.Pointers) Names.push_back(Sym->Name); return Names; } @@ -270,6 +273,38 @@ UnorderedElementsAre("ns_in_header", "ns_in_header::func_in_header")); } +TEST(FileIndexTest, DropSymbolFromIndexedFileWithSameDigest) { + StringRef URI = "unittest:///foo.h"; + StringRef Header = "namespace ns { class Foo {}; }"; + + SymbolSlab::Builder WithSameDigest; + WithSameDigest.addFileDigest(URI, llvm::hash_value(Header)); + auto MemIdx = MemIndex::build(std::move(WithSameDigest).build()); + + FileIndex M(/*URISchemes=*/{"unittest"}, MemIdx.get()); + update(M, "foo", Header); + FuzzyFindRequest Req; + Req.Query = ""; + Req.Scopes = {"ns::"}; + EXPECT_THAT(match(M, Req), UnorderedElementsAre()); +} + +TEST(FileIndexTest, NoDropSymbolFromFileWithDifferentDigest) { + StringRef URI = "unittest:///foo.h"; + StringRef Header = "namespace ns { class Foo {}; }"; + + SymbolSlab::Builder WithDifferentDigest; + WithDifferentDigest.addFileDigest(URI, llvm::hash_value(Header) + 1); + auto MemIdx = MemIndex::build(std::move(WithDifferentDigest).build()); + + FileIndex M(/*URISchemes=*/{"unittest"}, MemIdx.get()); + update(M, "foo", Header); + FuzzyFindRequest Req; + Req.Query = ""; + Req.Scopes = {"ns::"}; + EXPECT_THAT(match(M, Req), UnorderedElementsAre("ns::Foo")); +} + } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/CodeCompleteTests.cpp =================================================================== --- unittests/clangd/CodeCompleteTests.cpp +++ unittests/clangd/CodeCompleteTests.cpp @@ -893,6 +893,11 @@ void lookup(const LookupRequest &, llvm::function_ref<void(const Symbol &)>) const override {} + void + fileDigests(const FileDigestRequest &, + llvm::function_ref<void(llvm::StringRef URI, FileDigest Digest)>) + const override {} + const std::vector<FuzzyFindRequest> allRequests() const { return Requests; } private: Index: clangd/tool/ClangdMain.cpp =================================================================== --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -40,12 +40,7 @@ llvm::errs() << "Can't open " << YamlSymbolFile << "\n"; return nullptr; } - auto Slab = symbolsFromYAML(Buffer.get()->getBuffer()); - SymbolSlab::Builder SymsBuilder; - for (auto Sym : Slab) - SymsBuilder.insert(Sym); - - return MemIndex::build(std::move(SymsBuilder).build()); + return MemIndex::build(symbolsFromYAML(Buffer.get()->getBuffer())); } } // namespace Index: clangd/index/SymbolYAML.cpp =================================================================== --- clangd/index/SymbolYAML.cpp +++ clangd/index/SymbolYAML.cpp @@ -13,18 +13,34 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include <vector> + +namespace clangd_yaml_internal { +struct NormalizedDigest { + llvm::StringRef URI; + size_t Digest; +}; + +struct NormalizedSymbolSlab { + std::vector<clang::clangd::Symbol> Syms; + std::vector<NormalizedDigest> Digests; +}; +} // namespace clangd_yaml_internal LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(clang::clangd::Symbol) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol) +LLVM_YAML_IS_SEQUENCE_VECTOR(clangd_yaml_internal::NormalizedDigest) namespace llvm { namespace yaml { using clang::clangd::Symbol; using clang::clangd::SymbolID; using clang::clangd::SymbolLocation; +using clang::clangd::SymbolSlab; using clang::index::SymbolInfo; -using clang::index::SymbolLanguage; using clang::index::SymbolKind; +using clang::index::SymbolLanguage; // Helper to (de)serialize the SymbolID. We serialize it as a hex string. struct NormalizedSymbolID { @@ -162,6 +178,21 @@ } }; +template <> struct MappingTraits<clangd_yaml_internal::NormalizedDigest> { + static void mapping(IO &IO, clangd_yaml_internal::NormalizedDigest &Digest) { + IO.mapRequired("URI", Digest.URI); + IO.mapRequired("Digest", Digest.Digest); + } +}; + +template <> struct MappingTraits<clangd_yaml_internal::NormalizedSymbolSlab> { + static void mapping(IO &IO, + clangd_yaml_internal::NormalizedSymbolSlab &NSlab) { + IO.mapRequired("Symbols", NSlab.Syms); + IO.mapRequired("FileDigests", NSlab.Digests); + } +}; + } // namespace yaml } // namespace llvm @@ -172,12 +203,14 @@ // Store data of pointer fields (excl. `StringRef`) like `Detail`. llvm::BumpPtrAllocator Arena; llvm::yaml::Input Yin(YAMLContent, &Arena); - std::vector<Symbol> S; - Yin >> S; + clangd_yaml_internal::NormalizedSymbolSlab NSlab; + Yin >> NSlab; SymbolSlab::Builder Syms; - for (auto &Sym : S) + for (auto &Sym : NSlab.Syms) Syms.insert(Sym); + for (auto &Digest : NSlab.Digests) + Syms.addFileDigest(Digest.URI, Digest.Digest); return std::move(Syms).build(); } @@ -191,8 +224,12 @@ void SymbolsToYAML(const SymbolSlab& Symbols, llvm::raw_ostream &OS) { llvm::yaml::Output Yout(OS); - for (Symbol S : Symbols) // copy: Yout<< requires mutability. - Yout << S; + clangd_yaml_internal::NormalizedSymbolSlab NSlab; + for (auto &Sym : Symbols) + NSlab.Syms.push_back(Sym); + for (auto &Digest : Symbols.fileDigests()) + NSlab.Digests.push_back({Digest.first, Digest.second}); + Yout << NSlab; } std::string SymbolToYAML(Symbol Sym) { Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -11,6 +11,8 @@ #include "Index.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Sema/CodeCompleteConsumer.h" @@ -91,6 +93,7 @@ private: const Symbol *addDeclaration(const NamedDecl &, SymbolID); void addDefinition(const NamedDecl &, const Symbol &DeclSymbol); + void addFileDigest(StringRef URI, const SourceManager &SM, FileID FID); // All Symbols collected from the AST. SymbolSlab::Builder Symbols; @@ -108,6 +111,8 @@ // canonical by clang but should not be considered canonical in the index // unless it's a definition. llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls; + // URIs whose digests have been recorded. Reset on finish(). + llvm::DenseSet<StringRef> Digested; }; } // namespace clangd Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -15,12 +15,14 @@ #include "../SourceCode.h" #include "../URI.h" #include "CanonicalIncludes.h" +#include "SymbolYAML.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/SourceManager.h" #include "clang/Index/IndexSymbol.h" #include "clang/Index/USRGeneration.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -327,17 +329,24 @@ const NamedDecl &OriginalDecl = *cast<NamedDecl>(ASTNode.OrigD); const Symbol *BasicSymbol = Symbols.find(ID); + FileID FID = SM.getFileID(findNameLoc(ND)); if (!BasicSymbol) // Regardless of role, ND is the canonical declaration. BasicSymbol = addDeclaration(*ND, std::move(ID)); - else if (isPreferredDeclaration(OriginalDecl, Roles)) + else if (isPreferredDeclaration(OriginalDecl, Roles)) { // If OriginalDecl is preferred, replace the existing canonical // declaration (e.g. a class forward declaration). There should be at most // one duplicate as we expect to see only one preferred declaration per // TU, because in practice they are definitions. BasicSymbol = addDeclaration(OriginalDecl, std::move(ID)); + FID = SM.getFileID(findNameLoc(&OriginalDecl)); + } + + if (!BasicSymbol->CanonicalDeclaration.FileURI.empty()) + addFileDigest(BasicSymbol->CanonicalDeclaration.FileURI, SM, FID); if (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) addDefinition(OriginalDecl, *BasicSymbol); + return true; } @@ -386,10 +395,14 @@ S.IsIndexedForCodeCompletion = true; S.SymInfo = index::getSymbolInfoForMacro(*MI); std::string FileURI; - if (auto DeclLoc = getTokenLocation(MI->getDefinitionLoc(), SM, Opts, - PP->getLangOpts(), FileURI)) + SourceLocation MLoc = MI->getDefinitionLoc(); + if (auto DeclLoc = + getTokenLocation(MLoc, SM, Opts, PP->getLangOpts(), FileURI)) S.CanonicalDeclaration = *DeclLoc; + if (!S.CanonicalDeclaration.FileURI.empty()) + addFileDigest(S.CanonicalDeclaration.FileURI, SM, SM.getFileID(MLoc)); + CodeCompletionResult SymbolCompletion(Name); const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro( *PP, *CompletionAllocator, *CompletionTUInfo); @@ -439,6 +452,8 @@ } ReferencedDecls.clear(); ReferencedMacros.clear(); + CanonicalDecls.clear(); + Digested.clear(); } const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, @@ -514,5 +529,15 @@ Symbols.insert(S); } +void SymbolCollector::addFileDigest(StringRef URI, const SourceManager &SM, + FileID FID) { + if (FID.isInvalid() || !Digested.insert(URI).second) + return; // Ignore if file is invalid or already recorded. + bool Invalid = false; + StringRef Content = SM.getBufferData(FID, &Invalid); + if (!Invalid) + Symbols.addFileDigest(URI, llvm::hash_value(Content)); +} + } // namespace clangd } // namespace clang Index: clangd/index/Merge.cpp =================================================================== --- clangd/index/Merge.cpp +++ clangd/index/Merge.cpp @@ -74,6 +74,20 @@ Callback(*Sym); } + void fileDigests(const FileDigestRequest &Req, + llvm::function_ref<void(StringRef URI, FileDigest Digest)> + Callback) const override { + llvm::StringMap<FileDigest> Digests; + // Query the static index first so that dynamic index can override digests + // for the common files later. + Static->fileDigests(Req, + [&](StringRef U, FileDigest D) { Digests[U] = D; }); + Dynamic->fileDigests(Req, + [&](StringRef U, FileDigest D) { Digests[U] = D; }); + for (const auto &Digest : Digests) + Callback(Digest.first(), Digest.second); + } + private: const SymbolIndex *Dynamic, *Static; }; Index: clangd/index/MemIndex.h =================================================================== --- clangd/index/MemIndex.h +++ clangd/index/MemIndex.h @@ -20,23 +20,32 @@ /// can be easily managed in memory. class MemIndex : public SymbolIndex { public: - /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain - /// accessible as long as `Symbols` is kept alive. - void build(std::shared_ptr<std::vector<const Symbol *>> Symbols); + /// A view of a set symbols. This does not own the underlying data of symbols. + struct SymbolSlabView { + std::vector<const Symbol *> Pointers; + SymbolSlab::FileDigests Digests; + }; + /// \brief (Re-)Build index for `Symbols`. All symbol pointers and references + /// must remain accessible as long as `Symbols` is kept alive. + void build(std::shared_ptr<SymbolSlabView> Symbols); /// \brief Build index from a symbol slab. static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab); bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref<void(const Symbol &)> Callback) const override; - virtual void - lookup(const LookupRequest &Req, - llvm::function_ref<void(const Symbol &)> Callback) const override; + void lookup(const LookupRequest &Req, + llvm::function_ref<void(const Symbol &)> Callback) const override; + + void + fileDigests(const FileDigestRequest &Req, + llvm::function_ref<void(llvm::StringRef URI, FileDigest Digest)> + Callback) const override; private: - std::shared_ptr<std::vector<const Symbol *>> Symbols; + std::shared_ptr<SymbolSlabView> Symbols; // Index is a set of symbols that are deduplicated by symbol IDs. // FIXME: build smarter index structure. llvm::DenseMap<SymbolID, const Symbol *> Index; Index: clangd/index/MemIndex.cpp =================================================================== --- clangd/index/MemIndex.cpp +++ clangd/index/MemIndex.cpp @@ -15,9 +15,9 @@ namespace clang { namespace clangd { -void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) { +void MemIndex::build(std::shared_ptr<SymbolSlabView> Syms) { llvm::DenseMap<SymbolID, const Symbol *> TempIndex; - for (const Symbol *Sym : *Syms) + for (const Symbol *Sym : Syms->Pointers) TempIndex[Sym->ID] = Sym; // Swap out the old symbols and index. @@ -64,24 +64,39 @@ void MemIndex::lookup(const LookupRequest &Req, llvm::function_ref<void(const Symbol &)> Callback) const { + std::lock_guard<std::mutex> Lock(Mutex); for (const auto &ID : Req.IDs) { auto I = Index.find(ID); if (I != Index.end()) Callback(*I->second); } } +void MemIndex::fileDigests( + const FileDigestRequest &Req, + llvm::function_ref<void(llvm::StringRef URI, FileDigest Digest)> Callback) + const { + std::lock_guard<std::mutex> Lock(Mutex); + for (auto &U : Req.URIs) { + auto I = Symbols->Digests.find(StringRef(U.getKey())); + if (I != Symbols->Digests.end()) + Callback(I->first, I->second); + } +} + std::unique_ptr<SymbolIndex> MemIndex::build(SymbolSlab Slab) { struct Snapshot { SymbolSlab Slab; - std::vector<const Symbol *> Pointers; + SymbolSlabView View; }; auto Snap = std::make_shared<Snapshot>(); Snap->Slab = std::move(Slab); for (auto &Sym : Snap->Slab) - Snap->Pointers.push_back(&Sym); - auto S = std::shared_ptr<std::vector<const Symbol *>>(std::move(Snap), - &Snap->Pointers); + Snap->View.Pointers.push_back(&Sym); + const auto &SlabDigests = Snap->Slab.fileDigests(); + Snap->View.Digests.insert(SlabDigests.begin(), SlabDigests.end()); + + auto S = std::shared_ptr<SymbolSlabView>(std::move(Snap), &Snap->View); auto MemIdx = llvm::make_unique<MemIndex>(); MemIdx->build(std::move(S)); return std::move(MemIdx); Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -12,12 +12,17 @@ #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include <array> +#include <cstdint> #include <string> namespace clang { @@ -233,19 +238,25 @@ // and signals -> score, so it can be reused for Sema completions. double quality(const Symbol &S); +using FileDigest = uint64_t; + // An immutable symbol container that stores a set of symbols. // The container will maintain the lifetime of the symbols. class SymbolSlab { public: using const_iterator = std::vector<Symbol>::const_iterator; using iterator = const_iterator; + using FileDigests = llvm::DenseMap<llvm::StringRef, FileDigest>; SymbolSlab() = default; const_iterator begin() const { return Symbols.begin(); } const_iterator end() const { return Symbols.end(); } const_iterator find(const SymbolID &SymID) const; + // Note that the string reference keys do not own the underlying string data. + const FileDigests &fileDigests() const { return Digests; } + size_t size() const { return Symbols.size(); } // Estimates the total memory usage. size_t bytes() const { @@ -267,6 +278,9 @@ return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; } + // Adds the digest for the header file with \p URI. + void addFileDigest(llvm::StringRef URI, size_t Digest); + // Consumes the builder to finalize the slab. SymbolSlab build() &&; @@ -277,14 +291,19 @@ std::vector<Symbol> Symbols; // Values are indices into Symbols vector. llvm::DenseMap<SymbolID, size_t> SymbolIndex; + FileDigests Digests; }; private: - SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols) - : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} + SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols, + FileDigests Digests) + : Arena(std::move(Arena)), Symbols(std::move(Symbols)), + Digests(std::move(Digests)) {} llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup. + // Digests of all header files in TU. + FileDigests Digests; }; struct FuzzyFindRequest { @@ -312,6 +331,10 @@ llvm::DenseSet<SymbolID> IDs; }; +struct FileDigestRequest { + llvm::StringSet<> URIs; +}; + /// \brief Interface for symbol indexes that can be used for searching or /// matching symbols among a set of symbols based on names or unique IDs. class SymbolIndex { @@ -334,6 +357,13 @@ lookup(const LookupRequest &Req, llvm::function_ref<void(const Symbol &)> Callback) const = 0; + /// Gets digests of all files in the index. If a file is not indexed, no + /// digest will be returned. + virtual void + fileDigests(const FileDigestRequest &Req, + llvm::function_ref<void(llvm::StringRef URI, FileDigest Digest)>) + const = 0; + // FIXME: add interfaces for more index use cases: // - getAllOccurrences(SymbolID); }; Index: clangd/index/Index.cpp =================================================================== --- clangd/index/Index.cpp +++ clangd/index/Index.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Index.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" @@ -76,17 +77,20 @@ return Symbols.end(); } +// Returns a reference to the same string as \p V owned by the arena. +static StringRef InternStr(DenseSet<StringRef> &Strings, + BumpPtrAllocator &Arena, StringRef &V) { + auto R = Strings.insert(V); + if (R.second) { // New entry added to the table, copy the string. + *R.first = V.copy(Arena); + } + return *R.first; +} + // Copy the underlying data of the symbol into the owned arena. static void own(Symbol &S, DenseSet<StringRef> &Strings, BumpPtrAllocator &Arena) { - // Intern replaces V with a reference to the same string owned by the arena. - auto Intern = [&](StringRef &V) { - auto R = Strings.insert(V); - if (R.second) { // New entry added to the table, copy the string. - *R.first = V.copy(Arena); - } - V = *R.first; - }; + auto Intern = [&](StringRef &V) { V = InternStr(Strings, Arena, V); }; // We need to copy every StringRef field onto the arena. Intern(S.Name); @@ -121,6 +125,10 @@ } } +void SymbolSlab::Builder::addFileDigest(llvm::StringRef URI, size_t Digest) { + Digests[InternStr(Strings, Arena, URI)] = Digest; +} + SymbolSlab SymbolSlab::Builder::build() && { Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. // Sort symbols so the slab can binary search over them. @@ -131,7 +139,10 @@ DenseSet<StringRef> Strings; for (auto &S : Symbols) own(S, Strings, NewArena); - return SymbolSlab(std::move(NewArena), std::move(Symbols)); + for (auto &D : Digests) + D.getFirst() = InternStr(Strings, NewArena, D.getFirst()); + return SymbolSlab(std::move(NewArena), std::move(Symbols), + std::move(Digests)); } } // namespace clangd Index: clangd/index/FileIndex.h =================================================================== --- clangd/index/FileIndex.h +++ clangd/index/FileIndex.h @@ -20,6 +20,7 @@ #include "Index.h" #include "MemIndex.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/DenseMap.h" namespace clang { namespace clangd { @@ -44,7 +45,7 @@ void update(PathRef Path, std::unique_ptr<SymbolSlab> Slab); // The shared_ptr keeps the symbols alive - std::shared_ptr<std::vector<const Symbol *>> allSymbols(); + std::shared_ptr<MemIndex::SymbolSlabView> allSymbols(); private: mutable std::mutex Mutex; @@ -56,9 +57,13 @@ /// \brief This manages symbls from files and an in-memory index on all symbols. class FileIndex : public SymbolIndex { public: - /// If URISchemes is empty, the default schemes in SymbolCollector will be + /// If \p URISchemes is empty, the default schemes in SymbolCollector will be /// used. - FileIndex(std::vector<std::string> URISchemes = {}); + /// \p OverlayedIndex is optional underlying index overlayed by the file + /// index. If an overlayed index is provided, this will ignore symbols from + /// headers whose file digests are the same. + FileIndex(std::vector<std::string> URISchemes = {}, + const SymbolIndex *OverlayedIndex = nullptr); /// \brief Update symbols in \p Path with symbols in \p AST. If \p AST is /// nullptr, this removes all symbols in the file. @@ -73,10 +78,16 @@ void lookup(const LookupRequest &Req, llvm::function_ref<void(const Symbol &)> Callback) const override; + void + fileDigests(const FileDigestRequest &Req, + llvm::function_ref<void(llvm::StringRef URI, FileDigest Digest)> + Callback) const override; + private: FileSymbols FSymbols; MemIndex Index; std::vector<std::string> URISchemes; + const SymbolIndex *OverlayedIndex; }; /// Retrieves namespace and class level symbols in \p AST. Index: clangd/index/FileIndex.cpp =================================================================== --- clangd/index/FileIndex.cpp +++ clangd/index/FileIndex.cpp @@ -8,9 +8,14 @@ //===----------------------------------------------------------------------===// #include "FileIndex.h" +#include "Index.h" +#include "MemIndex.h" #include "SymbolCollector.h" #include "clang/Index/IndexingAction.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" namespace clang { namespace clangd { @@ -45,8 +50,9 @@ return Collector.takeSymbols(); } -FileIndex::FileIndex(std::vector<std::string> URISchemes) - : URISchemes(std::move(URISchemes)) {} +FileIndex::FileIndex(std::vector<std::string> URISchemes, + const SymbolIndex *OverlayedIndex) + : URISchemes(std::move(URISchemes)), OverlayedIndex(OverlayedIndex) {} void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Slab) { std::lock_guard<std::mutex> Lock(Mutex); @@ -56,27 +62,54 @@ FileToSlabs[Path] = std::move(Slab); } -std::shared_ptr<std::vector<const Symbol *>> FileSymbols::allSymbols() { +std::shared_ptr<MemIndex::SymbolSlabView> FileSymbols::allSymbols() { // The snapshot manages life time of symbol slabs and provides pointers of all // symbols in all slabs. struct Snapshot { - std::vector<const Symbol *> Pointers; + MemIndex::SymbolSlabView View; std::vector<std::shared_ptr<SymbolSlab>> KeepAlive; }; + auto Snap = std::make_shared<Snapshot>(); { std::lock_guard<std::mutex> Lock(Mutex); for (const auto &FileAndSlab : FileToSlabs) { Snap->KeepAlive.push_back(FileAndSlab.second); for (const auto &Iter : *FileAndSlab.second) - Snap->Pointers.push_back(&Iter); + Snap->View.Pointers.push_back(&Iter); + const auto &SlabDigests = FileAndSlab.second->fileDigests(); + Snap->View.Digests.insert(SlabDigests.begin(), SlabDigests.end()); } } - auto *Pointers = &Snap->Pointers; + auto *View = &Snap->View; // Use aliasing constructor to keep the snapshot alive along with the // pointers. - return {std::move(Snap), Pointers}; + return {std::move(Snap), View}; +} + +static SymbolSlab dropSymbolsFromIndexedFiles(const SymbolIndex &Index, + SymbolSlab Slab) { + const auto &SlabDigests = Slab.fileDigests(); + FileDigestRequest Req; + for (const auto &Digest : SlabDigests) + Req.URIs.insert(Digest.first); + + llvm::StringSet<> IndexedURIs; + Index.fileDigests(Req, [&](StringRef URI, FileDigest Digest) { + auto I = SlabDigests.find(URI); + if ((I != SlabDigests.end()) && (I->second == Digest)) + IndexedURIs.insert(URI); + }); + if (IndexedURIs.empty()) + return Slab; + SymbolSlab::Builder Syms; + for (const auto &Sym : Slab) + if (IndexedURIs.find(Sym.CanonicalDeclaration.FileURI) == IndexedURIs.end()) + Syms.insert(Sym); + for (const auto &Digest : SlabDigests) + Syms.addFileDigest(Digest.first, Digest.second); + return std::move(Syms).build(); } void FileIndex::update(PathRef Path, ASTContext *AST, @@ -86,7 +119,10 @@ } else { assert(PP); auto Slab = llvm::make_unique<SymbolSlab>(); - *Slab = indexAST(*AST, PP, URISchemes); + auto ASTSlab = indexAST(*AST, PP, URISchemes); + *Slab = OverlayedIndex ? dropSymbolsFromIndexedFiles(*OverlayedIndex, + std::move(ASTSlab)) + : std::move(ASTSlab); FSymbols.update(Path, std::move(Slab)); } auto Symbols = FSymbols.allSymbols(); @@ -105,5 +141,12 @@ Index.lookup(Req, Callback); } +void FileIndex::fileDigests( + const FileDigestRequest &Req, + llvm::function_ref<void(llvm::StringRef URI, FileDigest Digest)> Callback) + const { + Index.fileDigests(Req, Callback); +} + } // namespace clangd } // namespace clang Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp =================================================================== --- clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp +++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp @@ -31,6 +31,8 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/YAMLTraits.h" +#include <sstream> +#include <string> using namespace llvm; using namespace clang::tooling; @@ -40,6 +42,9 @@ namespace clangd { namespace { +// The index to prepend to +const llvm::StringRef kFileDigestKeyPrefix = "digest:"; + static llvm::cl::opt<std::string> AssumedHeaderDir( "assume-header-dir", llvm::cl::desc("The index includes header that a symbol is defined in. " @@ -92,9 +97,11 @@ } auto Symbols = Collector->takeSymbols(); - for (const auto &Sym : Symbols) { + for (const auto &Sym : Symbols) Ctx->reportResult(Sym.ID.str(), SymbolToYAML(Sym)); - } + for (const auto &Digest : Symbols.fileDigests()) + Ctx->reportResult((kFileDigestKeyPrefix + Digest.first).str(), + std::to_string(Digest.second)); } private: @@ -130,15 +137,22 @@ llvm::BumpPtrAllocator Arena; Symbol::Details Scratch; Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) { - Arena.Reset(); - llvm::yaml::Input Yin(Value, &Arena); - auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena); - clang::clangd::SymbolID ID; - Key >> ID; - if (const auto *Existing = UniqueSymbols.find(ID)) - UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch)); - else - UniqueSymbols.insert(Sym); + if (Key.startswith(kFileDigestKeyPrefix)) { + size_t Digest; + std::stringstream(Value) >> Digest; + UniqueSymbols.addFileDigest(Key.substr(kFileDigestKeyPrefix.size()), + Digest); + } else { + Arena.Reset(); + llvm::yaml::Input Yin(Value, &Arena); + auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena); + clang::clangd::SymbolID ID; + Key >> ID; + if (const auto *Existing = UniqueSymbols.find(ID)) + UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch)); + else + UniqueSymbols.insert(Sym); + } }); return std::move(UniqueSymbols).build(); } Index: clangd/ClangdServer.cpp =================================================================== --- clangd/ClangdServer.cpp +++ clangd/ClangdServer.cpp @@ -83,8 +83,9 @@ : CDB(CDB), DiagConsumer(DiagConsumer), FSProvider(FSProvider), ResourceDir(Opts.ResourceDir ? Opts.ResourceDir->str() : getStandardResourceDir()), - FileIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex(Opts.URISchemes) - : nullptr), + FileIdx(Opts.BuildDynamicSymbolIndex + ? new FileIndex(Opts.URISchemes, Opts.StaticIndex) + : nullptr), PCHs(std::make_shared<PCHContainerOperations>()), // Pass a callback into `WorkScheduler` to extract symbols from a newly // parsed file and rebuild the file index synchronously each time an AST
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits