Hi Kadir, I noticed in our buildbots that run with sanitizers that we got a new complaint with this commit. It's still there on current top-of-tree now so the fixes you pushed after this patch doesn't seem to address this one:
==89727==ERROR: LeakSanitizer: detected memory leaks Direct leak of 136 byte(s) in 1 object(s) allocated from: #0 0x5b9838 in operator new(unsigned long) /repo/uabkaka/tmp/llvm/projects/compiler- rt/lib/asan/asan_new_delete.cc:106 #1 0xc50879 in make_unique<clang::clangd::RefSlab, clang::clangd::RefSlab> /proj/flexasic/app/llvm/8.0/bin/../include/c++/v1/memory:3132:28 #2 0xc50879 in refSlab /repo/bbiswjenk/fem023- eiffel003/workspace/llvm/llvm-master-sanitize-asan/llvm/build-all- asan/../../clang-tools-extra/clangd/unittests/FileIndexTests.cpp:87 #3 0xc50879 in clang::clangd::(anonymous namespace)::FileShardedIndexTest_Sharding_Test::TestBody() /repo/bbiswjenk/fem023-eiffel003/workspace/llvm/llvm-master-sanitize- asan/llvm/build-all-asan/../../clang-tools- extra/clangd/unittests/FileIndexTests.cpp:535 #4 0x1620190 in HandleExceptionsInMethodIfSupported<testing::Test, void> /repo/bbiswjenk/fem023-eiffel003/workspace/llvm/llvm-master- sanitize-asan/llvm/build-all- asan/../utils/unittest/googletest/src/gtest.cc #5 0x1620190 in testing::Test::Run() /repo/bbiswjenk/fem023- eiffel003/workspace/llvm/llvm-master-sanitize-asan/llvm/build-all- asan/../utils/unittest/googletest/src/gtest.cc:2474 #6 0x1623875 in testing::TestInfo::Run() /repo/bbiswjenk/fem023- eiffel003/workspace/llvm/llvm-master-sanitize-asan/llvm/build-all- asan/../utils/unittest/googletest/src/gtest.cc:2656:11 #7 0x1624cf0 in testing::TestCase::Run() /repo/bbiswjenk/fem023- eiffel003/workspace/llvm/llvm-master-sanitize-asan/llvm/build-all- asan/../utils/unittest/googletest/src/gtest.cc:2774:28 #8 0x1643714 in testing::internal::UnitTestImpl::RunAllTests() /repo/bbiswjenk/fem023-eiffel003/workspace/llvm/llvm-master-sanitize- asan/llvm/build-all- asan/../utils/unittest/googletest/src/gtest.cc:4649:43 #9 0x16428c0 in HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool> /repo/bbiswjenk/fem023-eiffel003/workspace/llvm/llvm-master- sanitize-asan/llvm/build-all- asan/../utils/unittest/googletest/src/gtest.cc #10 0x16428c0 in testing::UnitTest::Run() /repo/bbiswjenk/fem023- eiffel003/workspace/llvm/llvm-master-sanitize-asan/llvm/build-all- asan/../utils/unittest/googletest/src/gtest.cc:4257 #11 0x16048f0 in RUN_ALL_TESTS /repo/bbiswjenk/fem023- eiffel003/workspace/llvm/llvm-master-sanitize-asan/llvm/build-all- asan/../utils/unittest/googletest/include/gtest/gtest.h:2233:46 #12 0x16048f0 in main /repo/bbiswjenk/fem023- eiffel003/workspace/llvm/llvm-master-sanitize-asan/llvm/build-all- asan/../utils/unittest/UnitTestMain/TestMain.cpp:50 #13 0x7f558b333544 in __libc_start_main (/lib64/libc.so.6+0x22544) SUMMARY: AddressSanitizer: 136 byte(s) leaked in 1 allocation(s). /Mikael On Wed, 2020-04-15 at 00:15 -0700, Kadir Cetinkaya via cfe-commits wrote: > Author: Kadir Cetinkaya > Date: 2020-04-15T09:10:10+02:00 > New Revision: dffa9dfbda56820c02e357ad34c24ce8759b4d26 > > URL: > https://protect2.fireeye.com/v1/url?k=1ed901c9-4253d508-1ed94152-863d9bcb726f-37cce004cef08ce5&q=1&e=d51603fa-ef7d-4c66-99fd-b868e84ed659&u=https%3A%2F%2Fgithub.com%2Fllvm%2Fllvm-project%2Fcommit%2Fdffa9dfbda56820c02e357ad34c24ce8759b4d26 > DIFF: > https://protect2.fireeye.com/v1/url?k=27955bfa-7b1f8f3b-27951b61-863d9bcb726f-307938d8468e38e8&q=1&e=d51603fa-ef7d-4c66-99fd-b868e84ed659&u=https%3A%2F%2Fgithub.com%2Fllvm%2Fllvm-project%2Fcommit%2Fdffa9dfbda56820c02e357ad34c24ce8759b4d26.diff > > LOG: [clangd] Shard preamble symbols in dynamic index > > Summary: > This reduces memory usage by dynamic index from more than 400MB to > 32MB > when all files in clang-tools-extra/clangd/*.cpp are active in > clangd. > > Reviewers: sammccall > > Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, > cfe-commits > > Tags: #clang > > Differential Revision: https://reviews.llvm.org/D77732 > > Added: > > > Modified: > clang-tools-extra/clangd/index/Background.cpp > clang-tools-extra/clangd/index/FileIndex.cpp > clang-tools-extra/clangd/index/FileIndex.h > clang-tools-extra/clangd/index/SymbolCollector.cpp > clang-tools-extra/clangd/unittests/FileIndexTests.cpp > clang-tools-extra/clangd/unittests/TestTU.cpp > > Removed: > > > > ##################################################################### > ########### > diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang- > tools-extra/clangd/index/Background.cpp > index 1c26dd48093e..4c5719d0526c 100644 > --- a/clang-tools-extra/clangd/index/Background.cpp > +++ b/clang-tools-extra/clangd/index/Background.cpp > @@ -61,51 +61,6 @@ namespace clang { > namespace clangd { > namespace { > > -// Resolves URI to file paths with cache. > -class URIToFileCache { > -public: > - URIToFileCache(llvm::StringRef HintPath) : HintPath(HintPath) {} > - > - llvm::StringRef resolve(llvm::StringRef FileURI) { > - auto I = URIToPathCache.try_emplace(FileURI); > - if (I.second) { > - auto Path = URI::resolve(FileURI, HintPath); > - if (!Path) { > - elog("Failed to resolve URI {0}: {1}", FileURI, > Path.takeError()); > - assert(false && "Failed to resolve URI"); > - return ""; > - } > - I.first->second = *Path; > - } > - return I.first->second; > - } > - > -private: > - std::string HintPath; > - llvm::StringMap<std::string> URIToPathCache; > -}; > - > -// We keep only the node "U" and its edges. Any node other than "U" > will be > -// empty in the resultant graph. > -IncludeGraph getSubGraph(const URI &U, const IncludeGraph > &FullGraph) { > - IncludeGraph IG; > - > - std::string FileURI = U.toString(); > - auto Entry = IG.try_emplace(FileURI).first; > - auto &Node = Entry->getValue(); > - Node = FullGraph.lookup(Entry->getKey()); > - Node.URI = Entry->getKey(); > - > - // URIs inside nodes must point into the keys of the same > IncludeGraph. > - for (auto &Include : Node.DirectIncludes) { > - auto I = IG.try_emplace(Include).first; > - I->getValue().URI = I->getKey(); > - Include = I->getKey(); > - } > - > - return IG; > -} > - > // We cannot use vfs->makeAbsolute because Cmd.FileName is either > absolute or > // relative to Cmd.Directory, which might not be the same as current > working > // directory. > @@ -219,108 +174,44 @@ void BackgroundIndex::update( > llvm::StringRef MainFile, IndexFileIn Index, > const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, > bool HadErrors) { > - // Partition symbols/references into files. > - struct File { > - llvm::DenseSet<const Symbol *> Symbols; > - llvm::DenseSet<const Ref *> Refs; > - llvm::DenseSet<const Relation *> Relations; > - FileDigest Digest; > - }; > - llvm::StringMap<File> Files; > - URIToFileCache URICache(MainFile); > + llvm::StringMap<FileDigest> FilesToUpdate; > for (const auto &IndexIt : *Index.Sources) { > const auto &IGN = IndexIt.getValue(); > // Note that sources do not contain any information regarding > missing > // headers, since we don't even know what absolute path they > should fall in. > - const auto AbsPath = URICache.resolve(IGN.URI); > + auto AbsPath = llvm::cantFail(URI::resolve(IGN.URI, MainFile), > + "Failed to resovle URI"); > const auto DigestIt = ShardVersionsSnapshot.find(AbsPath); > // File has > diff erent contents, or indexing was successful this time. > if (DigestIt == ShardVersionsSnapshot.end() || > DigestIt->getValue().Digest != IGN.Digest || > (DigestIt->getValue().HadErrors && !HadErrors)) > - Files.try_emplace(AbsPath).first->getValue().Digest = > IGN.Digest; > - } > - // This map is used to figure out where to store relations. > - llvm::DenseMap<SymbolID, File *> SymbolIDToFile; > - for (const auto &Sym : *Index.Symbols) { > - if (Sym.CanonicalDeclaration) { > - auto DeclPath = > URICache.resolve(Sym.CanonicalDeclaration.FileURI); > - const auto FileIt = Files.find(DeclPath); > - if (FileIt != Files.end()) { > - FileIt->second.Symbols.insert(&Sym); > - SymbolIDToFile[Sym.ID] = &FileIt->second; > - } > - } > - // For symbols with > diff erent declaration and definition locations, we store > - // the full symbol in both the header file and the > implementation file, so > - // that merging can tell the preferred symbols (from canonical > headers) from > - // other symbols (e.g. forward declarations). > - if (Sym.Definition && > - Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI) > { > - auto DefPath = URICache.resolve(Sym.Definition.FileURI); > - const auto FileIt = Files.find(DefPath); > - if (FileIt != Files.end()) > - FileIt->second.Symbols.insert(&Sym); > - } > - } > - llvm::DenseMap<const Ref *, SymbolID> RefToIDs; > - for (const auto &SymRefs : *Index.Refs) { > - for (const auto &R : SymRefs.second) { > - auto Path = URICache.resolve(R.Location.FileURI); > - const auto FileIt = Files.find(Path); > - if (FileIt != Files.end()) { > - auto &F = FileIt->getValue(); > - RefToIDs[&R] = SymRefs.first; > - F.Refs.insert(&R); > - } > - } > - } > - for (const auto &Rel : *Index.Relations) { > - const auto FileIt = SymbolIDToFile.find(Rel.Subject); > - if (FileIt != SymbolIDToFile.end()) > - FileIt->second->Relations.insert(&Rel); > + FilesToUpdate[AbsPath] = IGN.Digest; > } > > - // Build and store new slabs for each updated file. > - for (const auto &FileIt : Files) { > - llvm::StringRef Path = FileIt.getKey(); > - SymbolSlab::Builder Syms; > - RefSlab::Builder Refs; > - RelationSlab::Builder Relations; > - for (const auto *S : FileIt.second.Symbols) > - Syms.insert(*S); > - for (const auto *R : FileIt.second.Refs) > - Refs.insert(RefToIDs[R], *R); > - for (const auto *Rel : FileIt.second.Relations) > - Relations.insert(*Rel); > - auto SS = std::make_unique<SymbolSlab>(std::move(Syms).build()); > - auto RS = std::make_unique<RefSlab>(std::move(Refs).build()); > - auto RelS = > std::make_unique<RelationSlab>(std::move(Relations).build()); > - auto IG = std::make_unique<IncludeGraph>( > - getSubGraph(URI::create(Path), Index.Sources.getValue())); > + // Shard slabs into files. > + FileShardedIndex ShardedIndex(std::move(Index), MainFile); > > - // We need to store shards before updating the index, since the > latter > - // consumes slabs. > - // FIXME: Also skip serializing the shard if it is already up- > to-date. > - BackgroundIndexStorage *IndexStorage = > IndexStorageFactory(Path); > - IndexFileOut Shard; > - Shard.Symbols = SS.get(); > - Shard.Refs = RS.get(); > - Shard.Relations = RelS.get(); > - Shard.Sources = IG.get(); > + // Build and store new slabs for each updated file. > + for (const auto &FileIt : FilesToUpdate) { > + PathRef Path = FileIt.first(); > + auto IF = ShardedIndex.getShard(Path); > > // Only store command line hash for main files of the TU, since > our > // current model keeps only one version of a header file. > - if (Path == MainFile) > - Shard.Cmd = Index.Cmd.getPointer(); > + if (Path != MainFile) > + IF.Cmd.reset(); > > - if (auto Error = IndexStorage->storeShard(Path, Shard)) > + // We need to store shards before updating the index, since the > latter > + // consumes slabs. > + // FIXME: Also skip serializing the shard if it is already up- > to-date. > + if (auto Error = IndexStorageFactory(Path)->storeShard(Path, > IF)) > elog("Failed to write background-index shard for file {0}: > {1}", Path, > std::move(Error)); > > { > std::lock_guard<std::mutex> Lock(ShardVersionsMu); > - auto Hash = FileIt.second.Digest; > + const auto &Hash = FileIt.getValue(); > auto DigestIt = ShardVersions.try_emplace(Path); > ShardVersion &SV = DigestIt.first->second; > // Skip if file is already up to date, unless previous index > was broken > @@ -333,8 +224,11 @@ void BackgroundIndex::update( > // This can override a newer version that is added in another > thread, if > // this thread sees the older version but finishes later. This > should be > // rare in practice. > - IndexedSymbols.update(Path, std::move(SS), std::move(RS), > std::move(RelS), > - Path == MainFile); > + IndexedSymbols.update( > + Path, > std::make_unique<SymbolSlab>(std::move(*IF.Symbols)), > + std::make_unique<RefSlab>(std::move(*IF.Refs)), > + std::make_unique<RelationSlab>(std::move(*IF.Relations)), > + Path == MainFile); > } > } > } > > diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang- > tools-extra/clangd/index/FileIndex.cpp > index 92e7316105c7..d644268b8da9 100644 > --- a/clang-tools-extra/clangd/index/FileIndex.cpp > +++ b/clang-tools-extra/clangd/index/FileIndex.cpp > @@ -10,11 +10,17 @@ > #include "CollectMacros.h" > #include "Logger.h" > #include "ParsedAST.h" > +#include "Path.h" > #include "SymbolCollector.h" > #include "index/CanonicalIncludes.h" > #include "index/Index.h" > #include "index/MemIndex.h" > #include "index/Merge.h" > +#include "index/Ref.h" > +#include "index/Relation.h" > +#include "index/Serialization.h" > +#include "index/Symbol.h" > +#include "index/SymbolID.h" > #include "index/SymbolOrigin.h" > #include "index/dex/Dex.h" > #include "clang/AST/ASTContext.h" > @@ -23,19 +29,24 @@ > #include "clang/Lex/MacroInfo.h" > #include "clang/Lex/Preprocessor.h" > #include "llvm/ADT/DenseMap.h" > -#include "llvm/ADT/DenseSet.h" > #include "llvm/ADT/STLExtras.h" > +#include "llvm/ADT/StringMap.h" > #include "llvm/ADT/StringRef.h" > +#include "llvm/Support/Error.h" > #include <memory> > +#include <tuple> > +#include <utility> > +#include <vector> > > namespace clang { > namespace clangd { > +namespace { > > -static SlabTuple indexSymbols(ASTContext &AST, > std::shared_ptr<Preprocessor> PP, > - llvm::ArrayRef<Decl *> DeclsToIndex, > - const MainFileMacros > *MacroRefsToIndex, > - const CanonicalIncludes &Includes, > - bool IsIndexMainAST, llvm::StringRef > Version) { > +SlabTuple indexSymbols(ASTContext &AST, > std::shared_ptr<Preprocessor> PP, > + llvm::ArrayRef<Decl *> DeclsToIndex, > + const MainFileMacros *MacroRefsToIndex, > + const CanonicalIncludes &Includes, bool > IsIndexMainAST, > + llvm::StringRef Version) { > SymbolCollector::Options CollectorOpts; > CollectorOpts.CollectIncludePath = true; > CollectorOpts.Includes = &Includes; > @@ -85,6 +96,131 @@ static SlabTuple indexSymbols(ASTContext &AST, > std::shared_ptr<Preprocessor> PP, > std::move(Relations)); > } > > +// Resolves URI to file paths with cache. > +class URIToFileCache { > +public: > + URIToFileCache(PathRef HintPath) : HintPath(HintPath) {} > + > + llvm::StringRef operator[](llvm::StringRef FileURI) { > + if (FileURI.empty()) > + return ""; > + auto I = URIToPathCache.try_emplace(FileURI); > + if (I.second) { > + I.first->second = llvm::cantFail(URI::resolve(FileURI, > HintPath), > + "Failed to resolve URI"); > + } > + return I.first->second; > + } > + > +private: > + PathRef HintPath; > + llvm::StringMap<std::string> URIToPathCache; > +}; > + > +// We keep only the node "U" and its edges. Any node other than "U" > will be > +// empty in the resultant graph. > +IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph > &FullGraph) { > + IncludeGraph IG; > + > + auto Entry = IG.try_emplace(URI).first; > + auto &Node = Entry->getValue(); > + Node = FullGraph.lookup(Entry->getKey()); > + Node.URI = Entry->getKey(); > + > + // URIs inside nodes must point into the keys of the same > IncludeGraph. > + for (auto &Include : Node.DirectIncludes) { > + auto I = IG.try_emplace(Include).first; > + I->getValue().URI = I->getKey(); > + Include = I->getKey(); > + } > + return IG; > +} > +} // namespace > + > +FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef > HintPath) > + : Index(std::move(Input)) { > + URIToFileCache UriToFile(HintPath); > + // Used to build RelationSlabs. > + llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile; > + > + // Attribute each Symbol to both their declaration and definition > locations. > + if (Index.Symbols) { > + for (const auto &S : *Index.Symbols) { > + auto File = UriToFile[S.CanonicalDeclaration.FileURI]; > + auto It = Shards.try_emplace(File); > + It.first->getValue().Symbols.insert(&S); > + SymbolIDToFile[S.ID] = &It.first->getValue(); > + // Only bother if definition file is > diff erent than declaration file. > + if (S.Definition && > + S.Definition.FileURI != S.CanonicalDeclaration.FileURI) { > + auto File = UriToFile[S.Definition.FileURI]; > + auto It = Shards.try_emplace(File); > + It.first->getValue().Symbols.insert(&S); > + } > + } > + } > + // Attribute references into each file they occured in. > + if (Index.Refs) { > + for (const auto &SymRefs : *Index.Refs) { > + for (const auto &R : SymRefs.second) { > + auto File = UriToFile[R.Location.FileURI]; > + const auto It = Shards.try_emplace(File); > + It.first->getValue().Refs.insert(&R); > + RefToSymID[&R] = SymRefs.first; > + } > + } > + } > + // Attribute relations to the file declaraing their Subject as > Object might > + // not have been indexed, see SymbolCollector::processRelations > for details. > + if (Index.Relations) { > + for (const auto &R : *Index.Relations) { > + auto *File = SymbolIDToFile.lookup(R.Subject); > + assert(File && "unknown subject in relation"); > + File->Relations.insert(&R); > + } > + } > + // Store only the direct includes of a file in a shard. > + if (Index.Sources) { > + const auto &FullGraph = *Index.Sources; > + for (const auto &It : FullGraph) { > + auto File = UriToFile[It.first()]; > + auto ShardIt = Shards.try_emplace(File); > + ShardIt.first->getValue().IG = getSubGraph(It.first(), > FullGraph); > + } > + } > +} > +std::vector<PathRef> FileShardedIndex::getAllFiles() const { > + return {Shards.keys().begin(), Shards.keys().end()}; > +} > + > +IndexFileIn FileShardedIndex::getShard(PathRef File) const { > + auto It = Shards.find(File); > + assert(It != Shards.end() && "received unknown file"); > + > + IndexFileIn IF; > + IF.Sources = It->getValue().IG; > + IF.Cmd = Index.Cmd; > + > + SymbolSlab::Builder SymB; > + for (const auto *S : It->getValue().Symbols) > + SymB.insert(*S); > + IF.Symbols = std::move(SymB).build(); > + > + RefSlab::Builder RefB; > + for (const auto *Ref : It->getValue().Refs) { > + auto SID = RefToSymID.lookup(Ref); > + RefB.insert(SID, *Ref); > + } > + IF.Refs = std::move(RefB).build(); > + > + RelationSlab::Builder RelB; > + for (const auto *Rel : It->getValue().Relations) { > + RelB.insert(*Rel); > + } > + IF.Relations = std::move(RelB).build(); > + return IF; > +} > + > SlabTuple indexMainDecls(ParsedAST &AST) { > return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(), > AST.getLocalTopLevelDecls(), &AST.getMacros(), > @@ -254,15 +390,24 @@ void FileIndex::updatePreamble(PathRef Path, > llvm::StringRef Version, > ASTContext &AST, > std::shared_ptr<Preprocessor> PP, > const CanonicalIncludes &Includes) { > - auto Slabs = indexHeaderSymbols(Version, AST, std::move(PP), > Includes); > - PreambleSymbols.update( > - Path, > std::make_unique<SymbolSlab>(std::move(std::get<0>(Slabs))), > - std::make_unique<RefSlab>(), > - std::make_unique<RelationSlab>(std::move(std::get<2>(Slabs))), > - /*CountReferences=*/false); > + IndexFileIn IF; > + std::tie(IF.Symbols, std::ignore, IF.Relations) = > + indexHeaderSymbols(Version, AST, std::move(PP), Includes); > + FileShardedIndex ShardedIndex(std::move(IF), Path); > + for (PathRef File : ShardedIndex.getAllFiles()) { > + auto IF = ShardedIndex.getShard(File); > + PreambleSymbols.update( > + File, std::make_unique<SymbolSlab>(std::move(*IF.Symbols)), > + std::make_unique<RefSlab>(), > + std::make_unique<RelationSlab>(std::move(*IF.Relations)), > + /*CountReferences=*/false); > + } > PreambleIndex.reset( > PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : > IndexType::Light, > DuplicateHandling::PickOne)); > + vlog("Build dynamic index for header symbols with estimated memory > usage of " > + "{0} bytes", > + PreambleIndex.estimateMemoryUsage()); > } > > void FileIndex::updateMain(PathRef Path, ParsedAST &AST) { > @@ -274,6 +419,9 @@ void FileIndex::updateMain(PathRef Path, > ParsedAST &AST) { > /*CountReferences=*/true); > MainFileIndex.reset( > MainFileSymbols.buildIndex(IndexType::Light, > DuplicateHandling::Merge)); > + vlog("Build dynamic index for main-file symbols with estimated > memory usage " > + "of {0} bytes", > + MainFileIndex.estimateMemoryUsage()); > } > > } // namespace clangd > > diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang- > tools-extra/clangd/index/FileIndex.h > index ec44be9b89c2..539f232a7523 100644 > --- a/clang-tools-extra/clangd/index/FileIndex.h > +++ b/clang-tools-extra/clangd/index/FileIndex.h > @@ -15,14 +15,24 @@ > #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H > #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H > > +#include "Headers.h" > #include "Index.h" > #include "MemIndex.h" > #include "Merge.h" > #include "Path.h" > #include "index/CanonicalIncludes.h" > +#include "index/Ref.h" > +#include "index/Relation.h" > +#include "index/Serialization.h" > #include "index/Symbol.h" > #include "clang/Lex/Preprocessor.h" > +#include "clang/Tooling/CompilationDatabase.h" > +#include "llvm/ADT/DenseSet.h" > +#include "llvm/ADT/Optional.h" > +#include "llvm/ADT/StringMap.h" > +#include "llvm/ADT/StringRef.h" > #include <memory> > +#include <vector> > > namespace clang { > class ASTContext; > @@ -109,17 +119,15 @@ class FileIndex : public MergedIndex { > private: > bool UseDex; // FIXME: this should be always on. > > - // Contains information from each file's preamble only. > - // These are large, but update fairly infrequently (preambles are > stable). > + // Contains information from each file's preamble only. Symbols > and relations > + // are sharded per declaration file to deduplicate multiple > symbols and reduce > + // memory usage. > // Missing information: > // - symbol refs (these are always "from the main file") > // - definition locations in the main file > // > - // FIXME: Because the preambles for > diff erent TUs have large overlap and > - // FileIndex doesn't deduplicate, this uses lots of extra RAM. > - // The biggest obstacle in fixing this: the obvious approach of > partitioning > - // by declaring file (rather than main file) fails if headers > provide > - // > diff erent symbols based on preprocessor state. > + // Note that we store only one version of a header, hence symbols > appearing in > + // > diff erent PP states will be missing. > FileSymbols PreambleSymbols; > SwapIndex PreambleIndex; > > @@ -146,6 +154,43 @@ SlabTuple indexHeaderSymbols(llvm::StringRef > Version, ASTContext &AST, > std::shared_ptr<Preprocessor> PP, > const CanonicalIncludes &Includes); > > +/// Takes slabs coming from a TU (multiple files) and shards them > per > +/// declaration location. > +struct FileShardedIndex { > + /// \p HintPath is used to convert file URIs stored in symbols > into absolute > + /// paths. > + explicit FileShardedIndex(IndexFileIn Input, PathRef HintPath); > + > + /// Returns absolute paths for all files that has a shard. > + std::vector<PathRef> getAllFiles() const; > + > + /// Generates index shard for the \p File. Note that this function > results in > + /// a copy of all the relevant data. > + /// Returned index will always have Symbol/Refs/Relation Slabs > set, even if > + /// they are empty. > + IndexFileIn getShard(PathRef File) const; > + > +private: > + // Contains all the information that belongs to a single file. > + struct FileShard { > + // Either declared or defined in the file. > + llvm::DenseSet<const Symbol *> Symbols; > + // Reference occurs in the file. > + llvm::DenseSet<const Ref *> Refs; > + // Subject is declared in the file. > + llvm::DenseSet<const Relation *> Relations; > + // Contains edges for only the direct includes. > + IncludeGraph IG; > + }; > + > + // Keeps all the information alive. > + const IndexFileIn Index; > + // Mapping from absolute paths to slab information. > + llvm::StringMap<FileShard> Shards; > + // Used to build RefSlabs. > + llvm::DenseMap<const Ref *, SymbolID> RefToSymID; > +}; > + > } // namespace clangd > } // namespace clang > > > diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp > b/clang-tools-extra/clangd/index/SymbolCollector.cpp > index 06190914ee8d..471061672107 100644 > --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp > +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp > @@ -283,6 +283,18 @@ bool SymbolCollector::handleDeclOccurrence( > if (!ID) > return true; > > + // ND is the canonical (i.e. first) declaration. If it's in the > main file > + // (which is not a header), then no public declaration was > visible, so assume > + // it's main-file only. > + bool IsMainFileOnly = > + SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) > && > + !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())- > >getName(), > + ASTCtx->getLangOpts()); > + // In C, printf is a redecl of an implicit builtin! So check OrigD > instead. > + if (ASTNode.OrigD->isImplicit() || > + !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly)) > + return true; > + > // Note: we need to process relations for all decl occurrences, > including > // refs, because the indexing code only populates relations for > specific > // occurrences. For example, RelationBaseOf is only populated for > the > @@ -297,17 +309,6 @@ bool SymbolCollector::handleDeclOccurrence( > if (IsOnlyRef && !CollectRef) > return true; > > - // ND is the canonical (i.e. first) declaration. If it's in the > main file > - // (which is not a header), then no public declaration was > visible, so assume > - // it's main-file only. > - bool IsMainFileOnly = > - SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) > && > - !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())- > >getName(), > - ASTCtx->getLangOpts()); > - // In C, printf is a redecl of an implicit builtin! So check OrigD > instead. > - if (ASTNode.OrigD->isImplicit() || > - !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly)) > - return true; > // Do not store references to main-file symbols. > // Unlike other fields, e.g. Symbols (which use spelling > locations), we use > // file locations for references (as it aligns the behavior of > clangd's > > diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp > b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp > index 5203fb67307c..dc39ad2acf25 100644 > --- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp > +++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp > @@ -9,13 +9,19 @@ > #include "AST.h" > #include "Annotations.h" > #include "Compiler.h" > +#include "Headers.h" > #include "ParsedAST.h" > #include "SyncAPI.h" > #include "TestFS.h" > #include "TestTU.h" > +#include "URI.h" > #include "index/CanonicalIncludes.h" > #include "index/FileIndex.h" > #include "index/Index.h" > +#include "index/Ref.h" > +#include "index/Relation.h" > +#include "index/Serialization.h" > +#include "index/Symbol.h" > #include "clang/Frontend/CompilerInvocation.h" > #include "clang/Frontend/Utils.h" > #include "clang/Index/IndexSymbol.h" > @@ -23,6 +29,7 @@ > #include "clang/Tooling/CompilationDatabase.h" > #include "gmock/gmock.h" > #include "gtest/gtest.h" > +#include <utility> > > using ::testing::_; > using ::testing::AllOf; > @@ -151,8 +158,9 @@ void update(FileIndex &M, llvm::StringRef > Basename, llvm::StringRef Code) { > File.HeaderFilename = (Basename + ".h").str(); > File.HeaderCode = std::string(Code); > auto AST = File.build(); > - M.updatePreamble(File.Filename, /*Version=*/"null", > AST.getASTContext(), > - AST.getPreprocessorPtr(), > AST.getCanonicalIncludes()); > + M.updatePreamble(testPath(File.Filename), /*Version=*/"null", > + AST.getASTContext(), AST.getPreprocessorPtr(), > + AST.getCanonicalIncludes()); > } > > TEST(FileIndexTest, CustomizedURIScheme) { > @@ -393,8 +401,9 @@ TEST(FileIndexTest, Relations) { > TU.HeaderCode = "class A {}; class B : public A {};"; > auto AST = TU.build(); > FileIndex Index; > - Index.updatePreamble(TU.Filename, /*Version=*/"null", > AST.getASTContext(), > - AST.getPreprocessorPtr(), > AST.getCanonicalIncludes()); > + Index.updatePreamble(testPath(TU.Filename), /*Version=*/"null", > + AST.getASTContext(), > AST.getPreprocessorPtr(), > + AST.getCanonicalIncludes()); > SymbolID A = findSymbol(TU.headerSymbols(), "A").ID; > uint32_t Results = 0; > RelationsRequest Req; > @@ -477,6 +486,128 @@ TEST(FileSymbolsTest, > CountReferencesWithRefSlabs) { > AllOf(QName("2"), NumReferences(1u)), > AllOf(QName("3"), NumReferences(1u)))); > } > + > +TEST(FileIndexTest, StalePreambleSymbolsDeleted) { > + FileIndex M; > + TestTU File; > + File.HeaderFilename = "a.h"; > + > + File.Filename = "f1.cpp"; > + File.HeaderCode = "int a;"; > + auto AST = File.build(); > + M.updatePreamble(testPath(File.Filename), /*Version=*/"null", > + AST.getASTContext(), AST.getPreprocessorPtr(), > + AST.getCanonicalIncludes()); > + EXPECT_THAT(runFuzzyFind(M, ""), > UnorderedElementsAre(QName("a"))); > + > + File.Filename = "f2.cpp"; > + File.HeaderCode = "int b;"; > + AST = File.build(); > + M.updatePreamble(testPath(File.Filename), /*Version=*/"null", > + AST.getASTContext(), AST.getPreprocessorPtr(), > + AST.getCanonicalIncludes()); > + EXPECT_THAT(runFuzzyFind(M, ""), > UnorderedElementsAre(QName("b"))); > +} > + > +TEST(FileShardedIndexTest, Sharding) { > + auto AHeaderUri = URI::create(testPath("a.h")).toString(); > + auto BHeaderUri = URI::create(testPath("b.h")).toString(); > + auto BSourceUri = URI::create(testPath("b.cc")).toString(); > + > + auto Sym1 = symbol("1"); > + Sym1.CanonicalDeclaration.FileURI = AHeaderUri.c_str(); > + > + auto Sym2 = symbol("2"); > + Sym2.CanonicalDeclaration.FileURI = BHeaderUri.c_str(); > + Sym2.Definition.FileURI = BSourceUri.c_str(); > + > + IndexFileIn IF; > + { > + SymbolSlab::Builder B; > + // Should be stored in only a.h > + B.insert(Sym1); > + // Should be stored in both b.h and b.cc > + B.insert(Sym2); > + IF.Symbols = std::move(B).build(); > + } > + { > + // Should be stored in b.cc > + IF.Refs = std::move(*refSlab(Sym1.ID, > BSourceUri.c_str()).release()); > + } > + { > + RelationSlab::Builder B; > + // Should be stored in a.h > + B.insert(Relation{Sym1.ID, RelationKind::BaseOf, Sym2.ID}); > + // Should be stored in b.h > + B.insert(Relation{Sym2.ID, RelationKind::BaseOf, Sym1.ID}); > + IF.Relations = std::move(B).build(); > + } > + > + IF.Sources.emplace(); > + IncludeGraph &IG = *IF.Sources; > + { > + // b.cc includes b.h > + auto &Node = IG[BSourceUri]; > + Node.DirectIncludes = {BHeaderUri}; > + Node.URI = BSourceUri; > + } > + { > + // b.h includes a.h > + auto &Node = IG[BHeaderUri]; > + Node.DirectIncludes = {AHeaderUri}; > + Node.URI = BHeaderUri; > + } > + { > + // a.h includes nothing. > + auto &Node = IG[AHeaderUri]; > + Node.DirectIncludes = {}; > + Node.URI = AHeaderUri; > + } > + > + IF.Cmd = tooling::CompileCommand(testRoot(), "b.cc", {"clang"}, > "out"); > + > + FileShardedIndex ShardedIndex(std::move(IF), testPath("b.cc")); > + ASSERT_THAT( > + ShardedIndex.getAllFiles(), > + UnorderedElementsAre(testPath("a.h"), testPath("b.h"), > testPath("b.cc"))); > + > + { > + auto Shard = ShardedIndex.getShard(testPath("a.h")); > + EXPECT_THAT(Shard.Symbols.getValue(), > UnorderedElementsAre(QName("1"))); > + EXPECT_THAT(Shard.Refs.getValue(), IsEmpty()); > + EXPECT_THAT( > + Shard.Relations.getValue(), > + UnorderedElementsAre(Relation{Sym1.ID, RelationKind::BaseOf, > Sym2.ID})); > + ASSERT_THAT(Shard.Sources.getValue().keys(), > + UnorderedElementsAre(AHeaderUri)); > + EXPECT_THAT(Shard.Sources->lookup(AHeaderUri).DirectIncludes, > IsEmpty()); > + EXPECT_TRUE(Shard.Cmd.hasValue()); > + } > + { > + auto Shard = ShardedIndex.getShard(testPath("b.h")); > + EXPECT_THAT(Shard.Symbols.getValue(), > UnorderedElementsAre(QName("2"))); > + EXPECT_THAT(Shard.Refs.getValue(), IsEmpty()); > + EXPECT_THAT( > + Shard.Relations.getValue(), > + UnorderedElementsAre(Relation{Sym2.ID, RelationKind::BaseOf, > Sym1.ID})); > + ASSERT_THAT(Shard.Sources.getValue().keys(), > + UnorderedElementsAre(BHeaderUri, AHeaderUri)); > + EXPECT_THAT(Shard.Sources->lookup(BHeaderUri).DirectIncludes, > + UnorderedElementsAre(AHeaderUri)); > + EXPECT_TRUE(Shard.Cmd.hasValue()); > + } > + { > + auto Shard = ShardedIndex.getShard(testPath("b.cc")); > + EXPECT_THAT(Shard.Symbols.getValue(), > UnorderedElementsAre(QName("2"))); > + EXPECT_THAT(Shard.Refs.getValue(), > UnorderedElementsAre(Pair(Sym1.ID, _))); > + EXPECT_THAT(Shard.Relations.getValue(), IsEmpty()); > + ASSERT_THAT(Shard.Sources.getValue().keys(), > + UnorderedElementsAre(BSourceUri, BHeaderUri)); > + EXPECT_THAT(Shard.Sources->lookup(BSourceUri).DirectIncludes, > + UnorderedElementsAre(BHeaderUri)); > + EXPECT_TRUE(Shard.Cmd.hasValue()); > + } > +} > } // namespace > } // namespace clangd > } // namespace clang > > diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang- > tools-extra/clangd/unittests/TestTU.cpp > index fe3e8f01cca8..640d067261bd 100644 > --- a/clang-tools-extra/clangd/unittests/TestTU.cpp > +++ b/clang-tools-extra/clangd/unittests/TestTU.cpp > @@ -116,9 +116,10 @@ SymbolSlab TestTU::headerSymbols() const { > std::unique_ptr<SymbolIndex> TestTU::index() const { > auto AST = build(); > auto Idx = std::make_unique<FileIndex>(/*UseDex=*/true); > - Idx->updatePreamble(Filename, /*Version=*/"null", > AST.getASTContext(), > - AST.getPreprocessorPtr(), > AST.getCanonicalIncludes()); > - Idx->updateMain(Filename, AST); > + Idx->updatePreamble(testPath(Filename), /*Version=*/"null", > + AST.getASTContext(), AST.getPreprocessorPtr(), > + AST.getCanonicalIncludes()); > + Idx->updateMain(testPath(Filename), AST); > return std::move(Idx); > } > > > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits