https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/125988
>From 42eb3826ed79de5aabb7f0197cfda2ad62d9735d Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Tue, 11 Feb 2025 11:21:34 +0800 Subject: [PATCH 1/2] [clangd] [C++20] [Modules] Introduce cache for scanning modules --- clang-tools-extra/clangd/ModulesBuilder.cpp | 115 +++++++++++++++--- clang-tools-extra/clangd/ProjectModules.h | 6 +- .../clangd/ScanningProjectModules.cpp | 15 ++- 3 files changed, 112 insertions(+), 24 deletions(-) diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index bee31fe51555e..121a4cec4c8c2 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -360,7 +360,8 @@ void ModuleFileCache::remove(StringRef ModuleName) { /// Collect the directly and indirectly required module names for \param /// ModuleName in topological order. The \param ModuleName is guaranteed to /// be the last element in \param ModuleNames. -llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB, +llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource, + ProjectModules &MDB, StringRef ModuleName) { llvm::SmallVector<llvm::StringRef> ModuleNames; llvm::StringSet<> ModuleNamesSet; @@ -368,8 +369,8 @@ llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB, auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void { ModuleNamesSet.insert(ModuleName); - for (StringRef RequiredModuleName : - MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName))) + for (StringRef RequiredModuleName : MDB.getRequiredModules( + MDB.getSourceForModuleName(ModuleName, RequiredSource))) if (ModuleNamesSet.insert(RequiredModuleName).second) Visitor(RequiredModuleName, Visitor); @@ -380,30 +381,114 @@ llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB, return ModuleNames; } +class CachingProjectModules : public ProjectModules { +public: + CachingProjectModules(const GlobalCompilationDatabase &CDB) : CDB(CDB) {} + + std::vector<std::string> getRequiredModules(PathRef File) override { + std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File); + if (!MDB) { + elog("Failed to get Project Modules information for {0}", File); + return {}; + } + return MDB->getRequiredModules(File); + } + + std::string getModuleNameForSource(PathRef File) override { + std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File); + if (!MDB) { + elog("Failed to get Project Modules information for {0}", File); + return {}; + } + return MDB->getModuleNameForSource(File); + } + + void setCommandMangler(CommandMangler M) override { + // GlobalCompilationDatabase::getProjectModules() will set mangler + // for the underlying ProjectModules. + } + + std::string getSourceForModuleName(llvm::StringRef ModuleName, + PathRef RequiredSrcFile) override { + std::string CachedResult; + { + std::lock_guard<std::mutex> Lock(CacheMutex); + auto Iter = ModuleNameToSourceCache.find(ModuleName); + if (Iter != ModuleNameToSourceCache.end()) + CachedResult = Iter->second; + } + + std::unique_ptr<ProjectModules> MDB = + CDB.getProjectModules(RequiredSrcFile); + if (!MDB) { + elog("Failed to get Project Modules information for {0}", + RequiredSrcFile); + return {}; + } + + // Verify Cached Result by seeing if the source declaring the same module + // as we query. + if (!CachedResult.empty()) { + std::string ModuleNameOfCachedSource = + MDB->getModuleNameForSource(CachedResult); + if (ModuleNameOfCachedSource == ModuleName) + return CachedResult; + else { + // Cached Result is invalid. Clear it. + + std::lock_guard<std::mutex> Lock(CacheMutex); + ModuleNameToSourceCache.erase(ModuleName); + } + } + + auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile); + + { + std::lock_guard<std::mutex> Lock(CacheMutex); + ModuleNameToSourceCache.insert({ModuleName, Result}); + } + + return Result; + } + +private: + const GlobalCompilationDatabase &CDB; + + std::mutex CacheMutex; + llvm::StringMap<std::string> ModuleNameToSourceCache; +}; + } // namespace class ModulesBuilder::ModulesBuilderImpl { public: - ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {} + ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) + : CachedProjectModules(CDB), Cache(CDB) {} + + CachingProjectModules &getCachedProjectModules() { + return CachedProjectModules; + } const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); } llvm::Error - getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS, - ProjectModules &MDB, + getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName, + const ThreadsafeFS &TFS, ProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles); private: + CachingProjectModules CachedProjectModules; ModuleFileCache Cache; }; llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( - StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB, - ReusablePrerequisiteModules &BuiltModuleFiles) { + PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS, + ProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) { if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) return llvm::Error::success(); - PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName); + std::string ModuleUnitFileName = + MDB.getSourceForModuleName(ModuleName, RequiredSource); /// It is possible that we're meeting third party modules (modules whose /// source are not in the project. e.g, the std module may be a third-party /// module for most project) or something wrong with the implementation of @@ -416,7 +501,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( llvm::formatv("Don't get the module unit for module {0}", ModuleName)); // Get Required modules in topological order. - auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName); + auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName); for (llvm::StringRef ReqModuleName : ReqModuleNames) { if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) continue; @@ -449,13 +534,9 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( std::unique_ptr<PrerequisiteModules> ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS) { - std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File); - if (!MDB) { - elog("Failed to get Project Modules information for {0}", File); - return std::make_unique<FailedPrerequisiteModules>(); - } + ProjectModules &MDB = Impl->getCachedProjectModules(); - std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File); + std::vector<std::string> RequiredModuleNames = MDB.getRequiredModules(File); if (RequiredModuleNames.empty()) return std::make_unique<ReusablePrerequisiteModules>(); @@ -463,7 +544,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, for (llvm::StringRef RequiredModuleName : RequiredModuleNames) { // Return early if there is any error. if (llvm::Error Err = Impl->getOrBuildModuleFile( - RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) { + File, RequiredModuleName, TFS, MDB, *RequiredModules.get())) { elog("Failed to build module {0}; due to {1}", RequiredModuleName, toString(std::move(Err))); return std::make_unique<FailedPrerequisiteModules>(); diff --git a/clang-tools-extra/clangd/ProjectModules.h b/clang-tools-extra/clangd/ProjectModules.h index 48d52ac9deb89..41812674f12f4 100644 --- a/clang-tools-extra/clangd/ProjectModules.h +++ b/clang-tools-extra/clangd/ProjectModules.h @@ -42,9 +42,9 @@ class ProjectModules { llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>; virtual std::vector<std::string> getRequiredModules(PathRef File) = 0; - virtual PathRef - getSourceForModuleName(llvm::StringRef ModuleName, - PathRef RequiredSrcFile = PathRef()) = 0; + virtual std::string getModuleNameForSource(PathRef File) = 0; + virtual std::string getSourceForModuleName(llvm::StringRef ModuleName, + PathRef RequiredSrcFile) = 0; virtual void setCommandMangler(CommandMangler Mangler) {} diff --git a/clang-tools-extra/clangd/ScanningProjectModules.cpp b/clang-tools-extra/clangd/ScanningProjectModules.cpp index e4dc11c1c2895..b99d35989cebf 100644 --- a/clang-tools-extra/clangd/ScanningProjectModules.cpp +++ b/clang-tools-extra/clangd/ScanningProjectModules.cpp @@ -189,11 +189,18 @@ class ScanningAllProjectModules : public ProjectModules { /// RequiredSourceFile is not used intentionally. See the comments of /// ModuleDependencyScanner for detail. - PathRef - getSourceForModuleName(llvm::StringRef ModuleName, - PathRef RequiredSourceFile = PathRef()) override { + std::string getSourceForModuleName(llvm::StringRef ModuleName, + PathRef RequiredSourceFile) override { Scanner.globalScan(Mangler); - return Scanner.getSourceForModuleName(ModuleName); + return Scanner.getSourceForModuleName(ModuleName).str(); + } + + std::string getModuleNameForSource(PathRef File) override { + auto ScanningResult = Scanner.scan(File, Mangler); + if (!ScanningResult || !ScanningResult->ModuleName) + return {}; + + return *ScanningResult->ModuleName; } private: >From 5de01442644acafa9c1844080dd43315849b81ed Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Fri, 14 Feb 2025 10:42:41 +0800 Subject: [PATCH 2/2] Update --- clang-tools-extra/clangd/ModulesBuilder.cpp | 149 +++++++++--------- .../unittests/PrerequisiteModulesTest.cpp | 60 ++++++- 2 files changed, 131 insertions(+), 78 deletions(-) diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index 121a4cec4c8c2..08a7b250a8119 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -357,74 +357,51 @@ void ModuleFileCache::remove(StringRef ModuleName) { ModuleFiles.erase(ModuleName); } -/// Collect the directly and indirectly required module names for \param -/// ModuleName in topological order. The \param ModuleName is guaranteed to -/// be the last element in \param ModuleNames. -llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource, - ProjectModules &MDB, - StringRef ModuleName) { - llvm::SmallVector<llvm::StringRef> ModuleNames; - llvm::StringSet<> ModuleNamesSet; - - auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void { - ModuleNamesSet.insert(ModuleName); +class ModuleNameToSourceCache { +public: + std::string getSourceForModuleName(llvm::StringRef ModuleName) { + std::lock_guard<std::mutex> Lock(CacheMutex); + auto Iter = ModuleNameToSourceCache.find(ModuleName); + if (Iter != ModuleNameToSourceCache.end()) + return Iter->second; + return ""; + } - for (StringRef RequiredModuleName : MDB.getRequiredModules( - MDB.getSourceForModuleName(ModuleName, RequiredSource))) - if (ModuleNamesSet.insert(RequiredModuleName).second) - Visitor(RequiredModuleName, Visitor); + void addEntry(llvm::StringRef ModuleName, PathRef Source) { + std::lock_guard<std::mutex> Lock(CacheMutex); + ModuleNameToSourceCache[ModuleName] = Source.str(); + } - ModuleNames.push_back(ModuleName); - }; - VisitDeps(ModuleName, VisitDeps); + void eraseEntry(llvm::StringRef ModuleName) { + std::lock_guard<std::mutex> Lock(CacheMutex); + ModuleNameToSourceCache.erase(ModuleName); + } - return ModuleNames; -} +private: + std::mutex CacheMutex; + llvm::StringMap<std::string> ModuleNameToSourceCache; +}; class CachingProjectModules : public ProjectModules { public: - CachingProjectModules(const GlobalCompilationDatabase &CDB) : CDB(CDB) {} + CachingProjectModules(std::unique_ptr<ProjectModules> MDB, + ModuleNameToSourceCache &Cache) + : MDB(std::move(MDB)), Cache(Cache) { + assert(this->MDB && "CachingProjectModules should only be created with a " + "valid underlying ProjectModules"); + } std::vector<std::string> getRequiredModules(PathRef File) override { - std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File); - if (!MDB) { - elog("Failed to get Project Modules information for {0}", File); - return {}; - } return MDB->getRequiredModules(File); } std::string getModuleNameForSource(PathRef File) override { - std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File); - if (!MDB) { - elog("Failed to get Project Modules information for {0}", File); - return {}; - } return MDB->getModuleNameForSource(File); } - void setCommandMangler(CommandMangler M) override { - // GlobalCompilationDatabase::getProjectModules() will set mangler - // for the underlying ProjectModules. - } - std::string getSourceForModuleName(llvm::StringRef ModuleName, PathRef RequiredSrcFile) override { - std::string CachedResult; - { - std::lock_guard<std::mutex> Lock(CacheMutex); - auto Iter = ModuleNameToSourceCache.find(ModuleName); - if (Iter != ModuleNameToSourceCache.end()) - CachedResult = Iter->second; - } - - std::unique_ptr<ProjectModules> MDB = - CDB.getProjectModules(RequiredSrcFile); - if (!MDB) { - elog("Failed to get Project Modules information for {0}", - RequiredSrcFile); - return {}; - } + std::string CachedResult = Cache.getSourceForModuleName(ModuleName); // Verify Cached Result by seeing if the source declaring the same module // as we query. @@ -433,57 +410,70 @@ class CachingProjectModules : public ProjectModules { MDB->getModuleNameForSource(CachedResult); if (ModuleNameOfCachedSource == ModuleName) return CachedResult; - else { - // Cached Result is invalid. Clear it. - std::lock_guard<std::mutex> Lock(CacheMutex); - ModuleNameToSourceCache.erase(ModuleName); - } + // Cached Result is invalid. Clear it. + Cache.eraseEntry(ModuleName); } auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile); - - { - std::lock_guard<std::mutex> Lock(CacheMutex); - ModuleNameToSourceCache.insert({ModuleName, Result}); - } + Cache.addEntry(ModuleName, Result); return Result; } private: - const GlobalCompilationDatabase &CDB; - - std::mutex CacheMutex; - llvm::StringMap<std::string> ModuleNameToSourceCache; + std::unique_ptr<ProjectModules> MDB; + ModuleNameToSourceCache &Cache; }; +/// Collect the directly and indirectly required module names for \param +/// ModuleName in topological order. The \param ModuleName is guaranteed to +/// be the last element in \param ModuleNames. +llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource, + CachingProjectModules &MDB, + StringRef ModuleName) { + llvm::SmallVector<llvm::StringRef> ModuleNames; + llvm::StringSet<> ModuleNamesSet; + + auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void { + ModuleNamesSet.insert(ModuleName); + + for (StringRef RequiredModuleName : MDB.getRequiredModules( + MDB.getSourceForModuleName(ModuleName, RequiredSource))) + if (ModuleNamesSet.insert(RequiredModuleName).second) + Visitor(RequiredModuleName, Visitor); + + ModuleNames.push_back(ModuleName); + }; + VisitDeps(ModuleName, VisitDeps); + + return ModuleNames; +} + } // namespace class ModulesBuilder::ModulesBuilderImpl { public: - ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) - : CachedProjectModules(CDB), Cache(CDB) {} + ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {} - CachingProjectModules &getCachedProjectModules() { - return CachedProjectModules; + ModuleNameToSourceCache &getProjectModulesCache() { + return ProjectModulesCache; } - const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); } llvm::Error getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName, - const ThreadsafeFS &TFS, ProjectModules &MDB, + const ThreadsafeFS &TFS, CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles); private: - CachingProjectModules CachedProjectModules; ModuleFileCache Cache; + ModuleNameToSourceCache ProjectModulesCache; }; llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS, - ProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) { + CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) { if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) return llvm::Error::success(); @@ -534,9 +524,16 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( std::unique_ptr<PrerequisiteModules> ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS) { - ProjectModules &MDB = Impl->getCachedProjectModules(); + std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File); + if (!MDB) { + elog("Failed to get Project Modules information for {0}", File); + return std::make_unique<FailedPrerequisiteModules>(); + } + CachingProjectModules CachedMDB(std::move(MDB), + Impl->getProjectModulesCache()); - std::vector<std::string> RequiredModuleNames = MDB.getRequiredModules(File); + std::vector<std::string> RequiredModuleNames = + CachedMDB.getRequiredModules(File); if (RequiredModuleNames.empty()) return std::make_unique<ReusablePrerequisiteModules>(); @@ -544,7 +541,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, for (llvm::StringRef RequiredModuleName : RequiredModuleNames) { // Return early if there is any error. if (llvm::Error Err = Impl->getOrBuildModuleFile( - File, RequiredModuleName, TFS, MDB, *RequiredModules.get())) { + File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) { elog("Failed to build module {0}; due to {1}", RequiredModuleName, toString(std::move(Err))); return std::make_unique<FailedPrerequisiteModules>(); diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp index 51723c797eabc..27f4c817a8ff3 100644 --- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp +++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp @@ -27,12 +27,41 @@ namespace clang::clangd { namespace { +class GlobalScanningCounterProjectModules : public ProjectModules { +public: + GlobalScanningCounterProjectModules( + std::unique_ptr<ProjectModules> Underlying, std::atomic<unsigned> &Count) + : Underlying(std::move(Underlying)), Count(Count) {} + + std::vector<std::string> getRequiredModules(PathRef File) override { + return Underlying->getRequiredModules(File); + } + + std::string getModuleNameForSource(PathRef File) override { + return Underlying->getModuleNameForSource(File); + } + + void setCommandMangler(CommandMangler Mangler) override { + Underlying->setCommandMangler(std::move(Mangler)); + } + + std::string getSourceForModuleName(llvm::StringRef ModuleName, + PathRef RequiredSrcFile) override { + Count++; + return Underlying->getSourceForModuleName(ModuleName, RequiredSrcFile); + } + +private: + std::unique_ptr<ProjectModules> Underlying; + std::atomic<unsigned> &Count; +}; + class MockDirectoryCompilationDatabase : public MockCompilationDatabase { public: MockDirectoryCompilationDatabase(StringRef TestDir, const ThreadsafeFS &TFS) : MockCompilationDatabase(TestDir), MockedCDBPtr(std::make_shared<MockClangCompilationDatabase>(*this)), - TFS(TFS) { + TFS(TFS), GlobalScanningCount(0) { this->ExtraClangFlags.push_back("-std=c++20"); this->ExtraClangFlags.push_back("-c"); } @@ -40,9 +69,12 @@ class MockDirectoryCompilationDatabase : public MockCompilationDatabase { void addFile(llvm::StringRef Path, llvm::StringRef Contents); std::unique_ptr<ProjectModules> getProjectModules(PathRef) const override { - return scanningProjectModules(MockedCDBPtr, TFS); + return std::make_unique<GlobalScanningCounterProjectModules>( + scanningProjectModules(MockedCDBPtr, TFS), GlobalScanningCount); } + unsigned getGlobalScanningCount() const { return GlobalScanningCount; } + private: class MockClangCompilationDatabase : public tooling::CompilationDatabase { public: @@ -68,6 +100,8 @@ class MockDirectoryCompilationDatabase : public MockCompilationDatabase { std::shared_ptr<MockClangCompilationDatabase> MockedCDBPtr; const ThreadsafeFS &TFS; + + mutable std::atomic<unsigned> GlobalScanningCount; }; // Add files to the working testing directory and the compilation database. @@ -590,6 +624,28 @@ export constexpr int M = 43; EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles); } +TEST_F(PrerequisiteModulesTests, ScanningCacheTest) { + MockDirectoryCompilationDatabase CDB(TestDir, FS); + + CDB.addFile("M.cppm", R"cpp( +export module M; + )cpp"); + CDB.addFile("A.cppm", R"cpp( +export module A; +import M; + )cpp"); + CDB.addFile("B.cppm", R"cpp( +export module B; +import M; + )cpp"); + + ModulesBuilder Builder(CDB); + + Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS); + Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS); + EXPECT_EQ(CDB.getGlobalScanningCount(), 1u); +} + } // namespace } // namespace clang::clangd _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits