https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/128841

Backport ae839b02504a68a0dfe63ac8ec314d9d7a6ce8df

Requested by: @ChuanqiXu9

>From 0c59624ff222149bd926fbbe68d3c68c01db28de Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng...@linux.alibaba.com>
Date: Wed, 26 Feb 2025 16:03:04 +0800
Subject: [PATCH] [clangd] [C++20] [Modules] Add scanning cache (#125988)

Previously, everytime we want to get a source file declaring a specific
module, we need to scan the whole projects again and again. The
performance is super bad. This patch tries to improve this by
introducing a simple cache.

(cherry picked from commit ae839b02504a68a0dfe63ac8ec314d9d7a6ce8df)
---
 clang-tools-extra/clangd/ModulesBuilder.cpp   | 100 ++++++++++++++++--
 clang-tools-extra/clangd/ProjectModules.h     |   6 +-
 .../clangd/ScanningProjectModules.cpp         |  18 +++-
 .../unittests/PrerequisiteModulesTest.cpp     |  60 ++++++++++-
 4 files changed, 164 insertions(+), 20 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp 
b/clang-tools-extra/clangd/ModulesBuilder.cpp
index bee31fe51555e..08a7b250a8119 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -357,10 +357,80 @@ void ModuleFileCache::remove(StringRef ModuleName) {
   ModuleFiles.erase(ModuleName);
 }
 
+class ModuleNameToSourceCache {
+public:
+  std::string getSourceForModuleName(llvm::StringRef ModuleName) {
+    std::lock_guard<std::mutex> Lock(CacheMutex);
+    auto Iter = ModuleNameToSourceCache.find(ModuleName);
+    if (Iter != ModuleNameToSourceCache.end())
+      return Iter->second;
+    return "";
+  }
+
+  void addEntry(llvm::StringRef ModuleName, PathRef Source) {
+    std::lock_guard<std::mutex> Lock(CacheMutex);
+    ModuleNameToSourceCache[ModuleName] = Source.str();
+  }
+
+  void eraseEntry(llvm::StringRef ModuleName) {
+    std::lock_guard<std::mutex> Lock(CacheMutex);
+    ModuleNameToSourceCache.erase(ModuleName);
+  }
+
+private:
+  std::mutex CacheMutex;
+  llvm::StringMap<std::string> ModuleNameToSourceCache;
+};
+
+class CachingProjectModules : public ProjectModules {
+public:
+  CachingProjectModules(std::unique_ptr<ProjectModules> MDB,
+                        ModuleNameToSourceCache &Cache)
+      : MDB(std::move(MDB)), Cache(Cache) {
+    assert(this->MDB && "CachingProjectModules should only be created with a "
+                        "valid underlying ProjectModules");
+  }
+
+  std::vector<std::string> getRequiredModules(PathRef File) override {
+    return MDB->getRequiredModules(File);
+  }
+
+  std::string getModuleNameForSource(PathRef File) override {
+    return MDB->getModuleNameForSource(File);
+  }
+
+  std::string getSourceForModuleName(llvm::StringRef ModuleName,
+                                     PathRef RequiredSrcFile) override {
+    std::string CachedResult = Cache.getSourceForModuleName(ModuleName);
+
+    // Verify Cached Result by seeing if the source declaring the same module
+    // as we query.
+    if (!CachedResult.empty()) {
+      std::string ModuleNameOfCachedSource =
+          MDB->getModuleNameForSource(CachedResult);
+      if (ModuleNameOfCachedSource == ModuleName)
+        return CachedResult;
+
+      // Cached Result is invalid. Clear it.
+      Cache.eraseEntry(ModuleName);
+    }
+
+    auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile);
+    Cache.addEntry(ModuleName, Result);
+
+    return Result;
+  }
+
+private:
+  std::unique_ptr<ProjectModules> MDB;
+  ModuleNameToSourceCache &Cache;
+};
+
 /// Collect the directly and indirectly required module names for \param
 /// ModuleName in topological order. The \param ModuleName is guaranteed to
 /// be the last element in \param ModuleNames.
-llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
+llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource,
+                                                   CachingProjectModules &MDB,
                                                    StringRef ModuleName) {
   llvm::SmallVector<llvm::StringRef> ModuleNames;
   llvm::StringSet<> ModuleNamesSet;
@@ -368,8 +438,8 @@ llvm::SmallVector<StringRef> 
getAllRequiredModules(ProjectModules &MDB,
   auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
     ModuleNamesSet.insert(ModuleName);
 
-    for (StringRef RequiredModuleName :
-         MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
+    for (StringRef RequiredModuleName : MDB.getRequiredModules(
+             MDB.getSourceForModuleName(ModuleName, RequiredSource)))
       if (ModuleNamesSet.insert(RequiredModuleName).second)
         Visitor(RequiredModuleName, Visitor);
 
@@ -386,24 +456,29 @@ class ModulesBuilder::ModulesBuilderImpl {
 public:
   ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
 
+  ModuleNameToSourceCache &getProjectModulesCache() {
+    return ProjectModulesCache;
+  }
   const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
 
   llvm::Error
-  getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
-                       ProjectModules &MDB,
+  getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName,
+                       const ThreadsafeFS &TFS, CachingProjectModules &MDB,
                        ReusablePrerequisiteModules &BuiltModuleFiles);
 
 private:
   ModuleFileCache Cache;
+  ModuleNameToSourceCache ProjectModulesCache;
 };
 
 llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
-    StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
-    ReusablePrerequisiteModules &BuiltModuleFiles) {
+    PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS,
+    CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) 
{
   if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
     return llvm::Error::success();
 
-  PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
+  std::string ModuleUnitFileName =
+      MDB.getSourceForModuleName(ModuleName, RequiredSource);
   /// It is possible that we're meeting third party modules (modules whose
   /// source are not in the project. e.g, the std module may be a third-party
   /// module for most project) or something wrong with the implementation of
@@ -416,7 +491,7 @@ llvm::Error 
ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
         llvm::formatv("Don't get the module unit for module {0}", ModuleName));
 
   // Get Required modules in topological order.
-  auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
+  auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName);
   for (llvm::StringRef ReqModuleName : ReqModuleNames) {
     if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
       continue;
@@ -454,8 +529,11 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
     elog("Failed to get Project Modules information for {0}", File);
     return std::make_unique<FailedPrerequisiteModules>();
   }
+  CachingProjectModules CachedMDB(std::move(MDB),
+                                  Impl->getProjectModulesCache());
 
-  std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
+  std::vector<std::string> RequiredModuleNames =
+      CachedMDB.getRequiredModules(File);
   if (RequiredModuleNames.empty())
     return std::make_unique<ReusablePrerequisiteModules>();
 
@@ -463,7 +541,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
   for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
     // Return early if there is any error.
     if (llvm::Error Err = Impl->getOrBuildModuleFile(
-            RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
+            File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) 
{
       elog("Failed to build module {0}; due to {1}", RequiredModuleName,
            toString(std::move(Err)));
       return std::make_unique<FailedPrerequisiteModules>();
diff --git a/clang-tools-extra/clangd/ProjectModules.h 
b/clang-tools-extra/clangd/ProjectModules.h
index 48d52ac9deb89..41812674f12f4 100644
--- a/clang-tools-extra/clangd/ProjectModules.h
+++ b/clang-tools-extra/clangd/ProjectModules.h
@@ -42,9 +42,9 @@ class ProjectModules {
       llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>;
 
   virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
-  virtual PathRef
-  getSourceForModuleName(llvm::StringRef ModuleName,
-                         PathRef RequiredSrcFile = PathRef()) = 0;
+  virtual std::string getModuleNameForSource(PathRef File) = 0;
+  virtual std::string getSourceForModuleName(llvm::StringRef ModuleName,
+                                             PathRef RequiredSrcFile) = 0;
 
   virtual void setCommandMangler(CommandMangler Mangler) {}
 
diff --git a/clang-tools-extra/clangd/ScanningProjectModules.cpp 
b/clang-tools-extra/clangd/ScanningProjectModules.cpp
index e4dc11c1c2895..e561513fe687f 100644
--- a/clang-tools-extra/clangd/ScanningProjectModules.cpp
+++ b/clang-tools-extra/clangd/ScanningProjectModules.cpp
@@ -134,6 +134,9 @@ ModuleDependencyScanner::scan(PathRef FilePath,
 
 void ModuleDependencyScanner::globalScan(
     const ProjectModules::CommandMangler &Mangler) {
+  if (GlobalScanned)
+    return;
+
   for (auto &File : CDB->getAllFiles())
     scan(File, Mangler);
 
@@ -189,11 +192,18 @@ class ScanningAllProjectModules : public ProjectModules {
 
   /// RequiredSourceFile is not used intentionally. See the comments of
   /// ModuleDependencyScanner for detail.
-  PathRef
-  getSourceForModuleName(llvm::StringRef ModuleName,
-                         PathRef RequiredSourceFile = PathRef()) override {
+  std::string getSourceForModuleName(llvm::StringRef ModuleName,
+                                     PathRef RequiredSourceFile) override {
     Scanner.globalScan(Mangler);
-    return Scanner.getSourceForModuleName(ModuleName);
+    return Scanner.getSourceForModuleName(ModuleName).str();
+  }
+
+  std::string getModuleNameForSource(PathRef File) override {
+    auto ScanningResult = Scanner.scan(File, Mangler);
+    if (!ScanningResult || !ScanningResult->ModuleName)
+      return {};
+
+    return *ScanningResult->ModuleName;
   }
 
 private:
diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp 
b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
index 51723c797eabc..27f4c817a8ff3 100644
--- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
+++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
@@ -27,12 +27,41 @@
 namespace clang::clangd {
 namespace {
 
+class GlobalScanningCounterProjectModules : public ProjectModules {
+public:
+  GlobalScanningCounterProjectModules(
+      std::unique_ptr<ProjectModules> Underlying, std::atomic<unsigned> &Count)
+      : Underlying(std::move(Underlying)), Count(Count) {}
+
+  std::vector<std::string> getRequiredModules(PathRef File) override {
+    return Underlying->getRequiredModules(File);
+  }
+
+  std::string getModuleNameForSource(PathRef File) override {
+    return Underlying->getModuleNameForSource(File);
+  }
+
+  void setCommandMangler(CommandMangler Mangler) override {
+    Underlying->setCommandMangler(std::move(Mangler));
+  }
+
+  std::string getSourceForModuleName(llvm::StringRef ModuleName,
+                                     PathRef RequiredSrcFile) override {
+    Count++;
+    return Underlying->getSourceForModuleName(ModuleName, RequiredSrcFile);
+  }
+
+private:
+  std::unique_ptr<ProjectModules> Underlying;
+  std::atomic<unsigned> &Count;
+};
+
 class MockDirectoryCompilationDatabase : public MockCompilationDatabase {
 public:
   MockDirectoryCompilationDatabase(StringRef TestDir, const ThreadsafeFS &TFS)
       : MockCompilationDatabase(TestDir),
         MockedCDBPtr(std::make_shared<MockClangCompilationDatabase>(*this)),
-        TFS(TFS) {
+        TFS(TFS), GlobalScanningCount(0) {
     this->ExtraClangFlags.push_back("-std=c++20");
     this->ExtraClangFlags.push_back("-c");
   }
@@ -40,9 +69,12 @@ class MockDirectoryCompilationDatabase : public 
MockCompilationDatabase {
   void addFile(llvm::StringRef Path, llvm::StringRef Contents);
 
   std::unique_ptr<ProjectModules> getProjectModules(PathRef) const override {
-    return scanningProjectModules(MockedCDBPtr, TFS);
+    return std::make_unique<GlobalScanningCounterProjectModules>(
+        scanningProjectModules(MockedCDBPtr, TFS), GlobalScanningCount);
   }
 
+  unsigned getGlobalScanningCount() const { return GlobalScanningCount; }
+
 private:
   class MockClangCompilationDatabase : public tooling::CompilationDatabase {
   public:
@@ -68,6 +100,8 @@ class MockDirectoryCompilationDatabase : public 
MockCompilationDatabase {
 
   std::shared_ptr<MockClangCompilationDatabase> MockedCDBPtr;
   const ThreadsafeFS &TFS;
+
+  mutable std::atomic<unsigned> GlobalScanningCount;
 };
 
 // Add files to the working testing directory and the compilation database.
@@ -590,6 +624,28 @@ export constexpr int M = 43;
   EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles);
 }
 
+TEST_F(PrerequisiteModulesTests, ScanningCacheTest) {
+  MockDirectoryCompilationDatabase CDB(TestDir, FS);
+
+  CDB.addFile("M.cppm", R"cpp(
+export module M;
+  )cpp");
+  CDB.addFile("A.cppm", R"cpp(
+export module A;
+import M;
+  )cpp");
+  CDB.addFile("B.cppm", R"cpp(
+export module B;
+import M;
+  )cpp");
+
+  ModulesBuilder Builder(CDB);
+
+  Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
+  Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
+  EXPECT_EQ(CDB.getGlobalScanningCount(), 1u);
+}
+
 } // namespace
 } // namespace clang::clangd
 

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to