https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/113391
>From c6ff124355209de31c86096eb2ede14d598aa5cd Mon Sep 17 00:00:00 2001 From: Jan Svoboda <jan_svob...@apple.com> Date: Tue, 22 Oct 2024 15:46:07 -0700 Subject: [PATCH 1/2] [clang][modules] Optimize construction and usage of the submodule index This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand. Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway. This speeds up `clang-scan-deps` by ~0.5% on my workload. --- clang/include/clang/Basic/Module.h | 3 +-- clang/include/clang/Lex/ModuleMap.h | 10 ++++----- clang/lib/Basic/Module.cpp | 12 ++++++----- clang/lib/Lex/ModuleMap.cpp | 29 ++++++++++++++++----------- clang/lib/Serialization/ASTReader.cpp | 14 ++++++++----- 5 files changed, 39 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 1ab3b5e5f81567..dd384c1d76c5fd 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -227,7 +227,7 @@ class alignas(8) Module { /// A mapping from the submodule name to the index into the /// \c SubModules vector at which that submodule resides. - llvm::StringMap<unsigned> SubModuleIndex; + mutable llvm::StringMap<unsigned> SubModuleIndex; /// The AST file if this is a top-level module which has a /// corresponding serialized AST file, or null otherwise. @@ -612,7 +612,6 @@ class alignas(8) Module { void setParent(Module *M) { assert(!Parent); Parent = M; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 75b567a347cb6c..b4a8e0e358ffbe 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -541,11 +541,11 @@ class ModuleMap { /// /// \param IsExplicit Whether this is an explicit submodule. /// - /// \returns The found or newly-created module, along with a boolean value - /// that will be true if the module is newly-created. - std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent, - bool IsFramework, - bool IsExplicit); + /// \returns The found or newly-created module. + Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit); + Module *createModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit); /// Create a global module fragment for a C++ module unit. /// diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index a7a3f6b37efef1..330108d5b3e47f 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name, NoUndeclaredIncludes = Parent->NoUndeclaredIncludes; ModuleMapIsPrivate = Parent->ModuleMapIsPrivate; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } } @@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) { } Module *Module::findSubmodule(StringRef Name) const { - llvm::StringMap<unsigned>::const_iterator Pos = SubModuleIndex.find(Name); - if (Pos == SubModuleIndex.end()) - return nullptr; + // Add new submodules into the index. + for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I) + SubModuleIndex[SubModules[I]->Name] = I; - return SubModules[Pos->getValue()]; + if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end()) + return SubModules[It->second]; + + return nullptr; } Module *Module::getGlobalModuleFragment() const { diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 201ab91cf68ca1..10774429a2177b 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(SkippedDir.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = + findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); // Associate the module and the directory. @@ -672,8 +672,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(File.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = + findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); Result->addTopHeader(File); @@ -857,15 +857,21 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ return Context->findSubmodule(Name); } -std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, - Module *Parent, - bool IsFramework, - bool IsExplicit) { +Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { // Try to find an existing module with this name. if (Module *Sub = lookupModuleQualified(Name, Parent)) - return std::make_pair(Sub, false); + return Sub; // Create a new module with this name. + return createModule(Name, Parent, IsFramework, IsExplicit); +} + +Module *ModuleMap::createModule(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { + assert(lookupModuleQualified(Name, Parent) == nullptr && + "Creating duplicate submodule"); + Module *Result = new (ModulesAlloc.Allocate()) Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent, IsFramework, IsExplicit, NumCreatedModules++); @@ -875,7 +881,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, Modules[Name] = Result; ModuleScopeIDs[Result] = CurrentModuleScopeID; } - return std::make_pair(Result, true); + return Result; } Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc, @@ -2124,8 +2130,7 @@ void ModuleMapParser::parseModuleDecl() { Map.createShadowedModule(ModuleName, Framework, ShadowingModule); } else { ActiveModule = - Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit) - .first; + Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit); } ActiveModule->DefinitionLoc = ModuleNameLoc; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2419ed84e68acf..74a79ac54bb4eb 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5756,6 +5756,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, return Err; ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); + bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr; + // If we don't know the top-level module, there's no point in doing qualified + // lookup of its submodules; it won't find anything anywhere within this tree. + // Let's skip that and avoid some string lookups. + auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule + : &ModuleMap::findOrCreateModule; + bool First = true; Module *CurrentModule = nullptr; RecordData Record; @@ -5829,11 +5836,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, if (Parent) ParentModule = getSubmodule(Parent); - // Retrieve this (sub)module from the module map, creating it if - // necessary. - CurrentModule = - ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit) - .first; + CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule, + IsFramework, IsExplicit); SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; if (GlobalIndex >= SubmodulesLoaded.size() || >From 21295bc48b1f0392a71f0768f41f38d375e0c195 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <jan_svob...@apple.com> Date: Mon, 28 Oct 2024 11:28:00 -0700 Subject: [PATCH 2/2] Document new function --- clang/include/clang/Lex/ModuleMap.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index b4a8e0e358ffbe..5ee152e4213abf 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -544,6 +544,9 @@ class ModuleMap { /// \returns The found or newly-created module. Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, bool IsExplicit); + /// Create new submodule, assuming it does not exist. This function can only + /// be called when it is guaranteed that this submodule does not exist yet. + /// The parameters have same semantics as \c ModuleMap::findOrCreateModule. Module *createModule(StringRef Name, Module *Parent, bool IsFramework, bool IsExplicit); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits