https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/71117
None >From 9debc58d5135fbde51967dfb076d0ec5d954df38 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <jan_svob...@apple.com> Date: Thu, 2 Nov 2023 14:35:30 -0700 Subject: [PATCH] [clang][modules] Track included files per submodule --- clang/include/clang/Basic/Module.h | 2 + clang/include/clang/Lex/Preprocessor.h | 39 +++++++++++--- clang/lib/Serialization/ASTReader.cpp | 52 ++++++++++--------- clang/lib/Serialization/ASTReaderInternals.h | 3 +- clang/lib/Serialization/ASTWriter.cpp | 39 +++++++++++--- clang/test/Modules/per-submodule-includes.m | 53 ++++++++++++++++++++ 6 files changed, 149 insertions(+), 39 deletions(-) create mode 100644 clang/test/Modules/per-submodule-includes.m diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 6a7423938bdb8fa..41f3b4fb46423d6 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -423,6 +423,8 @@ class alignas(8) Module { /// to import but didn't because they are not direct uses. llvm::SmallSetVector<const Module *, 2> UndeclaredUses; + llvm::DenseSet<const FileEntry *> Includes; + /// A library or framework to link against when an entity from this /// module is used. struct LinkLibrary { diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 4a99447e757c6ac..fef608efdee09d9 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -982,6 +982,8 @@ class Preprocessor { /// The set of modules that are visible within the submodule. VisibleModuleSet VisibleModules; + /// The files that have been included. + IncludedFilesSet IncludedFiles; // FIXME: CounterValue? // FIXME: PragmaPushMacroInfo? }; @@ -994,8 +996,8 @@ class Preprocessor { /// in a submodule. SubmoduleState *CurSubmoduleState; - /// The files that have been included. - IncludedFilesSet IncludedFiles; + /// The files that have been included outside of (sub)modules. + IncludedFilesSet Includes; /// The set of top-level modules that affected preprocessing, but were not /// imported. @@ -1484,19 +1486,40 @@ class Preprocessor { /// Mark the file as included. /// Returns true if this is the first time the file was included. bool markIncluded(FileEntryRef File) { - HeaderInfo.getFileInfo(File); - return IncludedFiles.insert(File).second; + bool AlreadyIncluded = alreadyIncluded(File); + CurSubmoduleState->IncludedFiles.insert(File); + if (!BuildingSubmoduleStack.empty()) + BuildingSubmoduleStack.back().M->Includes.insert(File); + else if (Module *M = getCurrentModule()) + M->Includes.insert(File); + else + Includes.insert(File); + return !AlreadyIncluded; } /// Return true if this header has already been included. bool alreadyIncluded(FileEntryRef File) const { HeaderInfo.getFileInfo(File); - return IncludedFiles.count(File); + if (CurSubmoduleState->IncludedFiles.contains(File)) + return true; + // TODO: Do this more efficiently. + for (const auto &[Name, M] : HeaderInfo.getModuleMap().modules()) + if (CurSubmoduleState->VisibleModules.isVisible(M)) + if (M->Includes.contains(File)) + return true; + return false; + } + + void markIncludedOnTopLevel(const FileEntry *File) { + Includes.insert(File); + CurSubmoduleState->IncludedFiles.insert(File); + } + + void markIncludedInModule(Module *M, const FileEntry *File) { + M->Includes.insert(File); } - /// Get the set of included files. - IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } - const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } + const IncludedFilesSet &getTopLevelIncludes() const { return Includes; } /// Return the name of the macro defined before \p Loc that has /// spelling \p Tokens. If there are multiple macros with same spelling, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 42b48d230af7a97..9c32dcf3d954761 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1975,19 +1975,15 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M, return LocalID + I->second; } -const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) { +OptionalFileEntryRefDegradesToFileEntryPtr +HeaderFileInfoTrait::getFile(const internal_key_type &Key) { FileManager &FileMgr = Reader.getFileManager(); - if (!Key.Imported) { - if (auto File = FileMgr.getFile(Key.Filename)) - return *File; - return nullptr; - } + if (!Key.Imported) + return FileMgr.getOptionalFileRef(Key.Filename); std::string Resolved = std::string(Key.Filename); Reader.ResolveImportedPath(M, Resolved); - if (auto File = FileMgr.getFile(Resolved)) - return *File; - return nullptr; + return FileMgr.getOptionalFileRef(Resolved); } unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) { @@ -2043,13 +2039,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, HeaderFileInfo HFI; unsigned Flags = *d++; - bool Included = (Flags >> 6) & 0x01; - if (Included) - if (const FileEntry *FE = getFile(key)) - // Not using \c Preprocessor::markIncluded(), since that would attempt to - // deserialize this header file info again. - Reader.getPreprocessor().getIncludedFiles().insert(FE); - // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp. HFI.isImport |= (Flags >> 5) & 0x01; HFI.isPragmaOnce |= (Flags >> 4) & 0x01; @@ -2065,6 +2054,27 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, HFI.Framework = HS->getUniqueFrameworkName(FrameworkName); } + OptionalFileEntryRefDegradesToFileEntryPtr FE = getFile(key); + Preprocessor &PP = Reader.getPreprocessor(); + ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); + + unsigned IncludedCount = + endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d); + for (unsigned I = 0; I < IncludedCount; ++I) { + uint32_t LocalSMID = + endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d); + if (!FE) + continue; + + if (LocalSMID == 0) { + PP.markIncludedOnTopLevel(*FE); + } else { + SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID); + Module *Mod = Reader.getSubmodule(GlobalSMID); + PP.markIncludedInModule(Mod, *FE); + } + } + assert((End - d) % 4 == 0 && "Wrong data length in HeaderFileInfo deserialization"); while (d != End) { @@ -2077,14 +2087,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, // implicit module import. SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID); Module *Mod = Reader.getSubmodule(GlobalSMID); - FileManager &FileMgr = Reader.getFileManager(); - ModuleMap &ModMap = - Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap(); - - std::string Filename = std::string(key.Filename); - if (key.Imported) - Reader.ResolveImportedPath(M, Filename); - if (auto FE = FileMgr.getOptionalFileRef(Filename)) { + + if (FE) { // FIXME: NameAsWritten Module::Header H = {std::string(key.Filename), "", *FE}; ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true); diff --git a/clang/lib/Serialization/ASTReaderInternals.h b/clang/lib/Serialization/ASTReaderInternals.h index 25a46ddabcb7078..1abcaa9ca299d1d 100644 --- a/clang/lib/Serialization/ASTReaderInternals.h +++ b/clang/lib/Serialization/ASTReaderInternals.h @@ -278,7 +278,8 @@ class HeaderFileInfoTrait { data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen); private: - const FileEntry *getFile(const internal_key_type &Key); + OptionalFileEntryRefDegradesToFileEntryPtr + getFile(const internal_key_type &Key); }; /// The on-disk hash table used for known header files. diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ab70594607530f4..a7fddaf8e243b95 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1842,7 +1842,7 @@ namespace { struct data_type { const HeaderFileInfo &HFI; - bool AlreadyIncluded; + std::vector<const Module *> Includers; ArrayRef<ModuleMap::KnownHeader> KnownHeaders; UnresolvedModule Unresolved; }; @@ -1862,6 +1862,12 @@ namespace { EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) { unsigned KeyLen = key.Filename.size() + 1 + 8 + 8; unsigned DataLen = 1 + 4 + 4; + + DataLen += 4; + for (const Module *M : Data.Includers) + if (!M || Writer.getLocalOrImportedSubmoduleID(M)) + DataLen += 4; + for (auto ModInfo : Data.KnownHeaders) if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule())) DataLen += 4; @@ -1888,8 +1894,7 @@ namespace { endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; - unsigned char Flags = (Data.AlreadyIncluded << 6) - | (Data.HFI.isImport << 5) + unsigned char Flags = (Data.HFI.isImport << 5) | (Data.HFI.isPragmaOnce << 4) | (Data.HFI.DirInfo << 1) | Data.HFI.IndexHeaderMapHeader; @@ -1916,6 +1921,14 @@ namespace { } LE.write<uint32_t>(Offset); + LE.write<uint32_t>(Data.Includers.size()); + for (const Module *M : Data.Includers) { + if (!M) + LE.write<uint32_t>(0); + else if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) + LE.write<uint32_t>(ModID); + } + auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) { if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) { uint32_t Value = (ModID << 3) | (unsigned)Role; @@ -1990,7 +2003,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { HeaderFileInfoTrait::key_type Key = { FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0}; HeaderFileInfoTrait::data_type Data = { - Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}}; + Empty, {}, {}, {M, ModuleMap::headerKindToRole(U.Kind)}}; // FIXME: Deal with cases where there are multiple unresolved header // directives in different submodules for the same header. Generator.insert(Key, Data, GeneratorTrait); @@ -2033,13 +2046,27 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { SavedStrings.push_back(Filename.data()); } - bool Included = PP->alreadyIncluded(*File); + std::vector<const Module *> Includers; + if (WritingModule) { + llvm::DenseSet<const Module *> Seen; + std::function<void(const Module *)> Visit = [&](const Module *M) { + if (!Seen.insert(M).second) + return; + if (M->Includes.contains(*File)) + Includers.push_back(M); + for (const Module *SubM : M->submodules()) + Visit(SubM); + }; + Visit(WritingModule); + } else if (PP->getTopLevelIncludes().contains(*File)) { + Includers.push_back(nullptr); + } HeaderFileInfoTrait::key_type Key = { Filename, File->getSize(), getTimestampForOutput(*File) }; HeaderFileInfoTrait::data_type Data = { - *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {} + *HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), {} }; Generator.insert(Key, Data, GeneratorTrait); ++NumHeaderSearchEntries; diff --git a/clang/test/Modules/per-submodule-includes.m b/clang/test/Modules/per-submodule-includes.m new file mode 100644 index 000000000000000..39fd255e8e93e92 --- /dev/null +++ b/clang/test/Modules/per-submodule-includes.m @@ -0,0 +1,53 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- frameworks/Textual.framework/Headers/Header.h +static int symbol; + +//--- frameworks/FW.framework/Modules/module.modulemap +framework module FW { + umbrella header "FW.h" + export * + module * { export * } +} +//--- frameworks/FW.framework/Headers/FW.h +#import <FW/Sub1.h> +#import <FW/Sub2.h> +//--- frameworks/FW.framework/Headers/Sub1.h +//--- frameworks/FW.framework/Headers/Sub2.h +#import <Textual/Header.h> + +//--- pch.modulemap +module __PCH { + header "pch.h" + export * +} +//--- pch.h +#import <FW/Sub1.h> + +//--- tu.m +#import <Textual/Header.h> +int fn() { return symbol; } + +// Compilation using the PCH regularly succeeds. The import of FW/Sub1.h in the +// PCH is treated textually due to -fmodule-name=FW. +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \ +// RUN: -emit-pch -x objective-c %t/pch.h -o %t/pch.h.gch +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \ +// RUN: -include-pch %t/pch.h.gch -fsyntax-only %t/tu.m + +// Compilation using the PCH as precompiled module fails. The import of FW/Sub1.h +// in the PCH is translated to an import. Nothing is preventing that now that +// -fmodule-name=FW has been replaced with -fmodule-name=__PCH. +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \ +// RUN: -emit-module -fmodule-name=__PCH -x objective-c %t/pch.modulemap -o %t/pch.h.pcm +// +// Loading FW.pcm marks Textual/Header.h as imported (because it is imported in +// FW.Sub2), so the TU does not import it again. It's contents remain invisible, +// though. +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \ +// RUN: -include %t/pch.h -fmodule-map-file=%t/pch.modulemap -fsyntax-only %t/tu.m _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits