https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/87848
>From ee56548604be9473f33cd809c901886f37a3d8e9 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <jan_svob...@apple.com> Date: Fri, 5 Apr 2024 15:12:39 -0700 Subject: [PATCH 1/2] [clang][modules] Do not resolve `HeaderFileInfo` externally in `ASTWriter` --- clang/include/clang/Lex/HeaderSearch.h | 24 ++++---- clang/lib/Lex/HeaderSearch.cpp | 76 ++++++++++++++------------ clang/lib/Serialization/ASTWriter.cpp | 11 ++-- clang/test/Modules/foo.c | 48 ++++++++++++++++ 4 files changed, 107 insertions(+), 52 deletions(-) create mode 100644 clang/test/Modules/foo.c diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index 705dcfa8aacc3f..b8724e9aa3c92e 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -529,14 +529,15 @@ class HeaderSearch { /// Return whether the specified file is a normal header, /// a system header, or a C++ friendly system header. SrcMgr::CharacteristicKind getFileDirFlavor(FileEntryRef File) { - return (SrcMgr::CharacteristicKind)getFileInfo(File).DirInfo; + if (const HeaderFileInfo *HFI = getExistingFileInfo(File)) + return (SrcMgr::CharacteristicKind)HFI->DirInfo; + return (SrcMgr::CharacteristicKind)HeaderFileInfo().DirInfo; } /// Mark the specified file as a "once only" file due to /// \#pragma once. void MarkFileIncludeOnce(FileEntryRef File) { - HeaderFileInfo &FI = getFileInfo(File); - FI.isPragmaOnce = true; + getFileInfo(File).isPragmaOnce = true; } /// Mark the specified file as a system header, e.g. due to @@ -816,16 +817,17 @@ class HeaderSearch { unsigned header_file_size() const { return FileInfo.size(); } - /// Return the HeaderFileInfo structure for the specified FileEntry, - /// in preparation for updating it in some way. + /// Return the HeaderFileInfo structure for the specified FileEntry, in + /// preparation for updating it in some way. HeaderFileInfo &getFileInfo(FileEntryRef FE); - /// Return the HeaderFileInfo structure for the specified FileEntry, - /// if it has ever been filled in. - /// \param WantExternal Whether the caller wants purely-external header file - /// info (where \p External is true). - const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE, - bool WantExternal = true) const; + /// Return the HeaderFileInfo structure for the specified FileEntry, if it has + /// ever been filled in (either locally or externally). + const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE) const; + + /// Return the headerFileInfo structure for the specified FileEntry, if it has + /// ever been filled in locally. + const HeaderFileInfo *getExistingLocalFileInfo(FileEntryRef FE) const; SearchDirIterator search_dir_begin() { return {*this, 0}; } SearchDirIterator search_dir_end() { return {*this, SearchDirs.size()}; } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index fcc2b56df166b8..55274f32528e65 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -947,9 +947,13 @@ OptionalFileEntryRef HeaderSearch::LookupFile( // If we have no includer, that means we're processing a #include // from a module build. We should treat this as a system header if we're // building a [system] module. - bool IncluderIsSystemHeader = - Includer ? getFileInfo(*Includer).DirInfo != SrcMgr::C_User : - BuildSystemModule; + bool IncluderIsSystemHeader = [&]() { + if (!Includer) + return BuildSystemModule; + const HeaderFileInfo *HFI = getExistingFileInfo(*Includer); + assert(HFI && "includer without file info"); + return HFI->DirInfo != SrcMgr::C_User; + }(); if (OptionalFileEntryRef FE = getFileAndSuggestModule( TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader, RequestingModule, SuggestedModule)) { @@ -964,10 +968,11 @@ OptionalFileEntryRef HeaderSearch::LookupFile( // Note that we only use one of FromHFI/ToHFI at once, due to potential // reallocation of the underlying vector potentially making the first // reference binding dangling. - HeaderFileInfo &FromHFI = getFileInfo(*Includer); - unsigned DirInfo = FromHFI.DirInfo; - bool IndexHeaderMapHeader = FromHFI.IndexHeaderMapHeader; - StringRef Framework = FromHFI.Framework; + const HeaderFileInfo *FromHFI = getExistingFileInfo(*Includer); + assert(FromHFI && "includer without file info"); + unsigned DirInfo = FromHFI->DirInfo; + bool IndexHeaderMapHeader = FromHFI->IndexHeaderMapHeader; + StringRef Framework = FromHFI->Framework; HeaderFileInfo &ToHFI = getFileInfo(*FE); ToHFI.DirInfo = DirInfo; @@ -1154,10 +1159,12 @@ OptionalFileEntryRef HeaderSearch::LookupFile( // "Foo" is the name of the framework in which the including header was found. if (!Includers.empty() && Includers.front().first && !isAngled && !Filename.contains('/')) { - HeaderFileInfo &IncludingHFI = getFileInfo(*Includers.front().first); - if (IncludingHFI.IndexHeaderMapHeader) { + const HeaderFileInfo *IncludingHFI = + getExistingFileInfo(*Includers.front().first); + assert(IncludingHFI && "includer without file info"); + if (IncludingHFI->IndexHeaderMapHeader) { SmallString<128> ScratchFilename; - ScratchFilename += IncludingHFI.Framework; + ScratchFilename += IncludingHFI->Framework; ScratchFilename += '/'; ScratchFilename += Filename; @@ -1289,10 +1296,11 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader( // This file is a system header or C++ unfriendly if the old file is. // // Note that the temporary 'DirInfo' is required here, as either call to - // getFileInfo could resize the vector and we don't want to rely on order - // of evaluation. - unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo; - getFileInfo(*File).DirInfo = DirInfo; + // getExistingFileInfo could resize the vector and we don't want to rely on + // order of evaluation. + const HeaderFileInfo *ContextHFI = getExistingFileInfo(ContextFileEnt); + assert(ContextHFI && "context file without file info"); + getFileInfo(*File).DirInfo = ContextHFI->DirInfo; FrameworkName.pop_back(); // remove the trailing '/' if (!findUsableModuleForFrameworkHeader(*File, FrameworkName, @@ -1331,8 +1339,6 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI, HFI.Framework = OtherHFI.Framework; } -/// getFileInfo - Return the HeaderFileInfo structure for the specified -/// FileEntry. HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) { if (FE.getUID() >= FileInfo.size()) FileInfo.resize(FE.getUID() + 1); @@ -1349,27 +1355,20 @@ HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) { } HFI->IsValid = true; - // We have local information about this header file, so it's no longer - // strictly external. + // We assume the caller has local information about this header file, so it's + // no longer strictly external. HFI->External = false; return *HFI; } -const HeaderFileInfo * -HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const { - // If we have an external source, ensure we have the latest information. - // FIXME: Use a generation count to check whether this is really up to date. +const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const { HeaderFileInfo *HFI; if (ExternalSource) { - if (FE.getUID() >= FileInfo.size()) { - if (!WantExternal) - return nullptr; + if (FE.getUID() >= FileInfo.size()) FileInfo.resize(FE.getUID() + 1); - } HFI = &FileInfo[FE.getUID()]; - if (!WantExternal && (!HFI->IsValid || HFI->External)) - return nullptr; + // FIXME: Use a generation count to check whether this is really up to date. if (!HFI->Resolved) { auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE); if (ExternalHFI.IsValid) { @@ -1378,16 +1377,25 @@ HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const { mergeHeaderFileInfo(*HFI, ExternalHFI); } } - } else if (FE.getUID() >= FileInfo.size()) { - return nullptr; - } else { + } else if (FE.getUID() < FileInfo.size()) { HFI = &FileInfo[FE.getUID()]; + } else { + HFI = nullptr; } - if (!HFI->IsValid || (HFI->External && !WantExternal)) - return nullptr; + return (HFI && HFI->IsValid) ? HFI : nullptr; +} + +const HeaderFileInfo * +HeaderSearch::getExistingLocalFileInfo(FileEntryRef FE) const { + HeaderFileInfo *HFI; + if (FE.getUID() < FileInfo.size()) { + HFI = &FileInfo[FE.getUID()]; + } else { + HFI = nullptr; + } - return HFI; + return (HFI && HFI->IsValid && !HFI->External) ? HFI : nullptr; } bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ba6a8a5e16e4e7..a05caca1b5a6fb 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -178,8 +178,7 @@ std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP, if (!File) continue; - const HeaderFileInfo *HFI = - HS.getExistingFileInfo(*File, /*WantExternal*/ false); + const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File); if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader)) continue; @@ -2045,14 +2044,12 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { if (!File) continue; - // Get the file info. This will load info from the external source if - // necessary. Skip emitting this file if we have no information on it - // as a header file (in which case HFI will be null) or if it hasn't + // Get the file info. Skip emitting this file if we have no information on + // it as a header file (in which case HFI will be null) or if it hasn't // changed since it was loaded. Also skip it if it's for a modular header // from a different module; in that case, we rely on the module(s) // containing the header to provide this information. - const HeaderFileInfo *HFI = - HS.getExistingFileInfo(*File, /*WantExternal*/!Chain); + const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File); if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader)) continue; diff --git a/clang/test/Modules/foo.c b/clang/test/Modules/foo.c new file mode 100644 index 00000000000000..8f6860be1bff27 --- /dev/null +++ b/clang/test/Modules/foo.c @@ -0,0 +1,48 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// Not to be committed, just here as a demonstration. + +//--- frameworks/A.framework/Modules/module.modulemap +framework module A { + header "A1.h" + header "A2.h" + header "A3.h" +} +//--- frameworks/A.framework/Headers/A1.h +//--- frameworks/A.framework/Headers/A2.h +//--- frameworks/A.framework/Headers/A3.h +//--- EndA +// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=A %t/frameworks/A.framework/Modules/module.modulemap -o %t/A.pcm + +//--- frameworks/B.framework/Modules/module.modulemap +framework module B { + header "B1.h" + header "B2.h" + header "B3.h" +} +//--- frameworks/B.framework/Headers/B1.h +//--- frameworks/B.framework/Headers/B2.h +//--- frameworks/B.framework/Headers/B3.h +//--- EndB +// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=B %t/frameworks/B.framework/Modules/module.modulemap -o %t/B.pcm + +//--- frameworks/X.framework/Modules/module.modulemap +framework module X { header "X.h" } +//--- frameworks/X.framework/Headers/X.h +#import <A/A1.h> +#import <B/B1.h> +// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=X %t/frameworks/X.framework/Modules/module.modulemap -o %t/X.pcm \ +// RUN: -fmodule-map-file=%t/frameworks/A.framework/Modules/module.modulemap -fmodule-file=A=%t/A.pcm \ +// RUN: -fmodule-map-file=%t/frameworks/B.framework/Modules/module.modulemap -fmodule-file=B=%t/B.pcm + +// Without this patch, ASTWriter would go looking for: +// * "X.h" in A.pcm and B.pcm and not comparing it to any of their files due to size difference +// * "A2.h" and compare it to "A1.h", "A2.h" in A.pcm +// * "A3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm +// * "B2.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h" in B.pcm +// * "B3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h", "B3.h" in B.pcm + +// With this patch, ASTWriter doesn't go looking for anything of the above. +// * Clang already knows that "X.h" belongs to the current module and needs to be serialized, +// while the other headers belong to A or B and do not need to be serialized. >From 6f564f3afb0206c58c050a4fa05f73a0618c9eec Mon Sep 17 00:00:00 2001 From: Jan Svoboda <j...@svoboda.ai> Date: Sun, 7 Apr 2024 18:20:02 -0700 Subject: [PATCH 2/2] Fix SymbolCollector.cpp --- clang-tools-extra/clangd/index/SymbolCollector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 85b8fc549b016e..5c4e2150cf3123 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -409,7 +409,7 @@ class SymbolCollector::HeaderFileURICache { // Framework headers are spelled as <FrameworkName/Foo.h>, not // "path/FrameworkName.framework/Headers/Foo.h". auto &HS = PP->getHeaderSearchInfo(); - if (const auto *HFI = HS.getExistingFileInfo(*FE, /*WantExternal*/ false)) + if (const auto *HFI = HS.getExistingFileInfo(*FE)) if (!HFI->Framework.empty()) if (auto Spelling = getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS)) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits