llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-modules Author: Jan Svoboda (jansvoboda11) <details> <summary>Changes</summary> This patch builds on top of https://github.com/llvm/llvm-project/pull/115237 and https://github.com/llvm/llvm-project/pull/115235, only passing the `Preprocessor` object to `ASTWriter`. This reduces the size of scanning PCM files by 1/3 and speeds up scans by 16%. --- Patch is 51.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115239.diff 10 Files Affected: - (modified) clang/include/clang/Lex/HeaderSearchOptions.h (+5) - (modified) clang/include/clang/Serialization/ASTRecordWriter.h (+4-3) - (modified) clang/include/clang/Serialization/ASTWriter.h (+21-28) - (modified) clang/lib/Frontend/ASTUnit.cpp (+1-1) - (modified) clang/lib/Serialization/ASTReader.cpp (+3) - (modified) clang/lib/Serialization/ASTWriter.cpp (+149-127) - (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+30-25) - (modified) clang/lib/Serialization/ASTWriterStmt.cpp (+8-7) - (modified) clang/lib/Serialization/GeneratePCH.cpp (+14-5) - (modified) clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp (+1) ``````````diff diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h index c85e3d27281701..7a16926c186d2c 100644 --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -255,6 +255,10 @@ class HeaderSearchOptions { LLVM_PREFERRED_TYPE(bool) unsigned ModulesHashContent : 1; + /// Whether AST files should only contain the preprocessor information. + LLVM_PREFERRED_TYPE(bool) + unsigned ModulesSerializeOnlyPreprocessor : 1; + /// Whether we should include all things that could impact the module in the /// hash. /// @@ -288,6 +292,7 @@ class HeaderSearchOptions { ModulesSkipHeaderSearchPaths(false), ModulesSkipPragmaDiagnosticMappings(false), ModulesPruneNonAffectingModuleMaps(true), ModulesHashContent(false), + ModulesSerializeOnlyPreprocessor(false), ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false), AllowModuleMapSubdirectorySearch(true) {} diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h index d6090ba1a6c690..67720a0aebc1ca 100644 --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -60,8 +60,9 @@ class ASTRecordWriter public: /// Construct a ASTRecordWriter that uses the default encoding scheme. - ASTRecordWriter(ASTWriter &W, ASTWriter::RecordDataImpl &Record) - : DataStreamBasicWriter(W.getASTContext()), Writer(&W), Record(&Record) {} + ASTRecordWriter(ASTContext &Context, ASTWriter &W, + ASTWriter::RecordDataImpl &Record) + : DataStreamBasicWriter(Context), Writer(&W), Record(&Record) {} /// Construct a ASTRecordWriter that uses the same encoding scheme as another /// ASTRecordWriter. @@ -208,7 +209,7 @@ class ASTRecordWriter /// Emit a reference to a type. void AddTypeRef(QualType T) { - return Writer->AddTypeRef(T, *Record); + return Writer->AddTypeRef(getASTContext(), T, *Record); } void writeQualType(QualType T) { AddTypeRef(T); diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index d0e841f367c1e0..51253ea3c63ec7 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -119,9 +119,6 @@ class ASTWriter : public ASTDeserializationListener, /// The PCM manager which manages memory buffers for pcm files. InMemoryModuleCache &ModuleCache; - /// The ASTContext we're writing. - ASTContext *Context = nullptr; - /// The preprocessor we're writing. Preprocessor *PP = nullptr; @@ -545,7 +542,7 @@ class ASTWriter : public ASTDeserializationListener, unsigned getSubmoduleID(Module *Mod); /// Write the given subexpression to the bitstream. - void WriteSubStmt(Stmt *S); + void WriteSubStmt(ASTContext &Context, Stmt *S); void WriteBlockInfoBlock(); void WriteControlBlock(Preprocessor &PP, StringRef isysroot); @@ -564,34 +561,36 @@ class ASTWriter : public ASTDeserializationListener, void WriteHeaderSearch(const HeaderSearch &HS); void WritePreprocessorDetail(PreprocessingRecord &PPRec, uint64_t MacroOffsetsBase); - void WriteSubmodules(Module *WritingModule); + void WriteSubmodules(Module *WritingModule, ASTContext *Context); void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, bool isModule); unsigned TypeExtQualAbbrev = 0; void WriteTypeAbbrevs(); - void WriteType(QualType T); + void WriteType(ASTContext &Context, QualType T); bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC); - void GenerateNameLookupTable(const DeclContext *DC, + void GenerateNameLookupTable(ASTContext &Context, const DeclContext *DC, llvm::SmallVectorImpl<char> &LookupTable); uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, const DeclContext *DC); uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC); void WriteTypeDeclOffsets(); void WriteFileDeclIDsMap(); - void WriteComments(); + void WriteComments(ASTContext &Context); void WriteSelectors(Sema &SemaRef); void WriteReferencedSelectorsPool(Sema &SemaRef); - void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver, + void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver *IdResolver, bool IsModule); void WriteDeclAndTypes(ASTContext &Context); void PrepareWritingSpecialDecls(Sema &SemaRef); void WriteSpecialDeclRecords(Sema &SemaRef); - void WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord); - void WriteDeclContextVisibleUpdate(const DeclContext *DC); + void WriteDeclUpdatesBlocks(ASTContext &Context, + RecordDataImpl &OffsetsRecord); + void WriteDeclContextVisibleUpdate(ASTContext &Context, + const DeclContext *DC); void WriteFPPragmaOptions(const FPOptionsOverride &Opts); void WriteOpenCLExtensions(Sema &SemaRef); void WriteCUDAPragmas(Sema &SemaRef); @@ -640,7 +639,7 @@ class ASTWriter : public ASTDeserializationListener, void WriteDeclAbbrevs(); void WriteDecl(ASTContext &Context, Decl *D); - ASTFileSignature WriteASTCore(Sema &SemaRef, StringRef isysroot, + ASTFileSignature WriteASTCore(Sema *SemaPtr, StringRef isysroot, Module *WritingModule); public: @@ -653,11 +652,6 @@ class ASTWriter : public ASTDeserializationListener, bool GeneratingReducedBMI = false); ~ASTWriter() override; - ASTContext &getASTContext() const { - assert(Context && "requested AST context when not writing AST"); - return *Context; - } - const LangOptions &getLangOpts() const; /// Get a timestamp for output into the AST file. The actual timestamp @@ -667,8 +661,8 @@ class ASTWriter : public ASTDeserializationListener, /// Write a precompiled header for the given semantic analysis. /// - /// \param SemaRef a reference to the semantic analysis object that processed - /// the AST to be written into the precompiled header. + /// \param Subject The object that processed the input to be written into the + /// AST file. /// /// \param WritingModule The module that we are writing. If null, we are /// writing a precompiled header. @@ -679,8 +673,9 @@ class ASTWriter : public ASTDeserializationListener, /// /// \return the module signature, which eventually will be a hash of /// the module but currently is merely a random 32-bit number. - ASTFileSignature WriteAST(Sema &SemaRef, StringRef OutputFile, - Module *WritingModule, StringRef isysroot, + ASTFileSignature WriteAST(llvm::PointerUnion<Sema *, Preprocessor*> Subject, + StringRef OutputFile, Module *WritingModule, + StringRef isysroot, bool ShouldCacheASTInMemory = false); /// Emit a token. @@ -723,10 +718,10 @@ class ASTWriter : public ASTDeserializationListener, uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name); /// Emit a reference to a type. - void AddTypeRef(QualType T, RecordDataImpl &Record); + void AddTypeRef(ASTContext &Context, QualType T, RecordDataImpl &Record); /// Force a type to be emitted and get its ID. - serialization::TypeID GetOrCreateTypeID(QualType T); + serialization::TypeID GetOrCreateTypeID(ASTContext &Context, QualType T); /// Find the first local declaration of a given local redeclarable /// decl. @@ -928,9 +923,9 @@ class PCHGenerator : public SemaConsumer { void anchor() override; Preprocessor &PP; + llvm::PointerUnion<Sema *, Preprocessor *> Subject; std::string OutputFile; std::string isysroot; - Sema *SemaPtr; std::shared_ptr<PCHBuffer> Buffer; llvm::BitstreamWriter Stream; ASTWriter Writer; @@ -945,9 +940,7 @@ class PCHGenerator : public SemaConsumer { bool isComplete() const { return Buffer->IsComplete; } PCHBuffer *getBufferPtr() { return Buffer.get(); } StringRef getOutputFile() const { return OutputFile; } - DiagnosticsEngine &getDiagnostics() const { - return SemaPtr->getDiagnostics(); - } + DiagnosticsEngine &getDiagnostics() const; Preprocessor &getPreprocessor() { return PP; } virtual Module *getEmittingModule(ASTContext &Ctx); @@ -963,7 +956,7 @@ class PCHGenerator : public SemaConsumer { bool GeneratingReducedBMI = false); ~PCHGenerator() override; - void InitializeSema(Sema &S) override { SemaPtr = &S; } + void InitializeSema(Sema &S) override; void HandleTranslationUnit(ASTContext &Ctx) override; void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); } ASTMutationListener *GetASTMutationListener() override; diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 4aec928f9eb0a5..f58e27a7979d1e 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -2359,7 +2359,7 @@ bool ASTUnit::Save(StringRef File) { static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer, Sema &S, raw_ostream &OS) { - Writer.WriteAST(S, std::string(), nullptr, ""); + Writer.WriteAST(&S, std::string(), nullptr, ""); // Write the generated bitstream to "Out". if (!Buffer.empty()) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 79615dc3c018ea..8d2debf84f5ad8 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3529,6 +3529,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + if (Record.empty()) + break; + if (SpecialTypes.size() != Record.size()) return llvm::createStringError(std::errc::illegal_byte_sequence, "invalid special-types record"); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index b95e29cbc02515..43fac29f32609c 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -277,8 +277,8 @@ class ASTTypeWriter { ASTRecordWriter BasicWriter; public: - ASTTypeWriter(ASTWriter &Writer) - : Writer(Writer), BasicWriter(Writer, Record) {} + ASTTypeWriter(ASTContext &Context, ASTWriter &Writer) + : Writer(Writer), BasicWriter(Context, Writer, Record) {} uint64_t write(QualType T) { if (T.hasLocalNonFastQualifiers()) { @@ -2872,7 +2872,7 @@ static unsigned getNumberOfModules(Module *Mod) { return ChildModules + 1; } -void ASTWriter::WriteSubmodules(Module *WritingModule) { +void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) { // Enter the submodule description block. Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5); @@ -3123,12 +3123,14 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { // Emit the reachable initializers. // The initializer may only be unreachable in reduced BMI. - RecordData Inits; - for (Decl *D : Context->getModuleInitializers(Mod)) - if (wasDeclEmitted(D)) - AddDeclRef(D, Inits); - if (!Inits.empty()) - Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits); + if (Context) { + RecordData Inits; + for (Decl *D : Context->getModuleInitializers(Mod)) + if (wasDeclEmitted(D)) + AddDeclRef(D, Inits); + if (!Inits.empty()) + Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits); + } // Emit the name of the re-exported module, if any. if (!Mod->ExportAsModule.empty()) { @@ -3259,7 +3261,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, //===----------------------------------------------------------------------===// /// Write the representation of a type to the AST stream. -void ASTWriter::WriteType(QualType T) { +void ASTWriter::WriteType(ASTContext &Context, QualType T) { TypeIdx &IdxRef = TypeIdxs[T]; if (IdxRef.getValue() == 0) // we haven't seen this type before. IdxRef = TypeIdx(0, NextTypeID++); @@ -3269,7 +3271,8 @@ void ASTWriter::WriteType(QualType T) { assert(Idx.getValue() >= FirstTypeID && "Writing predefined type"); // Emit the type's representation. - uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset; + uint64_t Offset = + ASTTypeWriter(Context, *this).write(T) - DeclTypesBlockStartOffset; // Record the offset for this type. uint64_t Index = Idx.getValue() - FirstTypeID; @@ -3393,7 +3396,7 @@ void ASTWriter::WriteFileDeclIDsMap() { Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs)); } -void ASTWriter::WriteComments() { +void ASTWriter::WriteComments(ASTContext &Context) { Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3); auto _ = llvm::make_scope_exit([this] { Stream.ExitBlock(); }); if (!PP->getPreprocessorOpts().WriteCommentListToPCH) @@ -3406,7 +3409,7 @@ void ASTWriter::WriteComments() { return; RecordData Record; - for (const auto &FO : Context->Comments.OrderedComments) { + for (const auto &FO : Context.Comments.OrderedComments) { for (const auto &OC : FO.second) { const RawComment *I = OC.second; Record.clear(); @@ -3656,7 +3659,7 @@ void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) { return; RecordData Record; - ASTRecordWriter Writer(*this, Record); + ASTRecordWriter Writer(SemaRef.Context, *this, Record); // Note: this writes out all references even for a dependent AST. But it is // very tricky to fix, and given that @selector shouldn't really appear in @@ -3742,7 +3745,7 @@ bool IsInterestingNonMacroIdentifier(const IdentifierInfo *II, class ASTIdentifierTableTrait { ASTWriter &Writer; Preprocessor &PP; - IdentifierResolver &IdResolver; + IdentifierResolver *IdResolver; bool IsModule; bool NeedDecls; ASTWriter::RecordData *InterestingIdentifierOffsets; @@ -3767,7 +3770,7 @@ class ASTIdentifierTableTrait { using offset_type = unsigned; ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP, - IdentifierResolver &IdResolver, bool IsModule, + IdentifierResolver *IdResolver, bool IsModule, ASTWriter::RecordData *InterestingIdentifierOffsets) : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule), NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus), @@ -3806,8 +3809,8 @@ class ASTIdentifierTableTrait { if (MacroOffset) DataLen += 4; // MacroDirectives offset. - if (NeedDecls) - DataLen += std::distance(IdResolver.begin(II), IdResolver.end()) * + if (NeedDecls && IdResolver) + DataLen += std::distance(IdResolver->begin(II), IdResolver->end()) * sizeof(DeclID); } return emitULEBKeyDataLength(KeyLen, DataLen, Out); @@ -3845,14 +3848,14 @@ class ASTIdentifierTableTrait { if (HadMacroDefinition) LE.write<uint32_t>(MacroOffset); - if (NeedDecls) { + if (NeedDecls && IdResolver) { // Emit the declaration IDs in reverse order, because the // IdentifierResolver provides the declarations as they would be // visible (e.g., the function "stat" would come before the struct // "stat"), but the ASTReader adds declarations to the end of the list // (so we need to see the struct "stat" before the function "stat"). // Only emit declarations that aren't from a chained PCH, though. - SmallVector<NamedDecl *, 16> Decls(IdResolver.decls(II)); + SmallVector<NamedDecl *, 16> Decls(IdResolver->decls(II)); for (NamedDecl *D : llvm::reverse(Decls)) LE.write<DeclID>((DeclID)Writer.getDeclID( getDeclForLocalLookup(PP.getLangOpts(), D))); @@ -3872,7 +3875,7 @@ static bool isLocalIdentifierID(IdentifierID ID) { return !(ID >> 32); } /// (the actual identifiers themselves) and a separate "offsets" index /// that maps identifier IDs to locations within the blob. void ASTWriter::WriteIdentifierTable(Preprocessor &PP, - IdentifierResolver &IdResolver, + IdentifierResolver *IdResolver, bool IsModule) { using namespace llvm; @@ -4137,9 +4140,9 @@ static bool isLookupResultNotInteresting(ASTWriter &Writer, return true; } -void -ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, - llvm::SmallVectorImpl<char> &LookupTable) { +void ASTWriter::GenerateNameLookupTable( + ASTContext &Context, const DeclContext *ConstDC, + llvm::SmallVectorImpl<char> &LookupTable) { assert(!ConstDC->hasLazyLocalLexicalLookups() && !ConstDC->hasLazyExternalLexicalLookups() && "must call buildLookups first"); @@ -4234,8 +4237,8 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, // another declaration in the redecl chain. Any non-implicit constructor or // conversion function which doesn't occur in all the lexical contexts // would be an ODR violation. - auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName( - Context->getCanonicalType(Context->getRecordType(D))); + auto ImplicitCtorName = Context.DeclarationNames.getCXXConstructorName( + Context.getCanonicalType(Context.getRecordType(D))); if (ConstructorNameSet.erase(ImplicitCtorName)) Names.push_back(ImplicitCtorName); @@ -4415,7 +4418,7 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context, // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; - GenerateNameLookupTable(DC, LookupTable); + GenerateNameLookupTable(Context, DC, LookupTable); // Write the lookup table RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE}; @@ -4431,14 +4434,15 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context, /// DeclContext in a dependent AST file. As such, they only exist for the TU /// (in C++), for namespaces, and for classes with forward-declared unscoped /// enumeration members (in C++11). -void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) { +void ASTWriter::WriteDeclContextVisibleUpdate(ASTContext &Context, + const DeclContext *DC) { StoredDeclsMap *Map = DC->getLookupPtr(); if (!Map || Map->empty()) return; // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; - GenerateNameLookupTable(DC, LookupTable); + GenerateNameLookupTable(Context, DC, LookupTable); // If we're updating a namespace, select a key declaration as the key for the // update record; those are the only ones that will be checked on reload. @@ -4753,15 +4757,12 @@ void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) { } bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) { - assert(Context && "should have context when outputting path"); - // Leave special file names as they are. StringRef PathStr(Path.data(), Path.size()); if (PathStr == "<built-in>" || PathStr == "<command line>") return false; - bool Changed = - cleanPathForOutput(Context->getSourceManager().getFileManager(), Path); + bool Changed = cleanPathForOutput(PP->getFileManager(), Path); // Remove a prefix to make the path relative, if relevant. const char *PathBegin = Path.data(); @@ -4850,21 +4851,28 @@ ASTWriter::~ASTWriter() = default; const LangOptions &ASTWriter::getLangOpts() const { assert(WritingAST && "can't determine lang opts when not writing AST"); - return Context->getLangOpts(); + return PP->getLangOpts(); } time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const { return IncludeTimestamps ? E->getModificationTime() : 0; } -ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile, - Module *WritingModule, StringRef isysroot, - bool ShouldCacheASTInMemory) { +ASTFileSignature +ASTWriter::WriteAST(llvm::PointerUn... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/115239 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits