https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/92085
>From e8f756ec7f8ea7e5bf18cc122a965fb2f258fd15 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Tue, 14 May 2024 15:33:12 +0800 Subject: [PATCH] [Serialization] No transitive identifier change --- .../clang/Lex/ExternalPreprocessorSource.h | 54 ++++++++- clang/include/clang/Lex/HeaderSearch.h | 12 +- .../include/clang/Serialization/ASTBitCodes.h | 2 +- clang/include/clang/Serialization/ASTReader.h | 19 ++- .../include/clang/Serialization/ModuleFile.h | 3 - clang/lib/Lex/HeaderSearch.cpp | 33 +++--- clang/lib/Serialization/ASTReader.cpp | 98 ++++++++-------- clang/lib/Serialization/ASTWriter.cpp | 63 ++++++---- clang/lib/Serialization/GlobalModuleIndex.cpp | 3 +- clang/lib/Serialization/ModuleFile.cpp | 1 - .../no-transitive-identifier-change.cppm | 110 ++++++++++++++++++ 11 files changed, 286 insertions(+), 112 deletions(-) create mode 100644 clang/test/Modules/no-transitive-identifier-change.cppm diff --git a/clang/include/clang/Lex/ExternalPreprocessorSource.h b/clang/include/clang/Lex/ExternalPreprocessorSource.h index 6775841860373..48429948dbffe 100644 --- a/clang/include/clang/Lex/ExternalPreprocessorSource.h +++ b/clang/include/clang/Lex/ExternalPreprocessorSource.h @@ -36,12 +36,64 @@ class ExternalPreprocessorSource { /// Return the identifier associated with the given ID number. /// /// The ID 0 is associated with the NULL identifier. - virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0; + virtual IdentifierInfo *GetIdentifier(uint64_t ID) = 0; /// Map a module ID to a module. virtual Module *getModule(unsigned ModuleID) = 0; }; +// Either a pointer to an IdentifierInfo of the controlling macro or the ID +// number of the controlling macro. +class LazyIdentifierInfoPtr { + // If the low bit is clear, a pointer to the IdentifierInfo. If the low + // bit is set, the upper 63 bits are the ID number. + mutable uint64_t Ptr = 0; + +public: + LazyIdentifierInfoPtr() = default; + + explicit LazyIdentifierInfoPtr(const IdentifierInfo *Ptr) + : Ptr(reinterpret_cast<uint64_t>(Ptr)) {} + + explicit LazyIdentifierInfoPtr(uint64_t ID) : Ptr((ID << 1) | 0x01) { + assert((ID << 1 >> 1) == ID && "ID must require < 63 bits"); + if (ID == 0) + Ptr = 0; + } + + LazyIdentifierInfoPtr &operator=(const IdentifierInfo *Ptr) { + this->Ptr = reinterpret_cast<uint64_t>(Ptr); + return *this; + } + + LazyIdentifierInfoPtr &operator=(uint64_t ID) { + assert((ID << 1 >> 1) == ID && "IDs must require < 63 bits"); + if (ID == 0) + Ptr = 0; + else + Ptr = (ID << 1) | 0x01; + + return *this; + } + + /// Whether this pointer is non-NULL. + /// + /// This operation does not require the AST node to be deserialized. + bool isValid() const { return Ptr != 0; } + + /// Whether this pointer is currently stored as ID. + bool isID() const { return Ptr & 0x01; } + + IdentifierInfo *getPtr() const { + assert(!isID()); + return reinterpret_cast<IdentifierInfo *>(Ptr); + } + + uint64_t getID() const { + assert(isID()); + return Ptr >> 1; + } +}; } #endif diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index 5ac63dddd4d4e..65700b8f9dc11 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -16,6 +16,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/DirectoryLookup.h" +#include "clang/Lex/ExternalPreprocessorSource.h" #include "clang/Lex/HeaderMap.h" #include "clang/Lex/ModuleMap.h" #include "llvm/ADT/ArrayRef.h" @@ -119,13 +120,6 @@ struct HeaderFileInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsValid : 1; - /// The ID number of the controlling macro. - /// - /// This ID number will be non-zero when there is a controlling - /// macro whose IdentifierInfo may not yet have been loaded from - /// external storage. - unsigned ControllingMacroID = 0; - /// If this file has a \#ifndef XXX (or equivalent) guard that /// protects the entire contents of the file, this is the identifier /// for the macro that controls whether or not it has any effect. @@ -134,7 +128,7 @@ struct HeaderFileInfo { /// the controlling macro of this header, since /// getControllingMacro() is able to load a controlling macro from /// external storage. - const IdentifierInfo *ControllingMacro = nullptr; + LazyIdentifierInfoPtr LazyControllingMacro; /// If this header came from a framework include, this is the name /// of the framework. @@ -580,7 +574,7 @@ class HeaderSearch { /// no-op \#includes. void SetFileControllingMacro(FileEntryRef File, const IdentifierInfo *ControllingMacro) { - getFileInfo(File).ControllingMacro = ControllingMacro; + getFileInfo(File).LazyControllingMacro = ControllingMacro; } /// Determine whether this file is intended to be safe from diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 902c4470650c5..03bac9be2bf3d 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -59,7 +59,7 @@ const unsigned VERSION_MINOR = 1; /// /// The ID numbers of identifiers are consecutive (in order of discovery) /// and start at 1. 0 is reserved for NULL. -using IdentifierID = uint32_t; +using IdentifierID = uint64_t; /// The number of predefined identifier IDs. const unsigned int NUM_PREDEF_IDENT_IDS = 1; diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 0a9006223dcbd..3f38a08f0da3a 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -660,14 +660,6 @@ class ASTReader /// been loaded. std::vector<IdentifierInfo *> IdentifiersLoaded; - using GlobalIdentifierMapType = - ContinuousRangeMap<serialization::IdentifierID, ModuleFile *, 4>; - - /// Mapping from global identifier IDs to the module in which the - /// identifier resides along with the offset that should be added to the - /// global identifier ID to produce a local ID. - GlobalIdentifierMapType GlobalIdentifierMap; - /// A vector containing macros that have already been /// loaded. /// @@ -1539,6 +1531,11 @@ class ASTReader /// Translate a \param GlobalDeclID to the index of DeclsLoaded array. unsigned translateGlobalDeclIDToIndex(GlobalDeclID ID) const; + /// Translate an \param IdentifierID ID to the index of IdentifiersLoaded + /// array and the corresponding module file. + std::pair<ModuleFile *, unsigned> + translateIdentifierIDToIndex(serialization::IdentifierID ID) const; + public: /// Load the AST file and validate its contents against the given /// Preprocessor. @@ -2123,7 +2120,7 @@ class ASTReader /// Load a selector from disk, registering its ID if it exists. void LoadSelector(Selector Sel); - void SetIdentifierInfo(unsigned ID, IdentifierInfo *II); + void SetIdentifierInfo(serialization::IdentifierID ID, IdentifierInfo *II); void SetGloballyVisibleDecls(IdentifierInfo *II, const SmallVectorImpl<GlobalDeclID> &DeclIDs, SmallVectorImpl<Decl *> *Decls = nullptr); @@ -2150,10 +2147,10 @@ class ASTReader return DecodeIdentifierInfo(ID); } - IdentifierInfo *getLocalIdentifier(ModuleFile &M, unsigned LocalID); + IdentifierInfo *getLocalIdentifier(ModuleFile &M, uint64_t LocalID); serialization::IdentifierID getGlobalIdentifierID(ModuleFile &M, - unsigned LocalID); + uint64_t LocalID); void resolvePendingMacro(IdentifierInfo *II, const PendingMacroInfo &PMInfo); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 56193d44dd6f3..3787f4eeb8a8b 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -310,9 +310,6 @@ class ModuleFile { /// Base identifier ID for identifiers local to this module. serialization::IdentifierID BaseIdentifierID = 0; - /// Remapping table for identifier IDs in this module. - ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap; - /// Actual data for the on-disk hash table of identifiers. /// /// This pointer points into a memory buffer, where the on-disk hash diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 574723b33866a..addcb6ce2b635 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -60,19 +60,21 @@ ALWAYS_ENABLED_STATISTIC(NumSubFrameworkLookups, const IdentifierInfo * HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) { - if (ControllingMacro) { - if (ControllingMacro->isOutOfDate()) { - assert(External && "We must have an external source if we have a " - "controlling macro that is out of date."); - External->updateOutOfDateIdentifier(*ControllingMacro); - } - return ControllingMacro; - } + if (LazyControllingMacro.isID()) { + if (!External) + return nullptr; - if (!ControllingMacroID || !External) - return nullptr; + LazyControllingMacro = + External->GetIdentifier(LazyControllingMacro.getID()); + return LazyControllingMacro.getPtr(); + } - ControllingMacro = External->GetIdentifier(ControllingMacroID); + IdentifierInfo *ControllingMacro = LazyControllingMacro.getPtr(); + if (ControllingMacro && ControllingMacro->isOutOfDate()) { + assert(External && "We must have an external source if we have a " + "controlling macro that is out of date."); + External->updateOutOfDateIdentifier(*ControllingMacro); + } return ControllingMacro; } @@ -1341,10 +1343,8 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI, mergeHeaderFileInfoModuleBits(HFI, OtherHFI.isModuleHeader, OtherHFI.isTextualModuleHeader); - if (!HFI.ControllingMacro && !HFI.ControllingMacroID) { - HFI.ControllingMacro = OtherHFI.ControllingMacro; - HFI.ControllingMacroID = OtherHFI.ControllingMacroID; - } + if (!HFI.LazyControllingMacro.isValid()) + HFI.LazyControllingMacro = OtherHFI.LazyControllingMacro; HFI.DirInfo = OtherHFI.DirInfo; HFI.External = (!HFI.IsValid || HFI.External); @@ -1419,8 +1419,7 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const { // once. Note that we dor't check for #import, because that's not a property // of the file itself. if (auto *HFI = getExistingFileInfo(File)) - return HFI->isPragmaOnce || HFI->ControllingMacro || - HFI->ControllingMacroID; + return HFI->isPragmaOnce || HFI->LazyControllingMacro.isValid(); return false; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a5a747f68f014..29a7fcb2cb507 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -920,7 +920,7 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { SelectorTable &SelTable = Reader.getContext().Selectors; unsigned N = endian::readNext<uint16_t, llvm::endianness::little>(d); const IdentifierInfo *FirstII = Reader.getLocalIdentifier( - F, endian::readNext<uint32_t, llvm::endianness::little>(d)); + F, endian::readNext<IdentifierID, llvm::endianness::little>(d)); if (N == 0) return SelTable.getNullarySelector(FirstII); else if (N == 1) @@ -930,7 +930,7 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { Args.push_back(FirstII); for (unsigned I = 1; I != N; ++I) Args.push_back(Reader.getLocalIdentifier( - F, endian::readNext<uint32_t, llvm::endianness::little>(d))); + F, endian::readNext<IdentifierID, llvm::endianness::little>(d))); return SelTable.getSelector(N, Args.data()); } @@ -1011,7 +1011,8 @@ static bool readBit(unsigned &Bits) { IdentifierID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) { using namespace llvm::support; - unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d); + IdentifierID RawID = + endian::readNext<IdentifierID, llvm::endianness::little>(d); return Reader.getGlobalIdentifierID(F, RawID >> 1); } @@ -1029,9 +1030,12 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, unsigned DataLen) { using namespace llvm::support; - unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d); + IdentifierID RawID = + endian::readNext<IdentifierID, llvm::endianness::little>(d); bool IsInteresting = RawID & 0x01; + DataLen -= sizeof(IdentifierID); + // Wipe out the "is interesting" bit. RawID = RawID >> 1; @@ -1062,7 +1066,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, bool HadMacroDefinition = readBit(Bits); assert(Bits == 0 && "Extra bits in the identifier?"); - DataLen -= 8; + DataLen -= sizeof(uint16_t) * 2; // Set or check the various bits in the IdentifierInfo structure. // Token IDs are read-only. @@ -1188,7 +1192,7 @@ ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) { case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXDeductionGuideName: Data = (uint64_t)Reader.getLocalIdentifier( - F, endian::readNext<uint32_t, llvm::endianness::little>(d)); + F, endian::readNext<IdentifierID, llvm::endianness::little>(d)); break; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: @@ -2054,8 +2058,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, HFI.isPragmaOnce |= (Flags >> 4) & 0x01; HFI.DirInfo = (Flags >> 1) & 0x07; HFI.IndexHeaderMapHeader = Flags & 0x01; - HFI.ControllingMacroID = Reader.getGlobalIdentifierID( - M, endian::readNext<uint32_t, llvm::endianness::little>(d)); + HFI.LazyControllingMacro = Reader.getGlobalIdentifierID( + M, endian::readNext<IdentifierID, llvm::endianness::little>(d)); if (unsigned FrameworkOffset = endian::readNext<uint32_t, llvm::endianness::little>(d)) { // The framework offset is 1 greater than the actual offset, @@ -3429,24 +3433,11 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, "duplicate IDENTIFIER_OFFSET record in AST file"); F.IdentifierOffsets = (const uint32_t *)Blob.data(); F.LocalNumIdentifiers = Record[0]; - unsigned LocalBaseIdentifierID = Record[1]; F.BaseIdentifierID = getTotalNumIdentifiers(); - if (F.LocalNumIdentifiers > 0) { - // Introduce the global -> local mapping for identifiers within this - // module. - GlobalIdentifierMap.insert(std::make_pair(getTotalNumIdentifiers() + 1, - &F)); - - // Introduce the local -> global mapping for identifiers within this - // module. - F.IdentifierRemap.insertOrReplace( - std::make_pair(LocalBaseIdentifierID, - F.BaseIdentifierID - LocalBaseIdentifierID)); - + if (F.LocalNumIdentifiers > 0) IdentifiersLoaded.resize(IdentifiersLoaded.size() + F.LocalNumIdentifiers); - } break; } @@ -4038,7 +4029,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { F.ModuleOffsetMap = StringRef(); using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder; - RemapBuilder IdentifierRemap(F.IdentifierRemap); RemapBuilder MacroRemap(F.MacroRemap); RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); RemapBuilder SubmoduleRemap(F.SubmoduleRemap); @@ -4071,8 +4061,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { ImportedModuleVector.push_back(OM); - uint32_t IdentifierIDOffset = - endian::readNext<uint32_t, llvm::endianness::little>(Data); uint32_t MacroIDOffset = endian::readNext<uint32_t, llvm::endianness::little>(Data); uint32_t PreprocessedEntityIDOffset = @@ -4092,7 +4080,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { static_cast<int>(BaseOffset - Offset))); }; - mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap); mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap); mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID, PreprocessedEntityRemap); @@ -8181,7 +8168,6 @@ LLVM_DUMP_METHOD void ASTReader::dump() { dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap); dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap); dumpModuleIDMap("Global type map", GlobalTypeMap); - dumpModuleIDMap("Global identifier map", GlobalIdentifierMap); dumpModuleIDMap("Global macro map", GlobalMacroMap); dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap); dumpModuleIDMap("Global selector map", GlobalSelectorMap); @@ -8822,8 +8808,9 @@ void ASTReader::LoadSelector(Selector Sel) { void ASTReader::SetIdentifierInfo(IdentifierID ID, IdentifierInfo *II) { assert(ID && "Non-zero identifier ID required"); - assert(ID <= IdentifiersLoaded.size() && "identifier ID out of range"); - IdentifiersLoaded[ID - 1] = II; + unsigned Index = translateIdentifierIDToIndex(ID).second; + assert(Index < IdentifiersLoaded.size() && "identifier ID out of range"); + IdentifiersLoaded[Index] = II; if (DeserializationListener) DeserializationListener->IdentifierRead(ID, II); } @@ -8876,6 +8863,22 @@ void ASTReader::SetGloballyVisibleDecls( } } +std::pair<ModuleFile *, unsigned> +ASTReader::translateIdentifierIDToIndex(IdentifierID ID) const { + if (ID == 0) + return {nullptr, 0}; + + unsigned ModuleFileIndex = ID >> 32; + unsigned LocalID = ID & llvm::maskTrailingOnes<IdentifierID>(32); + + assert(ModuleFileIndex && "not translating loaded IdentifierID?"); + assert(getModuleManager().size() > ModuleFileIndex - 1); + + ModuleFile &MF = getModuleManager()[ModuleFileIndex - 1]; + assert(LocalID < MF.LocalNumIdentifiers); + return {&MF, MF.BaseIdentifierID + LocalID}; +} + IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) { if (ID == 0) return nullptr; @@ -8885,45 +8888,48 @@ IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) { return nullptr; } - ID -= 1; - if (!IdentifiersLoaded[ID]) { - GlobalIdentifierMapType::iterator I = GlobalIdentifierMap.find(ID + 1); - assert(I != GlobalIdentifierMap.end() && "Corrupted global identifier map"); - ModuleFile *M = I->second; - unsigned Index = ID - M->BaseIdentifierID; + auto [M, Index] = translateIdentifierIDToIndex(ID); + if (!IdentifiersLoaded[Index]) { + assert(M != nullptr && "Untranslated Identifier ID?"); + assert(Index >= M->BaseIdentifierID); + unsigned LocalIndex = Index - M->BaseIdentifierID; const unsigned char *Data = - M->IdentifierTableData + M->IdentifierOffsets[Index]; + M->IdentifierTableData + M->IdentifierOffsets[LocalIndex]; ASTIdentifierLookupTrait Trait(*this, *M); auto KeyDataLen = Trait.ReadKeyDataLength(Data); auto Key = Trait.ReadKey(Data, KeyDataLen.first); auto &II = PP.getIdentifierTable().get(Key); - IdentifiersLoaded[ID] = &II; + IdentifiersLoaded[Index] = &II; markIdentifierFromAST(*this, II); if (DeserializationListener) - DeserializationListener->IdentifierRead(ID + 1, &II); + DeserializationListener->IdentifierRead(ID, &II); } - return IdentifiersLoaded[ID]; + return IdentifiersLoaded[Index]; } -IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, unsigned LocalID) { +IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, uint64_t LocalID) { return DecodeIdentifierInfo(getGlobalIdentifierID(M, LocalID)); } -IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, unsigned LocalID) { +IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, uint64_t LocalID) { if (LocalID < NUM_PREDEF_IDENT_IDS) return LocalID; if (!M.ModuleOffsetMap.empty()) ReadModuleOffsetMap(M); - ContinuousRangeMap<uint32_t, int, 2>::iterator I - = M.IdentifierRemap.find(LocalID - NUM_PREDEF_IDENT_IDS); - assert(I != M.IdentifierRemap.end() - && "Invalid index into identifier index remap"); + unsigned ModuleFileIndex = LocalID >> 32; + LocalID &= llvm::maskTrailingOnes<IdentifierID>(32); + ModuleFile *MF = + ModuleFileIndex ? M.TransitiveImports[ModuleFileIndex - 1] : &M; + assert(MF && "malformed identifier ID encoding?"); - return LocalID + I->second; + if (!ModuleFileIndex) + LocalID -= NUM_PREDEF_IDENT_IDS; + + return ((IdentifierID)(MF->Index + 1) << 32) | LocalID; } MacroInfo *ASTReader::getMacro(MacroID ID) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index b8b613db712f4..dd6c135d54b46 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1991,7 +1991,7 @@ namespace { std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) { unsigned KeyLen = key.Filename.size() + 1 + 8 + 8; - unsigned DataLen = 1 + 4 + 4; + unsigned DataLen = 1 + sizeof(IdentifierID) + 4; for (auto ModInfo : Data.KnownHeaders) if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule())) DataLen += 4; @@ -2026,10 +2026,11 @@ namespace { | Data.HFI.IndexHeaderMapHeader; LE.write<uint8_t>(Flags); - if (!Data.HFI.ControllingMacro) - LE.write<uint32_t>(Data.HFI.ControllingMacroID); + if (Data.HFI.LazyControllingMacro.isID()) + LE.write<IdentifierID>(Data.HFI.LazyControllingMacro.getID()); else - LE.write<uint32_t>(Writer.getIdentifierRef(Data.HFI.ControllingMacro)); + LE.write<IdentifierID>( + Writer.getIdentifierRef(Data.HFI.LazyControllingMacro.getPtr())); unsigned Offset = 0; if (!Data.HFI.Framework.empty()) { @@ -3452,7 +3453,9 @@ class ASTMethodPoolTrait { std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream& Out, Selector Sel, data_type_ref Methods) { - unsigned KeyLen = 2 + (Sel.getNumArgs()? Sel.getNumArgs() * 4 : 4); + unsigned KeyLen = + 2 + (Sel.getNumArgs() ? Sel.getNumArgs() * sizeof(IdentifierID) + : sizeof(IdentifierID)); unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) @@ -3477,7 +3480,7 @@ class ASTMethodPoolTrait { if (N == 0) N = 1; for (unsigned I = 0; I != N; ++I) - LE.write<uint32_t>( + LE.write<IdentifierID>( Writer.getIdentifierRef(Sel.getIdentifierInfoForSlot(I))); } @@ -3788,7 +3791,7 @@ class ASTIdentifierTableTrait { InterestingIdentifierOffsets->push_back(Out.tell()); unsigned KeyLen = II->getLength() + 1; - unsigned DataLen = 4; // 4 bytes for the persistent ID << 1 + unsigned DataLen = sizeof(IdentifierID); // bytes for the persistent ID << 1 if (isInterestingIdentifier(II, MacroOffset)) { DataLen += 2; // 2 bytes for builtin ID DataLen += 2; // 2 bytes for flags @@ -3814,11 +3817,11 @@ class ASTIdentifierTableTrait { auto MacroOffset = Writer.getMacroDirectivesOffset(II); if (!isInterestingIdentifier(II, MacroOffset)) { - LE.write<uint32_t>(ID << 1); + LE.write<IdentifierID>(ID << 1); return; } - LE.write<uint32_t>((ID << 1) | 0x01); + LE.write<IdentifierID>((ID << 1) | 0x01); uint32_t Bits = (uint32_t)II->getObjCOrBuiltinID(); assert((Bits & 0xffff) == Bits && "ObjCOrBuiltinID too big for ASTReader."); LE.write<uint16_t>(Bits); @@ -3851,6 +3854,10 @@ class ASTIdentifierTableTrait { } // namespace +/// If the \param IdentifierID ID is a local Identifier ID. If the higher +/// bits of ID is 0, it implies that the ID doesn't come from AST files. +static bool isLocalIdentifierID(IdentifierID ID) { return !(ID >> 32); } + /// Write the identifier table into the AST file. /// /// The identifier table consists of a blob containing string data @@ -3895,7 +3902,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, // Write out identifiers if either the ID is local or the identifier has // changed since it was loaded. - if (ID >= FirstIdentID || II->hasChangedSinceDeserialization() || + if (isLocalIdentifierID(ID) || II->hasChangedSinceDeserialization() || (Trait.needDecls() && II->hasFETokenInfoChangedSinceDeserialization())) Generator.insert(II, ID, Trait); @@ -3929,7 +3936,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, auto Abbrev = std::make_shared<BitCodeAbbrev>(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); @@ -3939,8 +3945,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, #endif RecordData::value_type Record[] = {IDENTIFIER_OFFSET, - IdentifierOffsets.size(), - FirstIdentID - NUM_PREDEF_IDENT_IDS}; + IdentifierOffsets.size()}; Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record, bytes(IdentifierOffsets)); @@ -4030,11 +4035,13 @@ class ASTDeclContextNameLookupTrait { unsigned KeyLen = 1; switch (Name.getKind()) { case DeclarationName::Identifier: + case DeclarationName::CXXLiteralOperatorName: + case DeclarationName::CXXDeductionGuideName: + KeyLen += sizeof(IdentifierID); + break; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: - case DeclarationName::CXXLiteralOperatorName: - case DeclarationName::CXXDeductionGuideName: KeyLen += 4; break; case DeclarationName::CXXOperatorName: @@ -4062,7 +4069,7 @@ class ASTDeclContextNameLookupTrait { case DeclarationName::Identifier: case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXDeductionGuideName: - LE.write<uint32_t>(Writer.getIdentifierRef(Name.getIdentifier())); + LE.write<IdentifierID>(Writer.getIdentifierRef(Name.getIdentifier())); return; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: @@ -4780,8 +4787,15 @@ void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) { IdentifierID ID = IdentifierIDs[II]; // Only store offsets new to this AST file. Other identifier names are looked // up earlier in the chain and thus don't need an offset. - if (ID >= FirstIdentID) - IdentifierOffsets[ID - FirstIdentID] = Offset; + if (!isLocalIdentifierID(ID)) + return; + + // For local identifiers, the module file index must be 0. + + assert(ID != 0); + ID -= NUM_PREDEF_IDENT_IDS; + assert(ID < IdentifierOffsets.size()); + IdentifierOffsets[ID] = Offset; } /// Note that the selector Sel occurs at the given offset @@ -5415,7 +5429,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. - writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers); writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); writeBaseIDOrNone(M.BasePreprocessedEntityID, M.NumPreprocessedEntities); @@ -6614,20 +6627,26 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) { // Note, this will get called multiple times, once one the reader starts up // and again each time it's done reading a PCH or module. FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes(); - FirstIdentID = NUM_PREDEF_IDENT_IDS + Chain->getTotalNumIdentifiers(); FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros(); FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules(); FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors(); NextTypeID = FirstTypeID; - NextIdentID = FirstIdentID; NextMacroID = FirstMacroID; NextSelectorID = FirstSelectorID; NextSubmoduleID = FirstSubmoduleID; } void ASTWriter::IdentifierRead(IdentifierID ID, IdentifierInfo *II) { - // Always keep the highest ID. See \p TypeRead() for more information. IdentifierID &StoredID = IdentifierIDs[II]; + unsigned OriginalModuleFileIndex = StoredID >> 32; + + // Always keep the local identifier ID. See \p TypeRead() for more + // information. + if (OriginalModuleFileIndex == 0 && StoredID) + return; + + // Otherwise, keep the highest ID since the module file comes later has + // higher module file indexes. if (ID > StoredID) StoredID = ID; } diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp index f09ceb8d31620..1163943c5dffa 100644 --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -510,7 +510,8 @@ namespace { // The first bit indicates whether this identifier is interesting. // That's all we care about. using namespace llvm::support; - unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d); + IdentifierID RawID = + endian::readNext<IdentifierID, llvm::endianness::little>(d); bool IsInteresting = RawID & 0x01; return std::make_pair(k, IsInteresting); } diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp index f64a59bd94891..7976c28b28671 100644 --- a/clang/lib/Serialization/ModuleFile.cpp +++ b/clang/lib/Serialization/ModuleFile.cpp @@ -62,7 +62,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { llvm::errs() << " Base identifier ID: " << BaseIdentifierID << '\n' << " Number of identifiers: " << LocalNumIdentifiers << '\n'; - dumpLocalRemap("Identifier ID local -> global map", IdentifierRemap); llvm::errs() << " Base macro ID: " << BaseMacroID << '\n' << " Number of macros: " << LocalNumMacros << '\n'; diff --git a/clang/test/Modules/no-transitive-identifier-change.cppm b/clang/test/Modules/no-transitive-identifier-change.cppm new file mode 100644 index 0000000000000..97e8ac4444fdd --- /dev/null +++ b/clang/test/Modules/no-transitive-identifier-change.cppm @@ -0,0 +1,110 @@ +// Testing that adding an new identifier in an unused module file won't change +// the BMI of the current module file. +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/m-partA.cppm -emit-reduced-module-interface -o %t/m-partA.pcm +// RUN: %clang_cc1 -std=c++20 %t/m-partA.v1.cppm -emit-reduced-module-interface -o \ +// RUN: %t/m-partA.v1.pcm +// RUN: %clang_cc1 -std=c++20 %t/m-partB.cppm -emit-reduced-module-interface -o %t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ +// RUN: -fmodule-file=m:partA=%t/m-partA.pcm -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.v1.pcm \ +// RUN: -fmodule-file=m:partA=%t/m-partA.v1.pcm -fmodule-file=m:partB=%t/m-partB.pcm +// +// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.pcm \ +// RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.v1.pcm \ +// RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// Since useBOnly only uses partB from module M, the change in partA shouldn't affect +// useBOnly. +// RUN: diff %t/useBOnly.pcm %t/useBOnly.v1.pcm &> /dev/null +// +// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.pcm \ +// RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.v1.pcm \ +// RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// useAOnly should differ +// RUN: not diff %t/useAOnly.pcm %t/useAOnly.v1.pcm &> /dev/null + +//--- m-partA.cppm +export module m:partA; + +export inline int getA() { + return 43; +} + +export class A { +public: + int getMem(); +}; + +export template <typename T> +class ATempl { +public: + T getT(); +}; + +//--- m-partA.v1.cppm +export module m:partA; + +export inline int getA() { + return 43; +} + +// Now we add a new declaration without introducing a new type. +// The consuming module which didn't use m:partA completely is expected to be +// not changed. +export inline int getA2() { + return 88; +} + +export class A { +public: + int getMem(); + // Now we add a new declaration without introducing a new type. + // The consuming module which didn't use m:partA completely is expected to be + // not changed. + int getMem2(); +}; + +export template <typename T> +class ATempl { +public: + T getT(); + // Add a new declaration without introducing a new type. + T getT2(); +}; + +//--- m-partB.cppm +export module m:partB; + +export inline int getB() { + return 430; +} + +//--- m.cppm +export module m; +export import :partA; +export import :partB; + +//--- useBOnly.cppm +export module useBOnly; +import m; + +export inline int get() { + return getB(); +} + +//--- useAOnly.cppm +export module useAOnly; +import m; + +export inline int get() { + return getA(); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits