dang created this revision. dang added a reviewer: Bigcheese. Herald added subscribers: cfe-commits, dexonsmith. Herald added a project: clang.
This record is constructed by hashing the bytes of the AST block in a similiar fashion to the SIGNATURE record. This new signature only means anything if the AST block is fully relocatable, i.e. it does not embed absolute offsets within the PCM file. This change ensure this does not happen by repalcing these offsets with offsets relative to the start of the AST block. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D80383 Files: clang/include/clang/Serialization/ASTBitCodes.h clang/include/clang/Serialization/ASTReader.h clang/include/clang/Serialization/ASTWriter.h clang/include/clang/Serialization/ModuleFile.h clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTReaderDecl.cpp clang/lib/Serialization/ASTWriter.cpp clang/lib/Serialization/ASTWriterDecl.cpp clang/test/Modules/ASTSignature.c clang/test/Modules/Inputs/ASTHash/module.modulemap clang/test/Modules/Inputs/ASTHash/my_header_1.h clang/test/Modules/Inputs/ASTHash/my_header_2.h
Index: clang/test/Modules/Inputs/ASTHash/my_header_2.h =================================================================== --- /dev/null +++ clang/test/Modules/Inputs/ASTHash/my_header_2.h @@ -0,0 +1,3 @@ +#include "my_header_1.h" + +extern my_int var; Index: clang/test/Modules/Inputs/ASTHash/my_header_1.h =================================================================== --- /dev/null +++ clang/test/Modules/Inputs/ASTHash/my_header_1.h @@ -0,0 +1 @@ +typedef int my_int; Index: clang/test/Modules/Inputs/ASTHash/module.modulemap =================================================================== --- /dev/null +++ clang/test/Modules/Inputs/ASTHash/module.modulemap @@ -0,0 +1,8 @@ +module MyHeader1 { + header "my_header_1.h" +} + +module MyHeader2 { + header "my_header_2.h" + export * +} Index: clang/test/Modules/ASTSignature.c =================================================================== --- /dev/null +++ clang/test/Modules/ASTSignature.c @@ -0,0 +1,21 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \ +// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t %s -Rmodule-build 2> %t1 +// RUN: %clang_cc1 -iquote "/dev/null" -iquote %S/Inputs/ASTHash/ -fsyntax-only \ +// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t %s -Rmodule-build 2> %t2 +// RUN: sed -n "s/.* building module 'MyHeader2' as '\(.*\)' .*/\1/gp" %t1 \ +// RUN: | xargs llvm-bcanalyzer --dump --disable-histogram | cat > %t1.dump +// RUN: sed -n "s/.* building module 'MyHeader2' as '\(.*\)' .*/\1/gp" %t2 \ +// RUN: | xargs llvm-bcanalyzer --dump --disable-histogram | cat > %t2.dump +// RUN: cat %t1.dump %t2.dump | FileCheck %s + +#include "my_header_2.h" + +my_int var = 42; + +// CHECK: [[SIGNATURE:<AST_SIGNATURE .*>]] +// CHECK: [[SIGNATURE]] +// The modules built by this test are designed to yield the same AST. If this +// test fails, it means that the AST block is has become non-relocatable. Index: clang/lib/Serialization/ASTWriterDecl.cpp =================================================================== --- clang/lib/Serialization/ASTWriterDecl.cpp +++ clang/lib/Serialization/ASTWriterDecl.cpp @@ -2428,7 +2428,7 @@ W.Visit(D); // Emit this declaration to the bitstream. - uint64_t Offset = W.Emit(D); + uint64_t Offset = W.Emit(D) - ASTBlockRange.first; // Record the offset for this declaration SourceLocation Loc = D->getLocation(); Index: clang/lib/Serialization/ASTWriter.cpp =================================================================== --- clang/lib/Serialization/ASTWriter.cpp +++ clang/lib/Serialization/ASTWriter.cpp @@ -65,6 +65,7 @@ #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Sema.h" #include "clang/Sema/Weak.h" +#include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/InMemoryModuleCache.h" #include "clang/Serialization/ModuleFile.h" @@ -960,6 +961,7 @@ BLOCK(UNHASHED_CONTROL_BLOCK); RECORD(SIGNATURE); + RECORD(AST_SIGNATURE); RECORD(DIAGNOSTIC_OPTIONS); RECORD(DIAG_PRAGMA_MAPPINGS); @@ -1025,22 +1027,38 @@ return Filename + Pos; } -ASTFileSignature ASTWriter::createSignature(StringRef Bytes) { - // Calculate the hash till start of UNHASHED_CONTROL_BLOCK. +std::pair<ASTFileSignature, ASTFileSignature> +ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) { + // Calculate the hash of the AST block llvm::SHA1 Hasher; - Hasher.update(ArrayRef<uint8_t>(Bytes.bytes_begin(), Bytes.size())); + Hasher.update(ASTBlockBytes); auto Hash = Hasher.result(); // Convert to an array [5*i32]. - ASTFileSignature Signature; auto LShift = [&](unsigned char Val, unsigned Shift) { return (uint32_t)Val << Shift; }; + + ASTFileSignature ASTSignature; + for (int I = 0; I != 5; ++I) + ASTSignature[I] = LShift(Hash[I * 4 + 0], 24) | + LShift(Hash[I * 4 + 1], 16) | LShift(Hash[I * 4 + 2], 8) | + LShift(Hash[I * 4 + 3], 0); + + // Add the remaing bytes (i.e. bytes before the unhashed control block that + // are not part of the AST block) + Hasher.update( + AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin())); + Hasher.update( + AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end())); + + Hash = Hasher.result(); + ASTFileSignature Signature; for (int I = 0; I != 5; ++I) Signature[I] = LShift(Hash[I * 4 + 0], 24) | LShift(Hash[I * 4 + 1], 16) | LShift(Hash[I * 4 + 2], 8) | LShift(Hash[I * 4 + 3], 0); - return Signature; + return std::make_pair(ASTSignature, Signature); } ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, @@ -1054,10 +1072,18 @@ Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5); // For implicit modules, write the hash of the PCM as its signature. + ASTFileSignature ASTSignature; ASTFileSignature Signature; + auto ASTBlockStartByte = ASTBlockRange.first >> 3; + auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte; + std::tie(ASTSignature, Signature) = createSignature( + StringRef(Buffer.begin(), StartOfUnhashedControl), + StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength)); if (WritingModule && PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { - Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl)); + Record.append(ASTSignature.begin(), ASTSignature.end()); + Stream.EmitRecord(AST_SIGNATURE, Record); + Record.clear(); Record.append(Signature.begin(), Signature.end()); Stream.EmitRecord(SIGNATURE, Record); Record.clear(); @@ -2049,7 +2075,7 @@ RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), SourceMgr.getNextLocalOffset() - 1 /* skip dummy */, - SLocEntryOffsetsBase}; + SLocEntryOffsetsBase - ASTBlockRange.first}; Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } @@ -2328,7 +2354,7 @@ { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), FirstMacroID - NUM_PREDEF_MACRO_IDS, - MacroOffsetsBase}; + MacroOffsetsBase - ASTBlockRange.first}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } } @@ -2842,7 +2868,7 @@ assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST"); // Emit the type's representation. - uint64_t Offset = ASTTypeWriter(*this).write(T); + uint64_t Offset = ASTTypeWriter(*this).write(T) - ASTBlockRange.first; // Record the offset for this type. unsigned Index = Idx.getIndex() - FirstTypeID; @@ -4552,6 +4578,8 @@ WriteControlBlock(PP, Context, isysroot, OutputFile); // Write the remaining AST contents. + Stream.FlushToWord(); + ASTBlockRange.first = Stream.GetCurrentBitNo(); Stream.EnterSubblock(AST_BLOCK_ID, 5); // This is so that older clang versions, before the introduction @@ -4683,9 +4711,9 @@ // c++-base-specifiers-id:i32 // type-id:i32) // - // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or - // MK_ExplicitModule, then the module-name is the module name. Otherwise, - // it is the module file name. + // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule, + // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the + // module name. Otherwise, it is the module file name. auto Abbrev = std::make_shared<BitCodeAbbrev>(); Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); @@ -4698,10 +4726,11 @@ endian::Writer LE(Out, little); LE.write<uint8_t>(static_cast<uint8_t>(M.Kind)); - StringRef Name = - M.Kind == MK_PrebuiltModule || M.Kind == MK_ExplicitModule - ? M.ModuleName - : M.FileName; + StringRef Name = M.Kind == MK_PrebuiltModule || + M.Kind == MK_ExplicitModule || + M.Kind == MK_ImplicitModule + ? M.ModuleName + : M.FileName; LE.write<uint16_t>(Name.size()); Out.write(Name.data(), Name.size()); @@ -4913,6 +4942,7 @@ NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts}; Stream.EmitRecord(STATISTICS, Record); Stream.ExitBlock(); + ASTBlockRange.second = Stream.GetCurrentBitNo(); // Write the module file extension blocks. for (const auto &ExtWriter : ModuleFileExtensionWriters) Index: clang/lib/Serialization/ASTReaderDecl.cpp =================================================================== --- clang/lib/Serialization/ASTReaderDecl.cpp +++ clang/lib/Serialization/ASTReaderDecl.cpp @@ -2869,7 +2869,7 @@ const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; Loc = TranslateSourceLocation(*M, DOffs.getLocation()); - return RecordLocation(M, DOffs.getBitOffset()); + return RecordLocation(M, DOffs.getBitOffset(M->ASTBlockBitOffset)); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { Index: clang/lib/Serialization/ASTReader.cpp =================================================================== --- clang/lib/Serialization/ASTReader.cpp +++ clang/lib/Serialization/ASTReader.cpp @@ -2928,7 +2928,6 @@ ASTReader::ASTReadResult ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { BitstreamCursor &Stream = F.Stream; - if (llvm::Error Err = Stream.EnterSubBlock(AST_BLOCK_ID)) { Error(std::move(Err)); return Failure; @@ -3377,7 +3376,7 @@ F.SLocEntryOffsets = (const uint32_t *)Blob.data(); F.LocalNumSLocEntries = Record[0]; unsigned SLocSpaceSize = Record[1]; - F.SLocEntryOffsetsBase = Record[2]; + F.SLocEntryOffsetsBase = Record[2] + F.ASTBlockBitOffset; std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) = SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries, SLocSpaceSize); @@ -3696,7 +3695,7 @@ F.MacroOffsets = (const uint32_t *)Blob.data(); F.LocalNumMacros = Record[0]; unsigned LocalBaseMacroID = Record[1]; - F.MacroOffsetsBase = Record[2]; + F.MacroOffsetsBase = Record[2] + F.ASTBlockBitOffset; F.BaseMacroID = getTotalNumMacros(); if (F.LocalNumMacros > 0) { @@ -3837,17 +3836,18 @@ while (Data < DataEnd) { // FIXME: Looking up dependency modules by filename is horrible. Let's - // start fixing this with prebuilt and explicit modules and see how it - // goes... + // start fixing this with prebuilt, explicit and implicit modules and see + // how it goes... using namespace llvm::support; ModuleKind Kind = static_cast<ModuleKind>( endian::readNext<uint8_t, little, unaligned>(Data)); uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data); StringRef Name = StringRef((const char*)Data, Len); Data += Len; - ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule - ? ModuleMgr.lookupByModuleName(Name) - : ModuleMgr.lookupByFileName(Name)); + ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule || + Kind == MK_ImplicitModule + ? ModuleMgr.lookupByModuleName(Name) + : ModuleMgr.lookupByFileName(Name)); if (!OM) { std::string Msg = "SourceLocation remap refers to unknown module, cannot find "; @@ -4474,15 +4474,11 @@ llvm_unreachable("unknown module kind"); } -ASTReader::ASTReadResult -ASTReader::ReadASTCore(StringRef FileName, - ModuleKind Type, - SourceLocation ImportLoc, - ModuleFile *ImportedBy, - SmallVectorImpl<ImportedModule> &Loaded, - off_t ExpectedSize, time_t ExpectedModTime, - ASTFileSignature ExpectedSignature, - unsigned ClientLoadCapabilities) { +ASTReader::ASTReadResult ASTReader::ReadASTCore( + StringRef FileName, ModuleKind Type, SourceLocation ImportLoc, + ModuleFile *ImportedBy, SmallVectorImpl<ImportedModule> &Loaded, + off_t ExpectedSize, time_t ExpectedModTime, + ASTFileSignature ExpectedSignature, unsigned ClientLoadCapabilities) { ModuleFile *M; std::string ErrorStr; ModuleManager::AddModuleResult AddResult @@ -4552,6 +4548,7 @@ // This is used for compatibility with older PCH formats. bool HaveReadControlBlock = false; while (true) { + const uint64_t CurrentStreamOffset = Stream.GetCurrentBitNo(); Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance(); if (!MaybeEntry) { Error(MaybeEntry.takeError()); @@ -4606,6 +4603,7 @@ return VersionMismatch; } + F.ASTBlockBitOffset = CurrentStreamOffset; // Record that we've loaded this module. Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc)); ShouldFinalizePCM = true; @@ -4736,6 +4734,10 @@ if (F) std::copy(Record.begin(), Record.end(), F->Signature.data()); break; + case AST_SIGNATURE: + if (F) + std::copy(Record.begin(), Record.end(), F->ASTSignature.data()); + break; case DIAGNOSTIC_OPTIONS: { bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0; if (Listener && ValidateDiagnosticOptions && @@ -6348,7 +6350,8 @@ assert(I != GlobalTypeMap.end() && "Corrupted global type map"); ModuleFile *M = I->second; return RecordLocation( - M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset()); + M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() + + M->ASTBlockBitOffset); } static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) { Index: clang/include/clang/Serialization/ModuleFile.h =================================================================== --- clang/include/clang/Serialization/ModuleFile.h +++ clang/include/clang/Serialization/ModuleFile.h @@ -168,6 +168,10 @@ /// and modification time to identify this particular file. ASTFileSignature Signature; + /// The signature of the AST block of the module file, this can be used to + /// unique module files based on AST contents. + ASTFileSignature ASTSignature; + /// Whether this module has been directly imported by the /// user. bool DirectlyImported = false; @@ -185,6 +189,9 @@ /// The global bit offset (or base) of this module uint64_t GlobalBitOffset = 0; + /// The bit offset of the AST block of this module + uint64_t ASTBlockBitOffset = 0; + /// The serialized bitstream data for this file. StringRef Data; Index: clang/include/clang/Serialization/ASTWriter.h =================================================================== --- clang/include/clang/Serialization/ASTWriter.h +++ clang/include/clang/Serialization/ASTWriter.h @@ -27,6 +27,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -441,7 +442,9 @@ /// A list of the module file extension writers. std::vector<std::unique_ptr<ModuleFileExtensionWriter>> - ModuleFileExtensionWriters; + ModuleFileExtensionWriters; + + std::pair<uint64_t, uint64_t> ASTBlockRange; /// Retrieve or create a submodule ID for this module. unsigned getSubmoduleID(Module *Mod); @@ -458,7 +461,8 @@ ASTContext &Context); /// Calculate hash of the pcm content. - static ASTFileSignature createSignature(StringRef Bytes); + static std::pair<ASTFileSignature, ASTFileSignature> + createSignature(StringRef AllBytes, StringRef ASTBlockBytes); void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts, bool Modules); Index: clang/include/clang/Serialization/ASTReader.h =================================================================== --- clang/include/clang/Serialization/ASTReader.h +++ clang/include/clang/Serialization/ASTReader.h @@ -1290,6 +1290,7 @@ off_t ExpectedSize, time_t ExpectedModTime, ASTFileSignature ExpectedSignature, unsigned ClientLoadCapabilities); + ASTReadResult ReadControlBlock(ModuleFile &F, SmallVectorImpl<ImportedModule> &Loaded, const ModuleFile *ImportedBy, Index: clang/include/clang/Serialization/ASTBitCodes.h =================================================================== --- clang/include/clang/Serialization/ASTBitCodes.h +++ clang/include/clang/Serialization/ASTBitCodes.h @@ -264,8 +264,8 @@ BitOffset.setBitOffset(Offset); } - uint64_t getBitOffset() const { - return BitOffset.getBitOffset(); + uint64_t getBitOffset(const uint64_t ASTBlockBitOffset) const { + return BitOffset.getBitOffset() + ASTBlockBitOffset; } }; @@ -394,6 +394,9 @@ /// Record code for the signature that identifiers this AST file. SIGNATURE = 1, + /// Record code for the signature of the AST block. + AST_SIGNATURE, + /// Record code for the diagnostic options table. DIAGNOSTIC_OPTIONS,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits