dang updated this revision to Diff 267914.
dang added a comment.

Address some of the code review comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80383/new/

https://reviews.llvm.org/D80383

Files:
  clang/include/clang/Basic/Module.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/ModuleFile.h
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/lib/Serialization/ASTWriterDecl.cpp
  clang/test/Modules/ASTSignature.c
  clang/test/Modules/Inputs/ASTHash/module.modulemap
  clang/test/Modules/Inputs/ASTHash/my_header_1.h
  clang/test/Modules/Inputs/ASTHash/my_header_2.h

Index: clang/test/Modules/Inputs/ASTHash/my_header_2.h
===================================================================
--- /dev/null
+++ clang/test/Modules/Inputs/ASTHash/my_header_2.h
@@ -0,0 +1,3 @@
+#include "my_header_1.h"
+
+extern my_int var;
Index: clang/test/Modules/Inputs/ASTHash/my_header_1.h
===================================================================
--- /dev/null
+++ clang/test/Modules/Inputs/ASTHash/my_header_1.h
@@ -0,0 +1 @@
+typedef int my_int;
Index: clang/test/Modules/Inputs/ASTHash/module.modulemap
===================================================================
--- /dev/null
+++ clang/test/Modules/Inputs/ASTHash/module.modulemap
@@ -0,0 +1,8 @@
+module MyHeader1 {
+  header "my_header_1.h"
+}
+
+module MyHeader2 {
+  header "my_header_2.h"
+  export *
+}
Index: clang/test/Modules/ASTSignature.c
===================================================================
--- /dev/null
+++ clang/test/Modules/ASTSignature.c
@@ -0,0 +1,24 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \
+// RUN:   -fimplicit-module-maps -fmodules-strict-context-hash \
+// RUN:   -fmodules-cache-path=%t -fdisable-module-hash %s
+// RUN: cp %t/MyHeader2.pcm %t1.pcm
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -iquote "/dev/null" -iquote %S/Inputs/ASTHash/ -fsyntax-only \
+// RUN:   -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
+// RUN:   -fmodules-cache-path=%t -fdisable-module-hash %s
+// RUN: cp %t/MyHeader2.pcm %t2.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump
+// RUN: cat %t1.dump %t2.dump | FileCheck %s
+
+#include "my_header_2.h"
+
+my_int var = 42;
+
+// CHECK: [[AST_BLOCK_HASH:<AST_BLOCK_HASH .*>]]
+// CHECK: [[SIGNATURE:<SIGNATURE .*>]]
+// CHECK: [[AST_BLOCK_HASH]]
+// CHECK-NOT: [[SIGNATURE]]
+// The modules built by this test are designed to yield the same AST. If this
+// test fails, it means that the AST block is has become non-relocatable.
Index: clang/lib/Serialization/ASTWriterDecl.cpp
===================================================================
--- clang/lib/Serialization/ASTWriterDecl.cpp
+++ clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2434,12 +2434,12 @@
   SourceLocation Loc = D->getLocation();
   unsigned Index = ID - FirstDeclID;
   if (DeclOffsets.size() == Index)
-    DeclOffsets.emplace_back(Loc, Offset);
+    DeclOffsets.emplace_back(Loc, Offset, ASTBlockRange.first);
   else if (DeclOffsets.size() < Index) {
     // FIXME: Can/should this happen?
     DeclOffsets.resize(Index+1);
     DeclOffsets[Index].setLocation(Loc);
-    DeclOffsets[Index].setBitOffset(Offset);
+    DeclOffsets[Index].setBitOffset(Offset, ASTBlockRange.first);
   } else {
     llvm_unreachable("declarations should be emitted in ID order");
   }
Index: clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -10,14 +10,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/OpenMPClause.h"
-#include "clang/Serialization/ASTRecordWriter.h"
 #include "ASTCommon.h"
 #include "ASTReaderInternals.h"
 #include "MultiOnDiskHashTable.h"
-#include "clang/AST/AbstractTypeWriter.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTUnresolvedSet.h"
+#include "clang/AST/AbstractTypeWriter.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclBase.h"
@@ -31,6 +29,7 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/LambdaCapture.h"
 #include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/OpenMPClause.h"
 #include "clang/AST/RawCommentList.h"
 #include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
@@ -65,7 +64,9 @@
 #include "clang/Sema/ObjCMethodList.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/Weak.h"
+#include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/ASTReader.h"
+#include "clang/Serialization/ASTRecordWriter.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
 #include "clang/Serialization/ModuleFile.h"
 #include "clang/Serialization/ModuleFileExtension.h"
@@ -961,6 +962,7 @@
 
   BLOCK(UNHASHED_CONTROL_BLOCK);
   RECORD(SIGNATURE);
+  RECORD(AST_BLOCK_HASH);
   RECORD(DIAGNOSTIC_OPTIONS);
   RECORD(DIAG_PRAGMA_MAPPINGS);
 
@@ -1026,22 +1028,26 @@
   return Filename + Pos;
 }
 
-ASTFileSignature ASTWriter::createSignature(StringRef Bytes) {
-  // Calculate the hash till start of UNHASHED_CONTROL_BLOCK.
+std::pair<ASTFileSignature, ASTFileSignature>
+ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
+  // Calculate the hash of the AST block
   llvm::SHA1 Hasher;
-  Hasher.update(ArrayRef<uint8_t>(Bytes.bytes_begin(), Bytes.size()));
-  auto Hash = Hasher.result();
+  Hasher.update(ASTBlockBytes);
 
-  // Convert to an array [5*i32].
-  ASTFileSignature Signature;
-  auto LShift = [&](unsigned char Val, unsigned Shift) {
-    return (uint32_t)Val << Shift;
-  };
-  for (int I = 0; I != 5; ++I)
-    Signature[I] = LShift(Hash[I * 4 + 0], 24) | LShift(Hash[I * 4 + 1], 16) |
-                   LShift(Hash[I * 4 + 2], 8) | LShift(Hash[I * 4 + 3], 0);
+  auto ExpectedASTBlockHash = ASTFileSignature::Create(Hasher.result());
+  assert(ExpectedASTBlockHash);
+  ASTFileSignature ASTBlockHash = ExpectedASTBlockHash.get();
+  // Add the remaing bytes (i.e. bytes before the unhashed control block that
+  // are not part of the AST block)
+  Hasher.update(
+      AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
+  Hasher.update(
+      AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
 
-  return Signature;
+  auto ExpectedSignature = ASTFileSignature::Create(Hasher.result());
+  assert(ExpectedSignature);
+  ASTFileSignature Signature = ExpectedSignature.get();
+  return std::make_pair(ASTBlockHash, Signature);
 }
 
 ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
@@ -1055,10 +1061,18 @@
   Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
 
   // For implicit modules, write the hash of the PCM as its signature.
+  ASTFileSignature ASTSignature;
   ASTFileSignature Signature;
+  auto ASTBlockStartByte = ASTBlockRange.first >> 3;
+  auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
+  std::tie(ASTSignature, Signature) = createSignature(
+      StringRef(Buffer.begin(), StartOfUnhashedControl),
+      StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
   if (WritingModule &&
       PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
-    Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl));
+    Record.append(ASTSignature.begin(), ASTSignature.end());
+    Stream.EmitRecord(AST_BLOCK_HASH, Record);
+    Record.clear();
     Record.append(Signature.begin(), Signature.end());
     Stream.EmitRecord(SIGNATURE, Record);
     Record.clear();
@@ -2050,7 +2064,7 @@
     RecordData::value_type Record[] = {
         SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
         SourceMgr.getNextLocalOffset() - 1 /* skip dummy */,
-        SLocEntryOffsetsBase};
+        SLocEntryOffsetsBase - ASTBlockRange.first};
     Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
                               bytes(SLocEntryOffsets));
   }
@@ -2329,7 +2343,7 @@
   {
     RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
                                        FirstMacroID - NUM_PREDEF_MACRO_IDS,
-                                       MacroOffsetsBase};
+                                       MacroOffsetsBase - ASTBlockRange.first};
     Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
 }
@@ -2843,7 +2857,7 @@
   assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST");
 
   // Emit the type's representation.
-  uint64_t Offset = ASTTypeWriter(*this).write(T);
+  uint64_t Offset = ASTTypeWriter(*this).write(T) - ASTBlockRange.first;
 
   // Record the offset for this type.
   unsigned Index = Idx.getIndex() - FirstTypeID;
@@ -4553,6 +4567,8 @@
   WriteControlBlock(PP, Context, isysroot, OutputFile);
 
   // Write the remaining AST contents.
+  Stream.FlushToWord();
+  ASTBlockRange.first = Stream.GetCurrentBitNo();
   Stream.EnterSubblock(AST_BLOCK_ID, 5);
 
   // This is so that older clang versions, before the introduction
@@ -4684,9 +4700,9 @@
     //   c++-base-specifiers-id:i32
     //   type-id:i32)
     //
-    // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or
-    // MK_ExplicitModule, then the module-name is the module name. Otherwise,
-    // it is the module file name.
+    // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule,
+    // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the
+    // module name. Otherwise, it is the module file name.
     auto Abbrev = std::make_shared<BitCodeAbbrev>();
     Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
@@ -4699,10 +4715,11 @@
 
         endian::Writer LE(Out, little);
         LE.write<uint8_t>(static_cast<uint8_t>(M.Kind));
-        StringRef Name =
-          M.Kind == MK_PrebuiltModule || M.Kind == MK_ExplicitModule
-          ? M.ModuleName
-          : M.FileName;
+        StringRef Name = M.Kind == MK_PrebuiltModule ||
+                                 M.Kind == MK_ExplicitModule ||
+                                 M.Kind == MK_ImplicitModule
+                             ? M.ModuleName
+                             : M.FileName;
         LE.write<uint16_t>(Name.size());
         Out.write(Name.data(), Name.size());
 
@@ -4914,6 +4931,7 @@
       NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts};
   Stream.EmitRecord(STATISTICS, Record);
   Stream.ExitBlock();
+  ASTBlockRange.second = Stream.GetCurrentBitNo();
 
   // Write the module file extension blocks.
   for (const auto &ExtWriter : ModuleFileExtensionWriters)
Index: clang/lib/Serialization/ASTReaderDecl.cpp
===================================================================
--- clang/lib/Serialization/ASTReaderDecl.cpp
+++ clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2869,7 +2869,7 @@
   const DeclOffset &DOffs =
       M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
   Loc = TranslateSourceLocation(*M, DOffs.getLocation());
-  return RecordLocation(M, DOffs.getBitOffset());
+  return RecordLocation(M, DOffs.getBitOffset(M->ASTBlockBitOffset));
 }
 
 ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -3377,7 +3377,7 @@
       F.SLocEntryOffsets = (const uint32_t *)Blob.data();
       F.LocalNumSLocEntries = Record[0];
       unsigned SLocSpaceSize = Record[1];
-      F.SLocEntryOffsetsBase = Record[2];
+      F.SLocEntryOffsetsBase = Record[2] + F.ASTBlockBitOffset;
       std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
           SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
                                               SLocSpaceSize);
@@ -3696,7 +3696,7 @@
       F.MacroOffsets = (const uint32_t *)Blob.data();
       F.LocalNumMacros = Record[0];
       unsigned LocalBaseMacroID = Record[1];
-      F.MacroOffsetsBase = Record[2];
+      F.MacroOffsetsBase = Record[2] + F.ASTBlockBitOffset;
       F.BaseMacroID = getTotalNumMacros();
 
       if (F.LocalNumMacros > 0) {
@@ -3837,17 +3837,18 @@
 
   while (Data < DataEnd) {
     // FIXME: Looking up dependency modules by filename is horrible. Let's
-    // start fixing this with prebuilt and explicit modules and see how it
-    // goes...
+    // start fixing this with prebuilt, explicit and implicit modules and see
+    // how it goes...
     using namespace llvm::support;
     ModuleKind Kind = static_cast<ModuleKind>(
       endian::readNext<uint8_t, little, unaligned>(Data));
     uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data);
     StringRef Name = StringRef((const char*)Data, Len);
     Data += Len;
-    ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule
-                      ? ModuleMgr.lookupByModuleName(Name)
-                      : ModuleMgr.lookupByFileName(Name));
+    ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule ||
+                              Kind == MK_ImplicitModule
+                          ? ModuleMgr.lookupByModuleName(Name)
+                          : ModuleMgr.lookupByFileName(Name));
     if (!OM) {
       std::string Msg =
           "SourceLocation remap refers to unknown module, cannot find ";
@@ -4552,6 +4553,7 @@
   // This is used for compatibility with older PCH formats.
   bool HaveReadControlBlock = false;
   while (true) {
+    const uint64_t CurrentStreamOffset = Stream.GetCurrentBitNo();
     Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
     if (!MaybeEntry) {
       Error(MaybeEntry.takeError());
@@ -4606,6 +4608,7 @@
         return VersionMismatch;
       }
 
+      F.ASTBlockBitOffset = CurrentStreamOffset;
       // Record that we've loaded this module.
       Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc));
       ShouldFinalizePCM = true;
@@ -4736,6 +4739,10 @@
       if (F)
         std::copy(Record.begin(), Record.end(), F->Signature.data());
       break;
+    case AST_BLOCK_HASH:
+      if (F)
+        std::copy(Record.begin(), Record.end(), F->ASTBlockHash.data());
+      break;
     case DIAGNOSTIC_OPTIONS: {
       bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
       if (Listener && ValidateDiagnosticOptions &&
@@ -6348,7 +6355,8 @@
   assert(I != GlobalTypeMap.end() && "Corrupted global type map");
   ModuleFile *M = I->second;
   return RecordLocation(
-      M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset());
+      M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() +
+             M->ASTBlockBitOffset);
 }
 
 static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {
Index: clang/include/clang/Serialization/ModuleFile.h
===================================================================
--- clang/include/clang/Serialization/ModuleFile.h
+++ clang/include/clang/Serialization/ModuleFile.h
@@ -168,6 +168,10 @@
   /// and modification time to identify this particular file.
   ASTFileSignature Signature;
 
+  /// The signature of the AST block of the module file, this can be used to
+  /// unique module files based on AST contents.
+  ASTFileSignature ASTBlockHash;
+
   /// Whether this module has been directly imported by the
   /// user.
   bool DirectlyImported = false;
@@ -185,6 +189,9 @@
   /// The global bit offset (or base) of this module
   uint64_t GlobalBitOffset = 0;
 
+  /// The bit offset of the AST block of this module
+  uint64_t ASTBlockBitOffset = 0;
+
   /// The serialized bitstream data for this file.
   StringRef Data;
 
Index: clang/include/clang/Serialization/ASTWriter.h
===================================================================
--- clang/include/clang/Serialization/ASTWriter.h
+++ clang/include/clang/Serialization/ASTWriter.h
@@ -27,6 +27,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -441,7 +442,9 @@
 
   /// A list of the module file extension writers.
   std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
-    ModuleFileExtensionWriters;
+      ModuleFileExtensionWriters;
+
+  std::pair<uint64_t, uint64_t> ASTBlockRange;
 
   /// Retrieve or create a submodule ID for this module.
   unsigned getSubmoduleID(Module *Mod);
@@ -458,7 +461,8 @@
                                              ASTContext &Context);
 
   /// Calculate hash of the pcm content.
-  static ASTFileSignature createSignature(StringRef Bytes);
+  static std::pair<ASTFileSignature, ASTFileSignature>
+  createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
 
   void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
                        bool Modules);
Index: clang/include/clang/Serialization/ASTBitCodes.h
===================================================================
--- clang/include/clang/Serialization/ASTBitCodes.h
+++ clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@
     /// Version 4 of AST files also requires that the version control branch and
     /// revision match exactly, since there is no backward compatibility of
     /// AST files at this time.
-    const unsigned VERSION_MAJOR = 10;
+    const unsigned VERSION_MAJOR = 11;
 
     /// AST file minor version number supported by this version of
     /// Clang.
@@ -242,14 +242,16 @@
       /// Raw source location.
       unsigned Loc = 0;
 
-      /// Offset in the AST file. Keep structure alignment 32-bit and avoid
-      /// padding gap because undefined value in the padding affects AST hash.
+      /// Offset relative to the start of the AST block. Keep structure
+      /// alignment 32-bit and avoid padding gap because undefined value in the
+      /// padding affects AST hash.
       UnderalignedInt64 BitOffset;
 
       DeclOffset() = default;
-      DeclOffset(SourceLocation Loc, uint64_t BitOffset) {
+      DeclOffset(SourceLocation Loc, uint64_t BitOffset,
+                 uint64_t ASTBlockBitOffset) {
         setLocation(Loc);
-        setBitOffset(BitOffset);
+        setBitOffset(BitOffset, ASTBlockBitOffset);
       }
 
       void setLocation(SourceLocation L) {
@@ -260,12 +262,12 @@
         return SourceLocation::getFromRawEncoding(Loc);
       }
 
-      void setBitOffset(uint64_t Offset) {
-        BitOffset.setBitOffset(Offset);
+      void setBitOffset(uint64_t Offset, const uint64_t ASTBlockBitOffset) {
+        BitOffset.setBitOffset(Offset - ASTBlockBitOffset);
       }
 
-      uint64_t getBitOffset() const {
-        return BitOffset.getBitOffset();
+      uint64_t getBitOffset(const uint64_t ASTBlockBitOffset) const {
+        return BitOffset.getBitOffset() + ASTBlockBitOffset;
       }
     };
 
@@ -394,6 +396,9 @@
       /// Record code for the signature that identifiers this AST file.
       SIGNATURE = 1,
 
+      /// Record code for the content hash of the AST block.
+      AST_BLOCK_HASH,
+
       /// Record code for the diagnostic options table.
       DIAGNOSTIC_OPTIONS,
 
Index: clang/include/clang/Basic/Module.h
===================================================================
--- clang/include/clang/Basic/Module.h
+++ clang/include/clang/Basic/Module.h
@@ -26,6 +26,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Error.h"
 #include <array>
 #include <cassert>
 #include <cstdint>
@@ -56,6 +57,22 @@
   ASTFileSignature(std::array<uint32_t, 5> S = {{0}})
       : std::array<uint32_t, 5>(std::move(S)) {}
 
+  static Expected<ASTFileSignature> Create(StringRef Bytes) {
+    if (Bytes.size() != 20)
+      return llvm::make_error<llvm::StringError>(
+          "Wrong amount of bytes given to create an ASTFileSignature",
+          llvm::inconvertibleErrorCode());
+    ASTFileSignature Signature;
+    for (unsigned I = 0; I != 5; ++I) {
+      const unsigned BitOff = I * 4;
+      Signature[I] = ((uint32_t)Bytes[BitOff + 0] << 24) |
+                     ((uint32_t)Bytes[BitOff + 1] << 16) |
+                     ((uint32_t)Bytes[BitOff + 2] << 8) |
+                     ((uint32_t)Bytes[BitOff + 3]);
+    }
+    return Signature;
+  }
+
   explicit operator bool() const {
     return *this != std::array<uint32_t, 5>({{0}});
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to