DmitryPolukhin created this revision.
DmitryPolukhin added a project: clang.
Herald added subscribers: usaxena95, kadircet, ilya-biryukov.

This diff is for discussion how to address problem with large preamble file.

32 bit offsets can be used for PCH/preamble files below 512M. This diff fixes
crashes and asserts on clangd when preamble file exceeds 512M limit. The asserts
usually look like "LLVM ERROR: Invalid abbrev number". On about 700M preamble
files this patch increases file size on about 4%. I tested this diff on
Clang 8, 9 and master.

Test Plan:
Tested on clangd with 700M preamble file. 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D76295

Files:
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/ModuleFile.h
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp

Index: clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -1889,7 +1889,7 @@
 
   // Write out the source location entry table. We skip the first
   // entry, which is always the same dummy entry.
-  std::vector<uint32_t> SLocEntryOffsets;
+  std::vector<uint64_t> SLocEntryOffsets;
   RecordData PreloadSLocs;
   SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1);
   for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size();
@@ -2212,7 +2212,7 @@
   /// For each identifier that is associated with a macro, this map
   /// provides the offset into the bitstream where that macro is
   /// defined.
-  std::vector<uint32_t> MacroOffsets;
+  std::vector<uint64_t> MacroOffsets;
 
   for (unsigned I = 0, N = MacroInfosToEmit.size(); I != N; ++I) {
     const IdentifierInfo *Name = MacroInfosToEmit[I].Name;
@@ -3269,7 +3269,7 @@
       DataLen += 2; // 2 bytes for builtin ID
       DataLen += 2; // 2 bytes for flags
       if (MacroOffset)
-        DataLen += 4; // MacroDirectives offset.
+        DataLen += 8; // MacroDirectives offset.
 
       if (NeedDecls) {
         for (IdentifierResolver::iterator D = IdResolver.begin(II),
@@ -3333,7 +3333,7 @@
     LE.write<uint16_t>(Bits);
 
     if (HadMacroDefinition)
-      LE.write<uint32_t>(MacroOffset);
+      LE.write<uint64_t>(MacroOffset);
 
     if (NeedDecls) {
       // Emit the declaration IDs in reverse order, because the
@@ -4241,7 +4241,7 @@
 
 /// Note that the identifier II occurs at the given offset
 /// within the identifier table.
-void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
+void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint64_t Offset) {
   IdentID ID = IdentifierIDs[II];
   // Only store offsets new to this AST file. Other identifier names are looked
   // up earlier in the chain and thus don't need an offset.
@@ -4251,7 +4251,7 @@
 
 /// Note that the selector Sel occurs at the given offset
 /// within the method pool/selector table.
-void ASTWriter::SetSelectorOffset(Selector Sel, uint32_t Offset) {
+void ASTWriter::SetSelectorOffset(Selector Sel, uint64_t Offset) {
   unsigned ID = SelectorIDs[Sel];
   assert(ID && "Unknown selector");
   // Don't record offsets for selectors that are also available in a different
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -1000,9 +1000,9 @@
   // If this identifier is a macro, deserialize the macro
   // definition.
   if (HadMacroDefinition) {
-    uint32_t MacroDirectivesOffset =
-        endian::readNext<uint32_t, little, unaligned>(d);
-    DataLen -= 4;
+    uint64_t MacroDirectivesOffset =
+        endian::readNext<uint64_t, little, unaligned>(d);
+    DataLen -= 8;
 
     Reader.addPendingMacro(II, &F, MacroDirectivesOffset);
   }
@@ -3187,7 +3187,7 @@
         Error("duplicate IDENTIFIER_OFFSET record in AST file");
         return Failure;
       }
-      F.IdentifierOffsets = (const uint32_t *)Blob.data();
+      F.IdentifierOffsets = (const uint64_t *)Blob.data();
       F.LocalNumIdentifiers = Record[0];
       unsigned LocalBaseIdentifierID = Record[1];
       F.BaseIdentifierID = getTotalNumIdentifiers();
@@ -3290,7 +3290,7 @@
       break;
 
     case SELECTOR_OFFSETS: {
-      F.SelectorOffsets = (const uint32_t *)Blob.data();
+      F.SelectorOffsets = (const uint64_t *)Blob.data();
       F.LocalNumSelectors = Record[0];
       unsigned LocalBaseSelectorID = Record[1];
       F.BaseSelectorID = getTotalNumSelectors();
@@ -3371,7 +3371,7 @@
       break;
 
     case SOURCE_LOCATION_OFFSETS: {
-      F.SLocEntryOffsets = (const uint32_t *)Blob.data();
+      F.SLocEntryOffsets = (const uint64_t *)Blob.data();
       F.LocalNumSLocEntries = Record[0];
       unsigned SLocSpaceSize = Record[1];
       std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
@@ -3689,7 +3689,7 @@
         Error("duplicate MACRO_OFFSET record in AST file");
         return Failure;
       }
-      F.MacroOffsets = (const uint32_t *)Blob.data();
+      F.MacroOffsets = (const uint64_t *)Blob.data();
       F.LocalNumMacros = Record[0];
       unsigned LocalBaseMacroID = Record[1];
       F.BaseMacroID = getTotalNumMacros();
Index: clang/include/clang/Serialization/ModuleFile.h
===================================================================
--- clang/include/clang/Serialization/ModuleFile.h
+++ clang/include/clang/Serialization/ModuleFile.h
@@ -253,7 +253,7 @@
 
   /// Offsets for all of the source location entries in the
   /// AST file.
-  const uint32_t *SLocEntryOffsets = nullptr;
+  const uint64_t *SLocEntryOffsets = nullptr;
 
   /// SLocEntries that we're going to preload.
   SmallVector<uint64_t, 4> PreloadSLocEntries;
@@ -271,7 +271,7 @@
   /// This array is indexed by the identifier ID (-1), and provides
   /// the offset into IdentifierTableData where the string data is
   /// stored.
-  const uint32_t *IdentifierOffsets = nullptr;
+  const uint64_t *IdentifierOffsets = nullptr;
 
   /// Base identifier ID for identifiers local to this module.
   serialization::IdentID BaseIdentifierID = 0;
@@ -307,7 +307,7 @@
   /// This array is indexed by the macro ID (-1), and provides
   /// the offset into the preprocessor block where macro definitions are
   /// stored.
-  const uint32_t *MacroOffsets = nullptr;
+  const uint64_t *MacroOffsets = nullptr;
 
   /// Base macro ID for macros local to this module.
   serialization::MacroID BaseMacroID = 0;
@@ -379,7 +379,7 @@
 
   /// Offsets into the selector lookup table's data array
   /// where each selector resides.
-  const uint32_t *SelectorOffsets = nullptr;
+  const uint64_t *SelectorOffsets = nullptr;
 
   /// Base selector ID for selectors local to this module.
   serialization::SelectorID BaseSelectorID = 0;
Index: clang/include/clang/Serialization/ASTWriter.h
===================================================================
--- clang/include/clang/Serialization/ASTWriter.h
+++ clang/include/clang/Serialization/ASTWriter.h
@@ -293,7 +293,7 @@
 
   /// Offsets of each of the identifier IDs into the identifier
   /// table.
-  std::vector<uint32_t> IdentifierOffsets;
+  std::vector<uint64_t> IdentifierOffsets;
 
   /// The first ID number we can use for our own submodules.
   serialization::SubmoduleID FirstSubmoduleID =
@@ -314,7 +314,7 @@
 
   /// Offset of each selector within the method pool/selector
   /// table, indexed by the Selector ID (-1).
-  std::vector<uint32_t> SelectorOffsets;
+  std::vector<uint64_t> SelectorOffsets;
 
   /// Mapping from macro definitions (as they occur in the preprocessing
   /// record) to the macro IDs.
@@ -650,11 +650,11 @@
 
   /// Note that the identifier II occurs at the given offset
   /// within the identifier table.
-  void SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset);
+  void SetIdentifierOffset(const IdentifierInfo *II, uint64_t Offset);
 
   /// Note that the selector Sel occurs at the given offset
   /// within the method pool/selector table.
-  void SetSelectorOffset(Selector Sel, uint32_t Offset);
+  void SetSelectorOffset(Selector Sel, uint64_t Offset);
 
   /// Record an ID for the given switch-case statement.
   unsigned RecordSwitchCaseID(SwitchCase *S);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to