Author: Juan Manuel Martinez CaamaƱo
Date: 2025-03-19T09:10:40+01:00
New Revision: 614d8557dcd41aae31bd50e93fed3487f235ef11

URL: 
https://github.com/llvm/llvm-project/commit/614d8557dcd41aae31bd50e93fed3487f235ef11
DIFF: 
https://github.com/llvm/llvm-project/commit/614d8557dcd41aae31bd50e93fed3487f235ef11.diff

LOG: [OffloadBundler] Expose function to parse compressed bundle headers 
(#130284)

In COMGR we hash the header of compressed bundles. For this we take the
first bytes of the buffer (according to the maximum header size) and
hash them.

To have a more stable API, and to be able to pick only the hash field (which is
the only one we are actually interested in) of the header, we propose
a version independent header version that is common to all versions.

Added: 
    

Modified: 
    clang/include/clang/Driver/OffloadBundler.h
    clang/lib/Driver/OffloadBundler.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/OffloadBundler.h 
b/clang/include/clang/Driver/OffloadBundler.h
index cbe507c000601..667156a524b79 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -107,50 +107,20 @@ struct OffloadTargetInfo {
 // - Compressed Data (variable length).
 class CompressedOffloadBundle {
 private:
-  static inline const size_t MagicSize = 4;
-  static inline const size_t VersionFieldSize = sizeof(uint16_t);
-  static inline const size_t MethodFieldSize = sizeof(uint16_t);
-  // Legacy size fields for V1/V2
-  static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
-  static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
-  // New size fields for V3
-  static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
-  static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
-  static inline const size_t HashFieldSize = sizeof(uint64_t);
-
-  // Keep V1 header size for backward compatibility
-  static inline const size_t V1HeaderSize =
-      MagicSize + VersionFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSizeV2 + HashFieldSize;
-
-  // Keep V2 header size for backward compatibility
-  static inline const size_t V2HeaderSize =
-      MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
-      UncompressedSizeFieldSizeV2 + HashFieldSize;
-
-  // Add V3 header size with 64-bit fields
-  static inline const size_t V3HeaderSize =
-      MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
-      UncompressedSizeFieldSizeV3 + HashFieldSize;
-
   static inline const llvm::StringRef MagicNumber = "CCOB";
 
 public:
-  static inline const uint16_t DefaultVersion = 2;
+  struct CompressedBundleHeader {
+    unsigned Version;
+    llvm::compression::Format CompressionFormat;
+    std::optional<size_t> FileSize;
+    size_t UncompressedFileSize;
+    uint64_t Hash;
 
-  // Helper method to get header size based on version
-  static size_t getHeaderSize(uint16_t Version) {
-    switch (Version) {
-    case 1:
-      return V1HeaderSize;
-    case 2:
-      return V2HeaderSize;
-    case 3:
-      return V3HeaderSize;
-    default:
-      llvm_unreachable("Unsupported version");
-    }
-  }
+    static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+  };
+
+  static inline const uint16_t DefaultVersion = 2;
 
   static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
   compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,

diff  --git a/clang/lib/Driver/OffloadBundler.cpp 
b/clang/lib/Driver/OffloadBundler.cpp
index 6dfb72fb223ce..859e44fb9bdb2 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/EndianStream.h"
@@ -1127,13 +1128,116 @@ 
CompressedOffloadBundle::compress(llvm::compression::Params P,
       llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
 }
 
+// Use packed structs to avoid padding, such that the structs map the 
serialized
+// format.
+LLVM_PACKED_START
+union RawCompressedBundleHeader {
+  struct CommonFields {
+    uint32_t Magic;
+    uint16_t Version;
+    uint16_t Method;
+  };
+
+  struct V1Header {
+    CommonFields Common;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V2Header {
+    CommonFields Common;
+    uint32_t FileSize;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V3Header {
+    CommonFields Common;
+    uint64_t FileSize;
+    uint64_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  CommonFields Common;
+  V1Header V1;
+  V2Header V2;
+  V3Header V3;
+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version
+static size_t getHeaderSize(uint16_t Version) {
+  switch (Version) {
+  case 1:
+    return sizeof(RawCompressedBundleHeader::V1Header);
+  case 2:
+    return sizeof(RawCompressedBundleHeader::V2Header);
+  case 3:
+    return sizeof(RawCompressedBundleHeader::V3Header);
+  default:
+    llvm_unreachable("Unsupported version");
+  }
+}
+
+Expected<CompressedOffloadBundle::CompressedBundleHeader>
+CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
+  assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
+  assert(llvm::identify_magic(Blob) ==
+         llvm::file_magic::offload_bundle_compressed);
+
+  RawCompressedBundleHeader Header;
+  memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
+
+  CompressedBundleHeader Normalized;
+  Normalized.Version = Header.Common.Version;
+
+  size_t RequiredSize = getHeaderSize(Normalized.Version);
+  if (Blob.size() < RequiredSize)
+    return createStringError(inconvertibleErrorCode(),
+                             "Compressed bundle header size too small");
+
+  switch (Normalized.Version) {
+  case 1:
+    Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
+    Normalized.Hash = Header.V1.Hash;
+    break;
+  case 2:
+    Normalized.FileSize = Header.V2.FileSize;
+    Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
+    Normalized.Hash = Header.V2.Hash;
+    break;
+  case 3:
+    Normalized.FileSize = Header.V3.FileSize;
+    Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
+    Normalized.Hash = Header.V3.Hash;
+    break;
+  default:
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown compressed bundle version");
+  }
+
+  // Determine compression format
+  switch (Header.Common.Method) {
+  case static_cast<uint16_t>(compression::Format::Zlib):
+  case static_cast<uint16_t>(compression::Format::Zstd):
+    Normalized.CompressionFormat =
+        static_cast<compression::Format>(Header.Common.Method);
+    break;
+  default:
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown compressing method");
+  }
+
+  return Normalized;
+}
+
 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
 CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
                                     bool Verbose) {
   StringRef Blob = Input.getBuffer();
 
   // Check minimum header size (using V1 as it's the smallest)
-  if (Blob.size() < V1HeaderSize)
+  if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
 
   if (llvm::identify_magic(Blob) !=
@@ -1143,68 +1247,20 @@ CompressedOffloadBundle::decompress(const 
llvm::MemoryBuffer &Input,
     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
   }
 
-  size_t CurrentOffset = MagicSize;
-
-  // Read version
-  uint16_t ThisVersion;
-  memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += VersionFieldSize;
-
-  // Verify header size based on version
-  if (ThisVersion >= 2 && ThisVersion <= 3) {
-    size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
-    if (Blob.size() < RequiredSize)
-      return createStringError(inconvertibleErrorCode(),
-                               "Compressed bundle header size too small");
-  }
-
-  // Read compression method
-  uint16_t CompressionMethod;
-  memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += MethodFieldSize;
-
-  // Read total file size (version 2+)
-  uint64_t TotalFileSize = 0;
-  if (ThisVersion >= 2) {
-    if (ThisVersion == 2) {
-      uint32_t TotalFileSize32;
-      memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
-      TotalFileSize = TotalFileSize32;
-      CurrentOffset += FileSizeFieldSizeV2;
-    } else { // Version 3
-      memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
-      CurrentOffset += FileSizeFieldSizeV3;
-    }
-  }
+  Expected<CompressedBundleHeader> HeaderOrErr =
+      CompressedBundleHeader::tryParse(Blob);
+  if (!HeaderOrErr)
+    return HeaderOrErr.takeError();
 
-  // Read uncompressed size
-  uint64_t UncompressedSize = 0;
-  if (ThisVersion <= 2) {
-    uint32_t UncompressedSize32;
-    memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
-    UncompressedSize = UncompressedSize32;
-    CurrentOffset += UncompressedSizeFieldSizeV2;
-  } else { // Version 3
-    memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
-    CurrentOffset += UncompressedSizeFieldSizeV3;
-  }
+  const CompressedBundleHeader &Normalized = *HeaderOrErr;
+  unsigned ThisVersion = Normalized.Version;
+  size_t HeaderSize = getHeaderSize(ThisVersion);
 
-  // Read hash
-  uint64_t StoredHash;
-  memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
-  CurrentOffset += HashFieldSize;
+  llvm::compression::Format CompressionFormat = Normalized.CompressionFormat;
 
-  // Determine compression format
-  llvm::compression::Format CompressionFormat;
-  if (CompressionMethod ==
-      static_cast<uint16_t>(llvm::compression::Format::Zlib))
-    CompressionFormat = llvm::compression::Format::Zlib;
-  else if (CompressionMethod ==
-           static_cast<uint16_t>(llvm::compression::Format::Zstd))
-    CompressionFormat = llvm::compression::Format::Zstd;
-  else
-    return createStringError(inconvertibleErrorCode(),
-                             "Unknown compressing method");
+  size_t TotalFileSize = Normalized.FileSize.value_or(0);
+  size_t UncompressedSize = Normalized.UncompressedFileSize;
+  auto StoredHash = Normalized.Hash;
 
   llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
                               *ClangOffloadBundlerTimerGroup);
@@ -1212,7 +1268,7 @@ CompressedOffloadBundle::decompress(const 
llvm::MemoryBuffer &Input,
     DecompressTimer.startTimer();
 
   SmallVector<uint8_t, 0> DecompressedData;
-  StringRef CompressedData = Blob.substr(CurrentOffset);
+  StringRef CompressedData = Blob.substr(HeaderSize);
   if (llvm::Error DecompressionError = llvm::compression::decompress(
           CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
           DecompressedData, UncompressedSize))


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to