Author: Juan Manuel Martinez CaamaƱo Date: 2025-03-19T09:10:40+01:00 New Revision: 614d8557dcd41aae31bd50e93fed3487f235ef11
URL: https://github.com/llvm/llvm-project/commit/614d8557dcd41aae31bd50e93fed3487f235ef11 DIFF: https://github.com/llvm/llvm-project/commit/614d8557dcd41aae31bd50e93fed3487f235ef11.diff LOG: [OffloadBundler] Expose function to parse compressed bundle headers (#130284) In COMGR we hash the header of compressed bundles. For this we take the first bytes of the buffer (according to the maximum header size) and hash them. To have a more stable API, and to be able to pick only the hash field (which is the only one we are actually interested in) of the header, we propose a version independent header version that is common to all versions. Added: Modified: clang/include/clang/Driver/OffloadBundler.h clang/lib/Driver/OffloadBundler.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h index cbe507c000601..667156a524b79 100644 --- a/clang/include/clang/Driver/OffloadBundler.h +++ b/clang/include/clang/Driver/OffloadBundler.h @@ -107,50 +107,20 @@ struct OffloadTargetInfo { // - Compressed Data (variable length). class CompressedOffloadBundle { private: - static inline const size_t MagicSize = 4; - static inline const size_t VersionFieldSize = sizeof(uint16_t); - static inline const size_t MethodFieldSize = sizeof(uint16_t); - // Legacy size fields for V1/V2 - static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t); - static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t); - // New size fields for V3 - static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t); - static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t); - static inline const size_t HashFieldSize = sizeof(uint64_t); - - // Keep V1 header size for backward compatibility - static inline const size_t V1HeaderSize = - MagicSize + VersionFieldSize + MethodFieldSize + - UncompressedSizeFieldSizeV2 + HashFieldSize; - - // Keep V2 header size for backward compatibility - static inline const size_t V2HeaderSize = - MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize + - UncompressedSizeFieldSizeV2 + HashFieldSize; - - // Add V3 header size with 64-bit fields - static inline const size_t V3HeaderSize = - MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize + - UncompressedSizeFieldSizeV3 + HashFieldSize; - static inline const llvm::StringRef MagicNumber = "CCOB"; public: - static inline const uint16_t DefaultVersion = 2; + struct CompressedBundleHeader { + unsigned Version; + llvm::compression::Format CompressionFormat; + std::optional<size_t> FileSize; + size_t UncompressedFileSize; + uint64_t Hash; - // Helper method to get header size based on version - static size_t getHeaderSize(uint16_t Version) { - switch (Version) { - case 1: - return V1HeaderSize; - case 2: - return V2HeaderSize; - case 3: - return V3HeaderSize; - default: - llvm_unreachable("Unsupported version"); - } - } + static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef); + }; + + static inline const uint16_t DefaultVersion = 2; static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input, diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index 6dfb72fb223ce..859e44fb9bdb2 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -29,6 +29,7 @@ #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Debug.h" #include "llvm/Support/EndianStream.h" @@ -1127,13 +1128,116 @@ CompressedOffloadBundle::compress(llvm::compression::Params P, llvm::StringRef(FinalBuffer.data(), FinalBuffer.size())); } +// Use packed structs to avoid padding, such that the structs map the serialized +// format. +LLVM_PACKED_START +union RawCompressedBundleHeader { + struct CommonFields { + uint32_t Magic; + uint16_t Version; + uint16_t Method; + }; + + struct V1Header { + CommonFields Common; + uint32_t UncompressedFileSize; + uint64_t Hash; + }; + + struct V2Header { + CommonFields Common; + uint32_t FileSize; + uint32_t UncompressedFileSize; + uint64_t Hash; + }; + + struct V3Header { + CommonFields Common; + uint64_t FileSize; + uint64_t UncompressedFileSize; + uint64_t Hash; + }; + + CommonFields Common; + V1Header V1; + V2Header V2; + V3Header V3; +}; +LLVM_PACKED_END + +// Helper method to get header size based on version +static size_t getHeaderSize(uint16_t Version) { + switch (Version) { + case 1: + return sizeof(RawCompressedBundleHeader::V1Header); + case 2: + return sizeof(RawCompressedBundleHeader::V2Header); + case 3: + return sizeof(RawCompressedBundleHeader::V3Header); + default: + llvm_unreachable("Unsupported version"); + } +} + +Expected<CompressedOffloadBundle::CompressedBundleHeader> +CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) { + assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields)); + assert(llvm::identify_magic(Blob) == + llvm::file_magic::offload_bundle_compressed); + + RawCompressedBundleHeader Header; + memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header))); + + CompressedBundleHeader Normalized; + Normalized.Version = Header.Common.Version; + + size_t RequiredSize = getHeaderSize(Normalized.Version); + if (Blob.size() < RequiredSize) + return createStringError(inconvertibleErrorCode(), + "Compressed bundle header size too small"); + + switch (Normalized.Version) { + case 1: + Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize; + Normalized.Hash = Header.V1.Hash; + break; + case 2: + Normalized.FileSize = Header.V2.FileSize; + Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize; + Normalized.Hash = Header.V2.Hash; + break; + case 3: + Normalized.FileSize = Header.V3.FileSize; + Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize; + Normalized.Hash = Header.V3.Hash; + break; + default: + return createStringError(inconvertibleErrorCode(), + "Unknown compressed bundle version"); + } + + // Determine compression format + switch (Header.Common.Method) { + case static_cast<uint16_t>(compression::Format::Zlib): + case static_cast<uint16_t>(compression::Format::Zstd): + Normalized.CompressionFormat = + static_cast<compression::Format>(Header.Common.Method); + break; + default: + return createStringError(inconvertibleErrorCode(), + "Unknown compressing method"); + } + + return Normalized; +} + llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, bool Verbose) { StringRef Blob = Input.getBuffer(); // Check minimum header size (using V1 as it's the smallest) - if (Blob.size() < V1HeaderSize) + if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields)) return llvm::MemoryBuffer::getMemBufferCopy(Blob); if (llvm::identify_magic(Blob) != @@ -1143,68 +1247,20 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, return llvm::MemoryBuffer::getMemBufferCopy(Blob); } - size_t CurrentOffset = MagicSize; - - // Read version - uint16_t ThisVersion; - memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t)); - CurrentOffset += VersionFieldSize; - - // Verify header size based on version - if (ThisVersion >= 2 && ThisVersion <= 3) { - size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize; - if (Blob.size() < RequiredSize) - return createStringError(inconvertibleErrorCode(), - "Compressed bundle header size too small"); - } - - // Read compression method - uint16_t CompressionMethod; - memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t)); - CurrentOffset += MethodFieldSize; - - // Read total file size (version 2+) - uint64_t TotalFileSize = 0; - if (ThisVersion >= 2) { - if (ThisVersion == 2) { - uint32_t TotalFileSize32; - memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t)); - TotalFileSize = TotalFileSize32; - CurrentOffset += FileSizeFieldSizeV2; - } else { // Version 3 - memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t)); - CurrentOffset += FileSizeFieldSizeV3; - } - } + Expected<CompressedBundleHeader> HeaderOrErr = + CompressedBundleHeader::tryParse(Blob); + if (!HeaderOrErr) + return HeaderOrErr.takeError(); - // Read uncompressed size - uint64_t UncompressedSize = 0; - if (ThisVersion <= 2) { - uint32_t UncompressedSize32; - memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t)); - UncompressedSize = UncompressedSize32; - CurrentOffset += UncompressedSizeFieldSizeV2; - } else { // Version 3 - memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t)); - CurrentOffset += UncompressedSizeFieldSizeV3; - } + const CompressedBundleHeader &Normalized = *HeaderOrErr; + unsigned ThisVersion = Normalized.Version; + size_t HeaderSize = getHeaderSize(ThisVersion); - // Read hash - uint64_t StoredHash; - memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t)); - CurrentOffset += HashFieldSize; + llvm::compression::Format CompressionFormat = Normalized.CompressionFormat; - // Determine compression format - llvm::compression::Format CompressionFormat; - if (CompressionMethod == - static_cast<uint16_t>(llvm::compression::Format::Zlib)) - CompressionFormat = llvm::compression::Format::Zlib; - else if (CompressionMethod == - static_cast<uint16_t>(llvm::compression::Format::Zstd)) - CompressionFormat = llvm::compression::Format::Zstd; - else - return createStringError(inconvertibleErrorCode(), - "Unknown compressing method"); + size_t TotalFileSize = Normalized.FileSize.value_or(0); + size_t UncompressedSize = Normalized.UncompressedFileSize; + auto StoredHash = Normalized.Hash; llvm::Timer DecompressTimer("Decompression Timer", "Decompression time", *ClangOffloadBundlerTimerGroup); @@ -1212,7 +1268,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, DecompressTimer.startTimer(); SmallVector<uint8_t, 0> DecompressedData; - StringRef CompressedData = Blob.substr(CurrentOffset); + StringRef CompressedData = Blob.substr(HeaderSize); if (llvm::Error DecompressionError = llvm::compression::decompress( CompressionFormat, llvm::arrayRefFromStringRef(CompressedData), DecompressedData, UncompressedSize)) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits