https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/83605
>From 60faf7f657fdcc00edfa0a1813d1e2746c341ef1 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <yaxun....@amd.com> Date: Fri, 1 Mar 2024 13:16:45 -0500 Subject: [PATCH] [HIP] add --offload-compression-level= option Added --offload-compression-level= option to clang and -compression-level= option to clang-offload-bundler and clang-linker-wrapper for controlling compression level. Added support of long distance matching (LDM) for llvm::zstd which is off by default. Enable it for clang-offload-bundler by default since it improves compression rate in general. Change default compression level to 3 for zstd for clang-offload-bundler since it works well for bundle entry size from 1KB to 32MB, which should cover most of the clang-offload-bundler usage. Users can still specify compression level by -compression-level= option if necessary. Change-Id: I5e2d7abcaa11a2a37a5e798476e3f572bba11cab --- clang/include/clang/Driver/OffloadBundler.h | 6 +- clang/include/clang/Driver/Options.td | 4 + clang/lib/Driver/OffloadBundler.cpp | 113 ++++++++++++++---- clang/lib/Driver/ToolChains/Clang.cpp | 11 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 15 +++ clang/lib/Driver/ToolChains/CommonArgs.h | 2 + clang/lib/Driver/ToolChains/HIPUtility.cpp | 7 +- .../test/Driver/clang-offload-bundler-zlib.c | 21 +++- .../test/Driver/clang-offload-bundler-zstd.c | 19 ++- .../test/Driver/hip-offload-compress-zlib.hip | 7 +- .../test/Driver/hip-offload-compress-zstd.hip | 5 +- clang/test/Driver/linker-wrapper.c | 5 +- .../ClangLinkerWrapper.cpp | 5 + .../clang-linker-wrapper/LinkerWrapperOpts.td | 2 + .../ClangOffloadBundler.cpp | 5 + llvm/include/llvm/Support/Compression.h | 5 +- llvm/lib/Support/Compression.cpp | 40 +++++-- 17 files changed, 209 insertions(+), 63 deletions(-) diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h index 84349abe185fa4..65d33bfbd2825f 100644 --- a/clang/include/clang/Driver/OffloadBundler.h +++ b/clang/include/clang/Driver/OffloadBundler.h @@ -17,6 +17,7 @@ #ifndef LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H #define LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H +#include "llvm/Support/Compression.h" #include "llvm/Support/Error.h" #include "llvm/TargetParser/Triple.h" #include <llvm/Support/MemoryBuffer.h> @@ -36,6 +37,8 @@ class OffloadBundlerConfig { bool HipOpenmpCompatible = false; bool Compress = false; bool Verbose = false; + llvm::compression::Format CompressionFormat; + int CompressionLevel; unsigned BundleAlignment = 1; unsigned HostInputIndex = ~0u; @@ -116,7 +119,8 @@ class CompressedOffloadBundle { public: static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> - compress(const llvm::MemoryBuffer &Input, bool Verbose = false); + compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input, + bool Verbose = false); static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> decompress(const llvm::MemoryBuffer &Input, bool Verbose = false); }; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5b3d366dbcf91b..2d26a7983f397b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1264,6 +1264,10 @@ def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>; def offload_compress : Flag<["--"], "offload-compress">, HelpText<"Compress offload device binaries (HIP only)">; def no_offload_compress : Flag<["--"], "no-offload-compress">; + +def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">, + Flags<[HelpHidden]>, + HelpText<"Compression level for offload device binaries (HIP only)">; } // CUDA options diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index f9eadfaec88dec..a077a9648b0e9b 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -924,6 +924,17 @@ CreateFileHandler(MemoryBuffer &FirstInput, } OffloadBundlerConfig::OffloadBundlerConfig() { + if (llvm::compression::zstd::isAvailable()) { + CompressionFormat = llvm::compression::Format::Zstd; + // Compression level 3 is usually sufficient for zstd since long distance + // matching is enabled. + CompressionLevel = 3; + } else if (llvm::compression::zlib::isAvailable()) { + CompressionFormat = llvm::compression::Format::Zlib; + // Use default level for zlib since higher level does not have significant + // improvement. + CompressionLevel = llvm::compression::zlib::DefaultCompression; + } auto IgnoreEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR"); if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1") @@ -937,11 +948,41 @@ OffloadBundlerConfig::OffloadBundlerConfig() { llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS"); if (CompressEnvVarOpt.has_value()) Compress = CompressEnvVarOpt.value() == "1"; + + auto CompressionLevelEnvVarOpt = + llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL"); + if (CompressionLevelEnvVarOpt.has_value()) { + llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value(); + int Level; + if (!CompressionLevelStr.getAsInteger(10, Level)) + CompressionLevel = Level; + else + llvm::errs() + << "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: " + << CompressionLevelStr.str() << ". Ignoring it.\n"; + } +} + +// Utility function to format numbers with commas +static std::string formatWithCommas(unsigned long long Value) { + std::string Num = std::to_string(Value); + int InsertPosition = Num.length() - 3; + while (InsertPosition > 0) { + Num.insert(InsertPosition, ","); + InsertPosition -= 3; + } + return Num; } llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> -CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input, +CompressedOffloadBundle::compress(llvm::compression::Params P, + const llvm::MemoryBuffer &Input, bool Verbose) { + if (!llvm::compression::zstd::isAvailable() && + !llvm::compression::zlib::isAvailable()) + return createStringError(llvm::inconvertibleErrorCode(), + "Compression not supported"); + llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time", ClangOffloadBundlerTimerGroup); if (Verbose) @@ -959,25 +1000,15 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input, reinterpret_cast<const uint8_t *>(Input.getBuffer().data()), Input.getBuffer().size()); - llvm::compression::Format CompressionFormat; - - if (llvm::compression::zstd::isAvailable()) - CompressionFormat = llvm::compression::Format::Zstd; - else if (llvm::compression::zlib::isAvailable()) - CompressionFormat = llvm::compression::Format::Zlib; - else - return createStringError(llvm::inconvertibleErrorCode(), - "Compression not supported"); - llvm::Timer CompressTimer("Compression Timer", "Compression time", ClangOffloadBundlerTimerGroup); if (Verbose) CompressTimer.startTimer(); - llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer); + llvm::compression::compress(P, BufferUint8, CompressedBuffer); if (Verbose) CompressTimer.stopTimer(); - uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat); + uint16_t CompressionMethod = static_cast<uint16_t>(P.format); uint32_t UncompressedSize = Input.getBuffer().size(); SmallVector<char, 0> FinalBuffer; @@ -995,17 +1026,31 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input, if (Verbose) { auto MethodUsed = - CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib"; + P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib"; + double CompressionRate = + static_cast<double>(UncompressedSize) / CompressedBuffer.size(); + double CompressionTimeSeconds = + CompressTimer.getTotalTime() + .getWallTime(); + double CompressionSpeedMBs = + (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds; + llvm::errs() << "Compressed bundle format version: " << Version << "\n" << "Compression method used: " << MethodUsed << "\n" - << "Binary size before compression: " << UncompressedSize - << " bytes\n" - << "Binary size after compression: " << CompressedBuffer.size() - << " bytes\n" + << "Compression level: " << P.level << "\n" + << "Binary size before compression: " + << formatWithCommas(UncompressedSize) << " bytes\n" + << "Binary size after compression: " + << formatWithCommas(CompressedBuffer.size()) << " bytes\n" + << "Compression rate: " + << llvm::format("%.2lf", CompressionRate) << "\n" + << "Compression ratio: " + << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n" + << "Compression speed: " + << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n" << "Truncated MD5 hash: " << llvm::format_hex(TruncatedHash, 16) << "\n"; } - return llvm::MemoryBuffer::getMemBufferCopy( llvm::StringRef(FinalBuffer.data(), FinalBuffer.size())); } @@ -1070,7 +1115,10 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, if (Verbose) { DecompressTimer.stopTimer(); - // Recalculate MD5 hash + double DecompressionTimeSeconds = + DecompressTimer.getTotalTime().getWallTime(); + + // Recalculate MD5 hash for integrity check llvm::Timer HashRecalcTimer("Hash Recalculation Timer", "Hash recalculation time", ClangOffloadBundlerTimerGroup); @@ -1084,16 +1132,27 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, HashRecalcTimer.stopTimer(); bool HashMatch = (StoredHash == RecalculatedHash); + double CompressionRate = + static_cast<double>(UncompressedSize) / CompressedData.size(); + double DecompressionSpeedMBs = + (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds; + llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n" << "Decompression method: " << (CompressionFormat == llvm::compression::Format::Zlib ? "zlib" : "zstd") << "\n" - << "Size before decompression: " << CompressedData.size() - << " bytes\n" - << "Size after decompression: " << UncompressedSize - << " bytes\n" + << "Size before decompression: " + << formatWithCommas(CompressedData.size()) << " bytes\n" + << "Size after decompression: " + << formatWithCommas(UncompressedSize) << " bytes\n" + << "Compression rate: " + << llvm::format("%.2lf", CompressionRate) << "\n" + << "Compression ratio: " + << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n" + << "Decompression speed: " + << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n" << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n" << "Recalculated hash: " << llvm::format_hex(RecalculatedHash, 16) << "\n" @@ -1287,8 +1346,10 @@ Error OffloadBundler::BundleFiles() { std::unique_ptr<llvm::MemoryBuffer> BufferMemory = llvm::MemoryBuffer::getMemBufferCopy( llvm::StringRef(Buffer.data(), Buffer.size())); - auto CompressionResult = - CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose); + auto CompressionResult = CompressedOffloadBundle::compress( + {BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel, + /*zstdEnableLdm=*/true}, + *BufferMemory, BundlerConfig.Verbose); if (auto Error = CompressionResult.takeError()) return Error; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fa17f6295d6ea7..7fe784ca076f6c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8524,7 +8524,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, } // Begin OffloadBundler - void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -8622,11 +8621,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, } CmdArgs.push_back(TCArgs.MakeArgString(UB)); } - if (TCArgs.hasFlag(options::OPT_offload_compress, - options::OPT_no_offload_compress, false)) - CmdArgs.push_back("-compress"); - if (TCArgs.hasArg(options::OPT_v)) - CmdArgs.push_back("-verbose"); + addOffloadCompressArgs(TCArgs, CmdArgs); // All the inputs are encoded as commands. C.addCommand(std::make_unique<Command>( JA, *this, ResponseFileSupport::None(), @@ -8895,9 +8890,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, for (const char *LinkArg : LinkCommand->getArguments()) CmdArgs.push_back(LinkArg); - if (Args.hasFlag(options::OPT_offload_compress, - options::OPT_no_offload_compress, false)) - CmdArgs.push_back("--compress"); + addOffloadCompressArgs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper")); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 7f0f78b41e79ed..97d3f9f058229d 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2863,3 +2863,18 @@ void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC, CmdArgs.push_back("+outline-atomics"); } } + +void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs, + llvm::opt::ArgStringList &CmdArgs) { + if (TCArgs.hasFlag(options::OPT_offload_compress, + options::OPT_no_offload_compress, false)) + CmdArgs.push_back("-compress"); + if (TCArgs.hasArg(options::OPT_v)) + CmdArgs.push_back("-verbose"); + if (auto *Arg = + TCArgs.getLastArg(options::OPT_offload_compression_level_EQ)) { + std::string CompressionLevelArg = + std::string("-compression-level=") + Arg->getValue(); + CmdArgs.push_back(TCArgs.MakeArgString(CompressionLevelArg)); + } +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index b8f649aab4bdd2..bb37be4bd6ea05 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -221,6 +221,8 @@ void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const llvm::Triple &Triple); +void addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs, + llvm::opt::ArgStringList &CmdArgs); } // end namespace tools } // end namespace driver diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index fcecf2e1313bb5..08c647dfcb6f72 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "HIPUtility.h" +#include "Clang.h" #include "CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Options.h" @@ -258,11 +259,7 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, Args.MakeArgString(std::string("-output=").append(Output)); BundlerArgs.push_back(BundlerOutputArg); - if (Args.hasFlag(options::OPT_offload_compress, - options::OPT_no_offload_compress, false)) - BundlerArgs.push_back("-compress"); - if (Args.hasArg(options::OPT_v)) - BundlerArgs.push_back("-verbose"); + addOffloadCompressArgs(Args, BundlerArgs); const char *Bundler = Args.MakeArgString( T.getToolChain().GetProgramPath("clang-offload-bundler")); diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c index a57ee6da9a86a6..15b60341a8dbde 100644 --- a/clang/test/Driver/clang-offload-bundler-zlib.c +++ b/clang/test/Driver/clang-offload-bundler-zlib.c @@ -1,4 +1,4 @@ -// REQUIRES: zlib +// REQUIRES: zlib && !zstd // REQUIRES: x86-registered-target // UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} @@ -34,13 +34,28 @@ // RUN: diff %t.tgt2 %t.res.tgt2 // -// COMPRESS: Compression method used: -// DECOMPRESS: Decompression method: +// COMPRESS: Compression method used: zlib +// COMPRESS: Compression level: 6 +// DECOMPRESS: Decompression method: zlib +// DECOMPRESS: Hashes match: Yes // NOHOST-NOT: host- // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906 // +// Check -compression-level= option + +// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ +// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \ +// RUN: FileCheck -check-prefix=LEVEL %s +// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ +// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle +// RUN: diff %t.tgt1 %t.res.tgt1 +// RUN: diff %t.tgt2 %t.res.tgt2 +// +// LEVEL: Compression method used: zlib +// LEVEL: Compression level: 9 + // // Check -bundle-align option. // diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c index 3b577d4d166a3f..c1eb3f6e7ebd07 100644 --- a/clang/test/Driver/clang-offload-bundler-zstd.c +++ b/clang/test/Driver/clang-offload-bundler-zstd.c @@ -31,13 +31,28 @@ // RUN: diff %t.tgt1 %t.res.tgt1 // RUN: diff %t.tgt2 %t.res.tgt2 // -// COMPRESS: Compression method used -// DECOMPRESS: Decompression method +// COMPRESS: Compression method used: zstd +// COMPRESS: Compression level: 20 +// DECOMPRESS: Decompression method: zstd +// DECOMPRESS: Hashes match: Yes // NOHOST-NOT: host- // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906 // +// Check -compression-level= option + +// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ +// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \ +// RUN: FileCheck -check-prefix=LEVEL %s +// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ +// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle +// RUN: diff %t.tgt1 %t.res.tgt1 +// RUN: diff %t.tgt2 %t.res.tgt2 +// +// LEVEL: Compression method used: zstd +// LEVEL: Compression level: 9 + // // Check -bundle-align option. // diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip index 7a269c566bb93c..c1566c5f192c2d 100644 --- a/clang/test/Driver/hip-offload-compress-zlib.hip +++ b/clang/test/Driver/hip-offload-compress-zlib.hip @@ -1,4 +1,4 @@ -// REQUIRES: zlib +// REQUIRES: zlib && !zstd // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target @@ -9,13 +9,14 @@ // RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ // RUN: --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -// RUN: --offload-compress --offload-device-only --gpu-bundle-output \ +// RUN: --offload-compress --offload-compression-level=9 \ +// RUN: --offload-device-only --gpu-bundle-output \ // RUN: -o %t.bc \ // RUN: 2>&1 | FileCheck %s // CHECK: clang-offload-bundler{{.*}} -type=bc // CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 -// CHECK-SAME: -compress -verbose +// CHECK-SAME: -compress -verbose -compression-level=9 // CHECK: Compressed bundle format // Test uncompress of bundled bitcode. diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip index fa7fb3b6d5b5cb..ede7d59f113c86 100644 --- a/clang/test/Driver/hip-offload-compress-zstd.hip +++ b/clang/test/Driver/hip-offload-compress-zstd.hip @@ -9,13 +9,14 @@ // RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ // RUN: --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ -// RUN: --offload-compress --offload-device-only --gpu-bundle-output \ +// RUN: --offload-compress --offload-compression-level=9 \ +// RUN: --offload-device-only --gpu-bundle-output \ // RUN: -o %t.bc \ // RUN: 2>&1 | FileCheck %s // CHECK: clang-offload-bundler{{.*}} -type=bc // CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101 -// CHECK-SAME: -compress -verbose +// CHECK-SAME: -compress -verbose -compression-level=9 // CHECK: Compressed bundle format // Test uncompress of bundled bitcode. diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 0e6fd80b429846..cbf24d4ce3a824 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -114,12 +114,13 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --compress \ +// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ +// RUN: --compress --compression-level=6 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908 // HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a -// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb +// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -compression-level=6 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx908 -input=/dev/null -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 7e6e289c50d872..c9d32b5b4f9db2 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -407,6 +407,11 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, if (Args.hasArg(OPT_compress)) CmdArgs.push_back("-compress"); + if (auto *Arg = Args.getLastArg(OPT_compression_level_eq)) { + std::string CompressionLevelArg = + std::string("-compression-level=") + Arg->getValue(); + CmdArgs.push_back(Args.MakeArgString(CompressionLevelArg)); + } SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux"}; for (const auto &[File, Arch] : InputFiles) diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 473fb19d922385..0a8bd541c45242 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -60,6 +60,8 @@ def save_temps : Flag<["--"], "save-temps">, Flags<[WrapperOnlyOption]>, HelpText<"Save intermediate results">; def compress : Flag<["--"], "compress">, Flags<[WrapperOnlyOption]>, HelpText<"Compress bundled files">; +def compression_level_eq : Joined<["--"], "compression-level=">, + Flags<[WrapperOnlyOption]>, HelpText<"Specify the compression level (integer)">; def wrapper_time_trace_eq : Joined<["--"], "wrapper-time-trace=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<file>">, diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp index ec67e24552e9c9..e336417586f70b 100644 --- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -145,6 +145,9 @@ int main(int argc, const char **argv) { cl::init(false), cl::cat(ClangOffloadBundlerCategory)); cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"), cl::init(false), cl::cat(ClangOffloadBundlerCategory)); + cl::opt<int> CompressionLevel( + "compression-level", cl::desc("Specify the compression level (integer)"), + cl::value_desc("n"), cl::Optional, cl::cat(ClangOffloadBundlerCategory)); // Process commandline options and report errors sys::PrintStackTraceOnErrorSignal(argv[0]); @@ -178,6 +181,8 @@ int main(int argc, const char **argv) { BundlerConfig.Compress = Compress; if (Verbose.getNumOccurrences() > 0) BundlerConfig.Verbose = Verbose; + if (CompressionLevel.getNumOccurrences() > 0) + BundlerConfig.CompressionLevel = CompressionLevel; BundlerConfig.TargetNames = TargetNames; BundlerConfig.InputFileNames = InputFileNames; diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index c3ba3274d6ed87..2a8da9e96d356f 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -63,7 +63,7 @@ bool isAvailable(); void compress(ArrayRef<uint8_t> Input, SmallVectorImpl<uint8_t> &CompressedBuffer, - int Level = DefaultCompression); + int Level = DefaultCompression, bool EnableLdm = false); Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output, size_t &UncompressedSize); @@ -94,10 +94,13 @@ struct Params { constexpr Params(Format F) : format(F), level(F == Format::Zlib ? zlib::DefaultCompression : zstd::DefaultCompression) {} + constexpr Params(Format F, int L, bool Ldm = false) + : format(F), level(L), zstdEnableLdm(Ldm) {} Params(DebugCompressionType Type) : Params(formatFor(Type)) {} Format format; int level; + bool zstdEnableLdm = false; // Enable zstd long distance matching // This may support multi-threading for zstd in the future. Note that // different threads may produce different output, so be careful if certain // output determinism is desired. diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index 8e57ba798f5207..1294366e761807 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -50,7 +50,7 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input, zlib::compress(Input, Output, P.level); break; case compression::Format::Zstd: - zstd::compress(Input, Output, P.level); + zstd::compress(Input, Output, P.level, P.zstdEnableLdm); break; } } @@ -163,17 +163,39 @@ Error zlib::decompress(ArrayRef<uint8_t> Input, bool zstd::isAvailable() { return true; } +#include <zstd.h> // Ensure ZSTD library is included + void zstd::compress(ArrayRef<uint8_t> Input, - SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) { - unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size()); + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level, + bool EnableLdm) { + ZSTD_CCtx *Cctx = ZSTD_createCCtx(); + if (!Cctx) + report_bad_alloc_error("Failed to create ZSTD_CCtx"); + + if (ZSTD_isError(ZSTD_CCtx_setParameter( + Cctx, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) { + ZSTD_freeCCtx(Cctx); + report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching"); + } + + if (ZSTD_isError( + ZSTD_CCtx_setParameter(Cctx, ZSTD_c_compressionLevel, Level))) { + ZSTD_freeCCtx(Cctx); + report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel"); + } + + unsigned long CompressedBufferSize = ZSTD_compressBound(Input.size()); CompressedBuffer.resize_for_overwrite(CompressedBufferSize); - unsigned long CompressedSize = - ::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize, - (const char *)Input.data(), Input.size(), Level); + + size_t const CompressedSize = + ZSTD_compress2(Cctx, CompressedBuffer.data(), CompressedBufferSize, + Input.data(), Input.size()); + + ZSTD_freeCCtx(Cctx); + if (ZSTD_isError(CompressedSize)) - report_bad_alloc_error("Allocation failed"); - // Tell MemorySanitizer that zstd output buffer is fully initialized. - // This avoids a false report when running LLVM with uninstrumented ZLib. + report_bad_alloc_error("Compression failed"); + __msan_unpoison(CompressedBuffer.data(), CompressedSize); if (CompressedSize < CompressedBuffer.size()) CompressedBuffer.truncate(CompressedSize); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits