Author: Joseph Huber Date: 2022-09-14T09:42:06-05:00 New Revision: 194ec844f5c67306f505a3418038c5e75859bad8
URL: https://github.com/llvm/llvm-project/commit/194ec844f5c67306f505a3418038c5e75859bad8 DIFF: https://github.com/llvm/llvm-project/commit/194ec844f5c67306f505a3418038c5e75859bad8.diff LOG: [OpenMP][AMDGPU] Link bitcode ROCm device libraries per-TU Previously, we linked in the ROCm device libraries which provide math and other utility functions late. This is not stricly correct as this library contains several flags that are only set per-TU, such as fast math or denormalization. This patch changes this to pass the bitcode libraries per-TU using the same method we use for the CUDA libraries. This has the advantage that we correctly propagate attributes making this implementation more correct. Additionally, many annoying unused functions were not being fully removed during LTO. This lead to erroneous warning messages and remarks on unused functions. I am not sure if not finding these libraries should be a hard error. let me know if it should be demoted to a warning saying that some device utilities will not work without them. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D133726 Added: Modified: clang/include/clang/Driver/ToolChain.h clang/lib/Driver/ToolChain.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.h clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/HIPAMD.cpp clang/lib/Driver/ToolChains/HIPAMD.h clang/lib/Driver/ToolChains/HIPSPV.cpp clang/lib/Driver/ToolChains/HIPSPV.h clang/test/Driver/amdgpu-openmp-toolchain.c Removed: ################################################################################ diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 59d8dafc079f..28137e36e2af 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -714,9 +714,9 @@ class ToolChain { virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const; - /// Get paths of HIP device libraries. + /// Get paths for device libraries. virtual llvm::SmallVector<BitCodeLibraryInfo, 12> - getHIPDeviceLibs(const llvm::opt::ArgList &Args) const; + getDeviceLibs(const llvm::opt::ArgList &Args) const; /// Add the system specific linker arguments to use /// for the given HIP runtime library type. diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 7f16469155bd..26c5087b4ac2 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1099,7 +1099,7 @@ void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs) const { +ToolChain::getDeviceLibs(const ArgList &DriverArgs) const { return {}; } diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 4866982a8dfa..8ab79e1af532 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -75,6 +75,12 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( if (DriverArgs.hasArg(options::OPT_nogpulib)) return; + for (auto BCFile : getDeviceLibs(DriverArgs)) { + CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" + : "-mlink-bitcode-file"); + CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); + } + // Link the bitcode library late if we're using device LTO. if (getDriver().isUsingLTO(/* IsOffload */ true)) return; @@ -158,3 +164,24 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, const ArgList &Args) const { return HostTC.computeMSVCVersion(D, Args); } + +llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> +AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const { + if (Args.hasArg(options::OPT_nogpulib)) + return {}; + + if (!RocmInstallation.hasDeviceLibrary()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; + return {}; + } + + StringRef GpuArch = getProcessorFromTargetID( + getTriple(), Args.getLastArgValue(options::OPT_march_EQ)); + + SmallVector<BitCodeLibraryInfo, 12> BCLibs; + for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(), + /*IsOpenMP=*/true)) + BCLibs.emplace_back(BCLib); + + return BCLibs; +} diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h index 51a1c4696754..2be444a42c55 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -54,6 +54,9 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override; + llvm::SmallVector<BitCodeLibraryInfo, 12> + getDeviceLibs(const llvm::opt::ArgList &Args) const override; + const ToolChain &HostTC; }; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 73172e6a009d..689dedbe49df 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8361,7 +8361,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); const llvm::Triple TheTriple = getToolChain().getTriple(); - auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); ArgStringList CmdArgs; // Pass the CUDA path to the linker wrapper tool. @@ -8379,30 +8378,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, } } - // Get the AMDGPU math libraries. - // FIXME: This method is bad, remove once AMDGPU has a proper math library - // (see AMDGCN::OpenMPLinker::constructLLVMLinkCommand). - for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { - const ToolChain *TC = I.second; - - if (!TC->getTriple().isAMDGPU() || Args.hasArg(options::OPT_nogpulib)) - continue; - - const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); - StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); - const toolchains::ROCMToolChain RocmTC(TC->getDriver(), TC->getTriple(), - TCArgs); - - SmallVector<std::string, 12> BCLibs = - RocmTC.getCommonDeviceLibNames(TCArgs, Arch.str()); - - for (StringRef LibName : BCLibs) - CmdArgs.push_back(Args.MakeArgString( - "--bitcode-library=" + - Action::GetOffloadKindName(Action::OFK_OpenMP) + "-" + - TC->getTripleString() + "-" + Arch + "=" + LibName)); - } - if (D.isUsingLTO(/* IsOffload */ true)) { // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 7791074d7c92..1951ac12515d 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -246,7 +246,7 @@ void HIPAMDToolChain::addClangTargetOptions( CC1Args.push_back("-fapply-global-visibility-to-externs"); } - for (auto BCFile : getHIPDeviceLibs(DriverArgs)) { + for (auto BCFile : getDeviceLibs(DriverArgs)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); @@ -332,7 +332,7 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, } llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -HIPAMDToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; if (DriverArgs.hasArg(options::OPT_nogpulib)) return {}; diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h index 25d4a998e500..7275b1a137ac 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.h +++ b/clang/lib/Driver/ToolChains/HIPAMD.h @@ -76,7 +76,7 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp index 16956e6971ca..78566ca9a652 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.cpp +++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp @@ -154,7 +154,7 @@ void HIPSPVToolChain::addClangTargetOptions( CC1Args.append( {"-fvisibility=hidden", "-fapply-global-visibility-to-externs"}); - llvm::for_each(getHIPDeviceLibs(DriverArgs), + llvm::for_each(getDeviceLibs(DriverArgs), [&](const BitCodeLibraryInfo &BCFile) { CC1Args.append({"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)}); @@ -206,7 +206,7 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, } llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -HIPSPVToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPSPVToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; if (DriverArgs.hasArg(options::OPT_nogpulib)) return {}; diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index 79520f77c742..1c4c474cdf69 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -69,7 +69,7 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index 50ce8e5d1b1f..96caf57f33bf 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -49,5 +49,12 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR // CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm" -// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NEW -// CHECK-LIB-DEVICE-NEW: {{.*}}clang-linker-wrapper{{.*}}--bitcode-library=openmp-amdgcn-amd-amdhsa-gfx803={{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" +// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 \ +// RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE +// CHECK-LIB-DEVICE: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" + +// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 -nogpulib \ +// RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB +// CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits