https://github.com/fmayer created https://github.com/llvm/llvm-project/pull/126610
Reverts llvm/llvm-project#126544 This broke the build on sanitizer buildbots: https://lab.llvm.org/buildbot/#/builders/66/builds/9811 >From 7d3737f2374f489256bc3fce9cb96283e8307f75 Mon Sep 17 00:00:00 2001 From: Florian Mayer <florian.ma...@bitsrc.org> Date: Mon, 10 Feb 2025 14:19:59 -0800 Subject: [PATCH] Revert "[NVPTX] Make ctor/dtor lowering always enabled in NVPTX (#126544)" This reverts commit 3d9409f5bc413b12acf95b4a6c2a5c8860d95d7c. --- .../clang/Basic/DiagnosticSemaKinds.td | 2 -- clang/lib/Driver/ToolChains/Cuda.cpp | 19 +++++++++++++---- clang/lib/Driver/ToolChains/Cuda.h | 7 +++++-- clang/lib/Sema/SemaDeclAttr.cpp | 9 -------- clang/test/Driver/cuda-cross-compiling.c | 13 ++++++++++++ clang/test/SemaCUDA/device-var-init.cu | 9 -------- libc/cmake/modules/LLVMLibCTestRules.cmake | 4 +++- .../test/configs/nvptx-libc++-shared.cfg.in | 2 ++ llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 21 +++++++++++++++++++ llvm/test/CodeGen/NVPTX/global-ctor.ll | 9 ++++++++ llvm/test/CodeGen/NVPTX/global-dtor.ll | 9 ++++++++ llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll | 2 +- 12 files changed, 78 insertions(+), 28 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/global-ctor.ll create mode 100644 llvm/test/CodeGen/NVPTX/global-dtor.ll diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index cf390724b07a484..bcae9e9f3009387 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9193,8 +9193,6 @@ def err_cuda_device_exceptions : Error< def err_dynamic_var_init : Error< "dynamic initialization is not supported for " "__device__, __constant__, __shared__, and __managed__ variables">; -def err_cuda_ctor_dtor_attrs - : Error<"CUDA does not support global %0 for __device__ functions">; def err_shared_var_init : Error< "initialization is not supported for __shared__ variables">; def err_cuda_vla : Error< diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index d6487d4bc274de4..c7d5893085080fb 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -639,6 +639,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back( Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ","))); + // Enable ctor / dtor lowering for the direct / freestanding NVPTX target. + CmdArgs.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"}); + // Add paths for the default clang library path. SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(TC.getDriver().Dir); @@ -723,8 +726,9 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, /// toolchain. NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, const llvm::Triple &HostTriple, - const ArgList &Args) - : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) { + const ArgList &Args, bool Freestanding = false) + : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args), + Freestanding(Freestanding) { if (CudaInstallation.isValid()) getProgramPaths().push_back(std::string(CudaInstallation.getBinPath())); // Lookup binaries into the driver directory, this is used to @@ -736,7 +740,8 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, /// system's default triple if not provided. NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) - : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {} + : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args, + /*Freestanding=*/true) {} llvm::opt::DerivedArgList * NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, @@ -777,7 +782,13 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, void NVPTXToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - Action::OffloadKind DeviceOffloadingKind) const {} + Action::OffloadKind DeviceOffloadingKind) const { + // If we are compiling with a standalone NVPTX toolchain we want to try to + // mimic a standard environment as much as possible. So we enable lowering + // ctor / dtor functions to global symbols that can be registered. + if (Freestanding && !getDriver().isUsingLTO()) + CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"}); +} bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const { const Option &O = A->getOption(); diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 259eda6ebcadfb4..c2219ec47cfa979 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -132,8 +132,8 @@ namespace toolchains { class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain { public: NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, - const llvm::Triple &HostTriple, - const llvm::opt::ArgList &Args); + const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args, + bool Freestanding); NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); @@ -179,6 +179,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain { protected: Tool *buildAssembler() const override; // ptxas. Tool *buildLinker() const override; // nvlink. + +private: + bool Freestanding = false; }; class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain { diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 527db176cf8dd60..f351663c6824e36 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7484,15 +7484,6 @@ void Sema::ProcessDeclAttributeList( } } - // Do not permit 'constructor' or 'destructor' attributes on __device__ code. - if (getLangOpts().CUDAIsDevice && D->hasAttr<CUDADeviceAttr>() && - (D->hasAttr<ConstructorAttr>() || D->hasAttr<DestructorAttr>()) && - !getLangOpts().GPUAllowDeviceInit) { - Diag(D->getLocation(), diag::err_cuda_ctor_dtor_attrs) - << (D->hasAttr<ConstructorAttr>() ? "constructors" : "destructors"); - D->setInvalidDecl(); - } - // Do this check after processing D's attributes because the attribute // objc_method_family can change whether the given method is in the init // family, and it can be applied after objc_designated_initializer. This is a diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index 1df231ecb447946..7817e462c47be91 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -57,6 +57,19 @@ // LINK: clang-nvlink-wrapper{{.*}}"-o" "a.out" "-arch" "sm_61"{{.*}}[[CUBIN:.+]].o +// +// Test to ensure that we enable handling global constructors in a freestanding +// Nvidia compilation. +// +// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=LOWERING %s +// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 -flto -c %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=LOWERING-LTO %s + +// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor" +// LOWERING: clang-nvlink-wrapper{{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor" +// LOWERING-LTO-NOT: "--nvptx-lower-global-ctor-dtor" + // // Test passing arguments directly to nvlink. // diff --git a/clang/test/SemaCUDA/device-var-init.cu b/clang/test/SemaCUDA/device-var-init.cu index a9e3557c20ebf1a..1555d151c2590af 100644 --- a/clang/test/SemaCUDA/device-var-init.cu +++ b/clang/test/SemaCUDA/device-var-init.cu @@ -485,12 +485,3 @@ void instantiate() { bar<NontrivialInitializer><<<1, 1>>>(); // expected-note@-1 {{in instantiation of function template specialization 'bar<NontrivialInitializer>' requested here}} } - -__device__ void *ptr1 = nullptr; -__device__ void *ptr2 = ptr1; -// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} - -__device__ [[gnu::constructor(101)]] void ctor() {} -// expected-error@-1 {{CUDA does not support global constructors for __device__ functions}} -__device__ [[gnu::destructor(101)]] void dtor() {} -// expected-error@-1 {{CUDA does not support global destructors for __device__ functions}} diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index f33db5826537bd2..ffbdb40cd5091fa 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -560,12 +560,14 @@ function(add_integration_test test_name) if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) target_link_options(${fq_build_target_name} PRIVATE ${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS} - -Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto -nostdlib -static + -Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto + "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}") elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) target_link_options(${fq_build_target_name} PRIVATE ${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS} "-Wl,--suppress-stack-size-warning" -Wno-multi-gpu + "-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1" "-Wl,-mllvm,-nvptx-emit-init-fini-kernel" -march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static "--cuda-path=${LIBC_CUDA_ROOT}") diff --git a/libcxx/test/configs/nvptx-libc++-shared.cfg.in b/libcxx/test/configs/nvptx-libc++-shared.cfg.in index e07ed35da4d5ad5..9a3ca9c8da95093 100644 --- a/libcxx/test/configs/nvptx-libc++-shared.cfg.in +++ b/libcxx/test/configs/nvptx-libc++-shared.cfg.in @@ -10,6 +10,8 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -startfiles -stdlib ' '-L %{lib-dir} -lc++ -lc++abi ' '-Wl,--suppress-stack-size-warning ' + '-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 ' + '-Wl,-mllvm,-nvptx-emit-init-fini-kernel' )) config.substitutions.append(('%{exec}', '%{executor} --no-parallelism' diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 68a0f4cb0ade9e9..ad1433821036be6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -91,6 +91,11 @@ using namespace llvm; +static cl::opt<bool> + LowerCtorDtor("nvptx-lower-global-ctor-dtor", + cl::desc("Lower GPU ctor / dtors to globals on the device."), + cl::init(false), cl::Hidden); + #define DEPOTNAME "__local_depot" /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V @@ -789,6 +794,22 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30)) report_fatal_error(".alias requires PTX version >= 6.3 and sm_30"); + // OpenMP supports NVPTX global constructors and destructors. + bool IsOpenMP = M.getModuleFlag("openmp") != nullptr; + + if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) && + !LowerCtorDtor && !IsOpenMP) { + report_fatal_error( + "Module has a nontrivial global ctor, which NVPTX does not support."); + return true; // error + } + if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) && + !LowerCtorDtor && !IsOpenMP) { + report_fatal_error( + "Module has a nontrivial global dtor, which NVPTX does not support."); + return true; // error + } + // We need to call the parent's one explicitly. bool Result = AsmPrinter::doInitialization(M); diff --git a/llvm/test/CodeGen/NVPTX/global-ctor.ll b/llvm/test/CodeGen/NVPTX/global-ctor.ll new file mode 100644 index 000000000000000..6a833128206cec3 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/global-ctor.ll @@ -0,0 +1,9 @@ +; RUN: not --crash llc < %s -mtriple=nvptx -mcpu=sm_20 2>&1 | FileCheck %s + +; Check that llc dies when given a nonempty global ctor. +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }] + +; CHECK: ERROR: Module has a nontrivial global ctor +define internal void @foo() { + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/global-dtor.ll b/llvm/test/CodeGen/NVPTX/global-dtor.ll new file mode 100644 index 000000000000000..f385d620bba3607 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/global-dtor.ll @@ -0,0 +1,9 @@ +; RUN: not --crash llc < %s -mtriple=nvptx -mcpu=sm_20 2>&1 | FileCheck %s + +; Check that llc dies when given a nonempty global dtor. +@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }] + +; CHECK: ERROR: Module has a nontrivial global dtor +define internal void @foo() { + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll index 60b3d70840af591..4ee1ca3ad4b1f0a 100644 --- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll @@ -8,7 +8,7 @@ ; Make sure we get the same result if we run multiple times ; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s -; RUN: llc -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY +; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits