https://github.com/jhuber6 created 
https://github.com/llvm/llvm-project/pull/126544

Summary:
Currently we conditionally enable NVPTX lowering depending on the
language (C/C++/OpenMP). Unfortunately this causes problems because this
option is only present if the backend was enabled, which causes this to
error if you try to make LLVM-IR.

This patch instead makes it the only accepted lowering. The reason we
had it as opt-in before is because it is not handled by CUDA. So, this
pach also introduces diagnostics to prevent *all* creation of
device-side global constructors and destructors. We already did this for
variables, now we do it for attributes as well.

This inverts the responsibility of blocking this from the backend to the
langauage like it should be given that support for this is language
dependent.


>From 253c82967bf0a6d0a4eb5234880147ac1180ad70 Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Mon, 10 Feb 2025 10:49:02 -0600
Subject: [PATCH] [NVPTX] Make ctor/dtor lowering always enabled in NVPTX

Summary:
Currently we conditionally enable NVPTX lowering depending on the
language (C/C++/OpenMP). Unfortunately this causes problems because this
option is only present if the backend was enabled, which causes this to
error if you try to make LLVM-IR.

This patch instead makes it the only accepted lowering. The reason we
had it as opt-in before is because it is not handled by CUDA. So, this
pach also introduces diagnostics to prevent *all* creation of
device-side global constructors and destructors. We already did this for
variables, now we do it for attributes as well.

This inverts the responsibility of blocking this from the backend to the
langauage like it should be given that support for this is language
dependent.
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 ++
 clang/lib/Driver/ToolChains/Cuda.cpp          | 19 ++++-------------
 clang/lib/Driver/ToolChains/Cuda.h            |  7 ++-----
 clang/lib/Sema/SemaDeclAttr.cpp               | 11 ++++++++++
 clang/test/Driver/cuda-cross-compiling.c      | 13 ------------
 clang/test/SemaCUDA/device-var-init.cu        |  9 ++++++++
 libc/cmake/modules/LLVMLibCTestRules.cmake    |  4 +---
 .../test/configs/nvptx-libc++-shared.cfg.in   |  2 --
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp     | 21 -------------------
 llvm/test/CodeGen/NVPTX/global-ctor.ll        |  9 --------
 llvm/test/CodeGen/NVPTX/global-dtor.ll        |  9 --------
 llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll    |  2 +-
 12 files changed, 30 insertions(+), 78 deletions(-)
 delete mode 100644 llvm/test/CodeGen/NVPTX/global-ctor.ll
 delete mode 100644 llvm/test/CodeGen/NVPTX/global-dtor.ll

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index bcae9e9f3009387..cf390724b07a484 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9193,6 +9193,8 @@ def err_cuda_device_exceptions : Error<
 def err_dynamic_var_init : Error<
     "dynamic initialization is not supported for "
     "__device__, __constant__, __shared__, and __managed__ variables">;
+def err_cuda_ctor_dtor_attrs
+    : Error<"CUDA does not support global %0 for __device__ functions">;
 def err_shared_var_init : Error<
     "initialization is not supported for __shared__ variables">;
 def err_cuda_vla : Error<
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index c7d5893085080fb..d6487d4bc274de4 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -639,9 +639,6 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   CmdArgs.push_back(
       Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ",")));
 
-  // Enable ctor / dtor lowering for the direct / freestanding NVPTX target.
-  CmdArgs.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
-
   // Add paths for the default clang library path.
   SmallString<256> DefaultLibPath =
       llvm::sys::path::parent_path(TC.getDriver().Dir);
@@ -726,9 +723,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const 
llvm::Triple &Triple,
 /// toolchain.
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
                                const llvm::Triple &HostTriple,
-                               const ArgList &Args, bool Freestanding = false)
-    : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
-      Freestanding(Freestanding) {
+                               const ArgList &Args)
+    : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
   if (CudaInstallation.isValid())
     getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
   // Lookup binaries into the driver directory, this is used to
@@ -740,8 +736,7 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const 
llvm::Triple &Triple,
 /// system's default triple if not provided.
 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
                                const ArgList &Args)
-    : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
-                     /*Freestanding=*/true) {}
+    : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {}
 
 llvm::opt::DerivedArgList *
 NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
@@ -782,13 +777,7 @@ NVPTXToolChain::TranslateArgs(const 
llvm::opt::DerivedArgList &Args,
 
 void NVPTXToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
-    Action::OffloadKind DeviceOffloadingKind) const {
-  // If we are compiling with a standalone NVPTX toolchain we want to try to
-  // mimic a standard environment as much as possible. So we enable lowering
-  // ctor / dtor functions to global symbols that can be registered.
-  if (Freestanding && !getDriver().isUsingLTO())
-    CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
-}
+    Action::OffloadKind DeviceOffloadingKind) const {}
 
 bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
   const Option &O = A->getOption();
diff --git a/clang/lib/Driver/ToolChains/Cuda.h 
b/clang/lib/Driver/ToolChains/Cuda.h
index c2219ec47cfa979..259eda6ebcadfb4 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -132,8 +132,8 @@ namespace toolchains {
 class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
 public:
   NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
-                 const llvm::Triple &HostTriple, const llvm::opt::ArgList 
&Args,
-                 bool Freestanding);
+                 const llvm::Triple &HostTriple,
+                 const llvm::opt::ArgList &Args);
 
   NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
                  const llvm::opt::ArgList &Args);
@@ -179,9 +179,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public 
ToolChain {
 protected:
   Tool *buildAssembler() const override; // ptxas.
   Tool *buildLinker() const override;    // nvlink.
-
-private:
-  bool Freestanding = false;
 };
 
 class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index f351663c6824e36..42e1f7a412d3144 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -7484,6 +7484,17 @@ void Sema::ProcessDeclAttributeList(
     }
   }
 
+  // Do not permit 'constructor' or 'destructor' attributes on __device__ code.
+  if (getLangOpts().CUDAIsDevice && !getLangOpts().GPUAllowDeviceInit) {
+    if (D->hasAttr<ConstructorAttr>() && D->hasAttr<CUDADeviceAttr>()) {
+      Diag(D->getLocation(), diag::err_cuda_ctor_dtor_attrs) << "constructors";
+      D->setInvalidDecl();
+    } else if (D->hasAttr<DestructorAttr>() && D->hasAttr<CUDADeviceAttr>()) {
+      Diag(D->getLocation(), diag::err_cuda_ctor_dtor_attrs) << "destructors";
+      D->setInvalidDecl();
+    }
+  }
+
   // Do this check after processing D's attributes because the attribute
   // objc_method_family can change whether the given method is in the init
   // family, and it can be applied after objc_designated_initializer. This is a
diff --git a/clang/test/Driver/cuda-cross-compiling.c 
b/clang/test/Driver/cuda-cross-compiling.c
index 7817e462c47be91..1df231ecb447946 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -57,19 +57,6 @@
 
 // LINK: clang-nvlink-wrapper{{.*}}"-o" "a.out" "-arch" 
"sm_61"{{.*}}[[CUBIN:.+]].o
 
-//
-// Test to ensure that we enable handling global constructors in a freestanding
-// Nvidia compilation.
-//
-// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \
-// RUN:   | FileCheck -check-prefix=LOWERING %s
-// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 -flto -c %s -### 2>&1 \
-// RUN:   | FileCheck -check-prefix=LOWERING-LTO %s
-
-// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" 
"--nvptx-lower-global-ctor-dtor"
-// LOWERING: clang-nvlink-wrapper{{.*}} "-mllvm" 
"--nvptx-lower-global-ctor-dtor"
-// LOWERING-LTO-NOT: "--nvptx-lower-global-ctor-dtor"
-
 //
 // Test passing arguments directly to nvlink.
 //
diff --git a/clang/test/SemaCUDA/device-var-init.cu 
b/clang/test/SemaCUDA/device-var-init.cu
index 1555d151c2590af..a9e3557c20ebf1a 100644
--- a/clang/test/SemaCUDA/device-var-init.cu
+++ b/clang/test/SemaCUDA/device-var-init.cu
@@ -485,3 +485,12 @@ void instantiate() {
   bar<NontrivialInitializer><<<1, 1>>>();
 // expected-note@-1 {{in instantiation of function template specialization 
'bar<NontrivialInitializer>' requested here}}
 }
+
+__device__ void *ptr1 = nullptr;
+__device__ void *ptr2 = ptr1;
+// expected-error@-1 {{dynamic initialization is not supported for __device__, 
__constant__, __shared__, and __managed__ variables}}
+
+__device__ [[gnu::constructor(101)]] void ctor() {}
+// expected-error@-1 {{CUDA does not support global constructors for 
__device__ functions}}
+__device__ [[gnu::destructor(101)]] void dtor() {}
+// expected-error@-1 {{CUDA does not support global destructors for __device__ 
functions}}
diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake 
b/libc/cmake/modules/LLVMLibCTestRules.cmake
index ffbdb40cd5091fa..f33db5826537bd2 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -560,14 +560,12 @@ function(add_integration_test test_name)
   if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
     target_link_options(${fq_build_target_name} PRIVATE
       ${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
-      -Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
-      "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
+      -Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto -nostdlib 
-static
       "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
   elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
     target_link_options(${fq_build_target_name} PRIVATE
       ${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
       "-Wl,--suppress-stack-size-warning" -Wno-multi-gpu
-      "-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1"
       "-Wl,-mllvm,-nvptx-emit-init-fini-kernel"
       -march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
       "--cuda-path=${LIBC_CUDA_ROOT}")
diff --git a/libcxx/test/configs/nvptx-libc++-shared.cfg.in 
b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
index 9a3ca9c8da95093..e07ed35da4d5ad5 100644
--- a/libcxx/test/configs/nvptx-libc++-shared.cfg.in
+++ b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
@@ -10,8 +10,6 @@ config.substitutions.append(('%{link_flags}',
    '-nostdlib++ -startfiles -stdlib '
    '-L %{lib-dir} -lc++ -lc++abi '
    '-Wl,--suppress-stack-size-warning '
-   '-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 '
-   '-Wl,-mllvm,-nvptx-emit-init-fini-kernel'
 ))
 config.substitutions.append(('%{exec}',
     '%{executor} --no-parallelism'
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp 
b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ad1433821036be6..68a0f4cb0ade9e9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -91,11 +91,6 @@
 
 using namespace llvm;
 
-static cl::opt<bool>
-    LowerCtorDtor("nvptx-lower-global-ctor-dtor",
-                  cl::desc("Lower GPU ctor / dtors to globals on the device."),
-                  cl::init(false), cl::Hidden);
-
 #define DEPOTNAME "__local_depot"
 
 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -794,22 +789,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
     report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
 
-  // OpenMP supports NVPTX global constructors and destructors.
-  bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
-
-  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
-      !LowerCtorDtor && !IsOpenMP) {
-    report_fatal_error(
-        "Module has a nontrivial global ctor, which NVPTX does not support.");
-    return true;  // error
-  }
-  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
-      !LowerCtorDtor && !IsOpenMP) {
-    report_fatal_error(
-        "Module has a nontrivial global dtor, which NVPTX does not support.");
-    return true;  // error
-  }
-
   // We need to call the parent's one explicitly.
   bool Result = AsmPrinter::doInitialization(M);
 
diff --git a/llvm/test/CodeGen/NVPTX/global-ctor.ll 
b/llvm/test/CodeGen/NVPTX/global-ctor.ll
deleted file mode 100644
index 6a833128206cec3..000000000000000
--- a/llvm/test/CodeGen/NVPTX/global-ctor.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: not --crash llc < %s -mtriple=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
-
-; Check that llc dies when given a nonempty global ctor.
-@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr 
} { i32 65535, ptr @foo, ptr null }]
-
-; CHECK: ERROR: Module has a nontrivial global ctor
-define internal void @foo() {
-  ret void
-}
diff --git a/llvm/test/CodeGen/NVPTX/global-dtor.ll 
b/llvm/test/CodeGen/NVPTX/global-dtor.ll
deleted file mode 100644
index f385d620bba3607..000000000000000
--- a/llvm/test/CodeGen/NVPTX/global-dtor.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: not --crash llc < %s -mtriple=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
-
-; Check that llc dies when given a nonempty global dtor.
-@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr 
} { i32 65535, ptr @foo, ptr null }]
-
-; CHECK: ERROR: Module has a nontrivial global dtor
-define internal void @foo() {
-  ret void
-}
diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll 
b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
index 4ee1ca3ad4b1f0a..60b3d70840af591 100644
--- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -8,7 +8,7 @@
 
 ; Make sure we get the same result if we run multiple times
 ; RUN: opt -S -mtriple=nvptx64-- 
-passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
-; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa 
-mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
+; RUN: llc -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | 
FileCheck %s -check-prefix=VISIBILITY
 
 @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ 
i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
 @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ 
i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to