llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang
            
<details>
<summary>Changes</summary>
Currently, clang emits LLVM IR that fails verifier for the following code:

```
template<typename T>
__global__ void foo(T x);

void bar() {
  foo<<<1, 1>>>(0);
}
```
This is due to clang putting the kernel handle for foo into comdat, which is 
not allowed, since the kernel handle is a declaration.

The siutation is similar to calling a declaration-only template function. The 
callee will be a declaration in LLVM IR and won't be put into comdat. This is 
in contrast to calling a template function with body, which will be put into 
comdat.

Fixes: SWDEV-419769
--
Full diff: https://github.com/llvm/llvm-project/pull/66283.diff

2 Files Affected:

- (modified) clang/lib/CodeGen/CGCUDANV.cpp (+4-1) 
- (modified) clang/test/CodeGenCUDA/kernel-stub-name.cu (+12-1) 


<pre>
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 08769c98dc298a0..0efe7e8db0183fe 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1234,7 +1234,10 @@ llvm::GlobalValue 
*CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
   Var-&gt;setAlignment(CGM.getPointerAlign().getAsAlign());
   Var-&gt;setDSOLocal(F-&gt;isDSOLocal());
   Var-&gt;setVisibility(F-&gt;getVisibility());
-  CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var);
+  auto *FD = cast&lt;FunctionDecl&gt;(GD.getDecl());
+  auto *FT = FD-&gt;getPrimaryTemplate();
+  if (!FT || FT-&gt;isThisDeclarationADefinition())
+    CGM.maybeSetTrivialComdat(*FD, *Var);
   KernelHandles[F-&gt;getName()] = Var;
   KernelStubs[Var] = F;
   return Var;
diff --git a/clang/test/CodeGenCUDA/kernel-stub-name.cu 
b/clang/test/CodeGenCUDA/kernel-stub-name.cu
index 9884046fcd0fd0c..008d66bd590b759 100644
--- a/clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ b/clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -26,12 +26,13 @@
 // GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr 
@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
 // GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr 
@[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
 // GNU: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8
+// GNU: @[[HTDKERN:_Z16temp_kernel_declIiEvT_]] = external constant ptr, align 
8
 
 // MSVC: @[[HCKERN:ckernel]] = dso_local constant ptr 
@[[CSTUB:__device_stub__ckernel]], align 8
 // MSVC: @[[HNSKERN:&quot;\?nskernel@ns@@YAXXZ.*&quot;]] = dso_local constant 
ptr @[[NSSTUB:&quot;\?__device_stub__nskernel@ns@@YAXXZ&quot;]], align 8
 // MSVC: @[[HTKERN:&quot;\?\?\$kernelfunc@H@@YAXXZ.*&quot;]] = linkonce_odr 
dso_local constant ptr 
@[[TSTUB:&quot;\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*&quot;]], comdat, 
align 8
 // MSVC: @[[HDKERN:&quot;\?kernel_decl@@YAXXZ.*&quot;]] = external dso_local 
constant ptr, align 8
-
+// MSVC: @[[HTDKERN:&quot;\?\?\$temp_kernel_decl@H@@YAXH.*&quot;]] = external 
dso_local constant ptr, align 8
 extern &quot;C&quot; __global__ void ckernel() {}
 
 namespace ns {
@@ -43,6 +44,9 @@ __global__ void kernelfunc() {}
 
 __global__ void kernel_decl();
 
+template&lt;class T&gt;
+__global__ void temp_kernel_decl(T x);
+
 extern &quot;C&quot; void (*kernel_ptr)();
 extern &quot;C&quot; void *void_ptr;
 
@@ -69,13 +73,16 @@ extern &quot;C&quot; void launch(void *kern);
 // CHECK: call void @[[NSSTUB]]()
 // CHECK: call void @[[TSTUB]]()
 // GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
+// GNU: call void @[[TDSTUB:_Z31__device_stub__temp_kernel_declIiEvT_]](
 // MSVC: call void @[[DSTUB:&quot;\?__device_stub__kernel_decl@@YAXXZ&quot;]]()
+// MSVC: call void 
@[[TDSTUB:&quot;\?\?\$__device_stub__temp_kernel_decl@H@@YAXH@Z&quot;]](
 
 extern &quot;C&quot; void fun1(void) {
   ckernel&lt;&lt;&lt;1, 1&gt;&gt;&gt;();
   ns::nskernel&lt;&lt;&lt;1, 1&gt;&gt;&gt;();
   kernelfunc&lt;int&gt;&lt;&lt;&lt;1, 1&gt;&gt;&gt;();
   kernel_decl&lt;&lt;&lt;1, 1&gt;&gt;&gt;();
+  temp_kernel_decl&lt;&lt;&lt;1, 1&gt;&gt;&gt;(1);
 }
 
 // Template kernel stub functions
@@ -86,6 +93,7 @@ extern &quot;C&quot; void fun1(void) {
 // Check declaration of stub function for external kernel.
 
 // CHECK: declare{{.*}}@[[DSTUB]]
+// CHECK: declare{{.*}}@[[TDSTUB]]
 
 // Check kernel handle is used for passing the kernel as a function pointer.
 
@@ -94,11 +102,13 @@ extern &quot;C&quot; void fun1(void) {
 // CHECK: call void @launch({{.*}}[[HNSKERN]]
 // CHECK: call void @launch({{.*}}[[HTKERN]]
 // CHECK: call void @launch({{.*}}[[HDKERN]]
+// CHECK: call void @launch({{.*}}[[HTDKERN]]
 extern &quot;C&quot; void fun2() {
   launch((void *)ckernel);
   launch((void *)ns::nskernel);
   launch((void *)kernelfunc&lt;int&gt;);
   launch((void *)kernel_decl);
+  launch((void *)temp_kernel_decl&lt;int&gt;);
 }
 
 // Check kernel handle is used for assigning a kernel to a function pointer.
@@ -148,3 +158,4 @@ extern &quot;C&quot; void fun5() {
 // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
 // NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}__device_stub
 // NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}kernel_decl
+// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}temp_kernel_decl
</pre>
</details>


https://github.com/llvm/llvm-project/pull/66283
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to