yaxunl updated this revision to Diff 385812.
yaxunl retitled this revision from "[HIP] Do not use kernel handle for MSVC 
target" to "[CUDA][HIP] Allow comdat for kernels".
yaxunl edited the summary of this revision.
yaxunl added a comment.

fix comdat instead


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112492/new/

https://reviews.llvm.org/D112492

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/kernel-stub-name.cu
  clang/test/CodeGenCUDA/usual-deallocators.cu

Index: clang/test/CodeGenCUDA/usual-deallocators.cu
===================================================================
--- clang/test/CodeGenCUDA/usual-deallocators.cu
+++ clang/test/CodeGenCUDA/usual-deallocators.cu
@@ -109,7 +109,7 @@
 }
 
 // Make sure that we've generated the kernel used by A::~A.
-// DEVICE-LABEL: define dso_local void @_Z1fIiEvT_
+// DEVICE-LABEL: define void @_Z1fIiEvT_
 
 // Make sure we've picked deallocator for the correct side of compilation.
 
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -2,16 +2,35 @@
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -fcuda-include-gpubinary %t -o - -x hip\
-// RUN:   | FileCheck %s
+// RUN:   | FileCheck -check-prefixes=CHECK,GNU %s
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN:     -fcuda-include-gpubinary %t -o - -x hip\
+// RUN:   | FileCheck -check-prefix=NEG %s
+
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
+// RUN:     -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
+// RUN:     %t -o - -x hip\
+// RUN:   | FileCheck -check-prefixes=CHECK,MSVC %s
+
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
+// RUN:     -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
+// RUN:     %t -o - -x hip\
+// RUN:   | FileCheck -check-prefix=NEG %s
 
 #include "Inputs/cuda.h"
 
-// Kernel handles
+// Check kernel handles are emitted for non-MSVC target but not for MSVC target.
 
-// CHECK: @[[HCKERN:ckernel]] = constant void ()* @__device_stub__ckernel, align 8
-// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @_ZN2ns23__device_stub__nskernelEv, align 8
-// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @_Z25__device_stub__kernelfuncIiEvv, align 8
-// CHECK: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8
+// GNU: @[[HCKERN:ckernel]] = constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8
+// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
+// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
+// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8
+
+// MSVC: @[[HCKERN:ckernel]] = dso_local constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8
+// MSVC: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant void ()* @[[NSSTUB:"\?nskernel@ns@@YAXXZ"]], align 8
+// MSVC: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant void ()* @[[TSTUB:"\?\?\$kernelfunc@H@@YAXXZ.*"]], comdat, align 8
+// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant void ()*, align 8
 
 extern "C" __global__ void ckernel() {}
 
@@ -24,10 +43,10 @@
 
 __global__ void kernel_decl();
 
-void (*kernel_ptr)();
-void *void_ptr;
+extern "C" void (*kernel_ptr)();
+extern "C" void *void_ptr;
 
-void launch(void *kern);
+extern "C" void launch(void *kern);
 
 // Device side kernel names
 
@@ -37,21 +56,22 @@
 
 // Non-template kernel stub functions
 
-// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
+// CHECK: define{{.*}}@[[CSTUB]]
 // CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
-// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
 
+// CHECK: define{{.*}}@[[NSSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
 
-// Check kernel stub is used for triple chevron
+// Check kernel stub is called for triple chevron.
 
-// CHECK-LABEL: define{{.*}}@_Z4fun1v()
+// CHECK-LABEL: define{{.*}}@fun1()
 // CHECK: call void @[[CSTUB]]()
 // CHECK: call void @[[NSSTUB]]()
-// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]()
-// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
+// CHECK: call void @[[TSTUB]]()
+// GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
+// MSVC: call void @[[DSTUB:"\?kernel_decl@@YAXXZ"]]()
 
-void fun1(void) {
+extern "C" void fun1(void) {
   ckernel<<<1, 1>>>();
   ns::nskernel<<<1, 1>>>();
   kernelfunc<int><<<1, 1>>>();
@@ -67,28 +87,28 @@
 
 // CHECK: declare{{.*}}@[[DSTUB]]
 
-// Check kernel handle is used for passing the kernel as a function pointer
+// Check kernel handle is used for passing the kernel as a function pointer.
 
-// CHECK-LABEL: define{{.*}}@_Z4fun2v()
-// CHECK: call void @_Z6launchPv({{.*}}[[HCKERN]]
-// CHECK: call void @_Z6launchPv({{.*}}[[HNSKERN]]
-// CHECK: call void @_Z6launchPv({{.*}}[[HTKERN]]
-// CHECK: call void @_Z6launchPv({{.*}}[[HDKERN]]
-void fun2() {
+// CHECK-LABEL: define{{.*}}@fun2()
+// CHECK: call void @launch({{.*}}[[HCKERN]]
+// CHECK: call void @launch({{.*}}[[HNSKERN]]
+// CHECK: call void @launch({{.*}}[[HTKERN]]
+// CHECK: call void @launch({{.*}}[[HDKERN]]
+extern "C" void fun2() {
   launch((void *)ckernel);
   launch((void *)ns::nskernel);
   launch((void *)kernelfunc<int>);
   launch((void *)kernel_decl);
 }
 
-// Check kernel handle is used for assigning a kernel to a function pointer
+// Check kernel handle is used for assigning a kernel to a function pointer.
 
-// CHECK-LABEL: define{{.*}}@_Z4fun3v()
+// CHECK-LABEL: define{{.*}}@fun3()
 // CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
 // CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
 // CHECK:  store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
 // CHECK:  store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
-void fun3() {
+extern "C" void fun3() {
   kernel_ptr = ckernel;
   kernel_ptr = &ckernel;
   void_ptr = (void *)ckernel;
@@ -96,34 +116,37 @@
 }
 
 // Check kernel stub is loaded from kernel handle when function pointer is
-// used with triple chevron
+// used with triple chevron.
 
-// CHECK-LABEL: define{{.*}}@_Z4fun4v()
+// CHECK-LABEL: define{{.*}}@fun4()
 // CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
-// CHECK:  call i32 @_Z16hipConfigureCall4dim3S_mP9hipStream
+// CHECK:  call i32 @{{.*hipConfigureCall}}
 // CHECK:  %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
 // CHECK:  %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()**
 // CHECK:  %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8
 // CHECK:  call void %[[STUB]]()
-void fun4() {
+extern "C" void fun4() {
   kernel_ptr = ckernel;
   kernel_ptr<<<1,1>>>();
 }
 
-// Check kernel handle is passed to a function
+// Check kernel handle is passed to a function.
 
-// CHECK-LABEL: define{{.*}}@_Z4fun5v()
+// CHECK-LABEL: define{{.*}}@fun5()
 // CHECK:  store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
 // CHECK:  %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
 // CHECK:  %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to i8*
-// CHECK:  call void @_Z6launchPv(i8* %[[CAST]])
-void fun5() {
+// CHECK:  call void @launch(i8* %[[CAST]])
+extern "C" void fun5() {
   kernel_ptr = ckernel;
   launch((void *)kernel_ptr);
 }
 
+// Check kernel handle is registered.
+
 // CHECK-LABEL: define{{.*}}@__hip_register_globals
 // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
 // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
 // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
-// CHECK-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@[[HDKERN]]{{.*}}@{{[0-9]*}}
+// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}__device_stub
+// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}kernel_decl
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -4287,11 +4287,6 @@
   if (!CGM.supportsCOMDAT())
     return false;
 
-  // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent
-  // them being "merged" by the COMDAT Folding linker optimization.
-  if (D.hasAttr<CUDAGlobalAttr>())
-    return false;
-
   if (D.hasAttr<SelectAnyAttr>())
     return true;
 
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -1147,6 +1147,7 @@
   Var->setAlignment(CGM.getPointerAlign().getAsAlign());
   Var->setDSOLocal(F->isDSOLocal());
   Var->setVisibility(F->getVisibility());
+  CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var);
   KernelHandles[F] = Var;
   KernelStubs[Var] = F;
   return Var;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to