https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/66283:
Currently, clang emits LLVM IR that fails verifier for the following code: ``` template<typename T> __global__ void foo(T x); void bar() { foo<<<1, 1>>>(0); } ``` This is due to clang putting the kernel handle for foo into comdat, which is not allowed, since the kernel handle is a declaration. The siutation is similar to calling a declaration-only template function. The callee will be a declaration in LLVM IR and won't be put into comdat. This is in contrast to calling a template function with body, which will be put into comdat. Fixes: SWDEV-419769 >From d7a57a2bfa70af6fc4d021e8a06fc668342d36dc Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <yaxun....@amd.com> Date: Wed, 13 Sep 2023 12:44:09 -0400 Subject: [PATCH] [HIP] Fix comdat of template kernel handle Currently, clang emits LLVM IR that fails verifier for the following code: ``` template<typename T> __global__ void foo(T x); void bar() { foo<<<1, 1>>>(0); } ``` This is due to clang putting the kernel handle for foo into comdat, which is not allowed, since the kernel handle is a declaration. The siutation is similar to calling a declaration-only template function. The callee will be a declaration in LLVM IR and won't be put into comdat. This is in contrast to calling a template function with body, which will be put into comdat. Fixes: SWDEV-419769 --- clang/lib/CodeGen/CGCUDANV.cpp | 5 ++++- clang/test/CodeGenCUDA/kernel-stub-name.cu | 13 ++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 08769c98dc298a0..0efe7e8db0183fe 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -1234,7 +1234,10 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, Var->setAlignment(CGM.getPointerAlign().getAsAlign()); Var->setDSOLocal(F->isDSOLocal()); Var->setVisibility(F->getVisibility()); - CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var); + auto *FD = cast<FunctionDecl>(GD.getDecl()); + auto *FT = FD->getPrimaryTemplate(); + if (!FT || FT->isThisDeclarationADefinition()) + CGM.maybeSetTrivialComdat(*FD, *Var); KernelHandles[F->getName()] = Var; KernelStubs[Var] = F; return Var; diff --git a/clang/test/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CodeGenCUDA/kernel-stub-name.cu index 9884046fcd0fd0c..008d66bd590b759 100644 --- a/clang/test/CodeGenCUDA/kernel-stub-name.cu +++ b/clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -26,12 +26,13 @@ // GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8 // GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8 // GNU: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8 +// GNU: @[[HTDKERN:_Z16temp_kernel_declIiEvT_]] = external constant ptr, align 8 // MSVC: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8 // MSVC: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel@ns@@YAXXZ"]], align 8 // MSVC: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc@H@@YAXXZ.*"]], comdat, align 8 // MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8 - +// MSVC: @[[HTDKERN:"\?\?\$temp_kernel_decl@H@@YAXH.*"]] = external dso_local constant ptr, align 8 extern "C" __global__ void ckernel() {} namespace ns { @@ -43,6 +44,9 @@ __global__ void kernelfunc() {} __global__ void kernel_decl(); +template<class T> +__global__ void temp_kernel_decl(T x); + extern "C" void (*kernel_ptr)(); extern "C" void *void_ptr; @@ -69,13 +73,16 @@ extern "C" void launch(void *kern); // CHECK: call void @[[NSSTUB]]() // CHECK: call void @[[TSTUB]]() // GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]() +// GNU: call void @[[TDSTUB:_Z31__device_stub__temp_kernel_declIiEvT_]]( // MSVC: call void @[[DSTUB:"\?__device_stub__kernel_decl@@YAXXZ"]]() +// MSVC: call void @[[TDSTUB:"\?\?\$__device_stub__temp_kernel_decl@H@@YAXH@Z"]]( extern "C" void fun1(void) { ckernel<<<1, 1>>>(); ns::nskernel<<<1, 1>>>(); kernelfunc<int><<<1, 1>>>(); kernel_decl<<<1, 1>>>(); + temp_kernel_decl<<<1, 1>>>(1); } // Template kernel stub functions @@ -86,6 +93,7 @@ extern "C" void fun1(void) { // Check declaration of stub function for external kernel. // CHECK: declare{{.*}}@[[DSTUB]] +// CHECK: declare{{.*}}@[[TDSTUB]] // Check kernel handle is used for passing the kernel as a function pointer. @@ -94,11 +102,13 @@ extern "C" void fun1(void) { // CHECK: call void @launch({{.*}}[[HNSKERN]] // CHECK: call void @launch({{.*}}[[HTKERN]] // CHECK: call void @launch({{.*}}[[HDKERN]] +// CHECK: call void @launch({{.*}}[[HTDKERN]] extern "C" void fun2() { launch((void *)ckernel); launch((void *)ns::nskernel); launch((void *)kernelfunc<int>); launch((void *)kernel_decl); + launch((void *)temp_kernel_decl<int>); } // Check kernel handle is used for assigning a kernel to a function pointer. @@ -148,3 +158,4 @@ extern "C" void fun5() { // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]] // NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}__device_stub // NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}kernel_decl +// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}temp_kernel_decl _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits