Author: Yaxun (Sam) Liu Date: 2023-06-06T12:03:48-04:00 New Revision: f2677afe91592673663d24a63706c3218c477c1c
URL: https://github.com/llvm/llvm-project/commit/f2677afe91592673663d24a63706c3218c477c1c DIFF: https://github.com/llvm/llvm-project/commit/f2677afe91592673663d24a63706c3218c477c1c.diff LOG: [CUDA][HIP] Externalize device var in anonymous namespace Device variables in an anonymous namespace may be referenced by host code, therefore they need to be externalized in a similar way as a static device variables or kernels in an anonymous namespace. Fixes: https://github.com/ROCm-Developer-Tools/HIP/issues/3246 Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D152164 Added: clang/test/CodeGenCUDA/anon-ns.cu Modified: clang/lib/AST/ASTContext.cpp clang/test/CodeGenCUDA/host-used-device-var.cu Removed: clang/test/CodeGenCUDA/kernel-in-anon-ns.cu ################################################################################ diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 9af6fa67db1ef..b7d9c3cc46e53 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -13581,16 +13581,17 @@ operator<<(const StreamingDiagnostic &DB, } bool ASTContext::mayExternalize(const Decl *D) const { - bool IsStaticVar = - isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static; + bool IsInternalVar = + isa<VarDecl>(D) && + basicGVALinkageForVariable(*this, cast<VarDecl>(D)) == GVA_Internal; bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() && !D->getAttr<CUDADeviceAttr>()->isImplicit()) || (D->hasAttr<CUDAConstantAttr>() && !D->getAttr<CUDAConstantAttr>()->isImplicit()); - // CUDA/HIP: static managed variables need to be externalized since it is + // CUDA/HIP: managed variables need to be externalized since it is // a declaration in IR, therefore cannot have internal linkage. Kernels in // anonymous name space needs to be externalized to avoid duplicate symbols. - return (IsStaticVar && + return (IsInternalVar && (D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) || (D->hasAttr<CUDAGlobalAttr>() && basicGVALinkageForFunction(*this, cast<FunctionDecl>(D)) == diff --git a/clang/test/CodeGenCUDA/anon-ns.cu b/clang/test/CodeGenCUDA/anon-ns.cu new file mode 100644 index 0000000000000..3c55e9907dd6c --- /dev/null +++ b/clang/test/CodeGenCUDA/anon-ns.cu @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++17 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s + +// RUN: echo "GPU binary" > %t.fatbin + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \ +// RUN: -emit-llvm -o - %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple nvptx -std=c++17 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \ +// RUN: -emit-llvm -o - %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s + +#include "Inputs/cuda.h" + +// HIP-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( +// HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global +// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global +// HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global + +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]]( +// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global +// CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global + +// COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global +// COMNEG-NOT: @{{.*}} = {{.*}} c"_ZN12_GLOBAL__N_12vdE{{.*}}\00" + +// HIP-DAG: @llvm.compiler.used = {{.*}}@[[VM]]{{.*}}@[[VT]]{{.*}}@[[VC]] +// CUDA-DAG: @llvm.compiler.used = {{.*}}@[[VT]]{{.*}}@[[VC]] + +// COMMON-DAG: @[[KERNSTR:.*]] = {{.*}} c"[[KERN]]\00" +// COMMON-DAG: @[[KTXSTR:.*]] = {{.*}} c"[[KTX]]\00" +// COMMON-DAG: @[[KTLSTR:.*]] = {{.*}} c"[[KTL]]\00" +// HIP-DAG: @[[VMSTR:.*]] = {{.*}} c"[[VM]]\00" +// COMMON-DAG: @[[VCSTR:.*]] = {{.*}} c"[[VC]]\00" +// COMMON-DAG: @[[VTSTR:.*]] = {{.*}} c"[[VT]]\00" + +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KERNSTR]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTXSTR]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTLSTR]] +// HIP-DAG: call void @__{{.*}}RegisterManagedVar({{.*}}@[[VMSTR]] +// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VCSTR]] +// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VTSTR]] + +template <typename T> +__global__ void kt(T x) {} + +template <typename T> +__device__ T vt; + +namespace { + struct X {}; + X x; + auto lambda = [](){}; +#if __HIP__ + __managed__ int vm = 1; +#endif + __constant__ int vc = 2; + + // C should not be externalized since it is used by device code only. + __device__ int vd = 3; + __global__ void kernel() { vd = 4; } +} + +template<typename T> +void getSymbol(T *x) {} + +void test() { + kernel<<<1, 1>>>(); + + kt<<<1, 1>>>(x); + + kt<<<1, 1>>>(lambda); + + // A, B, and tempVar<X> should be externalized since they are + // used by host code. +#if __HIP__ + getSymbol(&vm); +#endif + getSymbol(&vc); + getSymbol(&vt<X>); +} diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu index 2c0d06d07c6f2..7cb31aff84264 100644 --- a/clang/test/CodeGenCUDA/host-used-device-var.cu +++ b/clang/test/CodeGenCUDA/host-used-device-var.cu @@ -73,9 +73,8 @@ constexpr int constexpr_var1a = 1; inline constexpr int constexpr_var1b = 1; // Check constant constexpr variables ODR-used by host code only. -// Non-inline constexpr variable has internal linkage, therefore it is not accessible by host and not kept. -// Inline constexpr variable has linkonce_ord linkage, therefore it can be accessed by host and kept. -// DEV-NEG-NOT: constexpr_var2a +// Device-side constexpr variables accessed by host code should be externalized and kept. +// DEV-DAG: @_ZL15constexpr_var2a = addrspace(4) externally_initialized constant i32 2 // DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2 __constant__ constexpr int constexpr_var2a = 2; inline __constant__ constexpr int constexpr_var2b = 2; @@ -184,6 +183,7 @@ public: // Check the exact list of variables to ensure @_ZL2u4 is not among them. // DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE +// DEV-SAME: {{^[^@]*}} @_ZL15constexpr_var2a // DEV-SAME: {{^[^@]*}} @_ZL2u3 // DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1 // DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 diff --git a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu deleted file mode 100644 index bc753d76e5c11..0000000000000 --- a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu +++ /dev/null @@ -1,58 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ -// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ -// RUN: -emit-llvm -o - -x hip %s > %t.dev - -// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ -// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \ -// RUN: -emit-llvm -o - -x hip %s > %t.host - -// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s - -// RUN: echo "GPU binary" > %t.fatbin - -// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ -// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ -// RUN: -emit-llvm -o - %s > %t.dev - -// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ -// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \ -// RUN: -emit-llvm -o - %s > %t.host - -// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s - -#include "Inputs/cuda.h" - -// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]]( -// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( -// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( - -// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( -// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]]( -// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]]( - -// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00" -// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00" -// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00" - -// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]] -// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]] -// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]] - - -template <typename T> -__global__ void tempKern(T x) {} - -namespace { - __global__ void kernel() {} - struct X {}; - X x; - auto lambda = [](){}; -} - -void test() { - kernel<<<1, 1>>>(); - - tempKern<<<1, 1>>>(x); - - tempKern<<<1, 1>>>(lambda); -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits