yaxunl updated this revision to Diff 528447.
yaxunl added a comment.

add a test to make sure device var in an anonymous namespace is not 
externalized if used by device code only.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152164/new/

https://reviews.llvm.org/D152164

Files:
  clang/lib/AST/ASTContext.cpp
  clang/test/CodeGenCUDA/anon-ns.cu
  clang/test/CodeGenCUDA/kernel-in-anon-ns.cu

Index: clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
+++ /dev/null
@@ -1,58 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN:   -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
-// RUN:   -emit-llvm -o - -x hip %s > %t.dev
-
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
-// RUN:   -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
-// RUN:   -emit-llvm -o - -x hip %s > %t.host
-
-// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s
-
-// RUN: echo "GPU binary" > %t.fatbin
-
-// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
-// RUN:   -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
-// RUN:   -emit-llvm -o - %s > %t.dev
-
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
-// RUN:   -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
-// RUN:   -emit-llvm -o - %s > %t.host
-
-// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s
-
-#include "Inputs/cuda.h"
-
-// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
-// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
-// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
-
-// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
-// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
-// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
-
-// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00"
-// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00"
-// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00"
-
-// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]]
-// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]]
-// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]]
-
-
-template <typename T>
-__global__ void tempKern(T x) {}
-
-namespace {
-  __global__ void kernel() {}
-  struct X {};
-  X x;
-  auto lambda = [](){};
-}
-
-void test() {
-  kernel<<<1, 1>>>();
-
-  tempKern<<<1, 1>>>(x);
-
-  tempKern<<<1, 1>>>(lambda);
-}
Index: clang/test/CodeGenCUDA/anon-ns.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/anon-ns.cu
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \
+// RUN:   -emit-llvm -o - -x hip %s > %t.dev
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN:   -aux-triple amdgcn-amd-amdhsa -std=c++17 -fgpu-rdc \
+// RUN:   -emit-llvm -o - -x hip %s > %t.host
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s
+
+// RUN: echo "GPU binary" > %t.fatbin
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \
+// RUN:   -emit-llvm -o - %s > %t.dev
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN:   -aux-triple nvptx -std=c++17 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
+// RUN:   -emit-llvm -o - %s > %t.host
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s
+
+#include "Inputs/cuda.h"
+
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
+// HIP-DAG: @[[VAR1:_ZN12_GLOBAL__N_11AE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
+// HIP-DAG: @[[VAR2:_ZN12_GLOBAL__N_11BE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global
+// HIP-DAG: @[[VAR3:_Z7tempVarIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
+
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
+// CUDA-DAG: @[[VAR2:_ZN12_GLOBAL__N_11BE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global
+// CUDA-DAG: @[[VAR3:_Z7tempVarIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
+
+// COMMON-DAG: @[[VAR4:_ZN12_GLOBAL__N_11CE]] = internal addrspace(1) global
+// COMNEG-NOT: @{{.*}} = {{.*}} c"_ZN12_GLOBAL__N_11CE{{.*}}\00"
+
+// HIP-DAG: @llvm.compiler.used = {{.*}}@[[VAR1]]{{.*}}@[[VAR3]]{{.*}}@[[VAR2]]
+// CUDA-DAG: @llvm.compiler.used = {{.*}}@[[VAR3]]{{.*}}@[[VAR2]]
+
+// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00"
+// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00"
+// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00"
+// HIP-DAG: @[[STR4:.*]] = {{.*}} c"[[VAR1]]\00"
+// COMMON-DAG: @[[STR5:.*]] = {{.*}} c"[[VAR2]]\00"
+// COMMON-DAG: @[[STR6:.*]] = {{.*}} c"[[VAR3]]\00"
+
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]]
+// HIP-DAG: call void @__{{.*}}RegisterManagedVar({{.*}}@[[STR4]]
+// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[STR5]]
+// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[STR6]]
+
+template <typename T>
+__global__ void tempKern(T x) {}
+
+template <typename T>
+__device__ T tempVar;
+
+namespace {
+  struct X {};
+  X x;
+  auto lambda = [](){};
+#if __HIP__
+  __managed__ int A = 1;
+#endif
+  __constant__ int B = 2;
+
+  // C should not be externalized since it is used by device code only.
+  __device__ int C = 3;
+  __global__ void kernel() { C = 4; }
+}
+
+template<typename T>
+void getSymbol(T *x) {}
+
+void test() {
+  kernel<<<1, 1>>>();
+
+  tempKern<<<1, 1>>>(x);
+
+  tempKern<<<1, 1>>>(lambda);
+
+  // A, B, and tempVar<X> should be externalized since they are
+  // used by host code.
+#if __HIP__
+  getSymbol(&A);
+#endif
+  getSymbol(&B);
+  getSymbol(&tempVar<X>);
+}
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -13602,16 +13602,17 @@
 }
 
 bool ASTContext::mayExternalize(const Decl *D) const {
-  bool IsStaticVar =
-      isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
+  bool IsInternalVar =
+      isa<VarDecl>(D) &&
+      basicGVALinkageForVariable(*this, cast<VarDecl>(D)) == GVA_Internal;
   bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
                               !D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
                              (D->hasAttr<CUDAConstantAttr>() &&
                               !D->getAttr<CUDAConstantAttr>()->isImplicit());
-  // CUDA/HIP: static managed variables need to be externalized since it is
+  // CUDA/HIP: managed variables need to be externalized since it is
   // a declaration in IR, therefore cannot have internal linkage. Kernels in
   // anonymous name space needs to be externalized to avoid duplicate symbols.
-  return (IsStaticVar &&
+  return (IsInternalVar &&
           (D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
          (D->hasAttr<CUDAGlobalAttr>() &&
           basicGVALinkageForFunction(*this, cast<FunctionDecl>(D)) ==
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to