This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
yaxunl marked 3 inline comments as done.
Closed by commit rG04fb81674ed7: [CUDA][HIP] Externalize kernels with internal 
linkage (authored by yaxunl).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D124189?vs=424364&id=424600#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124189/new/

https://reviews.llvm.org/D124189

Files:
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/device-var-linkage.cu
  clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
  clang/test/CodeGenCUDA/managed-var.cu
  clang/test/CodeGenCUDA/static-device-var-rdc.cu

Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -40,6 +40,11 @@
 // RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
 // RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s
 
+// Check postfix for CUDA.
+
+// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \
+// RUN:   -check-prefixes=CUDA %s
 
 #include "Inputs/cuda.h"
 
@@ -55,11 +60,12 @@
 // INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
 
 // Test externalized static device variables
-// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
-// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00"
+// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00"
+// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
 
-// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
-// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00"
+// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00"
 
 static __device__ int x;
 
@@ -73,8 +79,8 @@
 // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
 
 // Test externalized static device variables
-// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
-// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00"
+// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
 
 static __constant__ int y;
 
Index: clang/test/CodeGenCUDA/managed-var.cu
===================================================================
--- clang/test/CodeGenCUDA/managed-var.cu
+++ clang/test/CodeGenCUDA/managed-var.cu
@@ -1,5 +1,3 @@
-// REQUIRES: x86-registered-target, amdgpu-registered-target
-
 // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
 // RUN:   -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=COMMON,DEV,NORDC-D %s
@@ -52,15 +50,15 @@
 
 // NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4
 // NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
-// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
+// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
 // HOST-DAG: @_ZL2sx.managed = internal global i32 1
 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null
 // NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00"
-// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00"
+// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00"
 
-// POSTFIX:  @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00"
+// POSTFIX:  @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00"
 static __managed__ int sx = 1;
 
 // DEV-DAG: @llvm.compiler.used
Index: clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
+++ clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
@@ -6,19 +6,53 @@
 // RUN:   -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
 // RUN:   -emit-llvm -o - -x hip %s > %t.host
 
-// RUN: cat %t.dev %t.host | FileCheck %s
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s
+
+// RUN: echo "GPU binary" > %t.fatbin
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
+// RUN:   -emit-llvm -o - %s > %t.dev
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN:   -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
+// RUN:   -emit-llvm -o - %s > %t.host
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s
 
 #include "Inputs/cuda.h"
 
-// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]](
-// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00"
-// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]]
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
+
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
+
+// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00"
+// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00"
+// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00"
+
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]]
+
+
+template <typename T>
+__global__ void tempKern(T x) {}
 
 namespace {
-__global__ void kernel() {
-}
+  __global__ void kernel() {}
+  struct X {};
+  X x;
+  auto lambda = [](){};
 }
 
 void test() {
   kernel<<<1, 1>>>();
+
+  tempKern<<<1, 1>>>(x);
+
+  tempKern<<<1, 1>>>(lambda);
 }
Index: clang/test/CodeGenCUDA/device-var-linkage.cu
===================================================================
--- clang/test/CodeGenCUDA/device-var-linkage.cu
+++ clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -1,15 +1,18 @@
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \
+// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \
 // RUN:   -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=DEV,NORDC %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \
+// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \
 // RUN:   -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=DEV,RDC %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx \
+// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \
 // RUN:   -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=HOST,NORDC-H %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx \
+// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \
 // RUN:   -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=HOST,RDC-H %s
+// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \
+// RUN:   -fgpu-rdc -cuid=abc -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefixes=CUDA %s
 
 #include "Inputs/cuda.h"
 
@@ -24,7 +27,9 @@
 // DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
 // NORDC-H-DAG: @v3 = internal externally_initialized global i32* null
 // RDC-H-DAG: @v3 = externally_initialized global i32* null
+#if __HIP__
 __managed__ int v3;
+#endif
 
 // DEV-DAG: @ev1 = external addrspace(1) global i32
 // HOST-DAG: @ev1 = external global i32
@@ -34,25 +39,35 @@
 extern __constant__ int ev2;
 // DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)*
 // HOST-DAG: @ev3 = external externally_initialized global i32*
+#if __HIP__
 extern __managed__ int ev3;
+#endif
 
 // NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
 // HOST-DAG: @_ZL3sv1 = internal global i32 undef
+// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
 static __device__ int sv1;
 // NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
+// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
 // HOST-DAG: @_ZL3sv2 = internal global i32 undef
+// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
 static __constant__ int sv2;
 // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
 // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
+#if __HIP__
 static __managed__ int sv3;
+#endif
 
 __device__ __host__ int work(int *x);
 
 __device__ __host__ int fun1() {
-  return work(&ev1) + work(&ev2) + work(&ev3) + work(&sv1) + work(&sv2) + work(&sv3);
+  return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2)
+#if __HIP__
+    + work(&ev3) + work(&sv3)
+#endif
+    ;
 }
 
 // HOST: hipRegisterVar({{.*}}@v1
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -6809,6 +6809,12 @@
 
 void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
                                                     const Decl *D) const {
-  OS << (isa<VarDecl>(D) ? "__static__" : ".anon.")
-     << getContext().getCUIDHash();
+  StringRef Tag;
+  // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers
+  // postfix beginning with '.' since the symbol name can be demangled.
+  if (LangOpts.HIP)
+    Tag = (isa<VarDecl>(D) ? ".static." : ".intern.");
+  else
+    Tag = (isa<VarDecl>(D) ? "__static__" : "__intern__");
+  OS << Tag << getContext().getCUIDHash();
 }
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -12298,7 +12298,9 @@
   // anonymous name space needs to be externalized to avoid duplicate symbols.
   return (IsStaticVar &&
           (D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
-         (D->hasAttr<CUDAGlobalAttr>() && D->isInAnonymousNamespace());
+         (D->hasAttr<CUDAGlobalAttr>() &&
+          basicGVALinkageForFunction(*this, cast<FunctionDecl>(D)) ==
+              GVA_Internal);
 }
 
 bool ASTContext::shouldExternalize(const Decl *D) const {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to