yaxunl updated this revision to Diff 322021.
yaxunl marked 3 inline comments as done.
yaxunl edited the summary of this revision.
yaxunl added a comment.

Revised by Artem's comments. Use CUID hash as postfix for static variable name.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85223/new/

https://reviews.llvm.org/D85223

Files:
  clang/include/clang/AST/ASTContext.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/test/CodeGenCUDA/device-var-linkage.cu
  clang/test/CodeGenCUDA/managed-var.cu
  clang/test/CodeGenCUDA/static-device-var-rdc.cu
  clang/test/SemaCUDA/static-device-var.cu

Index: clang/test/SemaCUDA/static-device-var.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/static-device-var.cu
@@ -0,0 +1,37 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN:    -emit-llvm -o - %s -fsyntax-only -verify
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN:    -emit-llvm -o - %s -fsyntax-only -verify
+
+// expected-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+__device__ void f1() {
+  const static int b = 123;
+  static int a;
+}
+
+__global__ void k1() {
+  const static int b = 123;
+  static int a;
+}
+
+static __device__ int x;
+static __constant__ int y;
+
+__global__ void kernel(int *a) {
+  a[0] = x;
+  a[1] = y;
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+  getDeviceSymbol(&x);
+  getDeviceSymbol(&y);
+}
Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -0,0 +1,89 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=DEV,INT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=HOST,INT-HOST %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=DEV,EXT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=HOST,EXT-HOST %s
+
+#include "Inputs/cuda.h"
+
+// Test function scope static device variable, which should not be externalized.
+// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1
+
+
+// HOST-DAG: @_ZL1x = internal global i32 undef
+// HOST-DAG: @_ZL1y = internal global i32 undef
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1x = dso_local addrspace(1) externally_initialized global i32 0
+// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1x.static.[[HASH:b04fd23c98500190]] = dso_local addrspace(1) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:b04fd23c98500190]]\00"
+
+static __device__ int x;
+
+// Test static device variables not used by host code should not be externalized
+// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0
+
+static __device__ int x2;
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1y = dso_local addrspace(4) externally_initialized global i32 0
+// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
+
+static __constant__ int y;
+
+// Test static host variable, which should not be externalized nor registered.
+// HOST-DAG: @_ZL1z = internal global i32 0
+// DEV-NOT: @_ZL1z
+static int z;
+
+// Test static device variable in inline function, which should not be
+// externalized nor registered.
+// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
+
+inline __device__ void devfun(const int ** b) {
+  const static int p = 2;
+  b[0] = &p;
+}
+
+__global__ void kernel(int *a, const int **b) {
+  const static int w = 1;
+  a[0] = x;
+  a[1] = y;
+  b[0] = &w;
+  b[1] = &x2;
+  devfun(b);
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+  getDeviceSymbol(&x);
+  getDeviceSymbol(&y);
+  z = 123;
+}
+
+// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
Index: clang/test/CodeGenCUDA/managed-var.cu
===================================================================
--- clang/test/CodeGenCUDA/managed-var.cu
+++ clang/test/CodeGenCUDA/managed-var.cu
@@ -2,18 +2,18 @@
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
 // RUN:   -emit-llvm -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=COMMON,DEV %s
+// RUN:   -check-prefixes=COMMON,DEV,NORDC-D %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=COMMON,DEV %s
+// RUN:   -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=COMMON,DEV,RDC-D %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
 // RUN:   -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=COMMON,HOST,NORDC %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
-// RUN:   -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
+// RUN:   -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=COMMON,HOST,RDC %s
 
 #include "Inputs/cuda.h"
@@ -38,9 +38,12 @@
 // HOST-DAG: @ex = external externally_initialized global i32*
 extern __managed__ int ex;
 
-// DEV-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// NORDC-D-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-D-DAG: @_ZL2sx.static.[[HASH:b04fd23c98500190]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
 // HOST-DAG: @_ZL2sx.init = internal global i32 1
 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null
+// NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00"
+// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:b04fd23c98500190]]\00"
 static __managed__ int sx = 1;
 
 // Force ex and sx mitted in device compilation.
@@ -139,6 +142,6 @@
 }
 
 // HOST-DAG: __hipRegisterManagedVar({{.*}}@x {{.*}}@x.init {{.*}}@[[DEVNAMEX]]{{.*}}, i64 4, i32 4)
-// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.init
+// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.init {{.*}}@[[DEVNAMESX]]
 // HOST-NOT: __hipRegisterManagedVar({{.*}}@ex {{.*}}@ex.init
 // HOST-DAG: declare void @__hipRegisterManagedVar(i8**, i8*, i8*, i8*, i64, i32)
Index: clang/test/CodeGenCUDA/device-var-linkage.cu
===================================================================
--- clang/test/CodeGenCUDA/device-var-linkage.cu
+++ clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -2,13 +2,13 @@
 // RUN:   -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=DEV,NORDC %s
 // RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s \
+// RUN:   -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=DEV,RDC %s
 // RUN: %clang_cc1 -triple nvptx \
 // RUN:   -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=HOST,NORDC-H %s
 // RUN: %clang_cc1 -triple nvptx \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s \
+// RUN:   -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
 // RUN:   | FileCheck -check-prefixes=HOST,RDC-H %s
 
 #include "Inputs/cuda.h"
@@ -37,14 +37,15 @@
 extern __managed__ int ev3;
 
 // NORDC-DAG: @_ZL3sv1 = dso_local addrspace(1) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv1 = internal addrspace(1) global i32 0
+// RDC-DAG: @_ZL3sv1.static.[[HASH:b04fd23c98500190]] = dso_local addrspace(1) externally_initialized global i32 0
 // HOST-DAG: @_ZL3sv1 = internal global i32 undef
 static __device__ int sv1;
 // NORDC-DAG: @_ZL3sv2 = dso_local addrspace(4) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv2 = internal addrspace(4) global i32 0
+// RDC-DAG: @_ZL3sv2.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0
 // HOST-DAG: @_ZL3sv2 = internal global i32 undef
 static __constant__ int sv2;
-// DEV-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// NORDC-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-DAG: @_ZL3sv3.static.[[HASH]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
 // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
 static __managed__ int sv3;
 
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1423,6 +1423,10 @@
                                            TBAAAccessInfo *TBAAInfo = nullptr);
   bool stopAutoInit();
 
+  /// Print the postfix for externalized static variable for single source
+  /// offloading languages CUDA and HIP.
+  void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
+
 private:
   llvm::Constant *GetOrCreateLLVMFunction(
       StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1184,6 +1184,11 @@
       }
     }
 
+  // Make unique name for device side static file-scope variable for HIP.
+  if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
+      CGM.getLangOpts().GPURelocatableDeviceCode &&
+      CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty())
+    CGM.printPostfixForExternalizedStaticVar(Out);
   return std::string(Out.str());
 }
 
@@ -1241,9 +1246,16 @@
     }
   }
 
-  auto FoundName = MangledDeclNames.find(CanonicalGD);
-  if (FoundName != MangledDeclNames.end())
-    return FoundName->second;
+  // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a
+  // static device variable depends on whether the variable is referenced by
+  // a host or device host function. Therefore the mangled name cannot be
+  // cached.
+  if (!LangOpts.CUDAIsDevice ||
+      !getContext().mayExternalizeStaticVar(GD.getDecl())) {
+    auto FoundName = MangledDeclNames.find(CanonicalGD);
+    if (FoundName != MangledDeclNames.end())
+      return FoundName->second;
+  }
 
   // Keep the first result in the case of a mangling collision.
   const auto *ND = cast<NamedDecl>(GD.getDecl());
@@ -6276,3 +6288,8 @@
   }
   return false;
 }
+
+void CodeGenModule::printPostfixForExternalizedStaticVar(
+    llvm::raw_ostream &OS) const {
+  OS << ".static." << getContext().getCUIDHash();
+}
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -253,6 +253,17 @@
     DeviceSideName = std::string(Out.str());
   } else
     DeviceSideName = std::string(ND->getIdentifier()->getName());
+
+  // Make unique name for device side static file-scope variable for HIP.
+  if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
+      CGM.getLangOpts().GPURelocatableDeviceCode &&
+      !CGM.getLangOpts().CUID.empty()) {
+    SmallString<256> Buffer;
+    llvm::raw_svector_ostream Out(Buffer);
+    Out << DeviceSideName;
+    CGM.printPostfixForExternalizedStaticVar(Out);
+    DeviceSideName = std::string(Out.str());
+  }
   return DeviceSideName;
 }
 
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -84,6 +84,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -10555,7 +10556,10 @@
       return GVA_StrongODR;
     // Single source offloading languages like CUDA/HIP need to be able to
     // access static device variables from host code of the same compilation
-    // unit. This is done by externalizing the static variable.
+    // unit. This is done by externalizing the static variable with a shared
+    // name between the host and device compilation which is the same for the
+    // same compilation unit whereas different among different compilation
+    // units.
     if (Context.shouldExternalizeStaticVar(D))
       return GVA_StrongExternal;
   }
@@ -11443,10 +11447,8 @@
                               !D->getAttr<CUDAConstantAttr>()->isImplicit());
   // CUDA/HIP: static managed variables need to be externalized since it is
   // a declaration in IR, therefore cannot have internal linkage.
-  // ToDo: externalize static variables for -fgpu-rdc.
   return IsStaticVar &&
-         (D->hasAttr<HIPManagedAttr>() ||
-          (!getLangOpts().GPURelocatableDeviceCode && IsExplicitDeviceVar));
+         (D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar);
 }
 
 bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
@@ -11454,3 +11456,12 @@
          (D->hasAttr<HIPManagedAttr>() ||
           CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D)));
 }
+
+StringRef ASTContext::getCUIDHash() const {
+  if (!CUIDHash.empty())
+    return CUIDHash;
+  if (LangOpts.CUID.empty())
+    return StringRef();
+  CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /*LowerCase=*/true);
+  return CUIDHash;
+}
Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -299,6 +299,10 @@
   /// This is lazily created.  This is intentionally not serialized.
   mutable llvm::StringMap<StringLiteral *> StringLiteralCache;
 
+  /// MD5 hash of CUID. It is calculated when first used and cached by this
+  /// data member.
+  mutable std::string CUIDHash;
+
   /// Representation of a "canonical" template template parameter that
   /// is used in canonical template names.
   class CanonicalTemplateTemplateParm : public llvm::FoldingSetNode {
@@ -3116,6 +3120,8 @@
   /// Whether a C++ static variable should be externalized.
   bool shouldExternalizeStaticVar(const Decl *D) const;
 
+  StringRef getCUIDHash() const;
+
 private:
   /// All OMPTraitInfo objects live in this collection, one per
   /// `pragma omp [begin] declare variant` directive.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to