This revision was automatically updated to reflect the committed changes.
yaxunl marked 6 inline comments as done.
Closed by commit rG47acdec1dd5d: [CUDA][HIP] Support accessing static device
variable in host code for -fgpu-rdc (authored by yaxunl).
Herald added a project: clang.
Changed prior to commit:
https://reviews.llvm.org/D85223?vs=322021&id=326223#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D85223/new/
https://reviews.llvm.org/D85223
Files:
clang/include/clang/AST/ASTContext.h
clang/lib/AST/ASTContext.cpp
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
clang/test/CodeGenCUDA/device-var-linkage.cu
clang/test/CodeGenCUDA/managed-var.cu
clang/test/CodeGenCUDA/static-device-var-rdc.cu
clang/test/SemaCUDA/static-device-var.cu
Index: clang/test/SemaCUDA/static-device-var.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/static-device-var.cu
@@ -0,0 +1,50 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host
+
+// Checks allowed usage of file-scope and function-scope static variables.
+
+// host-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+// Checks static variables are allowed in device functions.
+
+__device__ void f1() {
+ const static int b = 123;
+ static int a;
+}
+
+// Checks static variables are allowd in global functions.
+
+__global__ void k1() {
+ const static int b = 123;
+ static int a;
+}
+
+// Checks static device and constant variables are allowed in device and
+// host functions, and static host variables are not allowed in device
+// functions.
+
+static __device__ int x;
+static __constant__ int y;
+static int z;
+
+__global__ void kernel(int *a) {
+ a[0] = x;
+ a[1] = y;
+ a[2] = z;
+ // dev-error@-1 {{reference to __host__ variable 'z' in __global__ function}}
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+ getDeviceSymbol(&x);
+ getDeviceSymbol(&y);
+}
Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -0,0 +1,97 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefixes=DEV,INT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefixes=HOST,INT-HOST %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
+// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
+// RUN: cat %t.host | FileCheck -check-prefixes=HOST,EXT-HOST %s
+
+// Check host and device compilations use the same postfixes for static
+// variable names.
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s
+
+#include "Inputs/cuda.h"
+
+// Test function scope static device variable, which should not be externalized.
+// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1
+
+
+// HOST-DAG: @_ZL1x = internal global i32 undef
+// HOST-DAG: @_ZL1y = internal global i32 undef
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1x = dso_local addrspace(1) externally_initialized global i32 0
+// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00"
+
+// POSTFIX: @_ZL1x.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 0
+// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00"
+
+static __device__ int x;
+
+// Test static device variables not used by host code should not be externalized
+// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0
+
+static __device__ int x2;
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1y = dso_local addrspace(4) externally_initialized global i32 0
+// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
+
+static __constant__ int y;
+
+// Test static host variable, which should not be externalized nor registered.
+// HOST-DAG: @_ZL1z = internal global i32 0
+// DEV-NOT: @_ZL1z
+static int z;
+
+// Test static device variable in inline function, which should not be
+// externalized nor registered.
+// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
+
+inline __device__ void devfun(const int ** b) {
+ const static int p = 2;
+ b[0] = &p;
+}
+
+__global__ void kernel(int *a, const int **b) {
+ const static int w = 1;
+ a[0] = x;
+ a[1] = y;
+ b[0] = &w;
+ b[1] = &x2;
+ devfun(b);
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+ getDeviceSymbol(&x);
+ getDeviceSymbol(&y);
+ z = 123;
+}
+
+// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
Index: clang/test/CodeGenCUDA/managed-var.cu
===================================================================
--- clang/test/CodeGenCUDA/managed-var.cu
+++ clang/test/CodeGenCUDA/managed-var.cu
@@ -2,19 +2,24 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
// RUN: -emit-llvm -o - -x hip %s | FileCheck \
-// RUN: -check-prefixes=COMMON,DEV %s
+// RUN: -check-prefixes=COMMON,DEV,NORDC-D %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
-// RUN: -check-prefixes=COMMON,DEV %s
+// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev
+// RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,RDC-D %s
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
// RUN: -emit-llvm -o - -x hip %s | FileCheck \
// RUN: -check-prefixes=COMMON,HOST,NORDC %s
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
-// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
-// RUN: -check-prefixes=COMMON,HOST,RDC %s
+// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.host
+// RUN: cat %t.host | FileCheck -check-prefixes=COMMON,HOST,RDC %s
+
+// Check device and host compilation use the same postfix for static
+// variable name.
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s
#include "Inputs/cuda.h"
@@ -45,10 +50,17 @@
// HOST-DAG: @ex = external externally_initialized global i32*
extern __managed__ int ex;
-// DEV-DAG: @_ZL2sx.managed = dso_local addrspace(1) externally_initialized global i32 1, align 4
-// DEV-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// NORDC-D-DAG: @_ZL2sx.managed = dso_local addrspace(1) externally_initialized global i32 1, align 4
+// NORDC-D-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = dso_local addrspace(1) externally_initialized global i32 1, align 4
+// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
// HOST-DAG: @_ZL2sx.managed = internal global i32 1
// HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null
+// NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00"
+// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00"
+
+// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00"
static __managed__ int sx = 1;
// DEV-DAG: @llvm.compiler.used
@@ -154,6 +166,6 @@
}
// HOST-DAG: __hipRegisterManagedVar({{.*}}@x {{.*}}@x.managed {{.*}}@[[DEVNAMEX]]{{.*}}, i64 4, i32 4)
-// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.managed
+// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.managed {{.*}}@[[DEVNAMESX]]
// HOST-NOT: __hipRegisterManagedVar({{.*}}@ex {{.*}}@ex.managed
// HOST-DAG: declare void @__hipRegisterManagedVar(i8**, i8*, i8*, i8*, i64, i32)
Index: clang/test/CodeGenCUDA/device-var-linkage.cu
===================================================================
--- clang/test/CodeGenCUDA/device-var-linkage.cu
+++ clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -2,13 +2,13 @@
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,NORDC %s
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
-// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \
+// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,RDC %s
// RUN: %clang_cc1 -triple nvptx \
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s
// RUN: %clang_cc1 -triple nvptx \
-// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \
+// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,RDC-H %s
#include "Inputs/cuda.h"
@@ -37,14 +37,15 @@
extern __managed__ int ev3;
// NORDC-DAG: @_ZL3sv1 = dso_local addrspace(1) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv1 = internal addrspace(1) global i32 0
+// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 0
// HOST-DAG: @_ZL3sv1 = internal global i32 undef
static __device__ int sv1;
// NORDC-DAG: @_ZL3sv2 = dso_local addrspace(4) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv2 = internal addrspace(4) global i32 0
+// RDC-DAG: @_ZL3sv2.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
static __constant__ int sv2;
-// DEV-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// NORDC-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-DAG: @_ZL3sv3.static.[[HASH]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
// HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
static __managed__ int sv3;
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1422,6 +1422,10 @@
TBAAAccessInfo *TBAAInfo = nullptr);
bool stopAutoInit();
+ /// Print the postfix for externalized static variable for single source
+ /// offloading languages CUDA and HIP.
+ void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
+
private:
llvm::Constant *GetOrCreateLLVMFunction(
StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1184,6 +1184,11 @@
}
}
+ // Make unique name for device side static file-scope variable for HIP.
+ if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
+ CGM.getLangOpts().GPURelocatableDeviceCode &&
+ CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty())
+ CGM.printPostfixForExternalizedStaticVar(Out);
return std::string(Out.str());
}
@@ -1241,9 +1246,16 @@
}
}
- auto FoundName = MangledDeclNames.find(CanonicalGD);
- if (FoundName != MangledDeclNames.end())
- return FoundName->second;
+ // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a
+ // static device variable depends on whether the variable is referenced by
+ // a host or device host function. Therefore the mangled name cannot be
+ // cached.
+ if (!LangOpts.CUDAIsDevice ||
+ !getContext().mayExternalizeStaticVar(GD.getDecl())) {
+ auto FoundName = MangledDeclNames.find(CanonicalGD);
+ if (FoundName != MangledDeclNames.end())
+ return FoundName->second;
+ }
// Keep the first result in the case of a mangling collision.
const auto *ND = cast<NamedDecl>(GD.getDecl());
@@ -6249,3 +6261,8 @@
}
return false;
}
+
+void CodeGenModule::printPostfixForExternalizedStaticVar(
+ llvm::raw_ostream &OS) const {
+ OS << ".static." << getContext().getCUIDHash();
+}
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -255,6 +255,17 @@
DeviceSideName = std::string(Out.str());
} else
DeviceSideName = std::string(ND->getIdentifier()->getName());
+
+ // Make unique name for device side static file-scope variable for HIP.
+ if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
+ CGM.getLangOpts().GPURelocatableDeviceCode &&
+ !CGM.getLangOpts().CUID.empty()) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << DeviceSideName;
+ CGM.printPostfixForExternalizedStaticVar(Out);
+ DeviceSideName = std::string(Out.str());
+ }
return DeviceSideName;
}
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -84,6 +84,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -10645,7 +10646,10 @@
return GVA_StrongODR;
// Single source offloading languages like CUDA/HIP need to be able to
// access static device variables from host code of the same compilation
- // unit. This is done by externalizing the static variable.
+ // unit. This is done by externalizing the static variable with a shared
+ // name between the host and device compilation which is the same for the
+ // same compilation unit whereas different among different compilation
+ // units.
if (Context.shouldExternalizeStaticVar(D))
return GVA_StrongExternal;
}
@@ -11533,10 +11537,8 @@
!D->getAttr<CUDAConstantAttr>()->isImplicit());
// CUDA/HIP: static managed variables need to be externalized since it is
// a declaration in IR, therefore cannot have internal linkage.
- // ToDo: externalize static variables for -fgpu-rdc.
return IsStaticVar &&
- (D->hasAttr<HIPManagedAttr>() ||
- (!getLangOpts().GPURelocatableDeviceCode && IsExplicitDeviceVar));
+ (D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar);
}
bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
@@ -11544,3 +11546,12 @@
(D->hasAttr<HIPManagedAttr>() ||
CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D)));
}
+
+StringRef ASTContext::getCUIDHash() const {
+ if (!CUIDHash.empty())
+ return CUIDHash;
+ if (LangOpts.CUID.empty())
+ return StringRef();
+ CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /*LowerCase=*/true);
+ return CUIDHash;
+}
Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -299,6 +299,10 @@
/// This is lazily created. This is intentionally not serialized.
mutable llvm::StringMap<StringLiteral *> StringLiteralCache;
+ /// MD5 hash of CUID. It is calculated when first used and cached by this
+ /// data member.
+ mutable std::string CUIDHash;
+
/// Representation of a "canonical" template template parameter that
/// is used in canonical template names.
class CanonicalTemplateTemplateParm : public llvm::FoldingSetNode {
@@ -3117,6 +3121,8 @@
/// Whether a C++ static variable should be externalized.
bool shouldExternalizeStaticVar(const Decl *D) const;
+ StringRef getCUIDHash() const;
+
private:
/// All OMPTraitInfo objects live in this collection, one per
/// `pragma omp [begin] declare variant` directive.
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits