yaxunl updated this revision to Diff 322021. yaxunl marked 3 inline comments as done. yaxunl edited the summary of this revision. yaxunl added a comment.
Revised by Artem's comments. Use CUID hash as postfix for static variable name. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D85223/new/ https://reviews.llvm.org/D85223 Files: clang/include/clang/AST/ASTContext.h clang/lib/AST/ASTContext.cpp clang/lib/CodeGen/CGCUDANV.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/CodeGen/CodeGenModule.h clang/test/CodeGenCUDA/device-var-linkage.cu clang/test/CodeGenCUDA/managed-var.cu clang/test/CodeGenCUDA/static-device-var-rdc.cu clang/test/SemaCUDA/static-device-var.cu
Index: clang/test/SemaCUDA/static-device-var.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/static-device-var.cu @@ -0,0 +1,37 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +__device__ void f1() { + const static int b = 123; + static int a; +} + +__global__ void k1() { + const static int b = 123; + static int a; +} + +static __device__ int x; +static __constant__ int y; + +__global__ void kernel(int *a) { + a[0] = x; + a[1] = y; +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); +} Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -0,0 +1,89 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,INT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST,INT-HOST %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,EXT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST,EXT-HOST %s + +#include "Inputs/cuda.h" + +// Test function scope static device variable, which should not be externalized. +// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 + + +// HOST-DAG: @_ZL1x = internal global i32 undef +// HOST-DAG: @_ZL1y = internal global i32 undef + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1x = dso_local addrspace(1) externally_initialized global i32 0 +// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1x.static.[[HASH:b04fd23c98500190]] = dso_local addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:b04fd23c98500190]]\00" + +static __device__ int x; + +// Test static device variables not used by host code should not be externalized +// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0 + +static __device__ int x2; + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1y = dso_local addrspace(4) externally_initialized global i32 0 +// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00" + +static __constant__ int y; + +// Test static host variable, which should not be externalized nor registered. +// HOST-DAG: @_ZL1z = internal global i32 0 +// DEV-NOT: @_ZL1z +static int z; + +// Test static device variable in inline function, which should not be +// externalized nor registered. +// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat + +inline __device__ void devfun(const int ** b) { + const static int p = 2; + b[0] = &p; +} + +__global__ void kernel(int *a, const int **b) { + const static int w = 1; + a[0] = x; + a[1] = y; + b[0] = &w; + b[1] = &x2; + devfun(b); +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); + z = 123; +} + +// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2 +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p Index: clang/test/CodeGenCUDA/managed-var.cu =================================================================== --- clang/test/CodeGenCUDA/managed-var.cu +++ clang/test/CodeGenCUDA/managed-var.cu @@ -2,18 +2,18 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ // RUN: -emit-llvm -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=COMMON,DEV %s +// RUN: -check-prefixes=COMMON,DEV,NORDC-D %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=COMMON,DEV %s +// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=COMMON,DEV,RDC-D %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ // RUN: -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,HOST,NORDC %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ -// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \ +// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,HOST,RDC %s #include "Inputs/cuda.h" @@ -38,9 +38,12 @@ // HOST-DAG: @ex = external externally_initialized global i32* extern __managed__ int ex; -// DEV-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// NORDC-D-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-D-DAG: @_ZL2sx.static.[[HASH:b04fd23c98500190]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL2sx.init = internal global i32 1 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null +// NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00" +// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:b04fd23c98500190]]\00" static __managed__ int sx = 1; // Force ex and sx mitted in device compilation. @@ -139,6 +142,6 @@ } // HOST-DAG: __hipRegisterManagedVar({{.*}}@x {{.*}}@x.init {{.*}}@[[DEVNAMEX]]{{.*}}, i64 4, i32 4) -// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.init +// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.init {{.*}}@[[DEVNAMESX]] // HOST-NOT: __hipRegisterManagedVar({{.*}}@ex {{.*}}@ex.init // HOST-DAG: declare void @__hipRegisterManagedVar(i8**, i8*, i8*, i8*, i64, i32) Index: clang/test/CodeGenCUDA/device-var-linkage.cu =================================================================== --- clang/test/CodeGenCUDA/device-var-linkage.cu +++ clang/test/CodeGenCUDA/device-var-linkage.cu @@ -2,13 +2,13 @@ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,NORDC %s // RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \ +// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,RDC %s // RUN: %clang_cc1 -triple nvptx \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s // RUN: %clang_cc1 -triple nvptx \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \ +// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,RDC-H %s #include "Inputs/cuda.h" @@ -37,14 +37,15 @@ extern __managed__ int ev3; // NORDC-DAG: @_ZL3sv1 = dso_local addrspace(1) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv1 = internal addrspace(1) global i32 0 +// RDC-DAG: @_ZL3sv1.static.[[HASH:b04fd23c98500190]] = dso_local addrspace(1) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv1 = internal global i32 undef static __device__ int sv1; // NORDC-DAG: @_ZL3sv2 = dso_local addrspace(4) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv2 = internal addrspace(4) global i32 0 +// RDC-DAG: @_ZL3sv2.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv2 = internal global i32 undef static __constant__ int sv2; -// DEV-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// NORDC-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-DAG: @_ZL3sv3.static.[[HASH]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null static __managed__ int sv3; Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -1423,6 +1423,10 @@ TBAAAccessInfo *TBAAInfo = nullptr); bool stopAutoInit(); + /// Print the postfix for externalized static variable for single source + /// offloading languages CUDA and HIP. + void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const; + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1184,6 +1184,11 @@ } } + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND) && + CGM.getLangOpts().GPURelocatableDeviceCode && + CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty()) + CGM.printPostfixForExternalizedStaticVar(Out); return std::string(Out.str()); } @@ -1241,9 +1246,16 @@ } } - auto FoundName = MangledDeclNames.find(CanonicalGD); - if (FoundName != MangledDeclNames.end()) - return FoundName->second; + // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a + // static device variable depends on whether the variable is referenced by + // a host or device host function. Therefore the mangled name cannot be + // cached. + if (!LangOpts.CUDAIsDevice || + !getContext().mayExternalizeStaticVar(GD.getDecl())) { + auto FoundName = MangledDeclNames.find(CanonicalGD); + if (FoundName != MangledDeclNames.end()) + return FoundName->second; + } // Keep the first result in the case of a mangling collision. const auto *ND = cast<NamedDecl>(GD.getDecl()); @@ -6276,3 +6288,8 @@ } return false; } + +void CodeGenModule::printPostfixForExternalizedStaticVar( + llvm::raw_ostream &OS) const { + OS << ".static." << getContext().getCUIDHash(); +} Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -253,6 +253,17 @@ DeviceSideName = std::string(Out.str()); } else DeviceSideName = std::string(ND->getIdentifier()->getName()); + + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND) && + CGM.getLangOpts().GPURelocatableDeviceCode && + !CGM.getLangOpts().CUID.empty()) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << DeviceSideName; + CGM.printPostfixForExternalizedStaticVar(Out); + DeviceSideName = std::string(Out.str()); + } return DeviceSideName; } Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -84,6 +84,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -10555,7 +10556,10 @@ return GVA_StrongODR; // Single source offloading languages like CUDA/HIP need to be able to // access static device variables from host code of the same compilation - // unit. This is done by externalizing the static variable. + // unit. This is done by externalizing the static variable with a shared + // name between the host and device compilation which is the same for the + // same compilation unit whereas different among different compilation + // units. if (Context.shouldExternalizeStaticVar(D)) return GVA_StrongExternal; } @@ -11443,10 +11447,8 @@ !D->getAttr<CUDAConstantAttr>()->isImplicit()); // CUDA/HIP: static managed variables need to be externalized since it is // a declaration in IR, therefore cannot have internal linkage. - // ToDo: externalize static variables for -fgpu-rdc. return IsStaticVar && - (D->hasAttr<HIPManagedAttr>() || - (!getLangOpts().GPURelocatableDeviceCode && IsExplicitDeviceVar)); + (D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar); } bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { @@ -11454,3 +11456,12 @@ (D->hasAttr<HIPManagedAttr>() || CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D))); } + +StringRef ASTContext::getCUIDHash() const { + if (!CUIDHash.empty()) + return CUIDHash; + if (LangOpts.CUID.empty()) + return StringRef(); + CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /*LowerCase=*/true); + return CUIDHash; +} Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -299,6 +299,10 @@ /// This is lazily created. This is intentionally not serialized. mutable llvm::StringMap<StringLiteral *> StringLiteralCache; + /// MD5 hash of CUID. It is calculated when first used and cached by this + /// data member. + mutable std::string CUIDHash; + /// Representation of a "canonical" template template parameter that /// is used in canonical template names. class CanonicalTemplateTemplateParm : public llvm::FoldingSetNode { @@ -3116,6 +3120,8 @@ /// Whether a C++ static variable should be externalized. bool shouldExternalizeStaticVar(const Decl *D) const; + StringRef getCUIDHash() const; + private: /// All OMPTraitInfo objects live in this collection, one per /// `pragma omp [begin] declare variant` directive.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits