yaxunl created this revision. yaxunl added a reviewer: tra. yaxunl requested review of this revision.
Currently static variables are allowed in device, global, and host device functions. A static variable in device and global functions is supposed to have implicit device attribute. Currently it does not. This causes incorrect diagnostics about host variables accessed by device functions. Another issue is static device variables are allowed in host functions since host functions could pass them to kernels for useful computations. Currently they are not emitted in device compilation, which should be fixed. This patch also handles static variables in host device functions and function scope static managed variables, and externalization of such variables for fno-gpu-rdc case. https://reviews.llvm.org/D95560 Files: clang/lib/AST/ASTContext.cpp clang/lib/CodeGen/CGDecl.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Sema/SemaCUDA.cpp clang/lib/Sema/SemaDecl.cpp clang/test/AST/ast-dump-func-scope-static-var.cu clang/test/CodeGenCUDA/func-scope-static-var.cu clang/test/SemaCUDA/func-scope-static-var.cu
Index: clang/test/SemaCUDA/func-scope-static-var.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/func-scope-static-var.cu @@ -0,0 +1,115 @@ +// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=host,com -x hip %s +// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fcuda-is-device -verify=dev,com -x hip %s +// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fgpu-rdc -verify=host,com -x hip %s +// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fgpu-rdc -fcuda-is-device -verify=dev,com -x hip %s + +#include "Inputs/cuda.h" + +struct A { + static int a; + static __device__ int fun(); +}; + +int A::a; +__device__ int A::fun() { + return a; + // dev-error@-1 {{reference to __host__ variable 'a' in __device__ function}} +} + +// Assuming this function accepts a pointer to a device variable and calculate some result. +__device__ __host__ int work(const int *x); + +int fun1(int x) { + static __device__ int a = sizeof(a); + static __device__ int b = x; + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const __device__ int c = sizeof(a); + static constexpr __device__ int d = sizeof(a); + static __constant__ __device__ int e = sizeof(a); + static __managed__ __device__ int f = sizeof(a); + static int a2 = sizeof(a); + static int b2 = x; + static const int c2 = sizeof(a); + static constexpr int d2 = sizeof(a); + static __constant__ int e2 = sizeof(a); + static __managed__ int f2 = sizeof(a); + return work(&a) + work(&b) + work(&c) + work(&d) + work(&e) + f + a2 + b2 + c2 + d2 + work(&e2) + f2; +} + +__device__ int fun2(int x) { + static __device__ int a = sizeof(a); + static __device__ int b = x; + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const __device__ int c = sizeof(a); + static constexpr __device__ int d = sizeof(a); + static __constant__ __device__ int e = sizeof(a); + static __managed__ __device__ int f = sizeof(a); + static int a2 = sizeof(a); + static int b2 = x; + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const int c2 = sizeof(a); + static constexpr int d2 = sizeof(a); + static __constant__ int e2 = sizeof(a); + static __managed__ int f2 = sizeof(a); + return a + b + c + d + e + f + a2 + b2 + c2 + d2 + e2 + f2; +} + +__device__ __host__ int fun3(int x) { + static __device__ int a = sizeof(a); + static __device__ int b = x; + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const __device__ int c = sizeof(a); + static constexpr __device__ int d = sizeof(a); + static __constant__ __device__ int e = sizeof(a); + static __managed__ __device__ int f = sizeof(a); + static int a2 = sizeof(a); + static int b2 = x; + // dev-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const int c2 = sizeof(a); + static constexpr int d2 = sizeof(a); + static __constant__ int e2 = sizeof(a); + static __managed__ int f2 = sizeof(a); + return work(&a) + work(&b) + work(&c) + work(&d) + work(&e) + f + a2 + b2 + c2 + d2 + work(&e2) + f2; +} + +template<typename T> +__device__ __host__ int fun4(T x) { + static __device__ int a = sizeof(x); + static __device__ int b = x; + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const __device__ int c = sizeof(x); + static constexpr __device__ int d = sizeof(x); + static __constant__ __device__ int e = sizeof(a); + static __managed__ __device__ int f = sizeof(a); + static int a2 = sizeof(x); + static int b2 = x; + // dev-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const int c2 = sizeof(x); + static constexpr int d2 = sizeof(x); + static __constant__ int e2 = sizeof(a); + static __managed__ int f2 = sizeof(a); + return work(&a) + work(&b) + work(&c) + work(&d) + work(&e) + f + a2 + b2 + c2 + d2 + work(&e2) + f2; +} + +__device__ __host__ int fun4_caller() { + return fun4(1); + // com-note@-1 {{in instantiation of function template specialization 'fun4<int>' requested here}} +} + +__global__ void fun5(int x, int *y) { + static __device__ int a = sizeof(a); + static __device__ int b = x; + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const __device__ int c = sizeof(a); + static constexpr __device__ int d = sizeof(a); + static __constant__ __device__ int e = sizeof(a); + static __managed__ __device__ int f = sizeof(a); + static int a2 = sizeof(a); + static int b2 = x; + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + static const int c2 = sizeof(a); + static constexpr int d2 = sizeof(a); + static __constant__ int e2 = sizeof(a); + static __managed__ int f2 = sizeof(a); + *y = a + b + c + d + e + f + a2 + b2 + c2 + d2 + e2 + f2; +} Index: clang/test/CodeGenCUDA/func-scope-static-var.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/func-scope-static-var.cu @@ -0,0 +1,166 @@ +// REQUIRES: x86-registered-target, amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,NORDC %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,RDC %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ +// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST %s + +#include "Inputs/cuda.h" + +// In device functions, static device variables are not externalized nor shadowed. +// Static managed variable behaves like a normal static device variable. + +// DEV: @_ZZ4fun1vE1a = internal addrspace(1) global i32 1 +// HOST-NOT: @_ZZ4fun1vE1a +// DEV: @_ZZ4fun1vE1b = internal addrspace(1) global i32 2 +// HOST-NOT: @_ZZ4fun1vE1b +// DEV: @_ZZ4fun1vE1c = internal addrspace(4) constant i32 3 +// HOST-NOT: @_ZZ4fun1vE1c +// DEV: @_ZZ4fun1vE1d = internal addrspace(4) constant i32 4 +// HOST-NOT: @_ZZ4fun1vE1d +// DEV: @_ZZ4fun1vE1e = internal addrspace(4) global i32 5 +// HOST-NOT: @_ZZ4fun1vE1e +// DEV: @_ZZ4fun1vE1f = internal addrspace(1) global i32 6 +// HOST-NOT: @_ZZ4fun1vE1f +__device__ int fun1() { + static int a = 1; + static __device__ int b = 2; + static const int c = 3; + static constexpr int d = 4; + static __constant__ int e = 5; + static __managed__ int f = 6; + return a + b + c + d + e + f; +} + +// Assuming this function accepts a device pointer and does some work. +__host__ __device__ int work(int *x); + +// In host function, static device variables are externalized if used and shadowed. + +// DEV-NOT: @_ZZ4fun2vE1a +// HOST: @_ZZ4fun2vE1a = internal global i32 1 +// NORDC: @_ZZ4fun2vE1b = dso_local addrspace(1) global i32 2 +// RDC: @_ZZ4fun2vE1b = internal addrspace(1) global i32 2 +// HOST: @_ZZ4fun2vE1b = internal global i32 2 +// DEV-NOT: @_ZZ4fun2vE1c +// HOST: @_ZZ4fun2vE1c = internal constant i32 3 +// DEV-NOT: @_ZZ4fun2vE1d +// HOST: @_ZZ4fun2vE1d = internal constant i32 4 +// NORDC: @_ZZ4fun2vE1e = dso_local addrspace(4) global i32 5 +// RDC: @_ZZ4fun2vE1e = internal addrspace(4) global i32 5 +// HOST: @_ZZ4fun2vE1e = internal global i32 5 +// DEV: @_ZZ4fun2vE1f = internal addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @_ZZ4fun2vE1b to i32*) +// HOST: @_ZZ4fun2vE1f = internal global i32* @_ZZ4fun2vE1b +// NORDC: @_ZZ4fun2vE1b_0 = dso_local addrspace(1) global i32 6 +// RDC: @_ZZ4fun2vE1b_0 = internal addrspace(1) global i32 6 +// HOST: @_ZZ4fun2vE1b_0 = internal global i32 6 +// NORDC: @_ZZ4fun2vE1g = dso_local addrspace(1) externally_initialized global i32 undef +// RDC: @_ZZ4fun2vE1g = external dso_local addrspace(1) global i32 +// HOST: @_ZZ4fun2vE1g = internal global i32 7 +int fun2() { + static int a = 1; + static __device__ int b = 2; + static const int c = 3; + static constexpr int d = 4; + static __constant__ int e = 5; + static __device__ int *f = &b; + for (int i = 0; i < 10; i++) { + static __device__ int b = 6; + work(&b); + } + static __managed__ int g = 7; + return a + c + d + work(&e) + g; +} + +// In host device function, explicit static device variables are externalized +// if used and registered. Default static variables are implicit device +// variables in device compilation and host variables in host compilation, +// which are independent, and the device variables are not shadowed. + +// DEV: @_ZZ4fun3vE1a = internal addrspace(1) global i32 1 +// HOST: @_ZZ4fun3vE1a = internal global i32 1 +// NORDC: @_ZZ4fun3vE1b = dso_local addrspace(1) global i32 2 +// RDC: @_ZZ4fun3vE1b = internal addrspace(1) global i32 2 +// HOST: @_ZZ4fun3vE1b = internal global i32 2 +// DEV: @_ZZ4fun3vE1c = internal addrspace(4) constant i32 3 +// HOST: @_ZZ4fun3vE1c = internal constant i32 3 +// DEV: @_ZZ4fun3vE1d = internal addrspace(4) constant i32 4 +// HOST: @_ZZ4fun3vE1d = internal constant i32 4 +// NORDC: @_ZZ4fun3vE1e = dso_local addrspace(4) global i32 5 +// RDC: @_ZZ4fun3vE1e = internal addrspace(4) global i32 5 +// HOST: @_ZZ4fun3vE1e = internal global i32 5 +// DEV: @_ZZ4fun3vE1f = internal addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @_ZZ4fun3vE1b to i32*) +// HOST: @_ZZ4fun3vE1f = internal global i32* @_ZZ4fun3vE1b +// NORDC: @_ZZ4fun3vE1b_0 = dso_local addrspace(1) global i32 6 +// RDC: @_ZZ4fun3vE1b_0 = internal addrspace(1) global i32 6 +// HOST: @_ZZ4fun3vE1b_0 = internal global i32 6 +// NORDC: @_ZZ4fun3vE1g = dso_local addrspace(1) externally_initialized global i32 undef +// RDC: @_ZZ4fun3vE1g = external dso_local addrspace(1) global i32 +// HOST: @_ZZ4fun3vE1g = internal global i32 7 +__host__ __device__ int fun3() { + static int a = 1; + static __device__ int b = 2; + static const int c = 3; + static constexpr int d = 4; + static __constant__ int e = 5; + static __device__ int *f = &b; + for (int i = 0; i < 10; i++) { + static __device__ int b = 6; + work(&b); + } + static __managed__ int g = 7; + return a + c + d + work(&e) + g; +} + +// In kernels, static device variables are not externalized nor shadowed. +// Static managed variable behaves like a normal static device variable. + +// DEV: @_ZZ4fun4vE1a = internal addrspace(1) global i32 1 +// HOST-NOT: @_ZZ4fun4vE1a +// DEV: @_ZZ4fun4vE1b = internal addrspace(1) global i32 2 +// HOST-NOT: @_ZZ4fun4vE1b +// DEV: @_ZZ4fun4vE1c = internal addrspace(4) constant i32 3 +// HOST-NOT: @_ZZ4fun4vE1c +// DEV: @_ZZ4fun4vE1d = internal addrspace(4) constant i32 4 +// HOST-NOT: @_ZZ4fun4vE1d +// DEV: @_ZZ4fun4vE1e = internal addrspace(4) global i32 5 +// HOST-NOT: @_ZZ4fun4vE1e +// DEV: @_ZZ4fun4vE1f = internal addrspace(1) global i32 6 +// HOST-NOT: @_ZZ4fun4vE1f +__global__ void fun4() { + static int a = 1; + static __device__ int b = 2; + static const int c = 3; + static constexpr int d = 4; + static __constant__ int e = 5; + static __managed__ int f = 6; +} + +// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun1vE1f +// HOST-NOT: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun1vE1f +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1b +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1e +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1f +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1b_0 +// HOST: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun2vE1g +// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1a +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1b +// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1c +// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1d +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1e +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1f +// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1b_0 +// HOST: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun3vE1g +// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun4vE1f +// HOST-NOT: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun4vE1f Index: clang/test/AST/ast-dump-func-scope-static-var.cu =================================================================== --- /dev/null +++ clang/test/AST/ast-dump-func-scope-static-var.cu @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -std=c++11 -ast-dump -x hip %s | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -ast-dump -fcuda-is-device -x hip %s | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK-LABEL: FunctionDecl {{.*}} fun1 +// CHECK: VarDecl {{.*}} a 'int' static +// CHECK-NOT: CUDADeviceAttr +// CHECK: VarDecl {{.*}} b 'int' static +// CHECK-NEXT: CUDADeviceAttr {{.*}}cuda.h +// CHECK: VarDecl {{.*}} c 'const int' static cinit +// CHECK-NOT: CUDADeviceAttr +// CHECK: VarDecl {{.*}} d 'const int' static constexpr cinit +// CHECK-NOT: CUDADeviceAttr +// CHECK: VarDecl {{.*}} e 'int' static cinit +// CHECK: CUDAConstantAttr {{.*}}cuda.h +// CHECK: VarDecl {{.*}} f 'int' static cinit +// CHECK: HIPManagedAttr {{.*}}cuda.h +// CHECK: CUDADeviceAttr {{.*}}Implicit +// CHECK-NOT: CUDADeviceAttr +void fun1() { + static int a; + static __device__ int b; + static const int c = 1; + static constexpr int d = 1; + static __constant__ int e = 1; + static __managed__ int f = 1; +} + +// CHECK-LABEL: FunctionDecl {{.*}} fun2 +// CHECK: VarDecl {{.*}} a 'int' static +// CHECK-NEXT: CUDADeviceAttr {{.*}}Implicit +// CHECK: VarDecl {{.*}} b 'int' static +// CHECK-NEXT: CUDADeviceAttr {{.*}}cuda.h +// CHECK: VarDecl {{.*}} c 'const int' static cinit +// CHECK: CUDAConstantAttr {{.*}}Implicit +// CHECK-NOT: CUDADeviceAttr +// CHECK: VarDecl {{.*}} d 'const int' static constexpr cinit +// CHECK: CUDAConstantAttr {{.*}}Implicit +// CHECK-NOT: CUDADeviceAttr +// CHECK: VarDecl {{.*}} e 'int' static cinit +// CHECK: CUDAConstantAttr {{.*}}cuda.h +// CHECK: VarDecl {{.*}} f 'int' static cinit +// CHECK: HIPManagedAttr {{.*}}cuda.h +// CHECK: CUDADeviceAttr {{.*}}Implicit +__device__ void fun2() { + static int a; + static __device__ int b; + static const int c = 1; + static constexpr int d = 1; + static __constant__ int e = 1; + static __managed__ int f = 1; +} Index: clang/lib/Sema/SemaDecl.cpp =================================================================== --- clang/lib/Sema/SemaDecl.cpp +++ clang/lib/Sema/SemaDecl.cpp @@ -7244,6 +7244,25 @@ // Handle attributes prior to checking for duplicates in MergeVarDecl ProcessDeclAttributes(S, NewVD, D); + // CUDA/HIP: Function-scope static variables in device or global functions + // have implicit device or constant attribute. Function-scope static variables + // in host device functions have implicit device or constant attribute in + // device compilation only. + if (getLangOpts().CUDA && SC == SC_Static) { + FunctionDecl *CurFD = getCurFunctionDecl(); + if (CurFD && + (CurFD->hasAttr<CUDADeviceAttr>() || + CurFD->hasAttr<CUDAGlobalAttr>()) && + (getLangOpts().CUDAIsDevice || !CurFD->hasAttr<CUDAHostAttr>()) && + !NewVD->hasAttr<CUDASharedAttr>() && + !NewVD->hasAttr<CUDAConstantAttr>()) { + if (NewVD->isConstexpr() || NewVD->getType().getQualifiers().hasConst()) + NewVD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); + else if (!NewVD->hasAttr<CUDADeviceAttr>()) + NewVD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); + } + } + if (getLangOpts().CUDA || getLangOpts().OpenMPIsDevice || getLangOpts().SYCLIsDevice) { if (EmitTLSUnsupportedError && Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -530,9 +530,12 @@ if (!AllowedInit && (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) { auto *Init = VD->getInit(); + // isConstantInitializer cannot be called with dependent value, therefore + // we skip checking dependent value here. This is OK since + // checkAllowedCUDAInitializer is called again when the template is + // instantiated. AllowedInit = - ((VD->getType()->isDependentType() || Init->isValueDependent()) && - VD->isConstexpr()) || + (VD->getType()->isDependentType() || Init->isValueDependent()) || Init->isConstantInitializer(Context, VD->getType()->isReferenceType()); } Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -94,6 +94,42 @@ llvm_unreachable("invalid C++ ABI kind"); } +// CUDA/HIP: A host function may contain static device or constant variable +// declarations. In device compilation, definition of such variables need +// to be emitted even though the host function is not emitted. This class +// does that. +class CUDAStaticDeviceVarEmitter + : public StmtVisitor<CUDAStaticDeviceVarEmitter> { +public: + CodeGenFunction CGF; + CUDAStaticDeviceVarEmitter(CodeGenModule &CGM) : CGF(CGM) {} + void Visit(Stmt *S) { + if (!S) + return; + if (auto *DS = dyn_cast<DeclStmt>(S)) { + for (auto &&D : DS->decls()) { + if (auto *VD = dyn_cast<VarDecl>(D)) { + if (VD->hasAttr<CUDADeviceAttr>() || + VD->hasAttr<CUDAConstantAttr>()) { + llvm::GlobalValue::LinkageTypes Linkage = + CGF.CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + return CGF.EmitStaticVarDecl(*VD, Linkage); + } + } + } + } + for (auto &&SS : S->children()) + Visit(SS); + } + void runOn(const FunctionDecl *FD) { + assert(CGF.getLangOpts().CUDAIsDevice); + assert(!FD->hasAttr<CUDADeviceAttr>() && !FD->hasAttr<CUDAGlobalAttr>()); + assert(FD->hasBody()); + CGF.CurFuncDecl = FD; + Visit(FD->getBody()); + } +}; + CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO, const CodeGenOptions &CGO, llvm::Module &M, @@ -2748,8 +2784,16 @@ !Global->hasAttr<CUDAConstantAttr>() && !Global->hasAttr<CUDASharedAttr>() && !Global->getType()->isCUDADeviceBuiltinSurfaceType() && - !Global->getType()->isCUDADeviceBuiltinTextureType()) + !Global->getType()->isCUDADeviceBuiltinTextureType()) { + if (auto *FD = dyn_cast<FunctionDecl>(Global)) { + if (FD->hasBody()) { + // Emit static device or constant variables for host functions. + CUDAStaticDeviceVarEmitter E(*this); + E.runOn(FD); + } + } return; + } } else { // We need to emit host-side 'shadows' for all global // device-side variables because the CUDA runtime needs their Index: clang/lib/CodeGen/CGDecl.cpp =================================================================== --- clang/lib/CodeGen/CGDecl.cpp +++ clang/lib/CodeGen/CGDecl.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CGBlocks.h" +#include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGDebugInfo.h" @@ -414,15 +415,41 @@ llvm::GlobalVariable *var = cast<llvm::GlobalVariable>(addr->stripPointerCasts()); + // CUDA/HIP: need to register static device variable declared in host + // or host device functions. + if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice && CurFuncDecl) { + if (auto *FD = dyn_cast<FunctionDecl>(CurFuncDecl)) { + if (!FD->hasAttr<CUDAGlobalAttr>() && + (!FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAHostAttr>())) + CGM.getCUDARuntime().mayRegisterDeviceVar(&D, *var); + } + } + // CUDA's local and local static __shared__ variables should not // have any non-empty initializers. This is ensured by Sema. // Whatever initializer such variable may have when it gets here is // a no-op and should not be emitted. bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && D.hasAttr<CUDASharedAttr>(); - // If this value has an initializer, emit it. - if (D.getInit() && !isCudaSharedVar) + // HIP static managed variables need to be emitted as declarations in device + // compilation in host or host device functions. + bool isUndefManagedVar = false; + if (getLangOpts().CUDAIsDevice && D.hasAttr<HIPManagedAttr>() && + CurFuncDecl) { + if (auto *FD = dyn_cast<FunctionDecl>(CurFuncDecl)) { + if (!FD->hasAttr<CUDAGlobalAttr>() && + (!FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAHostAttr>())) { + isUndefManagedVar = true; + } + } + } + if (isUndefManagedVar) { + var->setInitializer(nullptr); + var->setLinkage(llvm::GlobalValue::ExternalLinkage); + } else if (D.getInit() && !isCudaSharedVar) { + // If this value has an initializer, emit it. var = AddInitializerToStaticVarDecl(D, var); + } var->setAlignment(alignment.getAsAlign()); Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -11439,9 +11439,9 @@ ((D->hasAttr<CUDADeviceAttr>() && !D->getAttr<CUDADeviceAttr>()->isImplicit()) || (D->hasAttr<CUDAConstantAttr>() && - !D->getAttr<CUDAConstantAttr>()->isImplicit())) && - isa<VarDecl>(D) && cast<VarDecl>(D)->isFileVarDecl() && - cast<VarDecl>(D)->getStorageClass() == SC_Static; + !D->getAttr<CUDAConstantAttr>()->isImplicit()) || + D->hasAttr<HIPManagedAttr>()) && + isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static; } bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits