Author: jingyue Date: Sat Aug 22 00:49:28 2015 New Revision: 245786 URL: http://llvm.org/viewvc/llvm-project?rev=245786&view=rev Log: [CUDA] Change initializer for CUDA device code based on CUDA documentation.
Summary: According to CUDA documentation, global variables declared with __device__, __constant__ can be initialized from host code, so mark them as externally initialized. Because __shared__ variables cannot have an initialization as part of their declaration and since the value maybe kept across different kernel invocation, the value of __shared__ is effectively undefined instead of zero initialized. Wrongly using zero initializer may cause illegitimate optimization, e.g. removing unused __constant__ variable because it's not updated in the device code and the value is initialized with zero. Test Plan: test/CodeGenCUDA/address-spaces.cu Patch by Xuetian Weng Reviewers: jholewinski, eliben, tra, jingyue Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D12241 Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/test/CodeGenCUDA/address-spaces.cu cfe/trunk/test/CodeGenCUDA/filter-decl.cu Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=245786&r1=245785&r2=245786&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Sat Aug 22 00:49:28 2015 @@ -1990,7 +1990,16 @@ void CodeGenModule::EmitGlobalVarDefinit const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); - if (!InitExpr) { + // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part + // of their declaration." + if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice + && D->hasAttr<CUDASharedAttr>()) { + if (InitExpr) { + Error(D->getLocation(), + "__shared__ variable cannot have an initialization."); + } + Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); + } else if (!InitExpr) { // This is a tentative definition; tentative definitions are // implicitly initialized with { 0 }. // @@ -2076,6 +2085,17 @@ void CodeGenModule::EmitGlobalVarDefinit if (D->hasAttr<AnnotateAttr>()) AddGlobalAnnotations(D, GV); + // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on + // the device. [...]" + // CUDA B.2.2 "The __constant__ qualifier, optionally used together with + // __device__, declares a variable that: [...] + // Is accessible from all the threads within the grid and from the host + // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize() + // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." + if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice && + (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) { + GV->setExternallyInitialized(true); + } GV->setInitializer(Init); // If it is safe to mark the global 'constant', do so now. Modified: cfe/trunk/test/CodeGenCUDA/address-spaces.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/address-spaces.cu?rev=245786&r1=245785&r2=245786&view=diff ============================================================================== --- cfe/trunk/test/CodeGenCUDA/address-spaces.cu (original) +++ cfe/trunk/test/CodeGenCUDA/address-spaces.cu Sat Aug 22 00:49:28 2015 @@ -5,10 +5,10 @@ #include "Inputs/cuda.h" -// CHECK: @i = addrspace(1) global +// CHECK: @i = addrspace(1) externally_initialized global __device__ int i; -// CHECK: @j = addrspace(4) global +// CHECK: @j = addrspace(4) externally_initialized global __constant__ int j; // CHECK: @k = addrspace(3) global @@ -24,7 +24,7 @@ struct MyStruct { // CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer // CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00 // CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00 -// CHECK: @b = addrspace(3) global float 0.000000e+00 +// CHECK: @b = addrspace(3) global float undef __device__ void foo() { // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*) Modified: cfe/trunk/test/CodeGenCUDA/filter-decl.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/filter-decl.cu?rev=245786&r1=245785&r2=245786&view=diff ============================================================================== --- cfe/trunk/test/CodeGenCUDA/filter-decl.cu (original) +++ cfe/trunk/test/CodeGenCUDA/filter-decl.cu Sat Aug 22 00:49:28 2015 @@ -9,12 +9,12 @@ // CHECK-DEVICE-NOT: module asm "file scope asm is host only" __asm__("file scope asm is host only"); -// CHECK-HOST-NOT: constantdata = global -// CHECK-DEVICE: constantdata = global +// CHECK-HOST-NOT: constantdata = externally_initialized global +// CHECK-DEVICE: constantdata = externally_initialized global __constant__ char constantdata[256]; -// CHECK-HOST-NOT: devicedata = global -// CHECK-DEVICE: devicedata = global +// CHECK-HOST-NOT: devicedata = externally_initialized global +// CHECK-DEVICE: devicedata = externally_initialized global __device__ char devicedata[256]; // CHECK-HOST-NOT: shareddata = global _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits