Author: Yaxun (Sam) Liu Date: 2024-07-11T21:52:04-04:00 New Revision: 90abdf83e273586a43e1270e5f0a11de5cc35383
URL: https://github.com/llvm/llvm-project/commit/90abdf83e273586a43e1270e5f0a11de5cc35383 DIFF: https://github.com/llvm/llvm-project/commit/90abdf83e273586a43e1270e5f0a11de5cc35383.diff LOG: [CUDA][HIP][NFC] add CodeGenModule::shouldEmitCUDAGlobalVar (#98543) Extract the logic whether to emit a global var based on CUDA/HIP host/device related attributes to CodeGenModule::shouldEmitCUDAGlobalVar to be used by other places. Added: Modified: clang/lib/CodeGen/CodeGenModule.cpp clang/lib/CodeGen/CodeGenModule.h Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 5c810cd332185..6c10b4a2edef8 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3702,6 +3702,19 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) { return D->isImplicit(); } +bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const { + assert(LangOpts.CUDA && "Should not be called by non-CUDA languages"); + // We need to emit host-side 'shadows' for all global + // device-side variables because the CUDA runtime needs their + // size and host-side address in order to provide access to + // their device-side incarnations. + return !LangOpts.CUDAIsDevice || Global->hasAttr<CUDADeviceAttr>() || + Global->hasAttr<CUDAConstantAttr>() || + Global->hasAttr<CUDASharedAttr>() || + Global->getType()->isCUDADeviceBuiltinSurfaceType() || + Global->getType()->isCUDADeviceBuiltinTextureType(); +} + void CodeGenModule::EmitGlobal(GlobalDecl GD) { const auto *Global = cast<ValueDecl>(GD.getDecl()); @@ -3726,36 +3739,27 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Non-constexpr non-lambda implicit host device functions are not emitted // unless they are used on device side. if (LangOpts.CUDA) { - if (LangOpts.CUDAIsDevice) { + assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) && + "Expected Variable or Function"); + if (const auto *VD = dyn_cast<VarDecl>(Global)) { + if (!shouldEmitCUDAGlobalVar(VD)) + return; + } else if (LangOpts.CUDAIsDevice) { const auto *FD = dyn_cast<FunctionDecl>(Global); if ((!Global->hasAttr<CUDADeviceAttr>() || - (LangOpts.OffloadImplicitHostDeviceTemplates && FD && + (LangOpts.OffloadImplicitHostDeviceTemplates && hasImplicitAttr<CUDAHostAttr>(FD) && hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() && !isLambdaCallOperator(FD) && !getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) && !Global->hasAttr<CUDAGlobalAttr>() && - !Global->hasAttr<CUDAConstantAttr>() && - !Global->hasAttr<CUDASharedAttr>() && - !Global->getType()->isCUDADeviceBuiltinSurfaceType() && - !Global->getType()->isCUDADeviceBuiltinTextureType() && !(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>())) return; - } else { - // We need to emit host-side 'shadows' for all global - // device-side variables because the CUDA runtime needs their - // size and host-side address in order to provide access to - // their device-side incarnations. - - // So device-only functions are the only things we skip. - if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() && - Global->hasAttr<CUDADeviceAttr>()) - return; - - assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) && - "Expected Variable or Function"); - } + // Device-only functions are the only things we skip. + } else if (!Global->hasAttr<CUDAHostAttr>() && + Global->hasAttr<CUDADeviceAttr>()) + return; } if (LangOpts.OpenMP) { diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 585c4ea697fea..caa3786c033b5 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -563,6 +563,9 @@ class CodeGenModule : public CodeGenTypeCache { bool isTriviallyRecursive(const FunctionDecl *F); bool shouldEmitFunction(GlobalDecl GD); + // Whether a global variable should be emitted by CUDA/HIP host/device + // related attributes. + bool shouldEmitCUDAGlobalVar(const VarDecl *VD) const; bool shouldOpportunisticallyEmitVTables(); /// Map used to be sure we don't emit the same CompoundLiteral twice. llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits