================ @@ -245,6 +247,41 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, return DefaultGlobalAS; } +void SPIRVTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (!M.getLangOpts().HIP || + M.getTarget().getTriple().getVendor() != llvm::Triple::AMD) + return; + if (GV->isDeclaration()) + return; + + auto F = dyn_cast<llvm::Function>(GV); + if (!F) + return; + + auto FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + if (!FD->hasAttr<CUDAGlobalAttr>()) + return; + + unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock; + if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) + N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue(); + + // We encode the maximum flat WG size in the first component of the 3D + // max_work_group_size attribute, which will get reverse translated into the + // original AMDGPU attribute when targeting AMDGPU. ---------------- arsenm wrote:
I'm still confused about why this is supposed to be "OK" https://github.com/llvm/llvm-project/pull/116820 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits