https://github.com/pasaulais updated https://github.com/llvm/llvm-project/pull/69229
>From 24b0e5204f9eac6efaf502b5aa254f5ff7e11e69 Mon Sep 17 00:00:00 2001 From: Pierre-Andre Saulais <pierre-an...@codeplay.com> Date: Thu, 23 Nov 2023 17:33:18 +0000 Subject: [PATCH] [AMDGPU] Add an option to disable unsafe uses of atomic xor --- clang/include/clang/Basic/TargetInfo.h | 8 +++ clang/include/clang/Basic/TargetOptions.h | 3 + clang/include/clang/Driver/Options.td | 10 +++ clang/lib/Basic/TargetInfo.cpp | 1 + clang/lib/Basic/Targets/AMDGPU.cpp | 1 + clang/lib/CodeGen/Targets/AMDGPU.cpp | 23 +++++++ clang/lib/Driver/ToolChains/Clang.cpp | 2 + .../amdgpu-unsafe-int-atomics.cl | 62 +++++++++++++++++++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 26 ++++++++ llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll | 24 +++++-- 10 files changed, 156 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/amdgpu-unsafe-int-atomics.cl diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 41f3c2e403cbe..c222e370442df 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -269,6 +269,9 @@ class TargetInfo : public TransferrableTargetInfo, LLVM_PREFERRED_TYPE(bool) unsigned ARMCDECoprocMask : 8; + LLVM_PREFERRED_TYPE(bool) + unsigned AllowAMDGPUFineGrainedMem : 1; + unsigned MaxOpenCLWorkGroupSize; std::optional<unsigned> MaxBitIntWidth; @@ -1009,6 +1012,11 @@ class TargetInfo : public TransferrableTargetInfo, /// allowed. bool allowAMDGPUUnsafeFPAtomics() const { return AllowAMDGPUUnsafeFPAtomics; } + /// Returns whether or not fine-grained memory access is allowed on AMDGPU. + bool allowAMDGPUFineGrainedMem() const { + return AllowAMDGPUFineGrainedMem; + } + /// For ARM targets returns a mask defining which coprocessors are configured /// as Custom Datapath. uint32_t getARMCDECoprocMask() const { return ARMCDECoprocMask; } diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 2049f03b28893..90cfe6d71c6d0 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -78,6 +78,9 @@ class TargetOptions { /// \brief If enabled, allow AMDGPU unsafe floating point atomics. bool AllowAMDGPUUnsafeFPAtomics = false; + /// \brief If enabled, allow fine-grained memory access on AMDGPU. + bool AllowAMDGPUFineGrainedMem = false; + /// \brief Code object version for AMDGPU. llvm::CodeObjectVersionKind CodeObjectVersion = llvm::CodeObjectVersionKind::COV_None; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1d04e4f6e7e6d..71cc5aeb5c4e6 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4749,6 +4749,16 @@ defm unsafe_fp_atomics : BoolOption<"m", "unsafe-fp-atomics", def faltivec : Flag<["-"], "faltivec">, Group<f_Group>; def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>; + +defm amdgpu_fine_grained_mem : BoolOption<"m", "amdgpu-fine-grained-mem", + TargetOpts<"AllowAMDGPUFineGrainedMem">, DefaultFalse, + PosFlag<SetTrue, [], [ClangOption, CC1Option], + "Indicates that fine-grained memory allocations may be accessed in the " + "kernel. This may result in certain atomic operations being replaced " + "in order to guarantee correct operation when fine-grained memory " + "allocations are used. (AMDGPU only)">, + NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>; + let Flags = [TargetSpecific] in { def maltivec : Flag<["-"], "maltivec">, Group<m_ppc_Features_Group>, HelpText<"Enable AltiVec vector initializer syntax">; diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 6cd5d618a4aca..8e623b59365e5 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -157,6 +157,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { HasAArch64SVETypes = false; HasRISCVVTypes = false; AllowAMDGPUUnsafeFPAtomics = false; + AllowAMDGPUFineGrainedMem = false; ARMCDECoprocMask = 0; // Default to no types using fpret. diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 409ae32ab4242..cf70d2e267d3e 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -232,6 +232,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, HasFloat16 = true; WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; + AllowAMDGPUFineGrainedMem = Opts.AllowAMDGPUFineGrainedMem; // Set pointer width and alignment for the generic address space. PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index b654e3f12af8d..13bec674db8a4 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -392,6 +392,26 @@ void AMDGPUTargetCodeGenInfo::emitTargetGlobals( } } +// Add metadata to specific atomic instructions, to mark them as potentially +// accessing fine-grained memory locations. +static void addFineGrainedAtomicMD(llvm::Function *F) { + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDHelper(Ctx); + auto *Int32Ty = llvm::IntegerType::getInt32Ty(Ctx); + for (llvm::BasicBlock &BB : *F) { + for (llvm::Instruction &I : BB) { + llvm::AtomicRMWInst *ARI = llvm::dyn_cast<llvm::AtomicRMWInst>(&I); + if (!ARI || ARI->getOperation() != llvm::AtomicRMWInst::Xor) + continue; + auto *Key = MDHelper.createString("fine_grained"); + auto *One = MDHelper.createConstant(llvm::ConstantInt::get(Int32Ty, 1)); + llvm::MDNode *MD = llvm::MDNode::get(Ctx, {Key, One}); + auto *Tuple = llvm::MDNode::get(Ctx, {MD}); + ARI->setMetadata("amdgpu.atomic", Tuple); + } + } +} + void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (requiresAMDGPUProtectedVisibility(D, GV)) { @@ -413,6 +433,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); + if (M.getContext().getTargetInfo().allowAMDGPUFineGrainedMem()) + addFineGrainedAtomicMD(F); + if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) F->addFnAttr("amdgpu-ieee", "false"); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f02f7c841b91f..532c925520c85 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7346,6 +7346,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.addOptInFlag(CmdArgs, options::OPT_munsafe_fp_atomics, options::OPT_mno_unsafe_fp_atomics); + Args.addOptInFlag(CmdArgs, options::OPT_mamdgpu_fine_grained_mem, + options::OPT_mno_amdgpu_fine_grained_mem); Args.addOptOutFlag(CmdArgs, options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee); } diff --git a/clang/test/CodeGenOpenCL/amdgpu-unsafe-int-atomics.cl b/clang/test/CodeGenOpenCL/amdgpu-unsafe-int-atomics.cl new file mode 100644 index 0000000000000..0c81e0797d68b --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-unsafe-int-atomics.cl @@ -0,0 +1,62 @@ +// REQUIRES: amdgpu-registered-target +// +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -emit-llvm \ +// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-DEFAULT %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -emit-llvm -mamdgpu-fine-grained-mem \ +// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-ON %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -emit-llvm -mno-amdgpu-fine-grained-mem \ +// RUN: | FileCheck -check-prefixes=COMMON,UNSAFE-INT-OFF %s + +// Check AMDGCN ISA generation. + +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-DEFAULT %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -mamdgpu-fine-grained-mem \ +// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-ON %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -mno-amdgpu-fine-grained-mem \ +// RUN: | FileCheck -check-prefixes=COMMON-ISA,ISA-OFF %s + +#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable + +typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +#endif +} memory_scope; + +// COMMON-ISA: kern: +// ISA-ON: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc +// ISA-OFF: flat_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc +// ISA-DEFAULT: flat_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc +kernel void kern(global atomic_int *x, int y, global int *z) { + *z = __opencl_atomic_fetch_xor(x, y, memory_order_seq_cst, memory_scope_work_group); +} + +// COMMON: define{{.*}} amdgpu_kernel void @kern +// COMMON: atomicrmw xor ptr addrspace(1) %x, i32 %y syncscope("workgroup") seq_cst, align 4 + +// UNSAFE-INT-ON-SAME: !amdgpu.atomic ![[REF:[0-9]+]] +// UNSAFE-INT-ON: ![[REF]] = !{![[REF2:[0-9]+]]} +// UNSAFE-INT-ON: ![[REF2]] = !{!"fine_grained", i32 1} + +// UNSAFE-INT-OFF-NOT: !amdgpu.atomic + +// UNSAFE-INT-DEFAULT-NOT: !amdgpu.atomic diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 53ab5da013539..e71a874a59af8 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -15107,6 +15107,26 @@ bool unsafeFPAtomicsDisabled(Function *F) { "true"; } +// Inspect the instruction's metadata to determine whether or not it can access +// fine-grained memory allocations. Some atomic instructions may fail on certain +// systems when accessing fine-grained memory. +static bool canAccessFineGrainedMem(AtomicRMWInst &RMW) { + if (MDNode *MD = RMW.getMetadata("amdgpu.atomic")) { + for (const MDOperand &Op : MD->operands()) { + MDNode *OpMD = dyn_cast<MDNode>(&*Op); + if (!OpMD || OpMD->getNumOperands() < 2) + continue; + const MDString *NameOp = dyn_cast<MDString>(OpMD->getOperand(0)); + const MDOperand &ValOp = OpMD->getOperand(1); + if (NameOp->getString().equals("fine_grained") && + mdconst::extract<ConstantInt>(ValOp)->getZExtValue() != 0) { + return true; + } + } + } + return false; +} + TargetLowering::AtomicExpansionKind SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { unsigned AS = RMW->getPointerAddressSpace(); @@ -15229,6 +15249,12 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { } break; } + case AtomicRMWInst::Xor: { + if (AMDGPU::isFlatGlobalAddrSpace(AS) && canAccessFineGrainedMem(*RMW)) { + return AtomicExpansionKind::CmpXChg; + } + break; + } default: break; } diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index 92b70ad797b88..75f2bd99175a7 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX940 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1100 %s define float @syncscope_system(ptr %addr, float %val) #0 { ; GFX908-LABEL: syncscope_system: @@ -391,4 +391,20 @@ define float @no_unsafe(ptr %addr, float %val) { ret float %res } +define i32 @default_xor(ptr %addr, i32 %val) { + ; GCN-LABEL: default_xor: + ; GCN: flat_atomic_xor + %res = atomicrmw xor ptr %addr, i32 %val seq_cst + ret i32 %res +} + +define i32 @no_unsafe_xor(ptr %addr, i32 %val) { + ; GCN-LABEL: no_unsafe_xor: + ; GCN: flat_atomic_cmpswap + %res = atomicrmw xor ptr %addr, i32 %val seq_cst, !amdgpu.atomic !0 + ret i32 %res +} + attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" } +!0 = !{!1} +!1 = !{!"fine_grained", i32 1} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits