https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96443
>From 637bb436aa8472c2380364e573219c2a7524fdb1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Sun, 23 Jun 2024 16:44:08 +0200 Subject: [PATCH 1/3] AMDGPU: Add subtarget feature for global atomic fadd denormal support Not sure what the behavior for gfx90a is. The SPG says it always flushes. The instruction documentation says it does not. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 ++++++++++++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 3f35db8883716..51c077598df74 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureMemoryAtomicFaddF32DenormalSupport + : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", + "HasAtomicMemoryAtomicFaddF32DenormalSupport", + "true", + "global/flat/buffer atomic fadd for float supports denormal handling" +>; + def FeatureAgentScopeFineGrainedRemoteMemoryAtomics : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", "HasAgentScopeFineGrainedRemoteMemoryAtomics", @@ -1427,7 +1434,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureAgentScopeFineGrainedRemoteMemoryAtomics + FeatureAgentScopeFineGrainedRemoteMemoryAtomics, + FeatureMemoryAtomicFaddF32DenormalSupport ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1631,7 +1639,9 @@ def FeatureISAVersion12 : FeatureSet< FeatureScalarDwordx3Loads, FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, - Feature1_5xVGPRs]>; + Feature1_5xVGPRs, + FeatureMemoryAtomicFaddF32DenormalSupport]>; + ]>; def FeatureISAVersion12_Generic: FeatureSet< !listconcat(FeatureISAVersion12.Features, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 9e2a316a9ed28..db0b2b67a0388 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; + bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false; bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; bool HasAtomicBufferGlobalPkAddF16Insts = false; bool HasAtomicCSubNoRtnInsts = false; @@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target's flat, global, and buffer atomic fadd for + /// float supports denormal handling. + bool hasMemoryAtomicFaddF32DenormalSupport() const { + return HasAtomicMemoryAtomicFaddF32DenormalSupport; + } + /// \return true if atomic operations targeting fine-grained memory work /// correctly at device scope, in allocations in host or peer PCIe device /// memory. >From d954785fffda502d8325cca1ffb6a0adc15dc54a Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 24 Jun 2024 12:10:37 +0200 Subject: [PATCH 2/3] Add to gfx11. RDNA 3 manual says "Floating-point addition handles NAN/INF/denorm" thought I'm not sure I trust it. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 51c077598df74..370992eb81ff3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1547,7 +1547,8 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeaturePackedTID, - FeatureVcmpxPermlaneHazard]>; + FeatureVcmpxPermlaneHazard, + FeatureMemoryAtomicFaddF32DenormalSupport]>; // There are few workarounds that need to be // added to all targets. This pessimizes codegen @@ -1640,7 +1641,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, Feature1_5xVGPRs, - FeatureMemoryAtomicFaddF32DenormalSupport]>; + FeatureMemoryAtomicFaddF32DenormalSupport ]>; def FeatureISAVersion12_Generic: FeatureSet< >From deebca23726296fff2892f9c780e3049db64749a Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 26 Jun 2024 11:30:51 +0200 Subject: [PATCH 3/3] Rename --- llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +++++----- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 370992eb81ff3..bea233bfb27bd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,9 +788,9 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; -def FeatureMemoryAtomicFaddF32DenormalSupport +def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", - "HasAtomicMemoryAtomicFaddF32DenormalSupport", + "HasMemoryAtomicFaddF32DenormalSupport", "true", "global/flat/buffer atomic fadd for float supports denormal handling" >; @@ -1435,7 +1435,7 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1548,7 +1548,7 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureImageInsts, FeaturePackedTID, FeatureVcmpxPermlaneHazard, - FeatureMemoryAtomicFaddF32DenormalSupport]>; + FeatureMemoryAtomicFAddF32DenormalSupport]>; // There are few workarounds that need to be // added to all targets. This pessimizes codegen @@ -1641,7 +1641,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, Feature1_5xVGPRs, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport ]>; def FeatureISAVersion12_Generic: FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index db0b2b67a0388..11894174bdffe 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -167,7 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; - bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false; + bool HasMemoryAtomicFaddF32DenormalSupport = false; bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; bool HasAtomicBufferGlobalPkAddF16Insts = false; bool HasAtomicCSubNoRtnInsts = false; @@ -876,7 +876,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// \return true if the target's flat, global, and buffer atomic fadd for /// float supports denormal handling. bool hasMemoryAtomicFaddF32DenormalSupport() const { - return HasAtomicMemoryAtomicFaddF32DenormalSupport; + return HasMemoryAtomicFaddF32DenormalSupport; } /// \return true if atomic operations targeting fine-grained memory work _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits