https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/160129
Mostly NFC, and adds an assertion for gfx12 to ensure that no atomic scratch instructions are present in the case of GloballyAddressableScratch. This should always hold because of #154710. >From 8e61a9cceb44edcec4a7e2bf7d5d732753899ac6 Mon Sep 17 00:00:00 2001 From: Fabian Ritter <fabian.rit...@amd.com> Date: Mon, 22 Sep 2025 11:20:16 -0400 Subject: [PATCH] [AMDGPU] SIMemoryLegalizer: Factor out check if memory operations can affect the global AS Mostly NFC, and adds an assertion for gfx12 to ensure that no atomic scratch instructions are present in the case of GloballyAddressableScratch. This should always hold because of #154710. --- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 53 +++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index dc9c961e7de36..f807764e367f7 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -299,6 +299,10 @@ class SICacheControl { bool enableNamedBit(const MachineBasicBlock::iterator MI, AMDGPU::CPol::CPol Bit) const; + /// Check if any atomic operation on AS can affect memory accessible via the + /// global address space. + virtual bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const = 0; + public: /// Create a cache control for the subtarget \p ST. @@ -403,6 +407,10 @@ class SIGfx6CacheControl : public SICacheControl { return enableNamedBit(MI, AMDGPU::CPol::SLC); } + bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override { + return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE; + } + public: SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {} @@ -609,6 +617,15 @@ class SIGfx12CacheControl : public SIGfx11CacheControl { bool setAtomicScope(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const; + bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override { + assert((!ST.hasGloballyAddressableScratch() || + ((AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) || + (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) && + "scratch instructions should already be replaced by flat " + "instructions if GloballyAddressableScratch is enabled"); + return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE; + } + public: SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) { // GFX12.0 and GFX12.5 memory models greatly overlap, and in some cases @@ -1016,7 +1033,7 @@ bool SIGfx6CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1239,7 +1256,7 @@ bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1299,7 +1316,7 @@ bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1336,7 +1353,7 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1378,7 +1395,7 @@ bool SIGfx90ACacheControl::enableRMWCacheBypass( assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1487,7 +1504,7 @@ bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Ensures that following loads will not see stale remote VMEM data or @@ -1551,7 +1568,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the @@ -1594,7 +1611,7 @@ bool SIGfx940CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Set SC bits to indicate system scope. @@ -1638,7 +1655,7 @@ bool SIGfx940CacheControl::enableStoreCacheBypass( assert(!MI->mayLoad() && MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Set SC bits to indicate system scope. @@ -1678,7 +1695,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass( assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Set SC1 bit to indicate system scope. @@ -1756,7 +1773,7 @@ bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Ensures that following loads will not see stale remote VMEM data or @@ -1840,7 +1857,7 @@ bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the @@ -1897,7 +1914,7 @@ bool SIGfx10CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -2129,7 +2146,7 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -2194,7 +2211,7 @@ bool SIGfx11CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -2462,7 +2479,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, /// memory. /// Other address spaces do not have a cache. - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE) + if (!canAffectGlobalAddrSpace(AddrSpace)) return false; AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV; @@ -2521,7 +2538,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI, // writeback as all memory operations by the same thread are // sequentially consistent, and no other thread can access scratch // memory. - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { if (Pos == Position::AFTER) ++MI; @@ -2651,7 +2668,7 @@ bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace) const { bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits