llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) <details> <summary>Changes</summary> Mostly NFC, and adds an assertion for gfx12 to ensure that no atomic scratch instructions are present in the case of GloballyAddressableScratch. This should always hold because of #<!-- -->154710. --- Full diff: https://github.com/llvm/llvm-project/pull/160129.diff 1 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+35-18) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index dc9c961e7de36..f807764e367f7 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -299,6 +299,10 @@ class SICacheControl { bool enableNamedBit(const MachineBasicBlock::iterator MI, AMDGPU::CPol::CPol Bit) const; + /// Check if any atomic operation on AS can affect memory accessible via the + /// global address space. + virtual bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const = 0; + public: /// Create a cache control for the subtarget \p ST. @@ -403,6 +407,10 @@ class SIGfx6CacheControl : public SICacheControl { return enableNamedBit(MI, AMDGPU::CPol::SLC); } + bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override { + return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE; + } + public: SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {} @@ -609,6 +617,15 @@ class SIGfx12CacheControl : public SIGfx11CacheControl { bool setAtomicScope(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const; + bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override { + assert((!ST.hasGloballyAddressableScratch() || + ((AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) || + (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) && + "scratch instructions should already be replaced by flat " + "instructions if GloballyAddressableScratch is enabled"); + return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE; + } + public: SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) { // GFX12.0 and GFX12.5 memory models greatly overlap, and in some cases @@ -1016,7 +1033,7 @@ bool SIGfx6CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1239,7 +1256,7 @@ bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1299,7 +1316,7 @@ bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1336,7 +1353,7 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1378,7 +1395,7 @@ bool SIGfx90ACacheControl::enableRMWCacheBypass( assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1487,7 +1504,7 @@ bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Ensures that following loads will not see stale remote VMEM data or @@ -1551,7 +1568,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the @@ -1594,7 +1611,7 @@ bool SIGfx940CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Set SC bits to indicate system scope. @@ -1638,7 +1655,7 @@ bool SIGfx940CacheControl::enableStoreCacheBypass( assert(!MI->mayLoad() && MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Set SC bits to indicate system scope. @@ -1678,7 +1695,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass( assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Set SC1 bit to indicate system scope. @@ -1756,7 +1773,7 @@ bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Ensures that following loads will not see stale remote VMEM data or @@ -1840,7 +1857,7 @@ bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the @@ -1897,7 +1914,7 @@ bool SIGfx10CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -2129,7 +2146,7 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, if (Pos == Position::AFTER) ++MI; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -2194,7 +2211,7 @@ bool SIGfx11CacheControl::enableLoadCacheBypass( assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -2462,7 +2479,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, /// memory. /// Other address spaces do not have a cache. - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE) + if (!canAffectGlobalAddrSpace(AddrSpace)) return false; AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV; @@ -2521,7 +2538,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI, // writeback as all memory operations by the same thread are // sequentially consistent, and no other thread can access scratch // memory. - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { if (Pos == Position::AFTER) ++MI; @@ -2651,7 +2668,7 @@ bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace) const { bool Changed = false; - if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + if (canAffectGlobalAddrSpace(AddrSpace)) { switch (Scope) { case SIAtomicScope::SYSTEM: Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS); `````````` </details> https://github.com/llvm/llvm-project/pull/160129 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits