================ @@ -605,12 +606,197 @@ class SIGfx12CacheControl : public SIGfx11CacheControl { bool IsNonTemporal) const override; }; +class SIPreciseMemorySupport { +protected: + const GCNSubtarget &ST; + const SIInstrInfo *TII = nullptr; + + IsaVersion IV; + + SIPreciseMemorySupport(const GCNSubtarget &ST) : ST(ST) { + TII = ST.getInstrInfo(); + IV = getIsaVersion(ST.getCPU()); + } + +public: + static std::unique_ptr<SIPreciseMemorySupport> create(const GCNSubtarget &ST); + + virtual bool handleNonAtomic(MachineBasicBlock::iterator &MI) = 0; + /// Handles atomic instruction \p MI with \p ret indicating whether \p MI + /// returns a result. + virtual bool handleAtomic(MachineBasicBlock::iterator &MI, bool ret) = 0; +}; + +class SIGfx9PreciseMemorySupport : public SIPreciseMemorySupport { +public: + SIGfx9PreciseMemorySupport(const GCNSubtarget &ST) + : SIPreciseMemorySupport(ST) {} + bool handleNonAtomic(MachineBasicBlock::iterator &MI) override; + bool handleAtomic(MachineBasicBlock::iterator &MI, bool ret) override; +}; + +class SIGfx10And11PreciseMemorySupport : public SIPreciseMemorySupport { +public: + SIGfx10And11PreciseMemorySupport(const GCNSubtarget &ST) + : SIPreciseMemorySupport(ST) {} + bool handleNonAtomic(MachineBasicBlock::iterator &MI) override; + bool handleAtomic(MachineBasicBlock::iterator &MI, bool ret) override; +}; + +std::unique_ptr<SIPreciseMemorySupport> +SIPreciseMemorySupport::create(const GCNSubtarget &ST) { + GCNSubtarget::Generation Generation = ST.getGeneration(); + if (Generation < AMDGPUSubtarget::GFX10) + return std::make_unique<SIGfx9PreciseMemorySupport>(ST); + return std::make_unique<SIGfx10And11PreciseMemorySupport>(ST); +} + +bool SIGfx9PreciseMemorySupport ::handleNonAtomic( + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoadOrStore()); + + MachineInstr &Inst = *MI; + AMDGPU::Waitcnt Wait; + + if (TII->isSMRD(Inst)) { // scalar + if (Inst.mayStore()) + return false; + Wait.DsCnt = 0; // LgkmCnt + } else { // vector + if (Inst.mayLoad()) { // vector load + if (TII->isVMEM(Inst)) { // VMEM load + Wait.LoadCnt = 0; // VmCnt + } else if (TII->isFLAT(Inst)) { // Flat load + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + } else { // LDS load + Wait.DsCnt = 0; // LgkmCnt + } + } else { // vector store + if (TII->isVMEM(Inst)) { // VMEM store + Wait.LoadCnt = 0; // VmCnt + } else if (TII->isFLAT(Inst)) { // Flat store + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + } else { + Wait.DsCnt = 0; // LDS store; LgkmCnt + } + } + } + + unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); + MachineBasicBlock &MBB = *MI->getParent(); + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); + --MI; + return true; +} + +bool SIGfx9PreciseMemorySupport ::handleAtomic(MachineBasicBlock::iterator &MI, + bool ret) { + assert(MI->mayLoadOrStore()); + + AMDGPU::Waitcnt Wait; + + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + + unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); + MachineBasicBlock &MBB = *MI->getParent(); + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); + --MI; + return true; +} + +bool SIGfx10And11PreciseMemorySupport ::handleNonAtomic( + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoadOrStore()); + + MachineInstr &Inst = *MI; + AMDGPU::Waitcnt Wait; + + bool BuildWaitCnt = true; + bool BuildVsCnt = false; + + if (TII->isSMRD(Inst)) { // scalar + if (Inst.mayStore()) + return false; + Wait.DsCnt = 0; // LgkmCnt + } else { // vector + if (Inst.mayLoad()) { // vector load + if (TII->isVMEM(Inst)) { // VMEM load + Wait.LoadCnt = 0; // VmCnt + } else if (TII->isFLAT(Inst)) { // Flat load + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + } else { // LDS load + Wait.DsCnt = 0; // LgkmCnt + } + } + + // For some instructions, mayLoad() and mayStore() can be both true. + if (Inst.mayStore()) { // vector store; an instruction can be both + // load/store + if (TII->isVMEM(Inst)) { // VMEM store + if (!Inst.mayLoad()) + BuildWaitCnt = false; + BuildVsCnt = true; + } else if (TII->isFLAT(Inst)) { // Flat store + Wait.DsCnt = 0; // LgkmCnt + BuildVsCnt = true; + } else { + Wait.DsCnt = 0; // LDS store; LgkmCnt + } + } + } + + MachineBasicBlock &MBB = *MI->getParent(); + if (BuildWaitCnt) { + unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); + --MI; + } + + if (BuildVsCnt) { + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); + --MI; + } + return true; +} + +bool SIGfx10And11PreciseMemorySupport ::handleAtomic( + MachineBasicBlock::iterator &MI, bool ret) { ---------------- jwanggit86 wrote:
Like the existing functions, the return value indicates whether there's any change to the basic block, e.g., inserting a new instruction. https://github.com/llvm/llvm-project/pull/79236 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits