================ @@ -2378,6 +2456,221 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( return Changed; } +bool SIGfx6CacheControl ::handleNonAtomicForPreciseMemory( + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoadOrStore()); + + MachineInstr &Inst = *MI; + AMDGPU::Waitcnt Wait; + + if (TII->isSMRD(Inst)) { // scalar + if (Inst.mayStore()) + return false; + Wait.DsCnt = 0; // LgkmCnt + } else { // vector + if (Inst.mayLoad()) { // vector load + if (TII->isVMEM(Inst)) { // VMEM load + Wait.LoadCnt = 0; // VmCnt + } else if (TII->isFLAT(Inst)) { // Flat load + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + } else { // LDS load + Wait.DsCnt = 0; // LgkmCnt + } + } else { // vector store + if (TII->isVMEM(Inst)) { // VMEM store + Wait.LoadCnt = 0; // VmCnt + } else if (TII->isFLAT(Inst)) { // Flat store + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + } else { + Wait.DsCnt = 0; // LDS store; LgkmCnt + } + } + } + + unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); + MachineBasicBlock &MBB = *MI->getParent(); + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); + --MI; + return true; +} + +bool SIGfx6CacheControl ::handleAtomicForPreciseMemory( + MachineBasicBlock::iterator &MI, bool ret) { + assert(MI->mayLoadOrStore()); + + AMDGPU::Waitcnt Wait; + + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + + unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); + MachineBasicBlock &MBB = *MI->getParent(); + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); + --MI; + return true; +} + +bool SIGfx10CacheControl ::handleNonAtomicForPreciseMemory( + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoadOrStore()); + + MachineInstr &Inst = *MI; + AMDGPU::Waitcnt Wait; + + bool BuildWaitCnt = true; + bool BuildVsCnt = false; + + if (TII->isSMRD(Inst)) { // scalar + if (Inst.mayStore()) + return false; + Wait.DsCnt = 0; // LgkmCnt + } else { // vector + if (Inst.mayLoad()) { // vector load + if (TII->isVMEM(Inst)) { // VMEM load + Wait.LoadCnt = 0; // VmCnt + } else if (TII->isFLAT(Inst)) { // Flat load + Wait.LoadCnt = 0; // VmCnt + Wait.DsCnt = 0; // LgkmCnt + } else { // LDS load + Wait.DsCnt = 0; // LgkmCnt + } + } + + // For some vector instructions, mayLoad() and mayStore() can be both true. + if (Inst.mayStore()) { // vector store; an instruction can be both + // load/store + if (TII->isVMEM(Inst)) { // VMEM store + if (!Inst.mayLoad()) + BuildWaitCnt = false; + BuildVsCnt = true; + } else if (TII->isFLAT(Inst)) { // Flat store + Wait.DsCnt = 0; // LgkmCnt + BuildVsCnt = true; + } else { + Wait.DsCnt = 0; // LDS store; LgkmCnt + } + } + } + + MachineBasicBlock &MBB = *MI->getParent(); + if (BuildWaitCnt) { + unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); + --MI; + } + + if (BuildVsCnt) { + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); + --MI; + } + return true; +} + +bool SIGfx10CacheControl ::handleAtomicForPreciseMemory( + MachineBasicBlock::iterator &MI, bool ret) { + assert(MI->mayLoadOrStore()); + + AMDGPU::Waitcnt Wait; + + Wait.DsCnt = 0; // LgkmCnt + if (ret) + Wait.LoadCnt = 0; // VmCnt + + unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); + MachineBasicBlock &MBB = *MI->getParent(); + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); + --MI; + if (!ret) { + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); + --MI; + } + return true; +} + +bool SIGfx12CacheControl ::handleNonAtomicForPreciseMemory( + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoadOrStore()); + + MachineInstr &Inst = *MI; + unsigned WaitType = 0; + // For some vector instructions, mayLoad() and mayStore() can be both true. + bool LoadAndStore = false; + + if (TII->isSMRD(Inst)) { // scalar + if (Inst.mayStore()) + return false; + + WaitType = AMDGPU::S_WAIT_KMCNT; + } else { // vector + if (Inst.mayLoad() && Inst.mayStore()) { + WaitType = AMDGPU::S_WAIT_LOADCNT; + LoadAndStore = true; + } else if (Inst.mayLoad()) { // vector load + if (TII->isVMEM(Inst)) { // VMEM load + WaitType = AMDGPU::S_WAIT_LOADCNT; + } else if (TII->isFLAT(Inst)) { // Flat load + WaitType = AMDGPU::S_WAIT_LOADCNT_DSCNT; + } else { // LDS load + WaitType = AMDGPU::S_WAIT_DSCNT; + } + } else { // vector store + if (TII->isVMEM(Inst)) { // VMEM store + WaitType = AMDGPU::S_WAIT_STORECNT; + } else if (TII->isFLAT(Inst)) { // Flat store + WaitType = AMDGPU::S_WAIT_STORECNT_DSCNT; + } else { + WaitType = AMDGPU::S_WAIT_DSCNT; + } + } + } + + assert(WaitType != 0); + + MachineBasicBlock &MBB = *MI->getParent(); + + unsigned Enc = 0; + if (WaitType == AMDGPU::S_WAIT_LOADCNT_DSCNT) { + AMDGPU::Waitcnt Wait; + Wait.DsCnt = 0; + Wait.LoadCnt = 0; + Enc = AMDGPU::encodeLoadcntDscnt(IV, Wait); + } else if (WaitType == AMDGPU::S_WAIT_STORECNT_DSCNT) { + AMDGPU::Waitcnt Wait; + Wait.DsCnt = 0; + Wait.StoreCnt = 0; + Enc = AMDGPU::encodeStorecntDscnt(IV, Wait); + } + + BuildMI(MBB, ++MI, DebugLoc(), TII->get(WaitType)).addImm(Enc); + --MI; + if (LoadAndStore) { + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAIT_STORECNT)) + .addImm(Enc); + --MI; + } + return true; +} + +bool SIGfx12CacheControl ::handleAtomicForPreciseMemory( + MachineBasicBlock::iterator &MI, bool ret) { + assert(MI->mayLoadOrStore()); + + MachineBasicBlock &MBB = *MI->getParent(); + if (ret) { + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAIT_LOADCNT)).addImm(0); + } else { + BuildMI(MBB, ++MI, DebugLoc(), TII->get(AMDGPU::S_WAIT_STORECNT)).addImm(0); + } ---------------- Pierre-vh wrote:
can drop `{}` https://github.com/llvm/llvm-project/pull/79236 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits