This patch should work when checking than no OQAP is used before beeing queued, assuming that a value in OQAP is consumed and cannot be read twice. However I'm not sure I cover all LDS instructions that queues a value, I only use LDS_RET_READ in switch case.
Vincent ----- Mail original ----- > De : Tom Stellard <t...@stellard.net> > À : Vincent Lejeune <v...@ovi.com> > Cc : "llvm-comm...@cs.uiuc.edu" <llvm-comm...@cs.uiuc.edu>; > "mesa-dev@lists.freedesktop.org" <mesa-dev@lists.freedesktop.org>; Tom > Stellard <thomas.stell...@amd.com> > Envoyé le : Mardi 22 octobre 2013 23h20 > Objet : Re: [PATCH] R600: Make sure OQAP defs and uses happen in the same > clause > > Hi Vincent, > > Here is an updated patch. I wasn't sure where to put the assertion to > check that UnscheduledNoLiveOut{Defs,Uses} is empty when switching to a > new clause. I tried adding it to R600SchedStartegy::schedNode() behind > the if (NextInstKind != CurInstKind) condition, but it always failed. > Any suggestions on where I should but it? > > -Tom > > > On Mon, Oct 21, 2013 at 12:40:28PM -0700, Vincent Lejeune wrote: >> >> >> >> >> ----- Mail original ----- >> > De : Tom Stellard <t...@stellard.net> >> > À : llvm-comm...@cs.uiuc.edu >> > Cc : mesa-dev@lists.freedesktop.org; Tom Stellard > <thomas.stell...@amd.com> >> > Envoyé le : Vendredi 11 octobre 2013 20h10 >> > Objet : [PATCH] R600: Make sure OQAP defs and uses happen in the same > clause >> > >> > From: Tom Stellard <thomas.stell...@amd.com> >> > >> > Reading the special OQAP register pops the top value off the LDS >> > input queue and returns it to the instruction. This queue is >> > invalidated at the end of an ALU clause and leaving values in the > queue >> > can lead to GPU hangs. This means that if we load a value into the > queue, >> > we must use it before the end of the clause. >> > >> > This fixes some hangs in the OpenCV test suite. >> > --- >> > lib/Target/R600/R600MachineScheduler.cpp | 25 > +++++++++++++------------ >> > lib/Target/R600/R600MachineScheduler.h | 4 ++-- >> > test/CodeGen/R600/lds-input-queue.ll | 26 > ++++++++++++++++++++++++++ >> > 3 files changed, 41 insertions(+), 14 deletions(-) >> > create mode 100644 test/CodeGen/R600/lds-input-queue.ll >> > >> > diff --git a/lib/Target/R600/R600MachineScheduler.cpp >> > b/lib/Target/R600/R600MachineScheduler.cpp >> > index 6c26d9e..611b7f4 100644 >> > --- a/lib/Target/R600/R600MachineScheduler.cpp >> > +++ b/lib/Target/R600/R600MachineScheduler.cpp >> > @@ -93,11 +93,12 @@ SUnit* R600SchedStrategy::pickNode(bool > &IsTopNode) >> > { >> > } >> > >> > >> > - // We want to scheduled AR defs as soon as possible to make sure > they >> > aren't >> > - // put in a different ALU clause from their uses. >> > - if (!SU && !UnscheduledARDefs.empty()) { >> > - SU = UnscheduledARDefs[0]; >> > - UnscheduledARDefs.erase(UnscheduledARDefs.begin()); >> > + // We want to scheduled defs that cannot be live outside of this > clause >> > + // as soon as possible to make sure they aren't put in a > different >> > + // ALU clause from their uses. >> > + if (!SU && !UnscheduledNoLiveOutDefs.empty()) { >> > + SU = UnscheduledNoLiveOutDefs[0]; >> > + > UnscheduledNoLiveOutDefs.erase(UnscheduledNoLiveOutDefs.begin()); >> > NextInstKind = IDAlu; >> > } >> > >> > @@ -132,9 +133,9 @@ SUnit* R600SchedStrategy::pickNode(bool > &IsTopNode) >> > { >> > >> > // We want to schedule the AR uses as late as possible to make sure > that >> > // the AR defs have been released. >> > - if (!SU && !UnscheduledARUses.empty()) { >> > - SU = UnscheduledARUses[0]; >> > - UnscheduledARUses.erase(UnscheduledARUses.begin()); >> > + if (!SU && !UnscheduledNoLiveOutUses.empty()) { >> > + SU = UnscheduledNoLiveOutUses[0]; >> > + > UnscheduledNoLiveOutUses.erase(UnscheduledNoLiveOutUses.begin()); >> >> Can we use std::queue<SUnit*> instead of a std::vector for > UnscheduledNoLiveOutUses ? >> I had to use a vector because I needed to be able to pop non topmost SUnit > in some case >> (to fit Instruction Group const read limitation) but I would rather avoid > erase(iterator) call >> when possible. >> >> >> > NextInstKind = IDAlu; >> > } >> > >> > @@ -217,15 +218,15 @@ void R600SchedStrategy::releaseBottomNode(SUnit > *SU) >> > { >> > >> > int IK = getInstKind(SU); >> > >> > - // Check for AR register defines >> > + // Check for registers that do not live across ALU clauses. >> > for (MachineInstr::const_mop_iterator I = >> > SU->getInstr()->operands_begin(), >> > E = >> > SU->getInstr()->operands_end(); >> > I != E; ++I) { >> > - if (I->isReg() && I->getReg() == AMDGPU::AR_X) > { >> > + if (I->isReg() && (I->getReg() == AMDGPU::AR_X || >> > I->getReg() == AMDGPU::OQAP)) { >> > if (I->isDef()) { >> > - UnscheduledARDefs.push_back(SU); >> > + UnscheduledNoLiveOutDefs.push_back(SU); >> > } else { >> > - UnscheduledARUses.push_back(SU); >> > + UnscheduledNoLiveOutUses.push_back(SU); >> > } >> > return; >> > } >> > diff --git a/lib/Target/R600/R600MachineScheduler.h >> > b/lib/Target/R600/R600MachineScheduler.h >> > index 0a6f120..db2e188 100644 >> > --- a/lib/Target/R600/R600MachineScheduler.h >> > +++ b/lib/Target/R600/R600MachineScheduler.h >> > @@ -53,8 +53,8 @@ class R600SchedStrategy : public > MachineSchedStrategy { >> > >> > std::vector<SUnit *> Available[IDLast], Pending[IDLast]; >> > std::vector<SUnit *> AvailableAlus[AluLast]; >> > - std::vector<SUnit *> UnscheduledARDefs; >> > - std::vector<SUnit *> UnscheduledARUses; >> > + std::vector<SUnit *> UnscheduledNoLiveOutDefs; >> > + std::vector<SUnit *> UnscheduledNoLiveOutUses; >> > std::vector<SUnit *> PhysicalRegCopy; >> > >> > InstKind CurInstKind; >> > diff --git a/test/CodeGen/R600/lds-input-queue.ll >> > b/test/CodeGen/R600/lds-input-queue.ll >> > new file mode 100644 >> > index 0000000..548b41c >> > --- /dev/null >> > +++ b/test/CodeGen/R600/lds-input-queue.ll >> > @@ -0,0 +1,26 @@ >> > +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s >> >> Does the test work with -verify-machineinstrs flag set ? >> >> > +; >> > +; This test checks that the lds input queue will is empty at the end > of >> > +; the ALU clause. >> > + >> > +; CHECK-LABEL: @lds_input_queue >> > +; CHECK: LDS_READ_RET * OQAP >> > +; CHECK-NOT: ALU clause >> > +; CHECK: MOV T{{[0-9]\.[XYZW]}}, OQAP >> > + >> > +@local_mem = internal addrspace(3) unnamed_addr global [2 x i32] [i32 > 1, i32 >> > 2], align 4 >> > + >> > +define void @lds_input_queue(i32 addrspace(1)* %out, i32 > addrspace(1)* %in, i32 >> > %index) { >> > +entry: >> > + %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 > 0, i32 >> > %index >> > + %1 = load i32 addrspace(3)* %0 >> > + call void @llvm.AMDGPU.barrier.local() >> > + >> > + ; This will start a new clause for the vertex fetch >> > + %2 = load i32 addrspace(1)* %in >> > + %3 = add i32 %1, %2 >> > + store i32 %3, i32 addrspace(1)* %out >> > + ret void >> > +} >> > + >> > +declare void @llvm.AMDGPU.barrier.local() >> > -- >> > 1.7.11.4 >> > >> > _______________________________________________ >> > llvm-commits mailing list >> > llvm-comm...@cs.uiuc.edu >> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > >
From 956c9986fcda84c4a4b0a555432319bbdfd98cfc Mon Sep 17 00:00:00 2001 From: Vincent Lejeune <v...@ovi.com> Date: Fri, 25 Oct 2013 14:32:35 +0200 Subject: [PATCH] R600: Uses Assert for OQAP --- lib/Target/R600/R600EmitClauseMarkers.cpp | 31 +++++++++++++++++++++++++++++++ lib/Target/R600/R600ISelLowering.cpp | 2 ++ 2 files changed, 33 insertions(+) diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp index 928c0e3..9951b15 100644 --- a/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -33,6 +33,8 @@ private: static char ID; const R600InstrInfo *TII; int Address; + int OQAPLevel; + bool ArDefined; unsigned OccupiedDwords(MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -160,8 +162,36 @@ private: return true; } + void AssertARAndOQAPCorrectness(MachineInstr *MI) { + for (MachineInstr::mop_iterator MOp = MI->operands_begin(), + E = MI->operands_end(); MOp != E; MOp++) { + MachineOperand &MO = *MOp; + if (!MO.isReg() || MO.isDef()) + continue; + switch (MO.getReg()) { + default: break; + case AMDGPU::OQAP: + OQAPLevel--; + assert (OQAPLevel >= 0 && "OQAP not defined in this clause !"); + break; + } + } + switch (MI->getOpcode()) { + default: break; + case AMDGPU::MOVA_INT_eg: + ArDefined = true; + break; + case AMDGPU::LDS_READ_RET: + OQAPLevel++; + break; + } + } + MachineBasicBlock::iterator MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { + // LDS queue/ AR registers get resetted in new clause. + OQAPLevel = 0; + ArDefined = false; MachineBasicBlock::iterator ClauseHead = I; std::vector<std::pair<unsigned, unsigned> > KCacheBanks; bool PushBeforeModifier = false; @@ -205,6 +235,7 @@ private: !SubstituteKCacheBank(I, KCacheBanks)) break; AluInstCount += OccupiedDwords(I); + AssertARAndOQAPCorrectness(I); } unsigned Opcode = PushBeforeModifier ? AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 3c2e388..d8cbcae 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -215,6 +215,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } case AMDGPU::TXD: { + abort(); unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); MachineOperand &RID = MI->getOperand(4); @@ -316,6 +317,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } case AMDGPU::TXD_SHADOW: { + abort(); unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); MachineOperand &RID = MI->getOperand(4); -- 1.8.3.1
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev