A few comments below, otherwise : reviewed-by: Vincent Lejeune<vljn at ovi.com>
----- Mail original ----- > De : Tom Stellard <t...@stellard.net> > À : llvm-comm...@cs.uiuc.edu > Cc : mesa-dev@lists.freedesktop.org; Tom Stellard <thomas.stell...@amd.com> > Envoyé le : Vendredi 6 septembre 2013 0h23 > Objet : [PATCH] R600: Don't use trans slot for instructions that read LDS > source registers > > From: Tom Stellard <thomas.stell...@amd.com> > > This fixes some regressions in the piglit local memory store tests > introduced by recent commits which made the scheduler aware of the trans > slot. > > It's not possible to test this using lit, because there is no way to > determine from the assembly dumps whether or not an instruction is in > the trans slot. > > Even if this were possible, the test would be highly sensitive to > changes in the scheduler and might generate confusing false negatives. > --- > lib/Target/R600/R600InstrInfo.cpp | 17 +++++++++++++++++ > lib/Target/R600/R600InstrInfo.h | 1 + > lib/Target/R600/R600MachineScheduler.cpp | 5 +++++ > lib/Target/R600/R600Packetizer.cpp | 5 +++++ > lib/Target/R600/R600RegisterInfo.td | 10 +++++++++- > 5 files changed, 37 insertions(+), 1 deletion(-) > > diff --git a/lib/Target/R600/R600InstrInfo.cpp > b/lib/Target/R600/R600InstrInfo.cpp > index 0e7cfb4..60a3f7d 100644 > --- a/lib/Target/R600/R600InstrInfo.cpp > +++ b/lib/Target/R600/R600InstrInfo.cpp > @@ -204,6 +204,23 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) > const { > } > } > > +bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { > + if (!isALUInstr(MI->getOpcode())) { > + return false; > + } > + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), > + E = MI->operands_end(); I != E; ++I) > { > + if (!I->isReg() || !I->isUse() || > + TargetRegisterInfo::isVirtualRegister(I->getReg())) { > + continue; > + } > + if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) { > + return true; > + } The bracket in this if statements and in the previous one are unneeded. > + } > + return false; > +} > + > int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { > static const unsigned OpTable[] = { > AMDGPU::OpName::src0, > diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h > index 24cc43d..0d1ffc8 100644 > --- a/lib/Target/R600/R600InstrInfo.h > +++ b/lib/Target/R600/R600InstrInfo.h > @@ -78,6 +78,7 @@ namespace llvm { > bool usesTextureCache(const MachineInstr *MI) const; > > bool mustBeLastInClause(unsigned Opcode) const; > + bool readsLDSSrcReg(const MachineInstr *MI) const; > > /// \returns The operand index for the given source number. Legal values > /// for SrcNum are 0, 1, and 2. > diff --git a/lib/Target/R600/R600MachineScheduler.cpp > b/lib/Target/R600/R600MachineScheduler.cpp > index 0499dd5..f67ba89 100644 > --- a/lib/Target/R600/R600MachineScheduler.cpp > +++ b/lib/Target/R600/R600MachineScheduler.cpp > @@ -314,6 +314,11 @@ R600SchedStrategy::AluKind > R600SchedStrategy::getAluKind(SUnit *SU) const { > if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) > return AluT_XYZW; > > + // LDS src registers cannot be used in the Trans slot. > + if (TII->readsLDSSrcReg(MI)) { > + return AluT_XYZW; > + } Here too > + > return AluAny; > > } > diff --git a/lib/Target/R600/R600Packetizer.cpp > b/lib/Target/R600/R600Packetizer.cpp > index 6c70052..ee256d5 100644 > --- a/lib/Target/R600/R600Packetizer.cpp > +++ b/lib/Target/R600/R600Packetizer.cpp > @@ -272,6 +272,11 @@ public: > return false; > } > > + // We cannot read LDS source registrs from the Trans slot. > + if (isTransSlot && TII->readsLDSSrcReg(MI)) { > + return false; > + } And here too > + > CurrentPacketMIs.pop_back(); > return true; > } > diff --git a/lib/Target/R600/R600RegisterInfo.td > b/lib/Target/R600/R600RegisterInfo.td > index fa987cf..514427e 100644 > --- a/lib/Target/R600/R600RegisterInfo.td > +++ b/lib/Target/R600/R600RegisterInfo.td > @@ -95,6 +95,12 @@ foreach Index = 448-480 in { > > // Special Registers > > +def OQA : R600Reg<"OQA", 219>; > +def OQB : R600Reg<"OQB", 220>; > +def OQAP : R600Reg<"OQAP", 221>; > +def OQBP : R600Reg<"OQAP", 222>; > +def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>; > +def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>; > def ZERO : R600Reg<"0.0", 248>; > def ONE : R600Reg<"1.0", 249>; > def NEG_ONE : R600Reg<"-1.0", 249>; > @@ -115,7 +121,6 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off", > 0>; > def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; > def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; > def AR_X : R600Reg<"AR.x", 0>; > -def OQAP : R600Reg<"OQAP", 221>; > > def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, > (add (sequence "ArrayBase%u", 448, > 480))>; > @@ -130,6 +135,9 @@ let isAllocatable = 0 in { > // XXX: Only use the X channel, until we support wider stack widths > def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence > "Addr%u_X", 0, 127))>; > > +def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32, > + (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>; > + > } // End isAllocatable = 0 > > def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32, > -- > 1.7.11.4 > > _______________________________________________ > llvm-commits mailing list > llvm-comm...@cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev