On 22 February 2013 08:02, Michel Dänzer <mic...@daenzer.net> wrote:
> From: Michel Dänzer <michel.daen...@amd.com> > > Use LDS for calculating the deltas between neighbouring pixels. Not sure > the > sign of the delta is correct for both dimensions (the sign doesn't seem to > matter for the relevant piglit tests), but it'll be easy to fix that up if > not. > Is this the back-end code that implements the dFdx() and dFdy() GLSL functions? If so, then there is a piglit test that verifies that the sign of the delta is correct for both dimensions. It's in tests/fbo/fbo-deriv.c. > > Signed-off-by: Michel Dänzer <michel.daen...@amd.com> > --- > lib/Target/R600/SIISelLowering.cpp | 84 > ++++++++++++++++++++++++++++++++++++++ > lib/Target/R600/SIISelLowering.h | 2 + > lib/Target/R600/SIInstructions.td | 29 ++++++++++++- > 3 files changed, 113 insertions(+), 2 deletions(-) > > diff --git a/lib/Target/R600/SIISelLowering.cpp > b/lib/Target/R600/SIISelLowering.cpp > index 212e3f2..b21a7e8 100644 > --- a/lib/Target/R600/SIISelLowering.cpp > +++ b/lib/Target/R600/SIISelLowering.cpp > @@ -75,6 +75,9 @@ MachineBasicBlock * > SITargetLowering::EmitInstrWithCustomInserter( > MI->eraseFromParent(); > break; > > + case AMDGPU::SI_DD: > + LowerSI_DD(MI, *BB, I, MRI); > + break; > case AMDGPU::SI_INTERP: > LowerSI_INTERP(MI, *BB, I, MRI); > break; > @@ -93,6 +96,87 @@ void SITargetLowering::LowerSI_WQM(MachineInstr *MI, > MachineBasicBlock &BB, > MI->eraseFromParent(); > } > > +void SITargetLowering::LowerSI_DD(MachineInstr *MI, MachineBasicBlock &BB, > + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { > + unsigned mbcnt_lo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > + unsigned mbcnt = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > + unsigned tid = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > + unsigned tid0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > + unsigned tid1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > + unsigned coord0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > + unsigned coord1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > + MachineOperand dst = MI->getOperand(0); > + MachineOperand coord = MI->getOperand(1); > + MachineOperand incr = MI->getOperand(2); > + > + // Get this thread's ID > + BuildMI(BB, I, BB.findDebugLoc(I), > TII->get(AMDGPU::V_MBCNT_LO_U32_B32_e64), mbcnt_lo) > + .addImm(0xffffffff) > + .addImm(0x80) // Inline constant 0 > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0); > + BuildMI(BB, I, BB.findDebugLoc(I), > TII->get(AMDGPU::V_MBCNT_HI_U32_B32_e32), mbcnt) > + .addImm(0xffffffff) > + .addReg(mbcnt_lo); > + > + // Multiply by 4 to get a DWORD offset > + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_LSHL_B32_e64), > tid) > + .addReg(mbcnt) > + .addImm(0x82) // Inline constant 2 > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0); > + > + // Write this thread's coordinate to LDS > + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::DS_WRITE_B32)) > + .addOperand(coord) > + .addImm(0) // LDS > + .addReg(tid) > + .addOperand(coord) > + .addOperand(coord) > + .addImm(0) > + .addImm(0); > + > + // Get bottom left thread ID * 4 > + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_AND_B32_e32), > tid0) > + .addImm(0xfffffff0) > + .addReg(tid); > + > + // Read bottom left thread's coordinate from LDS > + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::DS_READ_B32), > coord0) > + .addImm(0) // LDS > + .addReg(tid0) > + .addReg(tid0) > + .addReg(tid0) > + .addImm(0) > + .addImm(0); > + > + // Get bottom right / top left thread ID * 4 > + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_ADD_I32_e32), > tid1) > + .addOperand(incr) > + .addReg(tid0); > + > + // Read bottom right / top left thread's coordinate from LDS > + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::DS_READ_B32), > coord1) > + .addImm(0) // LDS > + .addReg(tid1) > + .addReg(tid1) > + .addReg(tid1) > + .addImm(0) > + .addImm(0); > + > + // Subtract bottom left coordinate from bottom right / top left > coordinate > + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_SUB_F32_e32)) > + .addOperand(dst) > + .addReg(coord1) > + .addReg(coord0); > + > + MI->eraseFromParent(); > +} > + > void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock > &BB, > MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { > unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); > diff --git a/lib/Target/R600/SIISelLowering.h > b/lib/Target/R600/SIISelLowering.h > index 5d048f8..905a43e 100644 > --- a/lib/Target/R600/SIISelLowering.h > +++ b/lib/Target/R600/SIISelLowering.h > @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering { > > void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, > MachineBasicBlock::iterator I, unsigned Opocde) const; > + void LowerSI_DD(MachineInstr *MI, MachineBasicBlock &BB, > + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) > const; > void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, > MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) > const; > void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index 7152c49..490fb99 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -806,8 +806,8 @@ defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; > defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; > defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; > //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; > -//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", > []>; > -//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", > []>; > +defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; > +defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; > let Defs = [VCC] in { // Carry-out goes to VCC > defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", > [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] > @@ -996,6 +996,13 @@ def LOAD_CONST : AMDGPUShaderInst < > > let usesCustomInserter = 1 in { > > +def SI_DD : InstSI < > + (outs VReg_32:$dst), > + (ins VReg_32:$src, i32imm:$incr), > + "SI_DD $src, $incr", > + [] > +>; > + > def SI_INTERP : InstSI < > (outs VReg_32:$dst), > (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, > SReg_32:$params), > @@ -1396,6 +1403,24 @@ def : Pat < > (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) > >; > > +def : Pat < > + (int_AMDGPU_ddx VReg_128:$src, imm, imm, imm), > + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 > (IMPLICIT_DEF)), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub0), 4), sub0), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub1), 4), sub1), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub2), 4), sub2), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub3), 4), sub3) > +>; > + > +def : Pat < > + (int_AMDGPU_ddy VReg_128:$src, imm, imm, imm), > + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 > (IMPLICIT_DEF)), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub0), 8), sub0), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub1), 8), sub1), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub2), 8), sub2), > + (SI_DD (EXTRACT_SUBREG VReg_128:$src, sub3), 8), sub3) > +>; > + > /********** ================== **********/ > /********** VOP3 Patterns **********/ > /********** ================== **********/ > -- > 1.8.1.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev