On Mon, Dec 17, 2012 at 03:32:23PM +0100, Christian König wrote: > We shouldn't insert KILL optimization if we don't have a > kill instruction at all. >
For both patches: Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > Signed-off-by: Christian König <deathsim...@vodafone.de> > --- > lib/Target/AMDGPU/SIISelLowering.cpp | 14 ---- > lib/Target/AMDGPU/SIISelLowering.h | 2 - > lib/Target/AMDGPU/SIInstructions.td | 24 +++--- > lib/Target/AMDGPU/SILowerControlFlow.cpp | 125 > ++++++++++++++++++++---------- > 4 files changed, 94 insertions(+), 71 deletions(-) > > diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp > b/lib/Target/AMDGPU/SIISelLowering.cpp > index cd6e0e9..435b0b3 100644 > --- a/lib/Target/AMDGPU/SIISelLowering.cpp > +++ b/lib/Target/AMDGPU/SIISelLowering.cpp > @@ -131,9 +131,6 @@ MachineBasicBlock * > SITargetLowering::EmitInstrWithCustomInserter( > case AMDGPU::SI_INTERP_CONST: > LowerSI_INTERP_CONST(MI, *BB, I, MRI); > break; > - case AMDGPU::SI_KIL: > - LowerSI_KIL(MI, *BB, I, MRI); > - break; > case AMDGPU::SI_WQM: > LowerSI_WQM(MI, *BB, I, MRI); > break; > @@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr > *MI, > MI->eraseFromParent(); > } > > -void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, > - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { > - // Clear this pixel from the exec mask if the operand is negative > - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), > - AMDGPU::VCC) > - .addReg(AMDGPU::SREG_LIT_0) > - .addOperand(MI->getOperand(0)); > - > - MI->eraseFromParent(); > -} > - > void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock > &BB, > MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { > unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); > diff --git a/lib/Target/AMDGPU/SIISelLowering.h > b/lib/Target/AMDGPU/SIISelLowering.h > index c088112..db36eef 100644 > --- a/lib/Target/AMDGPU/SIISelLowering.h > +++ b/lib/Target/AMDGPU/SIISelLowering.h > @@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering { > MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) > const; > void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, > MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; > - void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, > - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) > const; > void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, > MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) > const; > void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, > diff --git a/lib/Target/AMDGPU/SIInstructions.td > b/lib/Target/AMDGPU/SIInstructions.td > index 005be96..cac42da 100644 > --- a/lib/Target/AMDGPU/SIInstructions.td > +++ b/lib/Target/AMDGPU/SIInstructions.td > @@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI < > imm:$attr, > SReg_32:$params))] > >; > > -def SI_KIL : InstSI < > - (outs), > - (ins VReg_32:$src), > - "SI_KIL $src", > - [(int_AMDGPU_kill VReg_32:$src)] > ->; > - > def SI_WQM : InstSI < > (outs), > (ins), > @@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI < > [(int_SI_end_cf SReg_64:$saved)] > >; > > +def SI_KILL : InstSI < > + (outs), > + (ins VReg_32:$src), > + "SI_KIL $src", > + [(int_AMDGPU_kill VReg_32:$src)] > +>; > + > } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 > // Uses = [EXEC], Defs = [EXEC] > > } // end IsCodeGenOnly, isPseudo > > +def : Pat < > + (int_AMDGPU_kilp), > + (SI_KILL (V_MOV_IMM_I32 0xbf800000)) > +>; > + > /* int_SI_vs_load_input */ > def : Pat< > (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, > @@ -1315,11 +1320,6 @@ def : Pat< > >; > > def : Pat < > - (int_AMDGPU_kilp), > - (SI_KIL (V_MOV_IMM_I32 0xbf800000)) > ->; > - > -def : Pat < > (int_AMDGPU_cube VReg_128:$src), > (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 > (IMPLICIT_DEF)), > (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), > diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp > b/lib/Target/AMDGPU/SILowerControlFlow.cpp > index 507cb54..34e5f17 100644 > --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp > +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp > @@ -68,7 +68,10 @@ private: > static char ID; > const TargetInstrInfo *TII; > > - void Skip(MachineInstr &MI, MachineOperand &To); > + bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); > + > + void Skip(MachineInstr &From, MachineOperand &To); > + void SkipIfDead(MachineInstr &MI); > > void If(MachineInstr &MI); > void Else(MachineInstr &MI); > @@ -78,6 +81,7 @@ private: > void Loop(MachineInstr &MI); > void EndCf(MachineInstr &MI); > > + void Kill(MachineInstr &MI); > void Branch(MachineInstr &MI); > > public: > @@ -100,23 +104,29 @@ FunctionPass > *llvm::createSILowerControlFlowPass(TargetMachine &tm) { > return new SILowerControlFlowPass(tm); > } > > -void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { > +bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, > + MachineBasicBlock *To) { > > unsigned NumInstr = 0; > > - for (MachineBasicBlock *MBB = *From.getParent()->succ_begin(); > - NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty(); > + for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty(); > MBB = *MBB->succ_begin()) { > > for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); > NumInstr < SkipThreshold && I != E; ++I) { > > if (I->isBundle() || !I->isBundled()) > - ++NumInstr; > + if (++NumInstr >= SkipThreshold) > + return true; > } > } > > - if (NumInstr < SkipThreshold) > + return false; > +} > + > +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { > + > + if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB())) > return; > > DebugLoc DL = From.getDebugLoc(); > @@ -125,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, > MachineOperand &To) { > .addReg(AMDGPU::EXEC); > } > > +void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { > + > + MachineBasicBlock &MBB = *MI.getParent(); > + DebugLoc DL = MI.getDebugLoc(); > + > + if (!shouldSkip(&MBB, &MBB.getParent()->back())) > + return; > + > + MachineBasicBlock::iterator Insert = &MI; > + ++Insert; > + > + // If the exec mask is non-zero, skip the next two instructions > + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) > + .addImm(3) > + .addReg(AMDGPU::EXEC); > + > + // Exec mask is zero: Export to NULL target... > + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) > + .addImm(0) > + .addImm(0x09) // V_008DFC_SQ_EXP_NULL > + .addImm(0) > + .addImm(1) > + .addImm(1) > + .addReg(AMDGPU::SREG_LIT_0) > + .addReg(AMDGPU::SREG_LIT_0) > + .addReg(AMDGPU::SREG_LIT_0) > + .addReg(AMDGPU::SREG_LIT_0); > + > + // ... and terminate wavefront > + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); > +} > + > void SILowerControlFlowPass::If(MachineInstr &MI) { > > MachineBasicBlock &MBB = *MI.getParent(); > @@ -251,9 +293,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) { > assert(0); > } > > +void SILowerControlFlowPass::Kill(MachineInstr &MI) { > + > + MachineBasicBlock &MBB = *MI.getParent(); > + DebugLoc DL = MI.getDebugLoc(); > + > + // Kill is only allowed in pixel shaders > + MachineFunction &MF = *MBB.getParent(); > + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); > + assert(Info->ShaderType == ShaderType::PIXEL); > + > + // Clear this pixel from the exec mask if the operand is negative > + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) > + .addReg(AMDGPU::SREG_LIT_0) > + .addOperand(MI.getOperand(0)); > + > + MI.eraseFromParent(); > +} > + > bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { > > - bool HaveCf = false; > + bool HaveKill = false; > + unsigned Depth = 0; > > for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); > BI != BE; ++BI) { > @@ -267,6 +328,7 @@ bool > SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { > switch (MI.getOpcode()) { > default: break; > case AMDGPU::SI_IF: > + ++Depth; > If(MI); > break; > > @@ -287,14 +349,26 @@ bool > SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { > break; > > case AMDGPU::SI_LOOP: > + ++Depth; > Loop(MI); > break; > > case AMDGPU::SI_END_CF: > - HaveCf = true; > + if (--Depth == 0 && HaveKill) { > + SkipIfDead(MI); > + HaveKill = false; > + } > EndCf(MI); > break; > > + case AMDGPU::SI_KILL: > + if (Depth == 0) > + SkipIfDead(MI); > + else > + HaveKill = true; > + Kill(MI); > + break; > + > case AMDGPU::S_BRANCH: > Branch(MI); > break; > @@ -302,40 +376,5 @@ bool > SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { > } > } > > - // TODO: What is this good for? > - unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType; > - if (HaveCf && ShaderType == ShaderType::PIXEL) { > - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); > - BI != BE; ++BI) { > - > - MachineBasicBlock &MBB = *BI; > - if (MBB.succ_empty()) { > - > - MachineInstr &MI = *MBB.getFirstNonPHI(); > - DebugLoc DL = MI.getDebugLoc(); > - > - // If the exec mask is non-zero, skip the next two instructions > - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) > - .addImm(3) > - .addReg(AMDGPU::EXEC); > - > - // Exec mask is zero: Export to NULL target... > - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP)) > - .addImm(0) > - .addImm(0x09) // V_008DFC_SQ_EXP_NULL > - .addImm(0) > - .addImm(1) > - .addImm(1) > - .addReg(AMDGPU::SREG_LIT_0) > - .addReg(AMDGPU::SREG_LIT_0) > - .addReg(AMDGPU::SREG_LIT_0) > - .addReg(AMDGPU::SREG_LIT_0); > - > - // ... and terminate wavefront > - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM)); > - } > - } > - } > - > return true; > } > -- > 1.7.9.5 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev