On Tue, Feb 25, 2014 at 07:36:47PM +0900, Michel Dänzer wrote: > From: Michel Dänzer <michel.daen...@amd.com> > > If the SI_KILL operand is constant, we can either clear the exec mask if > the operand is negative, or do nothing otherwise. > > Signed-off-by: Michel Dänzer <michel.daen...@amd.com> > --- > > v2: > * Use just 'SI' as the lit test prefix. > > lib/Target/R600/SIInstructions.td | 4 ++-- > lib/Target/R600/SILowerControlFlow.cpp | 25 ++++++++++++++++++------- > test/CodeGen/R600/llvm.AMDGPU.kill.ll | 10 +++++++--- > 3 files changed, 27 insertions(+), 12 deletions(-) > > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index b45da5c..b501645 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -1333,7 +1333,7 @@ def SI_END_CF : InstSI < > > def SI_KILL : InstSI < > (outs), > - (ins VReg_32:$src), > + (ins SSrc_32:$src),
Shouldn't this be VSrc_32? I think we need to be able to store kill conditions in VGPRs, since the condition is per-thread. -Tom > "SI_KILL $src", > [(int_AMDGPU_kill f32:$src)] > >; > @@ -1425,7 +1425,7 @@ def : Pat< > > def : Pat < > (int_AMDGPU_kilp), > - (SI_KILL (V_MOV_B32_e32 0xbf800000)) > + (SI_KILL 0xbf800000) > >; > > /* int_SI_vs_load_input */ > diff --git a/lib/Target/R600/SILowerControlFlow.cpp > b/lib/Target/R600/SILowerControlFlow.cpp > index fa5ee16..5eda3e9 100644 > --- a/lib/Target/R600/SILowerControlFlow.cpp > +++ b/lib/Target/R600/SILowerControlFlow.cpp > @@ -55,6 +55,7 @@ > #include "llvm/CodeGen/MachineFunctionPass.h" > #include "llvm/CodeGen/MachineInstrBuilder.h" > #include "llvm/CodeGen/MachineRegisterInfo.h" > +#include "llvm/IR/Constants.h" > > using namespace llvm; > > @@ -82,7 +83,7 @@ private: > void Loop(MachineInstr &MI); > void EndCf(MachineInstr &MI); > > - void Kill(MachineInstr &MI); > + void Kill(MachineInstr &MI, unsigned Depth); > void Branch(MachineInstr &MI); > > void LoadM0(MachineInstr &MI, MachineInstr *MovRel); > @@ -291,9 +292,10 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) { > // If these aren't equal, this is probably an infinite loop. > } > > -void SILowerControlFlowPass::Kill(MachineInstr &MI) { > +void SILowerControlFlowPass::Kill(MachineInstr &MI, unsigned Depth) { > MachineBasicBlock &MBB = *MI.getParent(); > DebugLoc DL = MI.getDebugLoc(); > + const MachineOperand &Op = MI.getOperand(0); > > // Kill is only allowed in pixel / geometry shaders > assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == > @@ -301,10 +303,19 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { > MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == > ShaderType::GEOMETRY); > > - // Clear this pixel from the exec mask if the operand is negative > - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) > - .addImm(0) > - .addOperand(MI.getOperand(0)); > + // Clear this thread from the exec mask if the operand is negative > + if ((Op.isImm() || Op.isFPImm())) { > + // Constant operand: Set exec mask to 0 or do nothing > + if (Op.isImm() ? (Op.getImm() & 0x80000000) : > + Op.getFPImm()->isNegative()) { > + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) > + .addImm(0); > + } > + } else { > + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) > + .addImm(0) > + .addOperand(MI.getOperand(0)); > + } > > MI.eraseFromParent(); > } > @@ -478,7 +489,7 @@ bool > SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { > SkipIfDead(MI); > else > HaveKill = true; > - Kill(MI); > + Kill(MI, Depth); > break; > > case AMDGPU::S_BRANCH: > diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll > b/test/CodeGen/R600/llvm.AMDGPU.kill.ll > index bec5cdf..4ab6a8a 100644 > --- a/test/CodeGen/R600/llvm.AMDGPU.kill.ll > +++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll > @@ -1,13 +1,17 @@ > ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck > --check-prefix=SI %s > > -; SI-LABEL: @kill_gs > -; SI: V_CMPX_LE_F32 > +; SI-LABEL: @kill_gs_const > +; SI-NOT: V_CMPX_LE_F32 > +; SI: S_MOV_B64 exec, 0 > > -define void @kill_gs() #0 { > +define void @kill_gs_const() #0 { > main_body: > %0 = icmp ule i32 0, 3 > %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00 > call void @llvm.AMDGPU.kill(float %1) > + %2 = icmp ule i32 3, 0 > + %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00 > + call void @llvm.AMDGPU.kill(float %3) > ret void > } > > -- > 1.9.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev