On Tue, Sep 18, 2012 at 10:57:22PM +0200, Vincent Lejeune wrote: Reviewed-by: Tom Stellard <thomas.stell...@amd.com>
This patch is OK to commit if there are no regressions with the LLVM compiler. > --- > src/gallium/drivers/radeon/AMDGPUISelLowering.cpp | 2 + > src/gallium/drivers/radeon/AMDGPUISelLowering.h | 2 + > .../drivers/radeon/R600ExpandSpecialInstrs.cpp | 129 > +++++++++++++++++++++ > src/gallium/drivers/radeon/R600ISelLowering.cpp | 88 +++++++++++++- > src/gallium/drivers/radeon/R600ISelLowering.h | 1 + > src/gallium/drivers/radeon/R600Instructions.td | 54 +++++++++ > .../drivers/radeon/R600IntrinsicsNoOpenCL.td | 10 ++ > src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td | 10 ++ > .../drivers/radeon/R600MachineFunctionInfo.cpp | 19 ++- > .../drivers/radeon/R600MachineFunctionInfo.h | 5 + > 10 files changed, 318 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > index 0a70164..72cd099 100644 > --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp > @@ -349,5 +349,7 @@ const char* > AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const > NODE_NAME_CASE(SMIN) > NODE_NAME_CASE(UMIN) > NODE_NAME_CASE(URECIP) > + NODE_NAME_CASE(INTERP) > + NODE_NAME_CASE(INTERP_P0) > } > } > diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h > b/src/gallium/drivers/radeon/AMDGPUISelLowering.h > index 4c100da..0d79786 100644 > --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h > +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h > @@ -121,6 +121,8 @@ enum > SMIN, > UMIN, > URECIP, > + INTERP, > + INTERP_P0, > LAST_AMDGPU_ISD_NUMBER > }; > > diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp > b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp > index 69ab0ff..d6184e5 100644 > --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp > +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp > @@ -15,6 +15,7 @@ > #include "R600Defines.h" > #include "R600InstrInfo.h" > #include "R600RegisterInfo.h" > +#include "R600MachineFunctionInfo.h" > #include "llvm/CodeGen/MachineFunctionPass.h" > #include "llvm/CodeGen/MachineInstrBuilder.h" > #include "llvm/CodeGen/MachineRegisterInfo.h" > @@ -29,6 +30,9 @@ private: > static char ID; > const R600InstrInfo *TII; > > + bool ExpandInputPerspective(MachineInstr& MI); > + bool ExpandInputConstant(MachineInstr& MI); > + > public: > R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), > TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } > @@ -48,6 +52,126 @@ FunctionPass > *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { > return new R600ExpandSpecialInstrsPass(TM); > } > > +bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) > +{ > + const R600RegisterInfo &TRI = TII->getRegisterInfo(); > + if (MI.getOpcode() != AMDGPU::input_perspective) > + return false; > + > + MachineBasicBlock::iterator I = &MI; > + unsigned DstReg = MI.getOperand(0).getReg(); > + R600MachineFunctionInfo *MFI = MI.getParent()->getParent() > + ->getInfo<R600MachineFunctionInfo>(); > + unsigned IJIndexBase; > + > + // In Evergreen ISA doc section 8.3.2 : > + // We need to interpolate XY and ZW in two different instruction groups. > + // An INTERP_* must occupy all 4 slots of an instruction group. > + // Output of INTERP_XY is written in X,Y slots > + // Output of INTERP_ZW is written in Z,W slots > + // > + // Thus interpolation requires the following sequences : > + // > + // AnyGPR.x = INTERP_ZW; (Write Masked Out) > + // AnyGPR.y = INTERP_ZW; (Write Masked Out) > + // DstGPR.z = INTERP_ZW; > + // DstGPR.w = INTERP_ZW; (End of first IG) > + // DstGPR.x = INTERP_XY; > + // DstGPR.y = INTERP_XY; > + // AnyGPR.z = INTERP_XY; (Write Masked Out) > + // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) > + // > + switch (MI.getOperand(1).getImm()) { > + case 0: > + IJIndexBase = MFI->GetIJPerspectiveIndex(); > + break; > + case 1: > + IJIndexBase = MFI->GetIJLinearIndex(); > + break; > + default: > + assert(0 && "Unknow ij index"); > + } > + > + for (unsigned i = 0; i < 8; i++) { > + unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( > + 2 * IJIndexBase + ((i + 1) % 2)); > + unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( > + 4 * MI.getOperand(2).getImm()); > + > + unsigned Sel; > + switch (i % 4) { > + case 0:Sel = AMDGPU::sel_x;break; > + case 1:Sel = AMDGPU::sel_y;break; > + case 2:Sel = AMDGPU::sel_z;break; > + case 3:Sel = AMDGPU::sel_w;break; > + default:break; > + } > + > + unsigned Res = TRI.getSubReg(DstReg, Sel); > + > + const MCInstrDesc &Opcode = (i < 4)? > + TII->get(AMDGPU::INTERP_ZW): > + TII->get(AMDGPU::INTERP_XY); > + > + MachineInstr *NewMI = BuildMI(*(MI.getParent()), > + I, MI.getParent()->findDebugLoc(I), > + Opcode, Res) > + .addReg(IJIndex) > + .addReg(ReadReg) > + .addImm(0); > + > + if (!(i> 1 && i < 6)) { > + TII->addFlag(NewMI, 0, MO_FLAG_MASK); > + } > + > + if (i % 4 != 3) > + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); > + } > + > + MI.eraseFromParent(); > + > + return true; > +} > + > +bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) > +{ > + const R600RegisterInfo &TRI = TII->getRegisterInfo(); > + if (MI.getOpcode() != AMDGPU::input_constant) > + return false; > + > + MachineBasicBlock::iterator I = &MI; > + unsigned DstReg = MI.getOperand(0).getReg(); > + > + for (unsigned i = 0; i < 4; i++) { > + unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister( > + 4 * MI.getOperand(1).getImm() + i); > + > + unsigned Sel; > + switch (i % 4) { > + case 0:Sel = AMDGPU::sel_x;break; > + case 1:Sel = AMDGPU::sel_y;break; > + case 2:Sel = AMDGPU::sel_z;break; > + case 3:Sel = AMDGPU::sel_w;break; > + default:break; > + } > + > + unsigned Res = TRI.getSubReg(DstReg, Sel); > + > + MachineInstr *NewMI = BuildMI(*(MI.getParent()), > + I, MI.getParent()->findDebugLoc(I), > + TII->get(AMDGPU::INTERP_LOAD_P0), Res) > + .addReg(ReadReg) > + .addImm(0); > + > + if (i % 4 != 3) > + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); > + } > + > + MI.eraseFromParent(); > + > + return true; > +} > + > bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { > > const R600RegisterInfo &TRI = TII->getRegisterInfo(); > @@ -59,6 +183,11 @@ bool > R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { > while (I != MBB.end()) { > MachineInstr &MI = *I; > I = llvm::next(I); > + > + if (ExpandInputPerspective(MI)) > + continue; > + if (ExpandInputConstant(MI)) > + continue; > > bool IsReduction = TII->isReductionOp(MI.getOpcode()); > bool IsVector = TII->isVector(MI); > diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp > b/src/gallium/drivers/radeon/R600ISelLowering.cpp > index 5642ee8..379bd48 100644 > --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp > +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp > @@ -40,6 +40,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : > > setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); > setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); > + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); > > setOperationAction(ISD::ROTL, MVT::i32, Custom); > > @@ -231,6 +232,29 @@ MachineBasicBlock * > R600TargetLowering::EmitInstrWithCustomInserter( > .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); > break; > } > + case AMDGPU::input_perspective: > + { > + R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); > + > + // XXX Be more fine about register reservation > + for (unsigned i = 0; i < 4; i ++) { > + unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); > + MFI->ReservedRegs.push_back(ReservedReg); > + } > + > + switch (MI->getOperand(1).getImm()) { > + case 0:// Perspective > + MFI->HasPerspectiveInterpolation = true; > + break; > + case 1:// Linear > + MFI->HasLinearInterpolation = true; > + break; > + default: > + assert(0 && "Unknow ij index"); > + } > + > + return BB; > + } > } > > MI->eraseFromParent(); > @@ -285,7 +309,48 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, > SelectionDAG &DAG) const > unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); > return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, > VT); > } > - > + case AMDGPUIntrinsic::R600_load_input_perspective: { > + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); > + SDValue FullVector = DAG.getNode( > + AMDGPUISD::INTERP, > + DL, MVT::v4f32, > + DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , > MVT::i32)); > + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, > + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); > + } > + case AMDGPUIntrinsic::R600_load_input_linear: { > + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); > + SDValue FullVector = DAG.getNode( > + AMDGPUISD::INTERP, > + DL, MVT::v4f32, > + DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); > + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, > + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); > + } > + case AMDGPUIntrinsic::R600_load_input_constant: { > + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); > + SDValue FullVector = DAG.getNode( > + AMDGPUISD::INTERP_P0, > + DL, MVT::v4f32, > + DAG.getConstant(slot / 4 , MVT::i32)); > + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, > + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); > + } > + case AMDGPUIntrinsic::R600_load_input_position: { > + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); > + unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); > + SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, > + RegIndex, MVT::f32); > + if ((slot % 4) == 3) { > + return DAG.getNode(ISD::FDIV, > + DL, VT, > + DAG.getConstantFP(1.0f, MVT::f32), > + Reg); > + } else { > + return Reg; > + } > + } > + > case r600_read_ngroups_x: > return LowerImplicitParameter(DAG, VT, DL, 0); > case r600_read_ngroups_y: > @@ -338,9 +403,30 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, > switch (N->getOpcode()) { > default: return; > case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), > DAG)); > + case ISD::INTRINSIC_WO_CHAIN: > + { > + unsigned IntrinsicID = > + cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); > + if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) { > + Results.push_back(LowerInputFace(N, DAG)); > + } else { > + return; > + } > + } > } > } > > +SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) > const > +{ > + unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); > + unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot); > + SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, > + RegIndex, MVT::f32); > + return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1, > + Reg, DAG.getConstantFP(0.0f, MVT::f32), > + DAG.getCondCode(ISD::SETUGT)); > +} > + > SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) > const > { > return DAG.getNode( > diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h > b/src/gallium/drivers/radeon/R600ISelLowering.h > index 49ea272..7c81f5f 100644 > --- a/src/gallium/drivers/radeon/R600ISelLowering.h > +++ b/src/gallium/drivers/radeon/R600ISelLowering.h > @@ -50,6 +50,7 @@ private: > > SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; > SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; > + SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; > SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; > }; > > diff --git a/src/gallium/drivers/radeon/R600Instructions.td > b/src/gallium/drivers/radeon/R600Instructions.td > index 75c6825..92a3e38 100644 > --- a/src/gallium/drivers/radeon/R600Instructions.td > +++ b/src/gallium/drivers/radeon/R600Instructions.td > @@ -233,6 +233,60 @@ def isEGorCayman : Predicate<"Subtarget.device()" > def isR600toCayman : Predicate< > "Subtarget.device()->getGeneration() <= > AMDGPUDeviceInfo::HD6XXX">; > > +//===----------------------------------------------------------------------===// > +// Interpolation Instructions > +//===----------------------------------------------------------------------===// > + > +def INTERP: SDNode<"AMDGPUISD::INTERP", > + SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> > + >; > + > +def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", > + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> > + >; > + > +let usesCustomInserter = 1 in { > +def input_perspective : AMDGPUShaderInst < > + (outs R600_Reg128:$dst), > + (ins i32imm:$src0, i32imm:$src1), > + "input_perspective $src0 $src1 : dst", > + [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; > +} // End usesCustomInserter = 1 > + > +def input_constant : AMDGPUShaderInst < > + (outs R600_Reg128:$dst), > + (ins i32imm:$src), > + "input_perspective $src : dst", > + [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; > + > + > + > +def INTERP_XY : InstR600 <0xD6, > + (outs R600_Reg32:$dst), > + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), > + "INTERP_XY dst", > + [], AnyALU> > +{ > + let FlagOperandIdx = 3; > +} > + > +def INTERP_ZW : InstR600 <0xD7, > + (outs R600_Reg32:$dst), > + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), > + "INTERP_ZW dst", > + [], AnyALU> > +{ > + let FlagOperandIdx = 3; > +} > + > +def INTERP_LOAD_P0 : InstR600 <0xE0, > + (outs R600_Reg32:$dst), > + (ins R600_Reg32:$src, i32imm:$flags), > + "INTERP_LOAD_P0 dst", > + [], AnyALU> > +{ > + let FlagOperandIdx = 2; > +} > > let Predicates = [isR600toCayman] in { > > diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > index 98af358..3b62f0a 100644 > --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td > @@ -13,6 +13,16 @@ > > let TargetPrefix = "R600", isTarget = 1 in { > def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], > [IntrNoMem]>; > + def int_R600_load_input_perspective : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_constant : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_linear : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_position : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_face : > + Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; > } > > let TargetPrefix = "r600", isTarget = 1 in { > diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > index 8efa29b..00877ca 100644 > --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td > @@ -13,4 +13,14 @@ > > let TargetPrefix = "R600", isTarget = 1 in { > def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], > [IntrNoMem]>; > + def int_R600_load_input_perspective : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_constant : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_linear : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_position : > + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; > + def int_R600_load_input_face : > + Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>; > } > diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > index 48443fb..a31848e 100644 > --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp > @@ -12,5 +12,22 @@ > using namespace llvm; > > R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) > - : MachineFunctionInfo() > + : MachineFunctionInfo(), > + HasLinearInterpolation(false), > + HasPerspectiveInterpolation(false) > { } > + > +unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const > +{ > + assert(HasPerspectiveInterpolation); > + return 0; > +} > + > +unsigned R600MachineFunctionInfo::GetIJLinearIndex() const > +{ > + assert(HasLinearInterpolation); > + if (HasPerspectiveInterpolation) > + return 1; > + else > + return 0; > +} > diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > index 948e192..68211b2 100644 > --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h > @@ -25,6 +25,11 @@ class R600MachineFunctionInfo : public MachineFunctionInfo > { > public: > R600MachineFunctionInfo(const MachineFunction &MF); > std::vector<unsigned> ReservedRegs; > + bool HasLinearInterpolation; > + bool HasPerspectiveInterpolation; > + > + unsigned GetIJLinearIndex() const; > + unsigned GetIJPerspectiveIndex() const; > > }; > > -- > 1.7.11.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev