On Tue, Sep 25, 2012 at 11:45:37PM +0200, Vincent Lejeune wrote: > --- > src/gallium/drivers/radeon/R600ISelLowering.cpp | 90 > +++++++++++++--------- > src/gallium/drivers/radeon/R600ISelLowering.h | 3 + > src/gallium/drivers/radeon/R600Instructions.td | 38 +++++++-- > .../drivers/radeon/radeon_setup_tgsi_llvm.c | 17 +++- > 4 files changed, 104 insertions(+), 44 deletions(-) > > diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp > b/src/gallium/drivers/radeon/R600ISelLowering.cpp > index 379bd48..d52762b 100644 > --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp > +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp > @@ -507,6 +507,17 @@ SDValue R600TargetLowering::LowerROTL(SDValue Op, > SelectionDAG &DAG) const > Op.getOperand(1))); > } > > +bool R600TargetLowering::isZero(SDValue Op) const > +{ > + if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { > + return !(Cst->getZExtValue()); > + } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ > + return CstFP->isExactlyValue(0.0f); > + } else { > + return false; > + } > +} > +
You can simplify this function by using ConstantSDNode::isNullValue(). > SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) > const > { > DebugLoc DL = Op.getDebugLoc(); > @@ -517,7 +528,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, > SelectionDAG &DAG) const > SDValue True = Op.getOperand(2); > SDValue False = Op.getOperand(3); > SDValue CC = Op.getOperand(4); > - ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); > SDValue Temp; > > // LHS and RHS are guaranteed to be the same value type > @@ -560,47 +570,58 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, > SelectionDAG &DAG) const > if (isHWTrueValue(False) && isHWFalseValue(True)) { > } > > - // XXX Check if we can lower this to a SELECT or if it is supported by a > native > - // operation. (The code below does this but we don't have the Instruction > - // selection patterns to do this yet. > -#if 0 > + // Check if we can lower this to a native operation. > + // CND* instructions requires all operands to have the same type, > + // and RHS to be zero. > + > if (isZero(LHS) || isZero(RHS)) { > SDValue Cond = (isZero(LHS) ? RHS : LHS); > - bool SwapTF = false; > + SDValue Zero = (isZero(LHS) ? LHS : RHS); > + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); > + if (CompareVT != VT) { > + True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); > + False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); > + } > + if (isZero(LHS)) { > + CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); > + } > + > switch (CCOpcode) { > - case ISD::SETOEQ: - case ISD::SETUEQ: > - case ISD::SETEQ: > - SwapTF = true; > - // Fall through > case ISD::SETONE: > case ISD::SETUNE: > case ISD::SETNE: > - // We can lower to select > - if (SwapTF) { > - Temp = True; > - True = False; > - False = Temp; > - } > - // CNDE > - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); > + case ISD::SETULE: > + case ISD::SETULT: > + case ISD::SETOLE: > + case ISD::SETOLT: > + case ISD::SETLE: > + case ISD::SETLT: > + CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); > + Temp = True; > + True = False; > + False = Temp; > + break; > default: > - // Supported by a native operation (CNDGE, CNDGT) > - return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); > + break; > } > + SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, > + Cond, Zero, > + True, False, > + DAG.getCondCode(CCOpcode)); > + return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); > } > -#endif > + > > // If we make it this for it means we have no native instructions to handle > // this SELECT_CC, so we must lower it. > SDValue HWTrue, HWFalse; > > - if (VT == MVT::f32) { > - HWTrue = DAG.getConstantFP(1.0f, VT); > - HWFalse = DAG.getConstantFP(0.0f, VT); > - } else if (VT == MVT::i32) { > - HWTrue = DAG.getConstant(-1, VT); > - HWFalse = DAG.getConstant(0, VT); > + if (CompareVT == MVT::f32) { > + HWTrue = DAG.getConstantFP(1.0f, CompareVT); > + HWFalse = DAG.getConstantFP(0.0f, CompareVT); > + } else if (CompareVT == MVT::i32) { > + HWTrue = DAG.getConstant(-1, CompareVT); > + HWFalse = DAG.getConstant(0, CompareVT); > } > else { > assert(!"Unhandled value type in LowerSELECT_CC"); > @@ -608,15 +629,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, > SelectionDAG &DAG) const > > // Lower this unsupported SELECT_CC into a combination of two supported > // SELECT_CC operations. > - SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, > HWFalse, CC); > - > - // Convert floating point condition to i1 > - if (VT == MVT::f32) { > - Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, > - DAG.getNode(ISD::FNEG, DL, VT, Cond)); > - } > + SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, > HWTrue, HWFalse, CC); > > - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); > + return DAG.getNode(ISD::SELECT_CC, DL, VT, > + Cond, HWFalse, > + True, False, > + DAG.getCondCode(ISD::SETNE)); > } > > SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const > diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h > b/src/gallium/drivers/radeon/R600ISelLowering.h > index 7c81f5f..0117ca6 100644 > --- a/src/gallium/drivers/radeon/R600ISelLowering.h > +++ b/src/gallium/drivers/radeon/R600ISelLowering.h > @@ -52,6 +52,9 @@ private: > SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; > SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const; > SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; > + > + bool isZero(SDValue Op) const; > + SDValue reverseCondOp(unsigned CCOpcode) const; This function does not appear to be implemented. > }; > > } // End namespace llvm; > diff --git a/src/gallium/drivers/radeon/R600Instructions.td > b/src/gallium/drivers/radeon/R600Instructions.td > index 92a3e38..ba52721 100644 > --- a/src/gallium/drivers/radeon/R600Instructions.td > +++ b/src/gallium/drivers/radeon/R600Instructions.td > @@ -500,7 +500,25 @@ def SETGE_UINT : R600_2OP < > def CNDE_INT : R600_3OP < > 0x1C, "CNDE_INT", > [(set (i32 R600_Reg32:$dst), > - (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] > + (selectcc (i32 R600_Reg32:$src0), 0, > + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), > + COND_EQ))] > +>; > + > +def CNDGE_INT : R600_3OP < > + 0x1E, "CNDGE_INT", > + [(set (i32 R600_Reg32:$dst), > + (selectcc (i32 R600_Reg32:$src0), 0, > + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), > + COND_GE))] > +>; > + > +def CNDGT_INT : R600_3OP < > + 0x1D, "CNDGT_INT", > + [(set (i32 R600_Reg32:$dst), > + (selectcc (i32 R600_Reg32:$src0), 0, > + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), > + COND_GT))] > >; > > > //===----------------------------------------------------------------------===// > @@ -597,18 +615,26 @@ class MULADD_Common <bits<32> inst> : R600_3OP < > > class CNDE_Common <bits<32> inst> : R600_3OP < > inst, "CNDE", > - [(set (f32 R600_Reg32:$dst), > - (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 > R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] > + [(set R600_Reg32:$dst, > + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, > + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), > + COND_EQ))] > >; > > class CNDGT_Common <bits<32> inst> : R600_3OP < > inst, "CNDGT", > - [] > + [(set R600_Reg32:$dst, > + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, > + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), > + COND_GT))] > >; > - > + > class CNDGE_Common <bits<32> inst> : R600_3OP < > inst, "CNDGE", > - [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, > R600_Reg32:$src2, R600_Reg32:$src1))] > + [(set R600_Reg32:$dst, > + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, > + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), > + COND_GE))] > >; > > class DOT4_Common <bits<32> inst> : R600_REDUCTION < > diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > index 04469e2..bb37154 100644 > --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > @@ -934,6 +934,20 @@ static void emit_u2f( > emit_data->args[0], bld_base->base.elem_type, ""); > } > > +static void emit_cndlt( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + LLVMBuilderRef builder = bld_base->base.gallivm->builder; > + LLVMValueRef float_zero = lp_build_const_float( > + bld_base->base.gallivm, 0.0f); > + LLVMValueRef cmp = LLVMBuildFCmp( > + builder, LLVMRealULT, emit_data->args[0], float_zero, ""); > + emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, > + cmp, emit_data->args[1], emit_data->args[2], ""); > +} > + > static void emit_immediate(struct lp_build_tgsi_context * bld_base, > const struct tgsi_full_immediate *imm) > { > @@ -1115,8 +1129,7 @@ void radeon_llvm_context_init(struct > radeon_llvm_context * ctx) > bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; > bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = > build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp."; > - bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem; > - bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt"; > + bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt; > bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos"; > bld_base->op_actions[TGSI_OPCODE_DIV].emit = build_tgsi_intrinsic_nomem; It would be nice if you could also remove the llvm.AMDGPU.cndlt intrinsic and also delete the SI pseudo instruction that uses it. I can test on SI for you. -Tom > -- > 1.7.11.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev