On Tue, Aug 13, 2013 at 07:04:56PM +0200, srol...@vmware.com wrote: > From: Roland Scheidegger <srol...@vmware.com> > > Also use ordered comparisons for old cmp instructions. Untested.
This patch looks good to me, but I would like to do a piglit run on radeonsi before you commit. I will try to do this tomorrow. -Tom > --- > src/gallium/drivers/r600/r600_shader.c | 18 ++++--- > .../drivers/radeon/radeon_setup_tgsi_llvm.c | 49 > ++++++++++++++++---- > 2 files changed, 48 insertions(+), 19 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 37298cc..fb766c4 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -5743,11 +5743,10 @@ static struct r600_shader_tgsi_instruction > r600_shader_tgsi_instruction[] = { > {105, 0, ALU_OP0_NOP, tgsi_unsupported}, > {106, 0, ALU_OP0_NOP, tgsi_unsupported}, > {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, > - /* gap */ > - {108, 0, ALU_OP0_NOP, tgsi_unsupported}, > - {109, 0, ALU_OP0_NOP, tgsi_unsupported}, > - {110, 0, ALU_OP0_NOP, tgsi_unsupported}, > - {111, 0, ALU_OP0_NOP, tgsi_unsupported}, > + {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, > + {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, > + {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, > + {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, > {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, > {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, > /* gap */ > @@ -5936,11 +5935,10 @@ static struct r600_shader_tgsi_instruction > eg_shader_tgsi_instruction[] = { > {105, 0, ALU_OP0_NOP, tgsi_unsupported}, > {106, 0, ALU_OP0_NOP, tgsi_unsupported}, > {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, > - /* gap */ > - {108, 0, ALU_OP0_NOP, tgsi_unsupported}, > - {109, 0, ALU_OP0_NOP, tgsi_unsupported}, > - {110, 0, ALU_OP0_NOP, tgsi_unsupported}, > - {111, 0, ALU_OP0_NOP, tgsi_unsupported}, > + {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, > + {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, > + {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, > + {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, > {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, > {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, > /* gap */ > diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > index 7a47746..8ff9abd 100644 > --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c > @@ -850,18 +850,16 @@ static void emit_cmp( > LLVMRealPredicate pred; > LLVMValueRef cond; > > - /* XXX I'm not sure whether to do unordered or ordered comparisons, > - * but llvmpipe uses unordered comparisons, so for consistency we use > - * unordered. (The authors of llvmpipe aren't sure about using > - * unordered vs ordered comparisons either. > + /* Use ordered for everything but NE (which is usual for > + * float comparisons) > */ > switch (emit_data->inst->Instruction.Opcode) { > - case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break; > - case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break; > - case TGSI_OPCODE_SLE: pred = LLVMRealULE; break; > - case TGSI_OPCODE_SLT: pred = LLVMRealULT; break; > + case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break; > + case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break; > + case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break; > + case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break; > case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break; > - case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break; > + case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break; > default: assert(!"unknown instruction"); pred = 0; break; > } > > @@ -872,6 +870,35 @@ static void emit_cmp( > cond, bld_base->base.one, bld_base->base.zero, ""); > } > > +static void emit_fcmp( > + const struct lp_build_tgsi_action *action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + LLVMBuilderRef builder = bld_base->base.gallivm->builder; > + LLVMContextRef context = bld_base->base.gallivm->context; > + LLVMRealPredicate pred; > + > + /* Use ordered for everything but NE (which is usual for > + * float comparisons) > + */ > + switch (emit_data->inst->Instruction.Opcode) { > + case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break; > + case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break; > + case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break; > + case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break; > + default: assert(!"unknown instruction"); pred = 0; break; > + } > + > + LLVMValueRef v = LLVMBuildFCmp(builder, pred, > + emit_data->args[0], emit_data->args[1],""); > + > + v = LLVMBuildSExtOrBitCast(builder, v, > + LLVMInt32TypeInContext(context), ""); > + > + emit_data->output[emit_data->chan] = v; > +} > + > static void emit_not( > const struct lp_build_tgsi_action * action, > struct lp_build_tgsi_context * bld_base, > @@ -1236,6 +1263,10 @@ void radeon_llvm_context_init(struct > radeon_llvm_context * ctx) > bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = > "llvm.AMDIL.fraction."; > bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; > bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; > + bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp; > + bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp; > + bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp; > + bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; > bld_base->op_actions[TGSI_OPCODE_IABS].emit = > build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; > bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; > -- > 1.7.9.5 > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev