On Tue, Aug 13, 2013 at 07:04:56PM +0200, srol...@vmware.com wrote:
> From: Roland Scheidegger <srol...@vmware.com>
> 
> Also use ordered comparisons for old cmp instructions. Untested.

This patch looks good to me, but I would like to do a piglit run on
radeonsi before you commit.  I will try to do this tomorrow.

-Tom

> ---
>  src/gallium/drivers/r600/r600_shader.c             |   18 ++++---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c        |   49 
> ++++++++++++++++----
>  2 files changed, 48 insertions(+), 19 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 37298cc..fb766c4 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -5743,11 +5743,10 @@ static struct r600_shader_tgsi_instruction 
> r600_shader_tgsi_instruction[] = {
>       {105,                   0, ALU_OP0_NOP, tgsi_unsupported},
>       {106,                   0, ALU_OP0_NOP, tgsi_unsupported},
>       {TGSI_OPCODE_NOP,       0, ALU_OP0_NOP, tgsi_unsupported},
> -     /* gap */
> -     {108,                   0, ALU_OP0_NOP, tgsi_unsupported},
> -     {109,                   0, ALU_OP0_NOP, tgsi_unsupported},
> -     {110,                   0, ALU_OP0_NOP, tgsi_unsupported},
> -     {111,                   0, ALU_OP0_NOP, tgsi_unsupported},
> +     {TGSI_OPCODE_FSEQ,      0, ALU_OP2_SETE_DX10, tgsi_op2},
> +     {TGSI_OPCODE_FSGE,      0, ALU_OP2_SETGE_DX10, tgsi_op2},
> +     {TGSI_OPCODE_FSLT,      0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
> +     {TGSI_OPCODE_FSNE,      0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
>       {TGSI_OPCODE_NRM4,      0, ALU_OP0_NOP, tgsi_unsupported},
>       {TGSI_OPCODE_CALLNZ,    0, ALU_OP0_NOP, tgsi_unsupported},
>       /* gap */
> @@ -5936,11 +5935,10 @@ static struct r600_shader_tgsi_instruction 
> eg_shader_tgsi_instruction[] = {
>       {105,                   0, ALU_OP0_NOP, tgsi_unsupported},
>       {106,                   0, ALU_OP0_NOP, tgsi_unsupported},
>       {TGSI_OPCODE_NOP,       0, ALU_OP0_NOP, tgsi_unsupported},
> -     /* gap */
> -     {108,                   0, ALU_OP0_NOP, tgsi_unsupported},
> -     {109,                   0, ALU_OP0_NOP, tgsi_unsupported},
> -     {110,                   0, ALU_OP0_NOP, tgsi_unsupported},
> -     {111,                   0, ALU_OP0_NOP, tgsi_unsupported},
> +     {TGSI_OPCODE_FSEQ,      0, ALU_OP2_SETE_DX10, tgsi_op2},
> +     {TGSI_OPCODE_FSGE,      0, ALU_OP2_SETGE_DX10, tgsi_op2},
> +     {TGSI_OPCODE_FSLT,      0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
> +     {TGSI_OPCODE_FSNE,      0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
>       {TGSI_OPCODE_NRM4,      0, ALU_OP0_NOP, tgsi_unsupported},
>       {TGSI_OPCODE_CALLNZ,    0, ALU_OP0_NOP, tgsi_unsupported},
>       /* gap */
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 7a47746..8ff9abd 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -850,18 +850,16 @@ static void emit_cmp(
>       LLVMRealPredicate pred;
>       LLVMValueRef cond;
>  
> -     /* XXX I'm not sure whether to do unordered or ordered comparisons,
> -      * but llvmpipe uses unordered comparisons, so for consistency we use
> -      * unordered.  (The authors of llvmpipe aren't sure about using
> -      * unordered vs ordered comparisons either.
> +     /* Use ordered for everything but NE (which is usual for
> +      * float comparisons)
>        */
>       switch (emit_data->inst->Instruction.Opcode) {
> -     case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break;
> -     case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break;
> -     case TGSI_OPCODE_SLE: pred = LLVMRealULE; break;
> -     case TGSI_OPCODE_SLT: pred = LLVMRealULT; break;
> +     case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
> +     case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
> +     case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
> +     case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
>       case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
> -     case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break;
> +     case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
>       default: assert(!"unknown instruction"); pred = 0; break;
>       }
>  
> @@ -872,6 +870,35 @@ static void emit_cmp(
>               cond, bld_base->base.one, bld_base->base.zero, "");
>  }
>  
> +static void emit_fcmp(
> +             const struct lp_build_tgsi_action *action,
> +             struct lp_build_tgsi_context * bld_base,
> +             struct lp_build_emit_data * emit_data)
> +{
> +     LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +     LLVMContextRef context = bld_base->base.gallivm->context;
> +     LLVMRealPredicate pred;
> +
> +     /* Use ordered for everything but NE (which is usual for
> +      * float comparisons)
> +      */
> +     switch (emit_data->inst->Instruction.Opcode) {
> +     case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
> +     case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
> +     case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
> +     case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
> +     default: assert(!"unknown instruction"); pred = 0; break;
> +     }
> +
> +     LLVMValueRef v = LLVMBuildFCmp(builder, pred,
> +                     emit_data->args[0], emit_data->args[1],"");
> +
> +     v = LLVMBuildSExtOrBitCast(builder, v,
> +                     LLVMInt32TypeInContext(context), "");
> +
> +     emit_data->output[emit_data->chan] = v;
> +}
> +
>  static void emit_not(
>               const struct lp_build_tgsi_action * action,
>               struct lp_build_tgsi_context * bld_base,
> @@ -1236,6 +1263,10 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>       bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = 
> "llvm.AMDIL.fraction.";
>       bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
>       bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
> +     bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
> +     bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
> +     bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
> +     bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
>       bld_base->op_actions[TGSI_OPCODE_IABS].emit = 
> build_tgsi_intrinsic_nomem;
>       bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
>       bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
> -- 
> 1.7.9.5
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to