From: Marek Olšák <marek.ol...@amd.com> It's redundant with the source modifier. --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 20 ++++++------ src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 38 +++------------------- src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 6 ---- src/gallium/auxiliary/nir/tgsi_to_nir.c | 1 - src/gallium/auxiliary/tgsi/tgsi_aa_point.c | 20 ++++++------ src/gallium/auxiliary/tgsi/tgsi_exec.c | 4 --- src/gallium/auxiliary/tgsi/tgsi_info.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_lowering.c | 22 ++++++++----- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 - src/gallium/auxiliary/tgsi/tgsi_point_sprite.c | 12 +++---- src/gallium/auxiliary/tgsi/tgsi_transform.h | 8 +++-- src/gallium/auxiliary/tgsi/tgsi_util.c | 1 - src/gallium/auxiliary/util/u_pstipple.c | 2 +- src/gallium/auxiliary/vl/vl_bicubic_filter.c | 4 +-- src/gallium/auxiliary/vl/vl_compositor.c | 4 +-- src/gallium/auxiliary/vl/vl_deint_filter.c | 8 ++--- src/gallium/drivers/i915/i915_fpc_optimize.c | 1 - src/gallium/drivers/i915/i915_fpc_translate.c | 11 ------- src/gallium/drivers/ilo/shader/toy_tgsi.c | 6 ---- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 -- src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c | 3 -- src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c | 3 -- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 1 - src/gallium/drivers/r600/r600_shader.c | 14 -------- src/gallium/drivers/svga/svga_tgsi_insn.c | 27 --------------- src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 25 -------------- src/gallium/include/pipe/p_shader_tokens.h | 2 +- src/gallium/state_trackers/xa/xa_tgsi.c | 4 +-- src/mesa/state_tracker/st_atifs_to_tgsi.c | 18 +++++----- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 ++-- src/mesa/state_tracker/st_tgsi_lower_yuv.c | 3 +- 33 files changed, 82 insertions(+), 202 deletions(-)
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index c236caa..57ca12e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -278,21 +278,21 @@ aa_transform_epilog(struct tgsi_transform_context *ctx) tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, aactx->colorOutput, TGSI_WRITEMASK_XYZ, TGSI_FILE_TEMPORARY, aactx->colorTemp); /* MUL alpha */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, aactx->colorOutput, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, aactx->colorTemp, - TGSI_FILE_TEMPORARY, aactx->texTemp); + TGSI_FILE_TEMPORARY, aactx->texTemp, false); } } /** * TGSI instruction transform callback. * Replace writes to result.color w/ a temp reg. */ static void aa_transform_inst(struct tgsi_transform_context *ctx, diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 33ef8ec..2b96b8a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -206,88 +206,88 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) * t0.x = distance of fragment from center point * t0.y = boolean, is t0.x > 1.0, also misc temp usage * t0.z = temporary for computing 1/(1-k) value * t0.w = final coverage value */ /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY, TGSI_FILE_INPUT, texInput, - TGSI_FILE_INPUT, texInput); + TGSI_FILE_INPUT, texInput, false); /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false); #if NORMALIZE /* OPTIONAL normalization of length */ /* RSQ t0.x, t0.x; */ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RSQ, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0); /* RCP t0.x, t0.x; */ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RCP, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0); #endif /* SGT t0.y, t0.xxxx, tex.wwww; # bool b = d > 1 (NOTE tex.w == 1) */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, - TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W); + TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false); /* KILL_IF -tmp0.yyyy; # if -tmp0.y < 0, KILL */ tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, TRUE); /* compute coverage factor = (1-d)/(1-k) */ /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z, TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, - TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z); + TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true); /* RCP t0.z, t0.z; # t0.z = 1 / m */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 1; newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; newInst.Src[0].Register.Index = tmp0; newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SUB t0.y, 1, t0.x; # d = 1 - d */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true); /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false); /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, - TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z); + TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false); /* CMP t0.w, -t0.y, tex.w, t0.w; * # if -t0.y < 0 then * t0.w = 1 * else * t0.w = t0.w */ tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_CMP, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, 1, @@ -311,21 +311,21 @@ aa_transform_epilog(struct tgsi_transform_context *ctx) tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, aactx->colorOutput, TGSI_WRITEMASK_XYZ, TGSI_FILE_TEMPORARY, aactx->colorTemp); /* MUL result.color.w, colorTemp, tmp0.w; */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, aactx->colorOutput, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, aactx->colorTemp, - TGSI_FILE_TEMPORARY, aactx->tmp0); + TGSI_FILE_TEMPORARY, aactx->tmp0, false); } /** * TGSI transform callback. * Called per instruction. * Replace writes to result.color w/ a temp reg. */ static void aa_transform_inst(struct tgsi_transform_context *ctx, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 7d939e8..91e959f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -361,22 +361,22 @@ exp_emit( /* floor( src0.x ) */ floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]); /* 2 ^ floor( src0.x ) */ emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2, floor_x); /* src0.x - floor( src0.x ) */ - emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x */, floor_x); + emit_data->output[TGSI_CHAN_Y] = + lp_build_sub(&bld_base->base, emit_data->args[0] /* src0.x */, floor_x); /* 2 ^ src0.x */ emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */); emit_data->output[TGSI_CHAN_W] = bld_base->base.one; } const struct lp_build_tgsi_action exp_action = { scalar_unary_fetch_args, /* fetch_args */ @@ -387,22 +387,22 @@ const struct lp_build_tgsi_action exp_action = { static void frc_emit( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { LLVMValueRef tmp; tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]); - emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_SUB, emit_data->args[0], tmp); + emit_data->output[emit_data->chan] = + lp_build_sub(&bld_base->base, emit_data->args[0], tmp); } /* TGSI_OPCODE_KILL_IF */ static void kil_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { /* src0.x */ @@ -763,33 +763,20 @@ scs_emit( /* dst.w */ emit_data->output[TGSI_CHAN_W] = bld_base->base.one; } const struct lp_build_tgsi_action scs_action = { scalar_unary_fetch_args, /* fetch_args */ scs_emit /* emit */ }; -/* TGSI_OPCODE_SUB */ -static void -sub_emit( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - emit_data->output[emit_data->chan] = - LLVMBuildFSub(bld_base->base.gallivm->builder, - emit_data->args[0], - emit_data->args[1], ""); -} - /* TGSI_OPCODE_F2U */ static void f2u_emit( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { emit_data->output[emit_data->chan] = LLVMBuildFPToUI(bld_base->base.gallivm->builder, emit_data->args[0], @@ -949,21 +936,21 @@ xpd_helper( LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d) { LLVMValueRef tmp0, tmp1; tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b); tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d); - return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1); + return lp_build_sub(&bld_base->base, tmp0, tmp1); } static void xpd_emit( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base, emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */, @@ -1345,21 +1332,20 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit; bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit; bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit; bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit; bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit; bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit; bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit; bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit; bld_base->op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit; bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit; - bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit; bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit; bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit; bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit; bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit; bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit; bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit; bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit; bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit; @@ -2064,33 +2050,20 @@ sne_emit_cpu( static void ssg_emit_cpu( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base, emit_data->args[0]); } -/* TGSI_OPCODE_SUB (CPU Only) */ - -static void -sub_emit_cpu( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base, - emit_data->args[0], - emit_data->args[1]); -} - /* TGSI_OPCODE_TRUNC (CPU Only) */ static void trunc_emit_cpu( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base, emit_data->args[0]); @@ -2617,21 +2590,20 @@ lp_set_default_actions_cpu( bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SHL].emit = shl_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu; - bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu; bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu; bld_base->rsq_action.emit = recip_sqrt_emit_cpu; bld_base->sqrt_action.emit = sqrt_emit_cpu; bld_base->op_actions[TGSI_OPCODE_UADD].emit = uadd_emit_cpu; bld_base->op_actions[TGSI_OPCODE_UCMP].emit = ucmp_emit_cpu; bld_base->op_actions[TGSI_OPCODE_UDIV].emit = udiv_emit_cpu; bld_base->op_actions[TGSI_OPCODE_UMAX].emit = umax_emit_cpu; bld_base->op_actions[TGSI_OPCODE_UMIN].emit = umin_emit_cpu; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index a5e439f..6c177b0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -584,26 +584,20 @@ lp_emit_instruction_aos( case TGSI_OPCODE_MAD: /* TGSI_OPCODE_MADD */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; - case TGSI_OPCODE_SUB: - src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); - src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); - dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); - break; - case TGSI_OPCODE_LRP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_DP2A: diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index af4a6e0..f3e8700 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1528,21 +1528,20 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_MUL] = nir_op_fmul, [TGSI_OPCODE_ADD] = nir_op_fadd, [TGSI_OPCODE_DP3] = 0, [TGSI_OPCODE_DP4] = 0, [TGSI_OPCODE_DST] = 0, [TGSI_OPCODE_MIN] = nir_op_fmin, [TGSI_OPCODE_MAX] = nir_op_fmax, [TGSI_OPCODE_SLT] = nir_op_slt, [TGSI_OPCODE_SGE] = nir_op_sge, [TGSI_OPCODE_MAD] = nir_op_ffma, - [TGSI_OPCODE_SUB] = nir_op_fsub, [TGSI_OPCODE_LRP] = 0, [TGSI_OPCODE_SQRT] = nir_op_fsqrt, [TGSI_OPCODE_DP2A] = 0, [TGSI_OPCODE_FRC] = nir_op_ffract, [TGSI_OPCODE_CLAMP] = 0, [TGSI_OPCODE_FLR] = nir_op_ffloor, [TGSI_OPCODE_ROUND] = nir_op_fround_even, [TGSI_OPCODE_EX2] = nir_op_fexp2, [TGSI_OPCODE_LG2] = nir_op_flog2, [TGSI_OPCODE_POW] = nir_op_fpow, diff --git a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c index 9016eff..4b14a2f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c +++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c @@ -141,71 +141,71 @@ aa_prolog(struct tgsi_transform_context *ctx) * Temp reg (t0) usage: * t0.x = distance of fragment from center point * t0.y = boolean, is t0.x > 0.5, also misc temp usage * t0.z = temporary for computing 1/(0.5-k) value * t0.w = final coverage value */ tmp0 = ts->tmp; /* SUB t0.xy, texIn, (0.5, 0,5) */ - tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY, TGSI_FILE_INPUT, texIn, - TGSI_FILE_IMMEDIATE, imm); + TGSI_FILE_IMMEDIATE, imm, true); /* DP2 t0.x, t0.xy, t0.xy; # t0.x = x^2 + y^2 */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0, - TGSI_FILE_TEMPORARY, tmp0); + TGSI_FILE_TEMPORARY, tmp0, false); /* SQRT t0.x, t0.x */ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0); /* compute coverage factor = (0.5-d)/(0.5-k) */ /* SUB t0.w, 0.5, texIn.z; # t0.w = 0.5-k */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X, - TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z); + TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z, true); /* SUB t0.y, 0.5, t0.x; # t0.y = 0.5-d */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true); /* DIV t0.w, t0.y, t0.w; # coverage = (0.5-d)/(0.5-k) */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W, false); /* If the coverage value is negative, it means the fragment is outside * the point's circular boundary. Kill it. */ /* KILL_IF tmp0.w; # if tmp0.w < 0 KILL */ tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W, FALSE); /* If the distance is less than the threshold, the coverage/alpha value * will be greater than one. Clamp to one here. */ /* MIN tmp0.w, tmp0.w, 1.0 */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W, - TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W); + TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W, false); } /** * TGSI instruction transform callback. */ static void aa_inst(struct tgsi_transform_context *ctx, struct tgsi_full_instruction *inst) { struct aa_transform_context *ts = aa_transform_context(ctx); @@ -242,21 +242,21 @@ aa_epilog(struct tgsi_transform_context *ctx) tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, ts->color_out, TGSI_WRITEMASK_XYZ, TGSI_FILE_TEMPORARY, ts->color_tmp); /* MUL output.color.w colorTmp.w tmp0.w */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, ts->color_out, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, ts->color_tmp, - TGSI_FILE_TEMPORARY, ts->tmp); + TGSI_FILE_TEMPORARY, ts->tmp, false); } /** * TGSI utility to transform a fragment shader to support antialiasing point. * * This utility accepts two inputs: *\param tokens_in -- the original token string of the shader *\param aa_point_coord_index -- the semantic index of the generic register * that contains the point sprite texture coord * diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 2f89de6..915cd10 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -5201,24 +5201,20 @@ exec_instruction( break; case TGSI_OPCODE_SGE: exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_SUB: - exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_LRP: exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SQRT: exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP2A: exec_dp2a(mach, inst); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 9b2431f..a339ec2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -47,21 +47,21 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL }, { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD }, { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 }, { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 }, { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "", 17 }, /* removed */ { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP }, { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index b0a28f2..bf6cbb3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -461,26 +461,27 @@ transform_frc(struct tgsi_transform_context *tctx, new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); tctx->emit_instruction(tctx, &new_inst); /* SUB dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } /* POW - Power * dst.x = src0.x^{src1.x} * dst.y = src0.x^{src1.x} * dst.z = src0.x^{src1.x} * dst.w = src0.x^{src1.x} * @@ -682,26 +683,27 @@ transform_exp(struct tgsi_transform_context *tctx, new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* SUB tmpA.x, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.x, src.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); @@ -715,26 +717,27 @@ transform_exp(struct tgsi_transform_context *tctx, new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { /* SUB dst.y, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { /* EX2 dst.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; @@ -815,26 +818,27 @@ transform_log(struct tgsi_transform_context *tctx, new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); tctx->emit_instruction(tctx, &new_inst); /* SUB tmpA.y, tmpA.x, tmpA.y */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.y, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); tctx->emit_instruction(tctx, &new_inst); @@ -1065,29 +1069,28 @@ transform_flr_ceil(struct tgsi_transform_context *tctx, reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); if (opcode == TGSI_OPCODE_CEIL) new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; tctx->emit_instruction(tctx, &new_inst); /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); - if (opcode == TGSI_OPCODE_CEIL) - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - else - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + if (opcode == TGSI_OPCODE_FLR) + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } /* TRUNC - truncate off fractional part * dst.x = trunc(src.x) * dst.y = trunc(src.y) * dst.z = trunc(src.z) * dst.w = trunc(src.w) * @@ -1117,28 +1120,29 @@ transform_trunc(struct tgsi_transform_context *tctx, new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; tctx->emit_instruction(tctx, &new_inst); new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index d78dd66..13c443f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -66,21 +66,20 @@ OP11(LOG) OP12(MUL) OP12(ADD) OP12(DP3) OP12(DP4) OP12(DST) OP12(MIN) OP12(MAX) OP12(SLT) OP12(SGE) OP13(MAD) -OP12(SUB) OP13(LRP) OP11(SQRT) OP13(DP2A) OP11(FRC) OP13(CLAMP) OP11(FLR) OP11(ROUND) OP11(EX2) OP11(LG2) OP12(POW) diff --git a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c index 713bd60..f60a17c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c +++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c @@ -288,21 +288,21 @@ psprite_emit_vertex_inst(struct tgsi_transform_context *ctx, /** * Set up the point scale vector * scale = pointSize * pos.w * inverseViewport */ /* MUL point_scale.x, point_size.x, point_pos.w */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W); + TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W, false); /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MUL; inst.Instruction.NumDstRegs = 1; tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_XY); inst.Instruction.NumSrcRegs = 2; tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_SWIZZLE_X, @@ -316,29 +316,29 @@ psprite_emit_vertex_inst(struct tgsi_transform_context *ctx, * Set up the point coord threshold distance * k = 0.5 - 1 / pointsize */ if (ts->aa_point) { tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_WRITEMASK_X, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, TGSI_FILE_TEMPORARY, ts->point_size_tmp, - TGSI_SWIZZLE_X); + TGSI_SWIZZLE_X, false); - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_WRITEMASK_X, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Z, TGSI_FILE_TEMPORARY, ts->point_coord_k, - TGSI_SWIZZLE_X); + TGSI_SWIZZLE_X, true); } for (i = 0; i < 4; i++) { unsigned point_dir_swz = ts->point_dir_swz[i]; unsigned point_coord_swz = ts->point_coord_swz[i]; /* All outputs need to be emitted for each vertex */ for (j = 0; j < ts->num_orig_out; j++) { if (ts->out_tmp_index[j] != INVALID_INDEX) { @@ -435,27 +435,27 @@ psprite_inst(struct tgsi_transform_context *ctx, */ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY; inst->Dst[0].Register.Index = ts->point_size_tmp; ctx->emit_instruction(ctx, inst); /* Clamp the point size */ /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y); + TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, false); /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W); + TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W, false); } else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Dst[0].Register.Index == ts->point_pos_out) { /** * Replace point pos output reg with tmp reg. */ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY; inst->Dst[0].Register.Index = ts->point_pos_tmp; ctx->emit_instruction(ctx, inst); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index c21ff95..7ea8206 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -274,35 +274,37 @@ tgsi_transform_op1_inst(struct tgsi_transform_context *ctx, static inline void tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, unsigned opcode, unsigned dst_file, unsigned dst_index, unsigned dst_writemask, unsigned src0_file, unsigned src0_index, unsigned src1_file, - unsigned src1_index) + unsigned src1_index, + bool src1_negate) { struct tgsi_full_instruction inst; inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.Dst[0].Register.File = dst_file, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 2; inst.Src[0].Register.File = src0_file; inst.Src[0].Register.Index = src0_index; inst.Src[1].Register.File = src1_file; inst.Src[1].Register.Index = src1_index; + inst.Src[1].Register.Negate = src1_negate; ctx->emit_instruction(ctx, &inst); } static inline void tgsi_transform_op3_inst(struct tgsi_transform_context *ctx, unsigned opcode, unsigned dst_file, unsigned dst_index, @@ -381,35 +383,37 @@ static inline void tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx, unsigned opcode, unsigned dst_file, unsigned dst_index, unsigned dst_writemask, unsigned src0_file, unsigned src0_index, unsigned src0_swizzle, unsigned src1_file, unsigned src1_index, - unsigned src1_swizzle) + unsigned src1_swizzle, + bool src1_negate) { struct tgsi_full_instruction inst; inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.Dst[0].Register.File = dst_file, inst.Dst[0].Register.Index = dst_index; inst.Dst[0].Register.WriteMask = dst_writemask; inst.Instruction.NumSrcRegs = 2; inst.Src[0].Register.File = src0_file; inst.Src[0].Register.Index = src0_index; inst.Src[1].Register.File = src1_file; inst.Src[1].Register.Index = src1_index; + inst.Src[1].Register.Negate = src1_negate; switch (dst_writemask) { case TGSI_WRITEMASK_X: inst.Src[0].Register.SwizzleX = src0_swizzle; inst.Src[1].Register.SwizzleX = src1_swizzle; break; case TGSI_WRITEMASK_Y: inst.Src[0].Register.SwizzleY = src0_swizzle; inst.Src[1].Register.SwizzleY = src1_swizzle; break; case TGSI_WRITEMASK_Z: diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 4f02829..4a6a2ae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -185,21 +185,20 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_ARR: case TGSI_OPCODE_RCP: case TGSI_OPCODE_MUL: case TGSI_OPCODE_DIV: case TGSI_OPCODE_ADD: case TGSI_OPCODE_MIN: case TGSI_OPCODE_MAX: case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: case TGSI_OPCODE_MAD: - case TGSI_OPCODE_SUB: case TGSI_OPCODE_LRP: case TGSI_OPCODE_FMA: case TGSI_OPCODE_FRC: case TGSI_OPCODE_CEIL: case TGSI_OPCODE_CLAMP: case TGSI_OPCODE_FLR: case TGSI_OPCODE_ROUND: case TGSI_OPCODE_POW: case TGSI_OPCODE_COS: case TGSI_OPCODE_SIN: diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index f6ea535..ae4cfa1 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -337,21 +337,21 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) * texcoords. Darn. */ /* XXX invert wincoord if origin isn't lower-left... */ /* MUL texTemp, INPUT[wincoord], 1/32; */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, texTemp, TGSI_WRITEMASK_XYZW, pctx->wincoordFile, wincoordInput, - TGSI_FILE_IMMEDIATE, pctx->numImmed); + TGSI_FILE_IMMEDIATE, pctx->numImmed, false); /* TEX texTemp, texTemp, sampler, 2D; */ tgsi_transform_tex_inst(ctx, TGSI_FILE_TEMPORARY, texTemp, TGSI_FILE_TEMPORARY, texTemp, TGSI_TEXTURE_2D, sampIdx); /* KILL_IF -texTemp; # if -texTemp < 0, kill fragment */ tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, texTemp, diff --git a/src/gallium/auxiliary/vl/vl_bicubic_filter.c b/src/gallium/auxiliary/vl/vl_bicubic_filter.c index 0364d43..774702c 100644 --- a/src/gallium/auxiliary/vl/vl_bicubic_filter.c +++ b/src/gallium/auxiliary/vl/vl_bicubic_filter.c @@ -179,22 +179,22 @@ create_frag_shader(struct vl_bicubic_filter *filter, unsigned video_width, t = ureg_DECL_temporary(shader); half_pixel = ureg_DECL_constant(shader, 0); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* * temp = (i_vtex - (0.5/dst_size)) * i_size) * t = frac(temp) * vtex = floor(i_vtex)/i_size */ - ureg_SUB(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY), - i_vtex, half_pixel); + ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY), + i_vtex, ureg_negate(half_pixel)); ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), ureg_src(t_array[21]), ureg_imm2f(shader, video_width, video_height)); ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY), ureg_src(t_array[22])); ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), ureg_src(t_array[22])); ureg_DIV(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), ureg_src(t_array[22]), ureg_imm2f(shader, video_width, video_height)); ureg_ADD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 03a0a64..e22e389 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -144,22 +144,22 @@ create_frag_shader_weave(struct ureg_program *shader, struct ureg_dst fragment) t_tc[i] = ureg_DECL_temporary(shader); t_texel[i] = ureg_DECL_temporary(shader); } /* calculate the texture offsets * t_tc.x = i_tc.x * t_tc.y = (round(i_tc.y - 0.5) + 0.5) / height * 2 */ for (i = 0; i < 2; ++i) { ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]); - ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), - i_tc[i], ureg_imm1f(shader, 0.5f)); + ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), + i_tc[i], ureg_imm1f(shader, -0.5f)); ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i])); ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W), ureg_imm1f(shader, i ? 1.0f : 0.0f)); ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f)); ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y), ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W)); ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z), ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W)); } diff --git a/src/gallium/auxiliary/vl/vl_deint_filter.c b/src/gallium/auxiliary/vl/vl_deint_filter.c index 3ca3b49..2eec5cb 100644 --- a/src/gallium/auxiliary/vl/vl_deint_filter.c +++ b/src/gallium/auxiliary/vl/vl_deint_filter.c @@ -166,35 +166,35 @@ create_deint_frag_shader(struct vl_deint_filter *filter, unsigned field, ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, sizes->x * 0.5f, sizes->y * -0.5f, 0, 0)); ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, sizes->x * -0.5f, sizes->y * 0.5f, 1.0f, 0)); if (field == 0) { /* interpolating top field -> current field is a bottom field */ // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prevprev); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b))); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_next); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b))); } else { /* interpolating bottom field -> current field is a top field */ // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prevprev); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b))); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_next); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b))); } // absolute maximum of differences ureg_MAX(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_abs(ureg_src(t_diff)), ureg_scalar(ureg_abs(ureg_src(t_diff)), TGSI_SWIZZLE_Y)); if (field == 0) { /* weave with prev top field */ ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler_prev); /* get linear interpolation from current bottom field */ diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index 7c3b9a9..5f2a876 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -108,21 +108,20 @@ static const struct { [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 }, [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 }, [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 }, [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 }, - [ TGSI_OPCODE_SUB ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 }, [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 }, [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 }, [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 }, [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 }, }; static boolean op_has_dst(unsigned opcode) { return (op_table[opcode].num_dst > 0); diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 80caf31..241c92d 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -1015,31 +1015,20 @@ i915_translate_instruction(struct i915_fp_compile *p, src0, 0); i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_ALL, 0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0); break; - case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->Src[0], fs); - src1 = src_vector(p, &inst->Src[1], fs); - - i915_emit_arith(p, - A0_ADD, - get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, - src0, negate(src1, 1, 1, 1, 1), 0); - break; - case TGSI_OPCODE_TEX: emit_tex(p, inst, T0_TEXLD, fs); break; case TGSI_OPCODE_TRUNC: emit_simple_arith(p, inst, A0_TRC, 1, fs); break; case TGSI_OPCODE_TXB: emit_tex(p, inst, T0_TEXLDB, fs); diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index a88f189..4d813f0 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -43,21 +43,20 @@ static const struct { [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 }, [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 }, [TGSI_OPCODE_MUL] = { GEN6_OPCODE_MUL, 1, 2 }, [TGSI_OPCODE_ADD] = { GEN6_OPCODE_ADD, 1, 2 }, [TGSI_OPCODE_DP3] = { GEN6_OPCODE_DP3, 1, 2 }, [TGSI_OPCODE_DP4] = { GEN6_OPCODE_DP4, 1, 2 }, [TGSI_OPCODE_MIN] = { GEN6_OPCODE_SEL, 1, 2 }, [TGSI_OPCODE_MAX] = { GEN6_OPCODE_SEL, 1, 2 }, /* a later pass will move src[2] to accumulator */ [TGSI_OPCODE_MAD] = { GEN6_OPCODE_MAC, 1, 3 }, - [TGSI_OPCODE_SUB] = { GEN6_OPCODE_ADD, 1, 2 }, [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 }, [TGSI_OPCODE_FRC] = { GEN6_OPCODE_FRC, 1, 1 }, [TGSI_OPCODE_FLR] = { GEN6_OPCODE_RNDD, 1, 1 }, [TGSI_OPCODE_ROUND] = { GEN6_OPCODE_RNDE, 1, 1 }, [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 }, [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 }, [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 }, [TGSI_OPCODE_DPH] = { GEN6_OPCODE_DPH, 1, 2 }, [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 }, [TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 }, @@ -137,23 +136,20 @@ aos_simple(struct toy_compiler *tc, case TGSI_OPCODE_MIN: case TGSI_OPCODE_IMIN: case TGSI_OPCODE_UMIN: cond_modifier = GEN6_COND_L; break; case TGSI_OPCODE_MAX: case TGSI_OPCODE_IMAX: case TGSI_OPCODE_UMAX: cond_modifier = GEN6_COND_GE; break; - case TGSI_OPCODE_SUB: - src[1] = tsrc_negate(src[1]); - break; case TGSI_OPCODE_IABS: src[0] = tsrc_absolute(src[0]); break; case TGSI_OPCODE_IF: cond_modifier = GEN6_COND_NZ; num_src = 2; assert(src[0].type == TOY_TYPE_F); src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); src[1] = tsrc_imm_f(0.0f); break; @@ -769,21 +765,20 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_MUL] = aos_simple, [TGSI_OPCODE_ADD] = aos_simple, [TGSI_OPCODE_DP3] = aos_simple, [TGSI_OPCODE_DP4] = aos_simple, [TGSI_OPCODE_DST] = aos_DST, [TGSI_OPCODE_MIN] = aos_simple, [TGSI_OPCODE_MAX] = aos_simple, [TGSI_OPCODE_SLT] = aos_set_on_cond, [TGSI_OPCODE_SGE] = aos_set_on_cond, [TGSI_OPCODE_MAD] = aos_simple, - [TGSI_OPCODE_SUB] = aos_simple, [TGSI_OPCODE_LRP] = aos_LRP, [TGSI_OPCODE_SQRT] = aos_simple, [TGSI_OPCODE_DP2A] = aos_DP2A, [TGSI_OPCODE_FRC] = aos_simple, [TGSI_OPCODE_CLAMP] = aos_CLAMP, [TGSI_OPCODE_FLR] = aos_simple, [TGSI_OPCODE_ROUND] = aos_simple, [TGSI_OPCODE_EX2] = aos_simple, [TGSI_OPCODE_LG2] = aos_simple, [TGSI_OPCODE_POW] = aos_simple, @@ -1311,21 +1306,20 @@ static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_MUL] = soa_per_channel, [TGSI_OPCODE_ADD] = soa_per_channel, [TGSI_OPCODE_DP3] = soa_dot_product, [TGSI_OPCODE_DP4] = soa_dot_product, [TGSI_OPCODE_DST] = soa_DST, [TGSI_OPCODE_MIN] = soa_per_channel, [TGSI_OPCODE_MAX] = soa_per_channel, [TGSI_OPCODE_SLT] = soa_per_channel, [TGSI_OPCODE_SGE] = soa_per_channel, [TGSI_OPCODE_MAD] = soa_per_channel, - [TGSI_OPCODE_SUB] = soa_per_channel, [TGSI_OPCODE_LRP] = soa_per_channel, [TGSI_OPCODE_SQRT] = soa_scalar_replicate, [TGSI_OPCODE_DP2A] = soa_dot_product, [TGSI_OPCODE_FRC] = soa_per_channel, [TGSI_OPCODE_CLAMP] = soa_per_channel, [TGSI_OPCODE_FLR] = soa_per_channel, [TGSI_OPCODE_ROUND] = soa_per_channel, [TGSI_OPCODE_EX2] = soa_scalar_replicate, [TGSI_OPCODE_LG2] = soa_scalar_replicate, [TGSI_OPCODE_POW] = soa_scalar_replicate, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index b919098..86348e7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -716,21 +716,20 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(MUL, MUL); NV50_IR_OPCODE_CASE(ADD, ADD); NV50_IR_OPCODE_CASE(MIN, MIN); NV50_IR_OPCODE_CASE(MAX, MAX); NV50_IR_OPCODE_CASE(SLT, SET); NV50_IR_OPCODE_CASE(SGE, SET); NV50_IR_OPCODE_CASE(MAD, MAD); NV50_IR_OPCODE_CASE(FMA, FMA); - NV50_IR_OPCODE_CASE(SUB, SUB); NV50_IR_OPCODE_CASE(FLR, FLOOR); NV50_IR_OPCODE_CASE(ROUND, CVT); NV50_IR_OPCODE_CASE(EX2, EX2); NV50_IR_OPCODE_CASE(LG2, LG2); NV50_IR_OPCODE_CASE(POW, POW); NV50_IR_OPCODE_CASE(COS, COS); NV50_IR_OPCODE_CASE(DDX, DFDX); NV50_IR_OPCODE_CASE(DDX_FINE, DFDX); @@ -2981,21 +2980,20 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_MOD: case TGSI_OPCODE_UMOD: case TGSI_OPCODE_MUL: case TGSI_OPCODE_UMUL: case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_UMUL_HI: case TGSI_OPCODE_OR: case TGSI_OPCODE_SHL: case TGSI_OPCODE_ISHR: case TGSI_OPCODE_USHR: - case TGSI_OPCODE_SUB: case TGSI_OPCODE_XOR: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); geni = mkOp2(op, dstTy, dst0[c], src0, src1); geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); } break; case TGSI_OPCODE_MAD: case TGSI_OPCODE_UMAD: diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c index d031c68..4924d21 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c @@ -743,23 +743,20 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc, insn.cc_test = NVFX_COND_GT; nvfx_fp_emit(fpc, insn); if(!sat) { insn = arith(0, MOV, dst, mask, minones, none, none); insn.cc_test = NVFX_COND_LT; nvfx_fp_emit(fpc, insn); } break; } - case TGSI_OPCODE_SUB: - nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none)); - break; case TGSI_OPCODE_TEX: nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_TRUNC: tmp = nvfx_src(temp(fpc)); insn = arith(0, MOV, none.reg, mask, src[0], none, none); insn.cc_update = 1; nvfx_fp_emit(fpc, insn); nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none)); diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c index a802c43..baea701 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c @@ -665,23 +665,20 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc, break; case TGSI_OPCODE_SLT: nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SNE: nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SSG: nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], none, none)); break; - case TGSI_OPCODE_SUB: - nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1]))); - break; case TGSI_OPCODE_TRUNC: tmp = nvfx_src(temp(vpc)); insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); insn.cc_update = 1; nvfx_vp_emit(vpc, insn); nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none)); nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none)); insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 9d1e59f..59dfa05 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -43,21 +43,20 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_MUL: return RC_OPCODE_MUL; case TGSI_OPCODE_ADD: return RC_OPCODE_ADD; case TGSI_OPCODE_DP3: return RC_OPCODE_DP3; case TGSI_OPCODE_DP4: return RC_OPCODE_DP4; case TGSI_OPCODE_DST: return RC_OPCODE_DST; case TGSI_OPCODE_MIN: return RC_OPCODE_MIN; case TGSI_OPCODE_MAX: return RC_OPCODE_MAX; case TGSI_OPCODE_SLT: return RC_OPCODE_SLT; case TGSI_OPCODE_SGE: return RC_OPCODE_SGE; case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; - case TGSI_OPCODE_SUB: return RC_OPCODE_SUB; case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; case TGSI_OPCODE_POW: return RC_OPCODE_POW; case TGSI_OPCODE_XPD: return RC_OPCODE_XPD; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c2996aa..ebe2744 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3797,23 +3797,20 @@ static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); } } else { r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); } /* handle some special cases */ if (i == 1 || i == 3) { switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { - case TGSI_OPCODE_SUB: - r600_bytecode_src_toggle_neg(&alu.src[1]); - break; case TGSI_OPCODE_DABS: r600_bytecode_src_set_abs(&alu.src[0]); break; default: break; } } if (i == lasti) { alu.last = 1; } @@ -3924,28 +3921,20 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) alu.op = ctx->inst_info->op; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } } else { r600_bytecode_src(&alu.src[0], &ctx->src[1], i); r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } - /* handle some special cases */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SUB: - r600_bytecode_src_toggle_neg(&alu.src[1]); - break; - default: - break; - } if (i == lasti || trans_only) { alu.last = 1; } r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } if (use_tmp) { /* move result from temp to dst */ @@ -8999,21 +8988,20 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, @@ -9198,21 +9186,20 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, @@ -9421,21 +9408,20 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 47a0afc..0efd72d 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -1396,44 +1396,20 @@ emit_ssg(struct svga_shader_emitter *emit, zero )) return FALSE; /* ADD DST, TMP0, TMP1 */ return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), src( temp1 ) ); } /** - * Translate/emit TGSI SUB instruction as: - * ADD DST, SRC0, negate(SRC1) - */ -static boolean -emit_sub(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - struct src_register src1 = translate_src_register( - emit, &insn->Src[1] ); - - src1 = negate(src1); - - if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, - src0, src1 )) - return FALSE; - - return TRUE; -} - - -/** * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative). */ static boolean emit_kill_if(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { const struct tgsi_full_src_register *reg = &insn->Src[0]; struct src_register src0, srcIn; const boolean special = (reg->Register.Absolute || reg->Register.Negate || @@ -2982,23 +2958,20 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SGE: return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn ); case TGSI_OPCODE_SLT: return emit_select_op( emit, PIPE_FUNC_LESS, insn ); case TGSI_OPCODE_SLE: return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); - case TGSI_OPCODE_SUB: - return emit_sub( emit, insn ); - case TGSI_OPCODE_POW: return emit_pow( emit, insn ); case TGSI_OPCODE_EX2: return emit_ex2( emit, insn ); case TGSI_OPCODE_EXP: return emit_exp( emit, insn ); case TGSI_OPCODE_LOG: diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index e7cfb40..3131444 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -4708,43 +4708,20 @@ emit_issg(struct svga_shader_emitter_v10 *emit, emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], &tmp1_src, &neg_tmp2, FALSE); free_temp_indexes(emit); return TRUE; } /** - * Emit code for TGSI_OPCODE_SUB instruction. - */ -static boolean -emit_sub(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) -{ - /* dst = SUB(s0, s1): - * dst = s0 - s1 - * Translates into: - * ADD dst, s0, neg(s1) - */ - struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); - - /* ADD dst, s0, neg(s1) */ - emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], - &inst->Src[0], &neg_src1, - inst->Instruction.Saturate); - - return TRUE; -} - - -/** * Emit a comparison instruction. The dest register will get * 0 or ~0 values depending on the outcome of comparing src0 to src1. */ static void emit_comparison(struct svga_shader_emitter_v10 *emit, SVGA3dCmpFunc func, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src0, const struct tgsi_full_src_register *src1) { @@ -5794,22 +5771,20 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_SLE: return emit_sle(emit, inst); case TGSI_OPCODE_SLT: return emit_slt(emit, inst); case TGSI_OPCODE_SNE: return emit_sne(emit, inst); case TGSI_OPCODE_SSG: return emit_ssg(emit, inst); case TGSI_OPCODE_ISSG: return emit_issg(emit, inst); - case TGSI_OPCODE_SUB: - return emit_sub(emit, inst); case TGSI_OPCODE_TEX: return emit_tex(emit, inst); case TGSI_OPCODE_TXP: return emit_txp(emit, inst); case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXL: return emit_txl_txb(emit, inst); case TGSI_OPCODE_TXD: return emit_txd(emit, inst); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 3538090..3384035 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -338,21 +338,21 @@ struct tgsi_property_data { #define TGSI_OPCODE_MUL 7 #define TGSI_OPCODE_ADD 8 #define TGSI_OPCODE_DP3 9 #define TGSI_OPCODE_DP4 10 #define TGSI_OPCODE_DST 11 #define TGSI_OPCODE_MIN 12 #define TGSI_OPCODE_MAX 13 #define TGSI_OPCODE_SLT 14 #define TGSI_OPCODE_SGE 15 #define TGSI_OPCODE_MAD 16 -#define TGSI_OPCODE_SUB 17 +/* gap */ #define TGSI_OPCODE_LRP 18 #define TGSI_OPCODE_FMA 19 #define TGSI_OPCODE_SQRT 20 #define TGSI_OPCODE_DP2A 21 #define TGSI_OPCODE_F2U64 22 #define TGSI_OPCODE_F2I64 23 #define TGSI_OPCODE_FRC 24 #define TGSI_OPCODE_CLAMP 25 #define TGSI_OPCODE_FLR 26 #define TGSI_OPCODE_ROUND 27 diff --git a/src/gallium/state_trackers/xa/xa_tgsi.c b/src/gallium/state_trackers/xa/xa_tgsi.c index f3f665d..344a576 100644 --- a/src/gallium/state_trackers/xa/xa_tgsi.c +++ b/src/gallium/state_trackers/xa/xa_tgsi.c @@ -232,24 +232,24 @@ radial_gradient(struct ureg_program *ureg, ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y)); ureg_MAD(ureg, temp4, ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp3)); ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4))); ureg_MUL(ureg, temp2, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_src(temp4)); ureg_MUL(ureg, temp0, ureg_scalar(const0124, TGSI_SWIZZLE_W), ureg_src(temp2)); ureg_MUL(ureg, temp3, ureg_src(temp1), ureg_src(temp1)); - ureg_SUB(ureg, temp2, ureg_src(temp3), ureg_src(temp0)); + ureg_ADD(ureg, temp2, ureg_src(temp3), ureg_negate(ureg_src(temp0))); ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2))); ureg_RCP(ureg, temp2, ureg_src(temp2)); - ureg_SUB(ureg, temp1, ureg_src(temp2), ureg_src(temp1)); + ureg_ADD(ureg, temp1, ureg_src(temp2), ureg_negate(ureg_src(temp1))); ureg_ADD(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_scalar(coords, TGSI_SWIZZLE_Z)); ureg_RCP(ureg, temp0, ureg_src(temp0)); ureg_MUL(ureg, temp2, ureg_src(temp1), ureg_src(temp0)); ureg_TEX(ureg, out, TGSI_TEXTURE_1D, ureg_src(temp2), sampler); ureg_release_temporary(ureg, temp0); ureg_release_temporary(ureg, temp1); ureg_release_temporary(ureg, temp2); diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c index 3aa7f84..b28c55c 100644 --- a/src/mesa/state_tracker/st_atifs_to_tgsi.c +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c @@ -59,21 +59,21 @@ struct instruction_desc { unsigned TGSI_opcode; const char *name; unsigned char arg_count; }; static const struct instruction_desc inst_desc[] = { {TGSI_OPCODE_MOV, "MOV", 1}, {TGSI_OPCODE_NOP, "UND", 0}, /* unused */ {TGSI_OPCODE_ADD, "ADD", 2}, {TGSI_OPCODE_MUL, "MUL", 2}, - {TGSI_OPCODE_SUB, "SUB", 2}, + {TGSI_OPCODE_NOP, "SUB", 2}, {TGSI_OPCODE_DP3, "DOT3", 2}, {TGSI_OPCODE_DP4, "DOT4", 2}, {TGSI_OPCODE_MAD, "MAD", 3}, {TGSI_OPCODE_LRP, "LERP", 3}, {TGSI_OPCODE_NOP, "CND", 3}, {TGSI_OPCODE_NOP, "CND0", 3}, {TGSI_OPCODE_NOP, "DOT2_ADD", 3} }; static struct ureg_dst @@ -168,30 +168,30 @@ prepare_argument(struct st_translate *t, const unsigned argId, break; case GL_ALPHA: src = ureg_scalar(src, TGSI_SWIZZLE_W); break; } ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1); if (srcReg->argMod & GL_COMP_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_imm1f(t->ureg, 1.0f); - modsrc[1] = ureg_src(arg); + modsrc[1] = ureg_negate(ureg_src(arg)); - ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_BIAS_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_src(arg); - modsrc[1] = ureg_imm1f(t->ureg, 0.5f); + modsrc[1] = ureg_imm1f(t->ureg, -0.5f); - ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_2X_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_src(arg); modsrc[1] = ureg_src(arg); ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_NEGATE_BIT_ATI) { struct ureg_src modsrc[2]; @@ -204,25 +204,27 @@ prepare_argument(struct st_translate *t, const unsigned argId, } /* These instructions need special treatment */ static void emit_special_inst(struct st_translate *t, const struct instruction_desc *desc, struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) { struct ureg_dst tmp[1]; struct ureg_src src[3]; - if (!strcmp(desc->name, "CND")) { + if (!strcmp(desc->name, "SUB")) { + ureg_ADD(t->ureg, *dst, args[0], ureg_negate(args[1])); + } else if (!strcmp(desc->name, "CND")) { tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */ src[0] = ureg_imm1f(t->ureg, 0.5f); - src[1] = args[2]; - ureg_insn(t->ureg, TGSI_OPCODE_SUB, tmp, 1, src, 2); + src[1] = ureg_negate(args[2]); + ureg_insn(t->ureg, TGSI_OPCODE_ADD, tmp, 1, src, 2); src[0] = ureg_src(tmp[0]); src[1] = args[0]; src[2] = args[1]; ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); } else if (!strcmp(desc->name, "CND0")) { src[0] = args[2]; src[1] = args[1]; src[2] = args[0]; ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); } else if (!strcmp(desc->name, "DOT2_ADD")) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 1be1f6c..f738084 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1695,21 +1695,22 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) * driver. */ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } case ir_binop_add: emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + op[1].negate = 1; + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_mul: emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE) assert(!"not reached: should be handled by ir_div_to_mul_rcp"); else emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 1768356..f906fed 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -474,22 +474,20 @@ translate_opcode( unsigned op ) case OPCODE_RCP: return TGSI_OPCODE_RCP; case OPCODE_SCS: return TGSI_OPCODE_SCS; case OPCODE_SGE: return TGSI_OPCODE_SGE; case OPCODE_SIN: return TGSI_OPCODE_SIN; case OPCODE_SLT: return TGSI_OPCODE_SLT; - case OPCODE_SUB: - return TGSI_OPCODE_SUB; case OPCODE_TEX: return TGSI_OPCODE_TEX; case OPCODE_TXB: return TGSI_OPCODE_TXB; case OPCODE_TXP: return TGSI_OPCODE_TXP; case OPCODE_XPD: return TGSI_OPCODE_XPD; case OPCODE_END: return TGSI_OPCODE_END; @@ -559,20 +557,24 @@ compile_instruction( break; case OPCODE_RSQ: ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) ); break; case OPCODE_ABS: ureg_MOV(ureg, dst[0], ureg_abs(src[0])); break; + case OPCODE_SUB: + ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1])); + break; + default: ureg_insn( ureg, translate_opcode( inst->Opcode ), dst, num_dst, src, num_src ); break; } } diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.c b/src/mesa/state_tracker/st_tgsi_lower_yuv.c index e346b97..6acd173 100644 --- a/src/mesa/state_tracker/st_tgsi_lower_yuv.c +++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.c @@ -251,27 +251,28 @@ yuv_to_rgb(struct tgsi_transform_context *tctx, /* * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 } * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 } * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 } * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 } */ /* SUB tmpA.xyz, tmpA, imm[3] */ inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.Saturate = 0; inst.Instruction.NumDstRegs = 1; inst.Instruction.NumSrcRegs = 2; reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _)); + inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &inst); /* DP3 dst.x, tmpA, imm[0] */ inst = dp3_instruction(); reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X); reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W)); tctx->emit_instruction(tctx, &inst); /* DP3 dst.y, tmpA, imm[1] */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev