Piglit quick tests including sqrt pass, no other regressions, tested on radeon 6670. --- Should be slightly more precise than the invsqrt/recip/mul combination used previously, I reckon up to about 2 bits of mantissa, and saves two instructions per sqrt emitted.
It would be good if someone could test this on Cayman since it uses a slightly different codepath. src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_shader.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 5bf9c00..ee6a416 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -428,7 +428,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: - return 0; + return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index db928f3..907547d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -6498,8 +6498,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {20, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ {22, 0, ALU_OP0_NOP, tgsi_unsupported}, @@ -6693,8 +6692,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {20, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ {22, 0, ALU_OP0_NOP, tgsi_unsupported}, @@ -6888,8 +6886,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {20, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ {22, 0, ALU_OP0_NOP, tgsi_unsupported}, -- 1.8.3.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev