From: Neil Roberts <n...@linux.intel.com> Adds an optimisation pass which recognises the LD sampler message type when the LOD parameter is either a constant zero or not given and replaces it with the LD_LZ message type. This is the same but the LOD is hardcoded to zero and doesn't need to be in the message. This can be a benefit for shaders using texelFetch with 3 coordinates because otherwise the LOD parameter can't be optimised out because it comes before the r coordinate.
[mattst88]: Does not affect anything in shader-db. Reviewed-by: Matt Turner <matts...@gmail.com> --- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_disasm.c | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 97 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 ++ .../drivers/dri/i965/brw_schedule_instructions.cpp | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 3 + 7 files changed, 110 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 60b696c..e23f372 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -977,6 +977,7 @@ enum opcode { SHADER_OPCODE_TXD_LOGICAL, SHADER_OPCODE_TXF, SHADER_OPCODE_TXF_LOGICAL, + SHADER_OPCODE_TXF_LZ, SHADER_OPCODE_TXL, SHADER_OPCODE_TXL_LOGICAL, SHADER_OPCODE_TXS, @@ -1636,6 +1637,7 @@ enum brw_message_target { #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18 #define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20 +#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ 26 #define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 1778419..046e1b8 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -550,6 +550,7 @@ static const char *const gen5_sampler_msg_type[] = { [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po", [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c", [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c", + [GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ] = "ld_lz", [GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w", [GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs", [GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms", diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 18760dd..15df298 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -949,6 +949,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: + case SHADER_OPCODE_TXF_LZ: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_MCS: @@ -2482,6 +2483,100 @@ fs_visitor::opt_zero_samples() return progress; } +static bool +lod_source_is_zero(const fs_inst *send_inst) +{ + int reg_offset = send_inst->exec_size / 8 * 2 + send_inst->header_size; + const fs_reg src = byte_offset(send_inst->src[0], reg_offset * 32); + + /* Look for the last instruction that writes to the source */ + foreach_inst_in_block_reverse_starting_from(const fs_inst, inst, send_inst) { + if (inst->overwrites_reg(src)) { + return (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD && + inst->src[inst->header_size + 2].is_zero()); + } + } + + return false; +} + +/** + * Replace LD sample messages that have a zero LOD with LD_LZ. This + * instruction is available since Gen9. It would help for doing texelFetch + * when passing three coordinates because then the LOD can be skipped. + */ +bool +fs_visitor::opt_ld_lz() +{ + if (devinfo->gen < 9) + return false; + + bool progress = false; + + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->opcode != SHADER_OPCODE_TXF) + continue; + + /* If the LOD parameter is not sent or is a constant zero then we can + * change the instruction. + */ + bool lod_included = (inst->mlen - inst->header_size >= + inst->exec_size / 8 * 3); + if (lod_included && !lod_source_is_zero(inst)) + continue; + + inst->opcode = SHADER_OPCODE_TXF_LZ; + + if (lod_included) { + inst->mlen -= inst->exec_size / 8; + + /* If the r coordinate is included then we need a new LOAD_PAYLOAD + * instruction which has it in the right place. + */ + if (inst->mlen - inst->header_size >= inst->exec_size / 8 * 3) { + const fs_builder ibld(this, block, inst); + fs_reg send_header = fs_reg(VGRF, alloc.allocate(inst->mlen), + BRW_REGISTER_TYPE_F); + int n_sources = ((inst->mlen - inst->header_size) * + 8 / inst->exec_size + + inst->header_size); + fs_reg *new_sources = ralloc_array(mem_ctx, fs_reg, n_sources); + + for (int i = 0; i < n_sources; i++) { + int j; + if (i >= inst->header_size + 2) + j = i + 1; + else + j = i; + new_sources[i] = offset(inst->src[0], ibld, j); + } + + /* The LOAD_PAYLOAD helper is not used for the same reasons given + * in fs_visitor::opt_sample_eot. + */ + fs_inst *new_load_payload = + new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, + inst->exec_size, + send_header, + new_sources, + n_sources); + + new_load_payload->regs_written = inst->mlen; + new_load_payload->header_size = inst->header_size; + inst->insert_before(block, new_load_payload); + inst->src[0] = send_header; + } + } + + progress = true; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + /** * Optimize sample messages which are followed by the final RT write. * @@ -4156,6 +4251,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, coordinate_done = true; break; + case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_UMS: @@ -5329,6 +5425,7 @@ fs_visitor::optimize() OPT(opt_redundant_discard_jumps); OPT(opt_saturate_propagation); OPT(opt_zero_samples); + OPT(opt_ld_lz); OPT(register_coalesce); OPT(compute_to_mrf); OPT(eliminate_find_live_channel); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index ba6bd3f..66b39dc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -227,6 +227,7 @@ public: bool opt_saturate_propagation(); bool opt_cmod_propagation(); bool opt_zero_samples(); + bool opt_ld_lz(); void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 8654ca4..0516d28 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -807,6 +807,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; + case SHADER_OPCODE_TXF_LZ: + assert(devinfo->gen >= 9); + msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ; + break; case SHADER_OPCODE_TXF_CMS_W: assert(devinfo->gen >= 9); msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; @@ -2115,6 +2119,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: + case SHADER_OPCODE_TXF_LZ: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_UMS: diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 8d92584..557811c 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -215,6 +215,7 @@ schedule_node::set_latency_gen7(bool is_haswell) case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: + case SHADER_OPCODE_TXF_LZ: case SHADER_OPCODE_TXL: /* 18 cycles: * mov(8) g115<1>F 0F { align1 WE_normal 1Q }; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 068244b..69f62d9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -213,6 +213,8 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op) return "txf"; case SHADER_OPCODE_TXF_LOGICAL: return "txf_logical"; + case SHADER_OPCODE_TXF_LZ: + return "txf_lz"; case SHADER_OPCODE_TXL: return "txl"; case SHADER_OPCODE_TXL_LOGICAL: @@ -749,6 +751,7 @@ backend_instruction::is_tex() const opcode == FS_OPCODE_TXB || opcode == SHADER_OPCODE_TXD || opcode == SHADER_OPCODE_TXF || + opcode == SHADER_OPCODE_TXF_LZ || opcode == SHADER_OPCODE_TXF_CMS || opcode == SHADER_OPCODE_TXF_CMS_W || opcode == SHADER_OPCODE_TXF_UMS || -- 2.7.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev