On Fri, May 27, 2016 at 7:05 PM, Francisco Jerez <curroje...@riseup.net> wrote:
> This makes the whole LOAD_PAYLOAD munging unnecessary which simplifies > the code and will allow the optimization to succeed in more cases > independent of whether the LOAD_PAYLOAD instruction can be found or > not. > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 79 > +++++++++++------------------------- > 1 file changed, 23 insertions(+), 56 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 45c4753..6abf776 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -2612,11 +2612,15 @@ fs_visitor::opt_sampler_eot() > if (key->nr_color_regions != 1) > return false; > > + /* Requires clamping the color payload in the shader. */ > + if (key->clamp_fragment_color) > + return false; > This looks new... Is it actually needed? Could we simply do "if (tex_inst->saturate) return false;"? In any case, it should probably be it's own commit. > + > /* Look for a texturing instruction immediately before the final > FB_WRITE. */ > bblock_t *block = cfg->blocks[cfg->num_blocks - 1]; > fs_inst *fb_write = (fs_inst *)block->end(); > assert(fb_write->eot); > - assert(fb_write->opcode == FS_OPCODE_FB_WRITE); > + assert(fb_write->opcode == FS_OPCODE_FB_WRITE_LOGICAL); > > /* There wasn't one; nothing to do. */ > if (unlikely(fb_write->prev->is_head_sentinel())) > @@ -2629,24 +2633,20 @@ fs_visitor::opt_sampler_eot() > * “Response Length of zero is allowed on all SIMD8* and SIMD16* > sampler > * messages except sample+killpix, resinfo, sampleinfo, LOD, and > gather4*” > */ > - if (!tex_inst->is_tex() || > - tex_inst->opcode == SHADER_OPCODE_TXS || > - tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO || > - tex_inst->opcode == SHADER_OPCODE_LOD || > - tex_inst->opcode == SHADER_OPCODE_TG4 || > - tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET) > + if (tex_inst->opcode != SHADER_OPCODE_TEX_LOGICAL && > + tex_inst->opcode != SHADER_OPCODE_TXD_LOGICAL && > + tex_inst->opcode != SHADER_OPCODE_TXF_LOGICAL && > + tex_inst->opcode != SHADER_OPCODE_TXL_LOGICAL && > + tex_inst->opcode != FS_OPCODE_TXB_LOGICAL && > + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL && > + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_W_LOGICAL && > + tex_inst->opcode != SHADER_OPCODE_TXF_UMS_LOGICAL) > return false; > Same here. This isn't just a rename, it's a switch from listing invalid cases to explicitly listing valid ones. Other than those two comments, I really like this. It's a nice cleanup. > > - /* If there's no header present, we need to munge the LOAD_PAYLOAD as > well. > - * It's very likely to be the previous instruction. > - */ > + /* XXX - This shouldn't be necessary. */ > if (tex_inst->prev->is_head_sentinel()) > return false; > > - fs_inst *load_payload = (fs_inst *) tex_inst->prev; > - if (load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) > - return false; > - > assert(!tex_inst->eot); /* We can't get here twice */ > assert((tex_inst->offset & (0xff << 24)) == 0); > > @@ -2658,46 +2658,10 @@ fs_visitor::opt_sampler_eot() > tex_inst->regs_written = 0; > fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); > > - /* If a header is present, marking the eot is sufficient. Otherwise, > we need > - * to create a new LOAD_PAYLOAD command with the same sources and a > space > - * saved for the header. Using a new destination register not only > makes sure > - * we have enough space, but it will make sure the dead code > eliminator kills > - * the instruction that this will replace. > + /* Marking EOT is sufficient, lower_logical_sends() will notice the EOT > + * flag and submit a header together with the sampler message as > required > + * by the hardware. > */ > - if (tex_inst->header_size != 0) { > - invalidate_live_intervals(); > - return true; > - } > - > - fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F, > - load_payload->sources + 1); > - fs_reg *new_sources = > - ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); > - > - new_sources[0] = fs_reg(); > - for (int i = 0; i < load_payload->sources; i++) > - new_sources[i+1] = load_payload->src[i]; > - > - /* The LOAD_PAYLOAD helper seems like the obvious choice here. > However, it > - * requires a lot of information about the sources to appropriately > figure > - * out the number of registers needed to be used. Given this stage in > our > - * optimization, we may not have the appropriate GRFs required by > - * LOAD_PAYLOAD at this point (copy propagation). Therefore, we need to > - * manually emit the instruction. > - */ > - fs_inst *new_load_payload = new(mem_ctx) > fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, > - > load_payload->exec_size, > - send_header, > - new_sources, > - load_payload->sources > + 1); > - > - new_load_payload->regs_written = load_payload->regs_written + 1; > - new_load_payload->header_size = 1; > - tex_inst->mlen++; > - tex_inst->header_size = 1; > - tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], > new_load_payload); > - tex_inst->src[0] = send_header; > - > invalidate_live_intervals(); > return true; > } > @@ -4153,7 +4117,7 @@ lower_sampler_logical_send_gen7(const fs_builder > &bld, fs_inst *inst, opcode op, > sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F); > > if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET || > - offset_value.file != BAD_FILE || > + offset_value.file != BAD_FILE || inst->eot || > op == SHADER_OPCODE_SAMPLEINFO || > is_high_sampler(devinfo, sampler)) { > /* For general texture offsets (no txf workaround), we need a > header to > @@ -4174,7 +4138,7 @@ lower_sampler_logical_send_gen7(const fs_builder > &bld, fs_inst *inst, opcode op, > * and we have an explicit header, we need to set up the sampler > * writemask. It's reversed from normal: 1 means "don't write". > */ > - if (inst->regs_written != 4 * reg_width) { > + if (!inst->eot && inst->regs_written != 4 * reg_width) { > assert((inst->regs_written % reg_width) == 0); > unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & > 0xf; > inst->offset |= mask << 12; > @@ -5738,6 +5702,10 @@ fs_visitor::optimize() > pass_num = 0; > > OPT(lower_simd_width); > + > + /* After SIMD lowering just in case we had to unroll the EOT send. */ > + OPT(opt_sampler_eot); > + > OPT(lower_logical_sends); > > if (progress) { > @@ -5761,7 +5729,6 @@ fs_visitor::optimize() > } > > OPT(opt_redundant_discard_jumps); > - OPT(opt_sampler_eot); > > if (OPT(lower_load_payload)) { > split_virtual_grfs(); > -- > 2.7.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev