On Sat, May 28, 2016 at 1:21 PM, Francisco Jerez <curroje...@riseup.net> wrote:
> Jason Ekstrand <ja...@jlekstrand.net> writes: > > > On Fri, May 27, 2016 at 7:05 PM, Francisco Jerez <curroje...@riseup.net> > > wrote: > > > >> This makes the whole LOAD_PAYLOAD munging unnecessary which simplifies > >> the code and will allow the optimization to succeed in more cases > >> independent of whether the LOAD_PAYLOAD instruction can be found or > >> not. > >> --- > >> src/mesa/drivers/dri/i965/brw_fs.cpp | 79 > >> +++++++++++------------------------- > >> 1 file changed, 23 insertions(+), 56 deletions(-) > >> > >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > >> b/src/mesa/drivers/dri/i965/brw_fs.cpp > >> index 45c4753..6abf776 100644 > >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > >> @@ -2612,11 +2612,15 @@ fs_visitor::opt_sampler_eot() > >> if (key->nr_color_regions != 1) > >> return false; > >> > >> + /* Requires clamping the color payload in the shader. */ > >> + if (key->clamp_fragment_color) > >> + return false; > >> > > > > This looks new... Is it actually needed? Could we simply do "if > > (tex_inst->saturate) return false;"? In any case, it should probably be > > it's own commit. > > > > It is necessary to preserve the current behavior and skip the > optimization when fragment color clamping is required, because right now > the MOVs used to saturate the color values are inserted during logical > send lowering so this pass would have no other way to find out. I can > make the change in a separate commit just before this one if you like. > Right. No need to make it a separate commit. Just update the comment so it says something about the MOVs inserted during lowering. Incidentally, I think Rob Clark recently added some stuff to do that lowering in NIR... In the future, we might want to use that and delete more back-end code. For now, an udpated comment will do. :-) > > > >> + > >> /* Look for a texturing instruction immediately before the final > >> FB_WRITE. */ > >> bblock_t *block = cfg->blocks[cfg->num_blocks - 1]; > >> fs_inst *fb_write = (fs_inst *)block->end(); > >> assert(fb_write->eot); > >> - assert(fb_write->opcode == FS_OPCODE_FB_WRITE); > >> + assert(fb_write->opcode == FS_OPCODE_FB_WRITE_LOGICAL); > >> > >> /* There wasn't one; nothing to do. */ > >> if (unlikely(fb_write->prev->is_head_sentinel())) > >> @@ -2629,24 +2633,20 @@ fs_visitor::opt_sampler_eot() > >> * “Response Length of zero is allowed on all SIMD8* and SIMD16* > >> sampler > >> * messages except sample+killpix, resinfo, sampleinfo, LOD, and > >> gather4*” > >> */ > >> - if (!tex_inst->is_tex() || > >> - tex_inst->opcode == SHADER_OPCODE_TXS || > >> - tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO || > >> - tex_inst->opcode == SHADER_OPCODE_LOD || > >> - tex_inst->opcode == SHADER_OPCODE_TG4 || > >> - tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET) > >> + if (tex_inst->opcode != SHADER_OPCODE_TEX_LOGICAL && > >> + tex_inst->opcode != SHADER_OPCODE_TXD_LOGICAL && > >> + tex_inst->opcode != SHADER_OPCODE_TXF_LOGICAL && > >> + tex_inst->opcode != SHADER_OPCODE_TXL_LOGICAL && > >> + tex_inst->opcode != FS_OPCODE_TXB_LOGICAL && > >> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL && > >> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_W_LOGICAL && > >> + tex_inst->opcode != SHADER_OPCODE_TXF_UMS_LOGICAL) > >> return false; > >> > > > > Same here. This isn't just a rename, it's a switch from listing invalid > > cases to explicitly listing valid ones. > > > > The reason is that is_tex() is unaware of the logical messages, I > considered changing that but a few places expect instructions marked > is_tex to be send-like (e.g. fs_inst::is_send_from_grf, > fs_inst::regs_read), so we would have to stop using is_tex() in those > places if we change it. > Right. Makes sense. Seems fine to me. Feel free to leave this patch as-is apart from a better comment above. Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > > > Other than those two comments, I really like this. It's a nice cleanup. > > > > > >> > >> - /* If there's no header present, we need to munge the LOAD_PAYLOAD > as > >> well. > >> - * It's very likely to be the previous instruction. > >> - */ > >> + /* XXX - This shouldn't be necessary. */ > >> if (tex_inst->prev->is_head_sentinel()) > >> return false; > >> > >> - fs_inst *load_payload = (fs_inst *) tex_inst->prev; > >> - if (load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) > >> - return false; > >> - > >> assert(!tex_inst->eot); /* We can't get here twice */ > >> assert((tex_inst->offset & (0xff << 24)) == 0); > >> > >> @@ -2658,46 +2658,10 @@ fs_visitor::opt_sampler_eot() > >> tex_inst->regs_written = 0; > >> fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); > >> > >> - /* If a header is present, marking the eot is sufficient. Otherwise, > >> we need > >> - * to create a new LOAD_PAYLOAD command with the same sources and a > >> space > >> - * saved for the header. Using a new destination register not only > >> makes sure > >> - * we have enough space, but it will make sure the dead code > >> eliminator kills > >> - * the instruction that this will replace. > >> + /* Marking EOT is sufficient, lower_logical_sends() will notice the > EOT > >> + * flag and submit a header together with the sampler message as > >> required > >> + * by the hardware. > >> */ > >> - if (tex_inst->header_size != 0) { > >> - invalidate_live_intervals(); > >> - return true; > >> - } > >> - > >> - fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F, > >> - load_payload->sources + 1); > >> - fs_reg *new_sources = > >> - ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); > >> - > >> - new_sources[0] = fs_reg(); > >> - for (int i = 0; i < load_payload->sources; i++) > >> - new_sources[i+1] = load_payload->src[i]; > >> - > >> - /* The LOAD_PAYLOAD helper seems like the obvious choice here. > >> However, it > >> - * requires a lot of information about the sources to appropriately > >> figure > >> - * out the number of registers needed to be used. Given this stage > in > >> our > >> - * optimization, we may not have the appropriate GRFs required by > >> - * LOAD_PAYLOAD at this point (copy propagation). Therefore, we > need to > >> - * manually emit the instruction. > >> - */ > >> - fs_inst *new_load_payload = new(mem_ctx) > >> fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, > >> - > >> load_payload->exec_size, > >> - send_header, > >> - new_sources, > >> - > load_payload->sources > >> + 1); > >> - > >> - new_load_payload->regs_written = load_payload->regs_written + 1; > >> - new_load_payload->header_size = 1; > >> - tex_inst->mlen++; > >> - tex_inst->header_size = 1; > >> - tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], > >> new_load_payload); > >> - tex_inst->src[0] = send_header; > >> - > >> invalidate_live_intervals(); > >> return true; > >> } > >> @@ -4153,7 +4117,7 @@ lower_sampler_logical_send_gen7(const fs_builder > >> &bld, fs_inst *inst, opcode op, > >> sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F); > >> > >> if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET || > >> - offset_value.file != BAD_FILE || > >> + offset_value.file != BAD_FILE || inst->eot || > >> op == SHADER_OPCODE_SAMPLEINFO || > >> is_high_sampler(devinfo, sampler)) { > >> /* For general texture offsets (no txf workaround), we need a > >> header to > >> @@ -4174,7 +4138,7 @@ lower_sampler_logical_send_gen7(const fs_builder > >> &bld, fs_inst *inst, opcode op, > >> * and we have an explicit header, we need to set up the sampler > >> * writemask. It's reversed from normal: 1 means "don't write". > >> */ > >> - if (inst->regs_written != 4 * reg_width) { > >> + if (!inst->eot && inst->regs_written != 4 * reg_width) { > >> assert((inst->regs_written % reg_width) == 0); > >> unsigned mask = ~((1 << (inst->regs_written / reg_width)) - > 1) & > >> 0xf; > >> inst->offset |= mask << 12; > >> @@ -5738,6 +5702,10 @@ fs_visitor::optimize() > >> pass_num = 0; > >> > >> OPT(lower_simd_width); > >> + > >> + /* After SIMD lowering just in case we had to unroll the EOT send. > */ > >> + OPT(opt_sampler_eot); > >> + > >> OPT(lower_logical_sends); > >> > >> if (progress) { > >> @@ -5761,7 +5729,6 @@ fs_visitor::optimize() > >> } > >> > >> OPT(opt_redundant_discard_jumps); > >> - OPT(opt_sampler_eot); > >> > >> if (OPT(lower_load_payload)) { > >> split_virtual_grfs(); > >> -- > >> 2.7.3 > >> > >> _______________________________________________ > >> mesa-dev mailing list > >> mesa-dev@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > >> >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev