Jason Ekstrand <ja...@jlekstrand.net> writes: > On Fri, May 27, 2016 at 7:05 PM, Francisco Jerez <curroje...@riseup.net> > wrote: > >> This makes the whole LOAD_PAYLOAD munging unnecessary which simplifies >> the code and will allow the optimization to succeed in more cases >> independent of whether the LOAD_PAYLOAD instruction can be found or >> not. >> --- >> src/mesa/drivers/dri/i965/brw_fs.cpp | 79 >> +++++++++++------------------------- >> 1 file changed, 23 insertions(+), 56 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >> b/src/mesa/drivers/dri/i965/brw_fs.cpp >> index 45c4753..6abf776 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> @@ -2612,11 +2612,15 @@ fs_visitor::opt_sampler_eot() >> if (key->nr_color_regions != 1) >> return false; >> >> + /* Requires clamping the color payload in the shader. */ >> + if (key->clamp_fragment_color) >> + return false; >> > > This looks new... Is it actually needed? Could we simply do "if > (tex_inst->saturate) return false;"? In any case, it should probably be > it's own commit. >
It is necessary to preserve the current behavior and skip the optimization when fragment color clamping is required, because right now the MOVs used to saturate the color values are inserted during logical send lowering so this pass would have no other way to find out. I can make the change in a separate commit just before this one if you like. > >> + >> /* Look for a texturing instruction immediately before the final >> FB_WRITE. */ >> bblock_t *block = cfg->blocks[cfg->num_blocks - 1]; >> fs_inst *fb_write = (fs_inst *)block->end(); >> assert(fb_write->eot); >> - assert(fb_write->opcode == FS_OPCODE_FB_WRITE); >> + assert(fb_write->opcode == FS_OPCODE_FB_WRITE_LOGICAL); >> >> /* There wasn't one; nothing to do. */ >> if (unlikely(fb_write->prev->is_head_sentinel())) >> @@ -2629,24 +2633,20 @@ fs_visitor::opt_sampler_eot() >> * “Response Length of zero is allowed on all SIMD8* and SIMD16* >> sampler >> * messages except sample+killpix, resinfo, sampleinfo, LOD, and >> gather4*” >> */ >> - if (!tex_inst->is_tex() || >> - tex_inst->opcode == SHADER_OPCODE_TXS || >> - tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO || >> - tex_inst->opcode == SHADER_OPCODE_LOD || >> - tex_inst->opcode == SHADER_OPCODE_TG4 || >> - tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET) >> + if (tex_inst->opcode != SHADER_OPCODE_TEX_LOGICAL && >> + tex_inst->opcode != SHADER_OPCODE_TXD_LOGICAL && >> + tex_inst->opcode != SHADER_OPCODE_TXF_LOGICAL && >> + tex_inst->opcode != SHADER_OPCODE_TXL_LOGICAL && >> + tex_inst->opcode != FS_OPCODE_TXB_LOGICAL && >> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL && >> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_W_LOGICAL && >> + tex_inst->opcode != SHADER_OPCODE_TXF_UMS_LOGICAL) >> return false; >> > > Same here. This isn't just a rename, it's a switch from listing invalid > cases to explicitly listing valid ones. > The reason is that is_tex() is unaware of the logical messages, I considered changing that but a few places expect instructions marked is_tex to be send-like (e.g. fs_inst::is_send_from_grf, fs_inst::regs_read), so we would have to stop using is_tex() in those places if we change it. > Other than those two comments, I really like this. It's a nice cleanup. > > >> >> - /* If there's no header present, we need to munge the LOAD_PAYLOAD as >> well. >> - * It's very likely to be the previous instruction. >> - */ >> + /* XXX - This shouldn't be necessary. */ >> if (tex_inst->prev->is_head_sentinel()) >> return false; >> >> - fs_inst *load_payload = (fs_inst *) tex_inst->prev; >> - if (load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) >> - return false; >> - >> assert(!tex_inst->eot); /* We can't get here twice */ >> assert((tex_inst->offset & (0xff << 24)) == 0); >> >> @@ -2658,46 +2658,10 @@ fs_visitor::opt_sampler_eot() >> tex_inst->regs_written = 0; >> fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); >> >> - /* If a header is present, marking the eot is sufficient. Otherwise, >> we need >> - * to create a new LOAD_PAYLOAD command with the same sources and a >> space >> - * saved for the header. Using a new destination register not only >> makes sure >> - * we have enough space, but it will make sure the dead code >> eliminator kills >> - * the instruction that this will replace. >> + /* Marking EOT is sufficient, lower_logical_sends() will notice the EOT >> + * flag and submit a header together with the sampler message as >> required >> + * by the hardware. >> */ >> - if (tex_inst->header_size != 0) { >> - invalidate_live_intervals(); >> - return true; >> - } >> - >> - fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F, >> - load_payload->sources + 1); >> - fs_reg *new_sources = >> - ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); >> - >> - new_sources[0] = fs_reg(); >> - for (int i = 0; i < load_payload->sources; i++) >> - new_sources[i+1] = load_payload->src[i]; >> - >> - /* The LOAD_PAYLOAD helper seems like the obvious choice here. >> However, it >> - * requires a lot of information about the sources to appropriately >> figure >> - * out the number of registers needed to be used. Given this stage in >> our >> - * optimization, we may not have the appropriate GRFs required by >> - * LOAD_PAYLOAD at this point (copy propagation). Therefore, we need to >> - * manually emit the instruction. >> - */ >> - fs_inst *new_load_payload = new(mem_ctx) >> fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, >> - >> load_payload->exec_size, >> - send_header, >> - new_sources, >> - load_payload->sources >> + 1); >> - >> - new_load_payload->regs_written = load_payload->regs_written + 1; >> - new_load_payload->header_size = 1; >> - tex_inst->mlen++; >> - tex_inst->header_size = 1; >> - tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], >> new_load_payload); >> - tex_inst->src[0] = send_header; >> - >> invalidate_live_intervals(); >> return true; >> } >> @@ -4153,7 +4117,7 @@ lower_sampler_logical_send_gen7(const fs_builder >> &bld, fs_inst *inst, opcode op, >> sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F); >> >> if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET || >> - offset_value.file != BAD_FILE || >> + offset_value.file != BAD_FILE || inst->eot || >> op == SHADER_OPCODE_SAMPLEINFO || >> is_high_sampler(devinfo, sampler)) { >> /* For general texture offsets (no txf workaround), we need a >> header to >> @@ -4174,7 +4138,7 @@ lower_sampler_logical_send_gen7(const fs_builder >> &bld, fs_inst *inst, opcode op, >> * and we have an explicit header, we need to set up the sampler >> * writemask. It's reversed from normal: 1 means "don't write". >> */ >> - if (inst->regs_written != 4 * reg_width) { >> + if (!inst->eot && inst->regs_written != 4 * reg_width) { >> assert((inst->regs_written % reg_width) == 0); >> unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & >> 0xf; >> inst->offset |= mask << 12; >> @@ -5738,6 +5702,10 @@ fs_visitor::optimize() >> pass_num = 0; >> >> OPT(lower_simd_width); >> + >> + /* After SIMD lowering just in case we had to unroll the EOT send. */ >> + OPT(opt_sampler_eot); >> + >> OPT(lower_logical_sends); >> >> if (progress) { >> @@ -5761,7 +5729,6 @@ fs_visitor::optimize() >> } >> >> OPT(opt_redundant_discard_jumps); >> - OPT(opt_sampler_eot); >> >> if (OPT(lower_load_payload)) { >> split_virtual_grfs(); >> -- >> 2.7.3 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >>
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev