Jason Ekstrand <ja...@jlekstrand.net> writes: > On Sat, May 28, 2016 at 1:21 PM, Francisco Jerez <curroje...@riseup.net> > wrote: > >> Jason Ekstrand <ja...@jlekstrand.net> writes: >> >> > On Fri, May 27, 2016 at 7:05 PM, Francisco Jerez <curroje...@riseup.net> >> > wrote: >> > >> >> This makes the whole LOAD_PAYLOAD munging unnecessary which simplifies >> >> the code and will allow the optimization to succeed in more cases >> >> independent of whether the LOAD_PAYLOAD instruction can be found or >> >> not. >> >> --- >> >> src/mesa/drivers/dri/i965/brw_fs.cpp | 79 >> >> +++++++++++------------------------- >> >> 1 file changed, 23 insertions(+), 56 deletions(-) >> >> >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >> >> b/src/mesa/drivers/dri/i965/brw_fs.cpp >> >> index 45c4753..6abf776 100644 >> >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> >> @@ -2612,11 +2612,15 @@ fs_visitor::opt_sampler_eot() >> >> if (key->nr_color_regions != 1) >> >> return false; >> >> >> >> + /* Requires clamping the color payload in the shader. */ >> >> + if (key->clamp_fragment_color) >> >> + return false; >> >> >> > >> > This looks new... Is it actually needed? Could we simply do "if >> > (tex_inst->saturate) return false;"? In any case, it should probably be >> > it's own commit. >> > >> >> It is necessary to preserve the current behavior and skip the >> optimization when fragment color clamping is required, because right now >> the MOVs used to saturate the color values are inserted during logical >> send lowering so this pass would have no other way to find out. I can >> make the change in a separate commit just before this one if you like. >> > > Right. No need to make it a separate commit. Just update the comment so > it says something about the MOVs inserted during lowering. > Sure, fixed locally.
> Incidentally, I think Rob Clark recently added some stuff to do that > lowering in NIR... In the future, we might want to use that and delete > more back-end code. For now, an udpated comment will do. :-) > Yeah, it would probably make sense to use that at some point. > >> > >> >> + >> >> /* Look for a texturing instruction immediately before the final >> >> FB_WRITE. */ >> >> bblock_t *block = cfg->blocks[cfg->num_blocks - 1]; >> >> fs_inst *fb_write = (fs_inst *)block->end(); >> >> assert(fb_write->eot); >> >> - assert(fb_write->opcode == FS_OPCODE_FB_WRITE); >> >> + assert(fb_write->opcode == FS_OPCODE_FB_WRITE_LOGICAL); >> >> >> >> /* There wasn't one; nothing to do. */ >> >> if (unlikely(fb_write->prev->is_head_sentinel())) >> >> @@ -2629,24 +2633,20 @@ fs_visitor::opt_sampler_eot() >> >> * “Response Length of zero is allowed on all SIMD8* and SIMD16* >> >> sampler >> >> * messages except sample+killpix, resinfo, sampleinfo, LOD, and >> >> gather4*” >> >> */ >> >> - if (!tex_inst->is_tex() || >> >> - tex_inst->opcode == SHADER_OPCODE_TXS || >> >> - tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO || >> >> - tex_inst->opcode == SHADER_OPCODE_LOD || >> >> - tex_inst->opcode == SHADER_OPCODE_TG4 || >> >> - tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET) >> >> + if (tex_inst->opcode != SHADER_OPCODE_TEX_LOGICAL && >> >> + tex_inst->opcode != SHADER_OPCODE_TXD_LOGICAL && >> >> + tex_inst->opcode != SHADER_OPCODE_TXF_LOGICAL && >> >> + tex_inst->opcode != SHADER_OPCODE_TXL_LOGICAL && >> >> + tex_inst->opcode != FS_OPCODE_TXB_LOGICAL && >> >> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL && >> >> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_W_LOGICAL && >> >> + tex_inst->opcode != SHADER_OPCODE_TXF_UMS_LOGICAL) >> >> return false; >> >> >> > >> > Same here. This isn't just a rename, it's a switch from listing invalid >> > cases to explicitly listing valid ones. >> > >> >> The reason is that is_tex() is unaware of the logical messages, I >> considered changing that but a few places expect instructions marked >> is_tex to be send-like (e.g. fs_inst::is_send_from_grf, >> fs_inst::regs_read), so we would have to stop using is_tex() in those >> places if we change it. >> > > Right. Makes sense. Seems fine to me. Feel free to leave this patch > as-is apart from a better comment above. > > Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > Thanks! > >> >> > Other than those two comments, I really like this. It's a nice cleanup. >> > >> > >> >> >> >> - /* If there's no header present, we need to munge the LOAD_PAYLOAD >> as >> >> well. >> >> - * It's very likely to be the previous instruction. >> >> - */ >> >> + /* XXX - This shouldn't be necessary. */ >> >> if (tex_inst->prev->is_head_sentinel()) >> >> return false; >> >> >> >> - fs_inst *load_payload = (fs_inst *) tex_inst->prev; >> >> - if (load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) >> >> - return false; >> >> - >> >> assert(!tex_inst->eot); /* We can't get here twice */ >> >> assert((tex_inst->offset & (0xff << 24)) == 0); >> >> >> >> @@ -2658,46 +2658,10 @@ fs_visitor::opt_sampler_eot() >> >> tex_inst->regs_written = 0; >> >> fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); >> >> >> >> - /* If a header is present, marking the eot is sufficient. Otherwise, >> >> we need >> >> - * to create a new LOAD_PAYLOAD command with the same sources and a >> >> space >> >> - * saved for the header. Using a new destination register not only >> >> makes sure >> >> - * we have enough space, but it will make sure the dead code >> >> eliminator kills >> >> - * the instruction that this will replace. >> >> + /* Marking EOT is sufficient, lower_logical_sends() will notice the >> EOT >> >> + * flag and submit a header together with the sampler message as >> >> required >> >> + * by the hardware. >> >> */ >> >> - if (tex_inst->header_size != 0) { >> >> - invalidate_live_intervals(); >> >> - return true; >> >> - } >> >> - >> >> - fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F, >> >> - load_payload->sources + 1); >> >> - fs_reg *new_sources = >> >> - ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); >> >> - >> >> - new_sources[0] = fs_reg(); >> >> - for (int i = 0; i < load_payload->sources; i++) >> >> - new_sources[i+1] = load_payload->src[i]; >> >> - >> >> - /* The LOAD_PAYLOAD helper seems like the obvious choice here. >> >> However, it >> >> - * requires a lot of information about the sources to appropriately >> >> figure >> >> - * out the number of registers needed to be used. Given this stage >> in >> >> our >> >> - * optimization, we may not have the appropriate GRFs required by >> >> - * LOAD_PAYLOAD at this point (copy propagation). Therefore, we >> need to >> >> - * manually emit the instruction. >> >> - */ >> >> - fs_inst *new_load_payload = new(mem_ctx) >> >> fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, >> >> - >> >> load_payload->exec_size, >> >> - send_header, >> >> - new_sources, >> >> - >> load_payload->sources >> >> + 1); >> >> - >> >> - new_load_payload->regs_written = load_payload->regs_written + 1; >> >> - new_load_payload->header_size = 1; >> >> - tex_inst->mlen++; >> >> - tex_inst->header_size = 1; >> >> - tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], >> >> new_load_payload); >> >> - tex_inst->src[0] = send_header; >> >> - >> >> invalidate_live_intervals(); >> >> return true; >> >> } >> >> @@ -4153,7 +4117,7 @@ lower_sampler_logical_send_gen7(const fs_builder >> >> &bld, fs_inst *inst, opcode op, >> >> sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F); >> >> >> >> if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET || >> >> - offset_value.file != BAD_FILE || >> >> + offset_value.file != BAD_FILE || inst->eot || >> >> op == SHADER_OPCODE_SAMPLEINFO || >> >> is_high_sampler(devinfo, sampler)) { >> >> /* For general texture offsets (no txf workaround), we need a >> >> header to >> >> @@ -4174,7 +4138,7 @@ lower_sampler_logical_send_gen7(const fs_builder >> >> &bld, fs_inst *inst, opcode op, >> >> * and we have an explicit header, we need to set up the sampler >> >> * writemask. It's reversed from normal: 1 means "don't write". >> >> */ >> >> - if (inst->regs_written != 4 * reg_width) { >> >> + if (!inst->eot && inst->regs_written != 4 * reg_width) { >> >> assert((inst->regs_written % reg_width) == 0); >> >> unsigned mask = ~((1 << (inst->regs_written / reg_width)) - >> 1) & >> >> 0xf; >> >> inst->offset |= mask << 12; >> >> @@ -5738,6 +5702,10 @@ fs_visitor::optimize() >> >> pass_num = 0; >> >> >> >> OPT(lower_simd_width); >> >> + >> >> + /* After SIMD lowering just in case we had to unroll the EOT send. >> */ >> >> + OPT(opt_sampler_eot); >> >> + >> >> OPT(lower_logical_sends); >> >> >> >> if (progress) { >> >> @@ -5761,7 +5729,6 @@ fs_visitor::optimize() >> >> } >> >> >> >> OPT(opt_redundant_discard_jumps); >> >> - OPT(opt_sampler_eot); >> >> >> >> if (OPT(lower_load_payload)) { >> >> split_virtual_grfs(); >> >> -- >> >> 2.7.3 >> >> >> >> _______________________________________________ >> >> mesa-dev mailing list >> >> mesa-dev@lists.freedesktop.org >> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> >> >>
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev