On Sun, Jun 1, 2014 at 12:01 AM, Kenneth Graunke <kenn...@whitecape.org> wrote: > On Friday, April 18, 2014 11:56:51 AM Matt Turner wrote: >> --- >> .../drivers/dri/i965/brw_fs_register_coalesce.cpp | 59 > ++++++++++++++++++---- >> 1 file changed, 49 insertions(+), 10 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp > b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp >> index cb4ab57..a363c4c 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp >> @@ -46,7 +46,14 @@ >> static bool >> is_nop_mov(const fs_inst *inst) >> { >> - if (inst->opcode == BRW_OPCODE_MOV) { >> + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { >> + for (int i = 0; i < inst->sources; i++) { >> + if (!inst->dst.equals(inst->src[i])) { > > I don't think this is what you want. You're not altering dst.reg_offset, > here, so dst.reg_offset will presumably always be 0. > > For dst.equals(inst->src[i]) to be true for every source, then > > dst.reg == src[i].reg for all sources. > dst.reg_offset == src[i].reg_offset for all sources. > > which means src[i].reg_offset == 0 for all sources.
Right, will fix. > In other words, I think this detects LOAD_PAYLOAD operations that splats the > first component of a size > 1 VGRF dst to all components. > > That's definitely not a nop mov. > > I think if you add dst.reg_offset++ on each iteration of the loop, you'll be > okay here. > >> + return false; >> + } >> + } >> + return true; >> + } else if (inst->opcode == BRW_OPCODE_MOV) { >> return inst->dst.equals(inst->src[0]); >> } >> >> @@ -54,9 +61,26 @@ is_nop_mov(const fs_inst *inst) >> } >> >> static bool >> +is_copy_payload(const fs_inst *inst) >> +{ >> + const int reg = inst->src[0].reg; >> + if (inst->src[0].reg_offset != 0) >> + return false; > > This looks okay, for both src[0] == reg_undef and where it's an actual value. > >> + >> + for (int i = 1; i < inst->sources; i++) { >> + if (inst->src[i].reg != reg || >> + inst->src[i].reg_offset != i) { >> + return false; >> + } >> + } >> + return true; >> +} >> + >> +static bool >> is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes) >> { >> - if (inst->opcode != BRW_OPCODE_MOV || >> + if ((inst->opcode != BRW_OPCODE_MOV && >> + inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) || >> inst->is_partial_write() || >> inst->saturate || >> inst->src[0].file != GRF || >> @@ -72,6 +96,12 @@ is_coalesce_candidate(const fs_inst *inst, const int > *virtual_grf_sizes) >> virtual_grf_sizes[inst->dst.reg]) >> return false; >> >> + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { >> + if (!is_copy_payload(inst)) { >> + return false; >> + } >> + } >> + >> return true; >> } >> >> @@ -144,10 +174,18 @@ fs_visitor::register_coalesce() >> if (reg_to != inst->dst.reg) >> continue; >> >> - const int offset = inst->src[0].reg_offset; >> - reg_to_offset[offset] = inst->dst.reg_offset; >> - mov[offset] = inst; >> - channels_remaining--; >> + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { >> + for (int i = 0; i < src_size; i++) { > > Isn't src_size always 1 for LOAD_PAYLOAD? Earlier: > > src_size = virtual_grf_sizes[inst->src[0].reg]; No, since inst->src[0].reg is just the number of the virtual register (doesn't include the offset) and virtual_grf_sizes[] contains virtual register sizes. > Presumably you want inst->sources here... I think you touched on something. is_copy_payload would say "true" to a load_payload instruction that copied the first three components of a size 4 vgrf. But here we actually do want the number of sources. Since returning true for this doesn't seem useful, I'll add a check to is_copy_payload to return false if the number of sources doesn't match the size of the vgrf. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev