On Friday, April 18, 2014 11:56:51 AM Matt Turner wrote: > --- > .../drivers/dri/i965/brw_fs_register_coalesce.cpp | 59 ++++++++++++++++++---- > 1 file changed, 49 insertions(+), 10 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp > index cb4ab57..a363c4c 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp > @@ -46,7 +46,14 @@ > static bool > is_nop_mov(const fs_inst *inst) > { > - if (inst->opcode == BRW_OPCODE_MOV) { > + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { > + for (int i = 0; i < inst->sources; i++) { > + if (!inst->dst.equals(inst->src[i])) {
I don't think this is what you want. You're not altering dst.reg_offset, here, so dst.reg_offset will presumably always be 0. For dst.equals(inst->src[i]) to be true for every source, then dst.reg == src[i].reg for all sources. dst.reg_offset == src[i].reg_offset for all sources. which means src[i].reg_offset == 0 for all sources. In other words, I think this detects LOAD_PAYLOAD operations that splats the first component of a size > 1 VGRF dst to all components. That's definitely not a nop mov. I think if you add dst.reg_offset++ on each iteration of the loop, you'll be okay here. > + return false; > + } > + } > + return true; > + } else if (inst->opcode == BRW_OPCODE_MOV) { > return inst->dst.equals(inst->src[0]); > } > > @@ -54,9 +61,26 @@ is_nop_mov(const fs_inst *inst) > } > > static bool > +is_copy_payload(const fs_inst *inst) > +{ > + const int reg = inst->src[0].reg; > + if (inst->src[0].reg_offset != 0) > + return false; This looks okay, for both src[0] == reg_undef and where it's an actual value. > + > + for (int i = 1; i < inst->sources; i++) { > + if (inst->src[i].reg != reg || > + inst->src[i].reg_offset != i) { > + return false; > + } > + } > + return true; > +} > + > +static bool > is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes) > { > - if (inst->opcode != BRW_OPCODE_MOV || > + if ((inst->opcode != BRW_OPCODE_MOV && > + inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) || > inst->is_partial_write() || > inst->saturate || > inst->src[0].file != GRF || > @@ -72,6 +96,12 @@ is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes) > virtual_grf_sizes[inst->dst.reg]) > return false; > > + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { > + if (!is_copy_payload(inst)) { > + return false; > + } > + } > + > return true; > } > > @@ -144,10 +174,18 @@ fs_visitor::register_coalesce() > if (reg_to != inst->dst.reg) > continue; > > - const int offset = inst->src[0].reg_offset; > - reg_to_offset[offset] = inst->dst.reg_offset; > - mov[offset] = inst; > - channels_remaining--; > + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { > + for (int i = 0; i < src_size; i++) { Isn't src_size always 1 for LOAD_PAYLOAD? Earlier: src_size = virtual_grf_sizes[inst->src[0].reg]; Presumably you want inst->sources here... > + reg_to_offset[i] = i; > + } > + mov[0] = inst; > + channels_remaining -= inst->sources; > + } else { > + const int offset = inst->src[0].reg_offset; > + reg_to_offset[offset] = inst->dst.reg_offset; > + mov[offset] = inst; > + channels_remaining--; > + } > > if (channels_remaining) > continue; > @@ -169,15 +207,16 @@ fs_visitor::register_coalesce() > continue; > > progress = true; > + bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD; > > for (int i = 0; i < src_size; i++) { > if (mov[i]) { > mov[i]->opcode = BRW_OPCODE_NOP; > mov[i]->conditional_mod = BRW_CONDITIONAL_NONE; > mov[i]->dst = reg_undef; > - mov[i]->src[0] = reg_undef; > - mov[i]->src[1] = reg_undef; > - mov[i]->src[2] = reg_undef; > + for (int j = 0; j < mov[i]->sources; j++) { > + mov[i]->src[j] = reg_undef; > + } > } > } > > @@ -185,7 +224,7 @@ fs_visitor::register_coalesce() > fs_inst *scan_inst = (fs_inst *)node; > > for (int i = 0; i < src_size; i++) { > - if (mov[i]) { > + if (mov[i] || was_load_payload) { > if (scan_inst->dst.file == GRF && > scan_inst->dst.reg == reg_from && > scan_inst->dst.reg_offset == i) { >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev