Patches 7 & 8 Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com>
On Fri, Jan 10, 2014 at 1:41 PM, Matt Turner <matts...@gmail.com> wrote: > On Fri, Jan 10, 2014 at 11:12 AM, Jordan Justen <jljus...@gmail.com> wrote: >> On Thu, Dec 19, 2013 at 1:40 PM, Matt Turner <matts...@gmail.com> wrote: >>> total instructions in shared programs: 1550048 -> 1549880 (-0.01%) >>> instructions in affected programs: 1896 -> 1728 (-8.86%) >>> --- >>> src/mesa/drivers/dri/i965/brw_fs.cpp | 80 >>> ++++++++++++++++++++++++++---------- >>> 1 file changed, 58 insertions(+), 22 deletions(-) >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >>> b/src/mesa/drivers/dri/i965/brw_fs.cpp >>> index 1a16f4e..39041e3 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >>> @@ -2265,6 +2265,12 @@ fs_visitor::register_coalesce() >>> >>> calculate_live_intervals(); >>> >>> + int src_size = 0; >>> + int channels_remaining = 0; >>> + int reg_from = -1, reg_to = -1; >>> + int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE]; >>> + fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE]; >>> + >>> foreach_list_safe(node, &this->instructions) { >>> fs_inst *inst = (fs_inst *)node; >>> >>> @@ -2276,11 +2282,14 @@ fs_visitor::register_coalesce() >>> inst->src[0].abs || >>> inst->src[0].smear != -1 || >>> inst->dst.file != GRF || >>> - inst->dst.type != inst->src[0].type || >>> - virtual_grf_sizes[inst->src[0].reg] != 1) { >>> + inst->dst.type != inst->src[0].type) { >>> continue; >>> } >>> >>> + if (virtual_grf_sizes[inst->src[0].reg] > >>> + virtual_grf_sizes[inst->dst.reg]) >>> + continue; >> >> Why isn't this != rather than >? > > Because coalescing a size=1 register into part of a size>1 is okay. > >>> int var_from = live_intervals->var_from_reg(&inst->src[0]); >>> int var_to = live_intervals->var_from_reg(&inst->dst); >>> >>> @@ -2288,31 +2297,58 @@ fs_visitor::register_coalesce() >>> !inst->dst.equals(inst->src[0])) >>> continue; >>> >>> - int reg_from = inst->src[0].reg; >>> - assert(inst->src[0].reg_offset == 0); >>> - int reg_to = inst->dst.reg; >>> - int reg_to_offset = inst->dst.reg_offset; >>> + if (reg_from != inst->src[0].reg) { >>> + reg_from = inst->src[0].reg; >>> + >>> + src_size = virtual_grf_sizes[inst->src[0].reg]; >>> + assert(src_size <= MAX_SAMPLER_MESSAGE_SIZE); >>> + >>> + channels_remaining = src_size; >>> + memset(mov, 0, sizeof(mov)); >>> + >>> + reg_to = inst->dst.reg; >>> + } >>> + >>> + if (reg_to != inst->dst.reg) >>> + continue; >>> + >>> + const int offset = inst->src[0].reg_offset; >>> + reg_to_offset[offset] = inst->dst.reg_offset; >>> + mov[offset] = inst; >>> + channels_remaining--; >>> + >>> + if (channels_remaining) >>> + continue; >>> + >>> + for (int i = 0; i < src_size; i++) { >>> + if (mov[i]) >>> + mov[i]->remove(); >>> + } >>> >>> foreach_list(node, &this->instructions) { >>> fs_inst *scan_inst = (fs_inst *)node; >>> >>> - if (scan_inst->dst.file == GRF && >>> - scan_inst->dst.reg == reg_from) { >>> - scan_inst->dst.reg = reg_to; >>> - scan_inst->dst.reg_offset = reg_to_offset; >>> - } >>> - for (int i = 0; i < 3; i++) { >>> - if (scan_inst->src[i].file == GRF && >>> - scan_inst->src[i].reg == reg_from) { >>> - scan_inst->src[i].reg = reg_to; >>> - scan_inst->src[i].reg_offset = reg_to_offset; >>> - } >>> - } >>> - } >>> + for (int i = 0; i < src_size; i++) { >>> + if (mov[i]) { >> >> Shouldn't this always be taken? >> >>> + if (scan_inst->dst.file == GRF && >>> + scan_inst->dst.reg == reg_from && >>> + scan_inst->dst.reg_offset == i) { >>> + scan_inst->dst.reg = reg_to; >>> + scan_inst->dst.reg_offset = reg_to_offset[i]; >>> + } >>> + for (int j = 0; j < 3; j++) { >>> + if (scan_inst->src[j].file == GRF && >>> + scan_inst->src[j].reg == reg_from && >>> + scan_inst->src[j].reg_offset == i) { >>> + scan_inst->src[j].reg = reg_to; >>> + scan_inst->src[j].reg_offset = reg_to_offset[i]; >>> + } >>> + } >>> >>> - inst->remove(); >>> - progress = true; >>> - continue; >>> + progress = true; >>> + } >>> + } >>> + } >> >> Couldn't we assign progress = true here instead? Ie, outside 2 of the >> 3 nested for loops. > > I think made this patch a little more confusing that necessary so that > the next one was much easier to follow. At least, I'm pretty sure. :) _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev