On Wed, 2016-09-07 at 18:48 -0700, Francisco Jerez wrote: > This makes sure that overlap checks are done correctly throughout the > back-end when the '*this' register starts before the register/size > pair provided as argument, and is actually less annoying to use than > in_range() at this point since regions_overlap() takes its size > arguments in bytes. > --- > src/mesa/drivers/dri/i965/brw_shader.cpp | 9 ----- > ---- > src/mesa/drivers/dri/i965/brw_shader.h | 1 - > src/mesa/drivers/dri/i965/brw_vec4.cpp | 14 > ++++++++------ > src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp | 4 ++-- > src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 4 ++-- > 5 files changed, 12 insertions(+), 20 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index e599235..951e6b2 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -745,15 +745,6 @@ backend_reg::is_accumulator() const > } > > bool > -backend_reg::in_range(const backend_reg &r, unsigned n) const > -{ > - return (file == r.file && > - nr == r.nr && > - offset >= r.offset && > - offset < r.offset + n * REG_SIZE); > -} > - > -bool > backend_instruction::is_commutative() const > { > switch (opcode) { > diff --git a/src/mesa/drivers/dri/i965/brw_shader.h > b/src/mesa/drivers/dri/i965/brw_shader.h > index 0de0808..ba2404a 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.h > +++ b/src/mesa/drivers/dri/i965/brw_shader.h > @@ -62,7 +62,6 @@ struct backend_reg : private brw_reg > bool is_negative_one() const; > bool is_null() const; > bool is_accumulator() const; > - bool in_range(const backend_reg &r, unsigned n) const; > > /** Offset from the start of the (virtual) register in bytes. */ > uint16_t offset; > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 561170c..eb2888f 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -1143,7 +1143,8 @@ vec4_visitor::opt_register_coalesce() > inst) { > _scan_inst = scan_inst; > > - if (inst->src[0].in_range(scan_inst->dst, > DIV_ROUND_UP(scan_inst->size_written, REG_SIZE))) { > + if (regions_overlap(inst->src[0], inst->size_read(0), > + scan_inst->dst, scan_inst- > >size_written)) { > /* Found something writing to the reg we want to > coalesce away. */ > if (to_mrf) { > /* SEND instructions can't have MRF as a destination. > */ > @@ -1197,8 +1198,8 @@ vec4_visitor::opt_register_coalesce() > */ > bool interfered = false; > for (int i = 0; i < 3; i++) { > - if (inst->src[0].in_range(scan_inst->src[i], > - DIV_ROUND_UP(scan_inst- > >size_read(i), REG_SIZE))) > + if (regions_overlap(inst->src[0], inst->size_read(0), > + scan_inst->src[i], scan_inst- > >size_read(i))) > interfered = true; > } > if (interfered) > @@ -1207,7 +1208,8 @@ vec4_visitor::opt_register_coalesce() > /* If somebody else writes the same channels of our > destination here, > * we can't coalesce before that. > */ > - if (inst->dst.in_range(scan_inst->dst, > DIV_ROUND_UP(scan_inst->size_written, REG_SIZE)) && > + if (regions_overlap(inst->dst, inst->size_written, > + scan_inst->dst, scan_inst- > >size_written) && > (inst->dst.writemask & scan_inst->dst.writemask) != 0) > { > break; > } > @@ -1223,8 +1225,8 @@ vec4_visitor::opt_register_coalesce() > } > } else { > for (int i = 0; i < 3; i++) { > - if (inst->dst.in_range(scan_inst->src[i], > - DIV_ROUND_UP(scan_inst- > >size_read(i), REG_SIZE))) > + if (regions_overlap(inst->dst, inst->size_written, > + scan_inst->src[i], scan_inst- > >size_read(i))) > interfered = true; > } > if (interfered) > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp > index e74bc15..9977317 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp > @@ -68,8 +68,8 @@ opt_cmod_propagation_local(bblock_t *block) > > bool read_flag = false; > foreach_inst_in_block_reverse_starting_from(vec4_instruction, > scan_inst, inst) { > - if (inst->src[0].in_range(scan_inst->dst, > - DIV_ROUND_UP(scan_inst- > >size_written, REG_SIZE))) { > + if (regions_overlap(inst->src[0], inst->size_read(0), > + scan_inst->dst, scan_inst- > >size_written)) { > if ((scan_inst->predicate && scan_inst->opcode != > BRW_OPCODE_SEL) || > scan_inst->dst.offset / REG_SIZE != inst- > >src[0].offset / REG_SIZE || > (scan_inst->dst.writemask != WRITEMASK_X && > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > index 777d252..fe76dea 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp > @@ -72,8 +72,8 @@ is_channel_updated(vec4_instruction *inst, src_reg > *values[4], int ch) > if (!src || src->file != VGRF) > return false; > > - return (src->in_range(inst->dst, DIV_ROUND_UP(inst->size_written, > REG_SIZE)) && > - inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, > ch))); > + return regions_overlap(*src, REG_SIZE, inst->dst, inst- > >size_written) && > + inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, > ch));
I suppose this (assuming that src always reads REG_SIZE) can be bogus, at least for fp64 programs, right? In any case, this would not be a problem introduced by this patch since the previous implementation would fail in the same case. If I am right, then I suppose that in order to fix this we would need to store the real size read for each value in copy_entry. > } > > static bool _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev