On Wed, 2017-10-25 at 16:25 -0700, Jason Ekstrand wrote: > This makes it far more explicit where we're inserting the > instructions > rather than the magic "before and after" stuff that the emit_[un]zip > helpers did based on block and inst. > > Cc: mesa-sta...@lists.freedesktop.org > --- > src/intel/compiler/brw_fs.cpp | 50 ++++++++++++++++++++++++--------- > ---------- > 1 file changed, 28 insertions(+), 22 deletions(-) > > diff --git a/src/intel/compiler/brw_fs.cpp > b/src/intel/compiler/brw_fs.cpp > index 4616529..49ca58d 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -5025,8 +5025,7 @@ needs_src_copy(const fs_builder &lbld, const > fs_inst *inst, unsigned i) > * will be emitted before the given \p inst in \p block. > */ > static fs_reg > -emit_unzip(const fs_builder &lbld, bblock_t *block, fs_inst *inst, > - unsigned i) > +emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)
The comment right above this function needs to be updated accordingly. > { > /* Specified channel group from the source region. */ > const fs_reg src = horiz_offset(inst->src[i], lbld.group()); > @@ -5041,8 +5040,7 @@ emit_unzip(const fs_builder &lbld, bblock_t > *block, fs_inst *inst, > const fs_reg tmp = lbld.vgrf(inst->src[i].type, inst- > >components_read(i)); > > for (unsigned k = 0; k < inst->components_read(i); ++k) > - cbld.at(block, inst) > - .MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, > k)); > + cbld.MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, > k)); > > return tmp; > > @@ -5112,36 +5110,43 @@ needs_dst_copy(const fs_builder &lbld, const > fs_inst *inst) > * be emitted around the given \p inst in \p block. > */ > static fs_reg > -emit_zip(const fs_builder &lbld, bblock_t *block, fs_inst *inst) > +emit_zip(const fs_builder &lbld_before, const fs_builder > &lbld_after, > + fs_inst *inst) > { Same here. > - /* Builder of the right width to perform the copy avoiding > uninitialized > - * data if the lowered execution size is greater than the > original > - * execution size of the instruction. > - */ > - const fs_builder cbld = lbld.group(MIN2(lbld.dispatch_width(), > - inst->exec_size), 0); > + assert(lbld_before.dispatch_width() == > lbld_after.dispatch_width()); > + assert(lbld_before.group() == lbld_after.group()); > > /* Specified channel group from the destination region. */ > - const fs_reg dst = horiz_offset(inst->dst, lbld.group()); > + const fs_reg dst = horiz_offset(inst->dst, lbld_after.group()); > const unsigned dst_size = inst->size_written / > inst->dst.component_size(inst->exec_size); > > - if (needs_dst_copy(lbld, inst)) { > - const fs_reg tmp = lbld.vgrf(inst->dst.type, dst_size); > + if (needs_dst_copy(lbld_after, inst)) { > + const fs_reg tmp = lbld_after.vgrf(inst->dst.type, dst_size); > > if (inst->predicate) { > /* Handle predication by copying the original contents of > * the destination into the temporary before emitting the > * lowered instruction. > */ > - for (unsigned k = 0; k < dst_size; ++k) > - cbld.at(block, inst) > - .MOV(offset(tmp, lbld, k), offset(dst, inst- > >exec_size, k)); > + for (unsigned k = 0; k < dst_size; ++k) { > + lbld_before.group(MIN2(lbld_before.dispatch_width(), > + inst->exec_size), 0) It doesn't make sense to do: lbld_before.group(MIN2(lbld_before.dispatch_width(), inst->exec_size), 0) for every iteration in the loop since it is constant for all iterations. > + .MOV(offset(tmp, lbld_before, k), > + offset(dst, inst->exec_size, k)); > + } > } > > - for (unsigned k = 0; k < dst_size; ++k) > - cbld.at(block, inst->next) > - .MOV(offset(dst, inst->exec_size, k), offset(tmp, lbld, > k)); > + for (unsigned k = 0; k < dst_size; ++k) { > + /* Use a builder of the right width to perform the copy > avoiding > + * uninitialized data if the lowered execution size is > greater than > + * the original execution size of the instruction. > + */ > + lbld_after.group(MIN2(lbld_after.dispatch_width(), > + inst->exec_size), 0) Same here. Iago > + .MOV(offset(dst, inst->exec_size, k), > + offset(tmp, lbld_after, k)); > + } > > return tmp; > > @@ -5197,9 +5202,10 @@ fs_visitor::lower_simd_width() > const fs_builder lbld = ibld.group(lower_width, i); > > for (unsigned j = 0; j < inst->sources; j++) > - split_inst.src[j] = emit_unzip(lbld, block, inst, j); > + split_inst.src[j] = emit_unzip(lbld.at(block, inst), > inst, j); > > - split_inst.dst = emit_zip(lbld, block, inst); > + split_inst.dst = emit_zip(lbld.at(block, inst), > + lbld.at(block, inst->next), > inst); > split_inst.size_written = > split_inst.dst.component_size(lower_width) * > dst_size; _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev