On Mon, 2016-08-29 at 01:30 -0400, Connor Abbott wrote: > On Mon, Aug 29, 2016 at 12:54 AM, Timothy Arceri > <timothy.arc...@collabora.com> wrote: > > > > Don't do copy propagation inside loops until after we try > > unrolling them. > > > > This helps avoid propagating everything to the phis which > > makes loop unrolling more difficult. > > > > For example without this: > > > > loop { > > block block_1: > > /* preds: block_0 block_4 */ > > vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15 > > vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_17 > > vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_18 > > vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_19 > > vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_20 > > vec1 32 ssa_15 = iadd ssa_10, ssa_2 > > vec1 32 ssa_16 = ige ssa_15, ssa_1 > > /* succs: block_2 block_3 */ > > if ssa_16 { > > block block_2: > > /* preds: block_1 */ > > break > > /* succs: block_5 */ > > } else { > > block block_3: > > /* preds: block_1 */ > > /* succs: block_4 */ > > } > > block block_4: > > /* preds: block_3 */ > > vec1 32 ssa_17 = imov ssa_12 > > vec1 32 ssa_18 = imov ssa_13 > > vec1 32 ssa_19 = imov ssa_14 > > vec1 32 ssa_20 = imov ssa_11 > > /* succs: block_1 */ > > } > > > > Will end up as: > > > > loop { > > /* preds: block_0 block_4 */ > > block block_1: > > vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15 > > vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_12 > > vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_13 > > vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_14 > > vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_11 > > vec1 32 ssa_15 = iadd ssa_10, ssa_2 > > vec1 32 ssa_16 = ige ssa_15, ssa_1 > > /* succs: block_2 block_3 */ > > if ssa_16 { > > block block_2: > > /* preds: block_1 */ > > break > > /* succs: block_5 */ > > } else { > > block block_3: > > /* preds: block_1 */ > > /* succs: block_4 */ > > } > > block block_4: > > /* preds: block_3 */ > > /* succs: block_1 */ > > } > > This change seems really fishy to me, since moves like those in your > example are just a trivial renaming of values.
If you look closely this is not just a trivial renaming of values. This example was from a loop that looked something like this: int i = 0; while (i < 2) { a = a.yzwx; } > Just turning off copy > propagation isn't a very robust solution (what if whatever's > producing > the NIR doesn't insert the moves in the first place?), I haven't seen this happen when testing shader-db or any of the test suites. > and it can hurt > other optimization passes if they can't see through moves. While true loops in glsl tend to be fairly simple. I haven't been able to detect any negative results in my copy of shader-db. Also we should be unrolling a high percentage of loops we come across which means copy propagation will kick in once they are unrolled. > Why is the > second loop harder to unroll? Because manipulating the control flow in NIR is painful. When the movs remain in place we can simply clone them as we unroll. > Why can't it be handled by inserting > moves before unrolling the loop, or directly during the loop > unrolling > pass? We could possibly do this and I did attempt it at first, but it would mean an extra pass over all phi's in the loop header that would likely be expensive and complicated. So instead I went with the simple solution that had no measurable impact as far as I could tell. > > > > > --- > > src/compiler/nir/nir.h | 2 +- > > src/compiler/nir/nir_opt_copy_propagate.c | 47 > > ++++++++++++++++++++----------- > > src/mesa/drivers/dri/i965/brw_nir.c | 6 ++-- > > 3 files changed, 35 insertions(+), 20 deletions(-) > > > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > > index d0dfb0d..7ff5394 100644 > > --- a/src/compiler/nir/nir.h > > +++ b/src/compiler/nir/nir.h > > @@ -2573,7 +2573,7 @@ bool nir_opt_constant_folding(nir_shader > > *shader); > > > > bool nir_opt_global_to_local(nir_shader *shader); > > > > -bool nir_copy_prop(nir_shader *shader); > > +bool nir_copy_prop(nir_shader *shader, bool prop_loops); > > > > bool nir_opt_cse(nir_shader *shader); > > > > diff --git a/src/compiler/nir/nir_opt_copy_propagate.c > > b/src/compiler/nir/nir_opt_copy_propagate.c > > index c26e07f..12daeb6 100644 > > --- a/src/compiler/nir/nir_opt_copy_propagate.c > > +++ b/src/compiler/nir/nir_opt_copy_propagate.c > > @@ -99,11 +99,14 @@ is_swizzleless_move(nir_alu_instr *instr) > > } > > > > static bool > > -copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if > > *parent_if) > > +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if > > *parent_if, > > + bool prop_loops) > > { > > if (!src->is_ssa) { > > - if (src->reg.indirect) > > - return copy_prop_src(src->reg.indirect, parent_instr, > > parent_if); > > + if (src->reg.indirect) { > > + return copy_prop_src(src->reg.indirect, parent_instr, > > parent_if, > > + prop_loops); > > + } > > return false; > > } > > > > @@ -125,6 +128,14 @@ copy_prop_src(nir_src *src, nir_instr > > *parent_instr, nir_if *parent_if) > > if (phi->dest.ssa.num_components != > > alu_instr->src[0].src.ssa->num_components) > > return false; > > + > > + /* Avoid propagating moves inside a loop into phis which > > makes > > + * unrolling difficult. > > + */ > > + if (!prop_loops) { > > + if (phi->instr.block->cf_node.parent->type == > > nir_cf_node_loop) > > + return false; > > + } > > } > > > > if (parent_instr) { > > @@ -140,13 +151,14 @@ copy_prop_src(nir_src *src, nir_instr > > *parent_instr, nir_if *parent_if) > > } > > > > static bool > > -copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) > > +copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index, > > + bool prop_loops) > > { > > nir_alu_src *src = &parent_alu_instr->src[index]; > > if (!src->src.is_ssa) { > > if (src->src.reg.indirect) > > return copy_prop_src(src->src.reg.indirect, > > &parent_alu_instr->instr, > > - NULL); > > + NULL, prop_loops); > > return false; > > } > > > > @@ -195,6 +207,7 @@ copy_prop_alu_src(nir_alu_instr > > *parent_alu_instr, unsigned index) > > > > typedef struct { > > nir_instr *parent_instr; > > + bool prop_loops; > > bool progress; > > } copy_prop_state; > > > > @@ -202,25 +215,26 @@ static bool > > copy_prop_src_cb(nir_src *src, void *_state) > > { > > copy_prop_state *state = (copy_prop_state *) _state; > > - while (copy_prop_src(src, state->parent_instr, NULL)) > > + while (copy_prop_src(src, state->parent_instr, NULL, state- > > >prop_loops)) > > state->progress = true; > > > > return true; > > } > > > > static bool > > -copy_prop_instr(nir_instr *instr) > > +copy_prop_instr(nir_instr *instr, bool prop_loops) > > { > > if (instr->type == nir_instr_type_alu) { > > nir_alu_instr *alu_instr = nir_instr_as_alu(instr); > > bool progress = false; > > > > for (unsigned i = 0; i < nir_op_infos[alu_instr- > > >op].num_inputs; i++) > > - while (copy_prop_alu_src(alu_instr, i)) > > + while (copy_prop_alu_src(alu_instr, i, prop_loops)) > > progress = true; > > > > if (!alu_instr->dest.dest.is_ssa && alu_instr- > > >dest.dest.reg.indirect) > > - while (copy_prop_src(alu_instr->dest.dest.reg.indirect, > > instr, NULL)) > > + while (copy_prop_src(alu_instr->dest.dest.reg.indirect, > > instr, NULL, > > + prop_loops)) > > progress = true; > > > > return progress; > > @@ -229,30 +243,31 @@ copy_prop_instr(nir_instr *instr) > > copy_prop_state state; > > state.parent_instr = instr; > > state.progress = false; > > + state.prop_loops = prop_loops; > > nir_foreach_src(instr, copy_prop_src_cb, &state); > > > > return state.progress; > > } > > > > static bool > > -copy_prop_if(nir_if *if_stmt) > > +copy_prop_if(nir_if *if_stmt, bool prop_loops) > > { > > - return copy_prop_src(&if_stmt->condition, NULL, if_stmt); > > + return copy_prop_src(&if_stmt->condition, NULL, if_stmt, > > prop_loops); > > } > > > > static bool > > -nir_copy_prop_impl(nir_function_impl *impl) > > +nir_copy_prop_impl(nir_function_impl *impl, bool prop_loops) > > { > > bool progress = false; > > > > nir_foreach_block(block, impl) { > > nir_foreach_instr(instr, block) { > > - if (copy_prop_instr(instr)) > > + if (copy_prop_instr(instr, prop_loops)) > > progress = true; > > } > > > > nir_if *if_stmt = nir_block_get_following_if(block); > > - if (if_stmt && copy_prop_if(if_stmt)) > > + if (if_stmt && copy_prop_if(if_stmt, prop_loops)) > > progress = true; > > } > > > > @@ -265,12 +280,12 @@ nir_copy_prop_impl(nir_function_impl *impl) > > } > > > > bool > > -nir_copy_prop(nir_shader *shader) > > +nir_copy_prop(nir_shader *shader, bool prop_loops) > > { > > bool progress = false; > > > > nir_foreach_function(function, shader) { > > - if (function->impl && nir_copy_prop_impl(function->impl)) > > + if (function->impl && nir_copy_prop_impl(function->impl, > > prop_loops)) > > progress = true; > > } > > > > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > > b/src/mesa/drivers/dri/i965/brw_nir.c > > index 29a3e3e..b8457d2 100644 > > --- a/src/mesa/drivers/dri/i965/brw_nir.c > > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > > @@ -378,13 +378,13 @@ nir_optimize(nir_shader *nir, bool is_scalar) > > OPT_V(nir_lower_alu_to_scalar); > > } > > > > - OPT(nir_copy_prop); > > + OPT(nir_copy_prop, false); > > > > if (is_scalar) { > > OPT_V(nir_lower_phis_to_scalar); > > } > > > > - OPT(nir_copy_prop); > > + OPT(nir_copy_prop, false); > > OPT(nir_opt_dce); > > OPT(nir_opt_cse); > > OPT(nir_opt_peephole_select); > > @@ -499,7 +499,7 @@ brw_postprocess_nir(nir_shader *nir, > > OPT(nir_lower_locals_to_regs); > > > > OPT_V(nir_lower_to_source_mods); > > - OPT(nir_copy_prop); > > + OPT(nir_copy_prop, true); > > OPT(nir_opt_dce); > > > > if (unlikely(debug_enabled)) { > > -- > > 2.7.4 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev