Don't do copy propagation inside loops until after we try unrolling them. This helps avoid propagating everything to the phis which makes loop unrolling more difficult.
For example without this: loop { block block_1: /* preds: block_0 block_4 */ vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15 vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_17 vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_18 vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_19 vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_20 vec1 32 ssa_15 = iadd ssa_10, ssa_2 vec1 32 ssa_16 = ige ssa_15, ssa_1 /* succs: block_2 block_3 */ if ssa_16 { block block_2: /* preds: block_1 */ break /* succs: block_5 */ } else { block block_3: /* preds: block_1 */ /* succs: block_4 */ } block block_4: /* preds: block_3 */ vec1 32 ssa_17 = imov ssa_12 vec1 32 ssa_18 = imov ssa_13 vec1 32 ssa_19 = imov ssa_14 vec1 32 ssa_20 = imov ssa_11 /* succs: block_1 */ } Will end up as: loop { /* preds: block_0 block_4 */ block block_1: vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15 vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_12 vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_13 vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_14 vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_11 vec1 32 ssa_15 = iadd ssa_10, ssa_2 vec1 32 ssa_16 = ige ssa_15, ssa_1 /* succs: block_2 block_3 */ if ssa_16 { block block_2: /* preds: block_1 */ break /* succs: block_5 */ } else { block block_3: /* preds: block_1 */ /* succs: block_4 */ } block block_4: /* preds: block_3 */ /* succs: block_1 */ } --- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_opt_copy_propagate.c | 47 ++++++++++++++++++++----------- src/mesa/drivers/dri/i965/brw_nir.c | 6 ++-- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 5e527d8..e3e06b1 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2562,7 +2562,7 @@ bool nir_opt_constant_folding(nir_shader *shader); bool nir_opt_global_to_local(nir_shader *shader); -bool nir_copy_prop(nir_shader *shader); +bool nir_copy_prop(nir_shader *shader, bool prop_loops); bool nir_opt_cse(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_copy_propagate.c b/src/compiler/nir/nir_opt_copy_propagate.c index c26e07f..12daeb6 100644 --- a/src/compiler/nir/nir_opt_copy_propagate.c +++ b/src/compiler/nir/nir_opt_copy_propagate.c @@ -99,11 +99,14 @@ is_swizzleless_move(nir_alu_instr *instr) } static bool -copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if, + bool prop_loops) { if (!src->is_ssa) { - if (src->reg.indirect) - return copy_prop_src(src->reg.indirect, parent_instr, parent_if); + if (src->reg.indirect) { + return copy_prop_src(src->reg.indirect, parent_instr, parent_if, + prop_loops); + } return false; } @@ -125,6 +128,14 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) if (phi->dest.ssa.num_components != alu_instr->src[0].src.ssa->num_components) return false; + + /* Avoid propagating moves inside a loop into phis which makes + * unrolling difficult. + */ + if (!prop_loops) { + if (phi->instr.block->cf_node.parent->type == nir_cf_node_loop) + return false; + } } if (parent_instr) { @@ -140,13 +151,14 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) } static bool -copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) +copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index, + bool prop_loops) { nir_alu_src *src = &parent_alu_instr->src[index]; if (!src->src.is_ssa) { if (src->src.reg.indirect) return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr, - NULL); + NULL, prop_loops); return false; } @@ -195,6 +207,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) typedef struct { nir_instr *parent_instr; + bool prop_loops; bool progress; } copy_prop_state; @@ -202,25 +215,26 @@ static bool copy_prop_src_cb(nir_src *src, void *_state) { copy_prop_state *state = (copy_prop_state *) _state; - while (copy_prop_src(src, state->parent_instr, NULL)) + while (copy_prop_src(src, state->parent_instr, NULL, state->prop_loops)) state->progress = true; return true; } static bool -copy_prop_instr(nir_instr *instr) +copy_prop_instr(nir_instr *instr, bool prop_loops) { if (instr->type == nir_instr_type_alu) { nir_alu_instr *alu_instr = nir_instr_as_alu(instr); bool progress = false; for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) - while (copy_prop_alu_src(alu_instr, i)) + while (copy_prop_alu_src(alu_instr, i, prop_loops)) progress = true; if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect) - while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL)) + while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL, + prop_loops)) progress = true; return progress; @@ -229,30 +243,31 @@ copy_prop_instr(nir_instr *instr) copy_prop_state state; state.parent_instr = instr; state.progress = false; + state.prop_loops = prop_loops; nir_foreach_src(instr, copy_prop_src_cb, &state); return state.progress; } static bool -copy_prop_if(nir_if *if_stmt) +copy_prop_if(nir_if *if_stmt, bool prop_loops) { - return copy_prop_src(&if_stmt->condition, NULL, if_stmt); + return copy_prop_src(&if_stmt->condition, NULL, if_stmt, prop_loops); } static bool -nir_copy_prop_impl(nir_function_impl *impl) +nir_copy_prop_impl(nir_function_impl *impl, bool prop_loops) { bool progress = false; nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { - if (copy_prop_instr(instr)) + if (copy_prop_instr(instr, prop_loops)) progress = true; } nir_if *if_stmt = nir_block_get_following_if(block); - if (if_stmt && copy_prop_if(if_stmt)) + if (if_stmt && copy_prop_if(if_stmt, prop_loops)) progress = true; } @@ -265,12 +280,12 @@ nir_copy_prop_impl(nir_function_impl *impl) } bool -nir_copy_prop(nir_shader *shader) +nir_copy_prop(nir_shader *shader, bool prop_loops) { bool progress = false; nir_foreach_function(function, shader) { - if (function->impl && nir_copy_prop_impl(function->impl)) + if (function->impl && nir_copy_prop_impl(function->impl, prop_loops)) progress = true; } diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 8aec0d3..6122c2a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -374,13 +374,13 @@ nir_optimize(nir_shader *nir, bool is_scalar) OPT_V(nir_lower_alu_to_scalar); } - OPT(nir_copy_prop); + OPT(nir_copy_prop, false); if (is_scalar) { OPT_V(nir_lower_phis_to_scalar); } - OPT(nir_copy_prop); + OPT(nir_copy_prop, false); OPT(nir_opt_dce); OPT(nir_opt_cse); OPT(nir_opt_peephole_select); @@ -495,7 +495,7 @@ brw_postprocess_nir(nir_shader *nir, OPT(nir_lower_locals_to_regs); OPT_V(nir_lower_to_source_mods); - OPT(nir_copy_prop); + OPT(nir_copy_prop, true); OPT(nir_opt_dce); if (unlikely(debug_enabled)) { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev