On Mon, Nov 16, 2020 at 10:26 AM Jiufu Guo <guoji...@linux.ibm.com> wrote: > > Jiufu Guo <guoji...@linux.ibm.com> writes: > > > Richard Biener <rguent...@suse.de> writes: > > > >> On Wed, 11 Nov 2020, Jiufu Guo wrote: > >> > >>> > >>> Thanks a lot for the sugguestion from previous mails. > >>> The patch was updated accordingly. > >>> > >>> This updated patch propagates loop-closed PHIs them out after > >>> loop_optimizer_finalize under a new introduced flag. At some cases, > >>> to clean up loop-closed PHIs would save efforts of optimization passes > >>> after loopdone. > >>> > >>> This patch passes bootstrap and regtest on ppc64le. Is this ok for trunk? > >> > >> Comments below > >> > >>> free_numbers_of_iterations_estimates (fn); > >>> > >>> + if (flag_clean_up_loop_closed_phi > >> > >> Sorry if there was miscommunication but I've not meant to add a > >> new user-visible flag but instead a flag argument to > >> loop_optimizer_finalize > >> (as said, you can default it to false to only need to change the > >> one in fini_loops) > > Sorry for misunderstand. > > Updated the patch. > > Thanks for the comments! The patch was updated accordingly. > > This updated patch clean up loop closed PHIs in loop_optimizer_finalize, > which is called when some loop optimizations are done. For some cases, > this would save efforts of optimization passes after loopdone. > > gcc/ChangeLog: > 2020-10-16 Jiufu Guo <guoji...@linux.ibm.com> > > * cfgloop.h (loop_optimizer_finalize): Add flag argument. > * loop-init.c (loop_optimizer_finalize): Call > clean_up_loop_closed_phi. > * tree-cfgcleanup.h (clean_up_loop_closed_phi): New declare. > * tree-ssa-loop.c (tree_ssa_loop_done): Call loop_optimizer_finalize > with flag argument. > * tree-ssa-propagate.c (clean_up_loop_closed_phi): New function. > > gcc/testsuite/ChangeLog: > 2020-10-16 Jiufu Guo <guoji...@linux.ibm.com> > > * gcc.dg/tree-ssa/loopclosedphi.c: New test. > > --- > gcc/cfgloop.h | 2 +- > gcc/loop-init.c | 9 ++- > gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c | 21 +++++++ > gcc/tree-cfgcleanup.h | 1 + > gcc/tree-ssa-loop.c | 2 +- > gcc/tree-ssa-propagate.c | 63 +++++++++++++++++++ > 6 files changed, 95 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c > > diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h > index d14689dc31f..438b1f779bb 100644 > --- a/gcc/cfgloop.h > +++ b/gcc/cfgloop.h > @@ -824,7 +824,7 @@ extern void init_set_costs (void); > > /* Loop optimizer initialization. */ > extern void loop_optimizer_init (unsigned); > -extern void loop_optimizer_finalize (function *); > +extern void loop_optimizer_finalize (function *, bool = false); > inline void > loop_optimizer_finalize () > { > diff --git a/gcc/loop-init.c b/gcc/loop-init.c > index 401e5282907..0cb6f509b89 100644 > --- a/gcc/loop-init.c > +++ b/gcc/loop-init.c > @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-ssa-loop-niter.h" > #include "loop-unroll.h" > #include "tree-scalar-evolution.h" > +#include "tree-cfgcleanup.h" > > > /* Apply FLAGS to the loop state. */ > @@ -133,7 +134,7 @@ loop_optimizer_init (unsigned flags) > /* Finalize loop structures. */ > > void > -loop_optimizer_finalize (struct function *fn) > +loop_optimizer_finalize (struct function *fn, bool clean_loop_closed_phi) > { > class loop *loop; > basic_block bb; > @@ -145,6 +146,12 @@ loop_optimizer_finalize (struct function *fn) > > free_numbers_of_iterations_estimates (fn); > > + if (clean_loop_closed_phi && loops_state_satisfies_p (fn, LOOP_CLOSED_SSA)) > + { > + clean_up_loop_closed_phi (fn); > + loops_state_clear (fn, LOOP_CLOSED_SSA); > + } > + > /* If we should preserve loop structure, do not free it but clear > flags that advanced properties are there as we are not preserving > that in full. */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c > b/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c > new file mode 100644 > index 00000000000..d71b757fbca > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fno-tree-ch -w -fdump-tree-loopdone-details" } */ > + > +void > +t6 (int qz, int wh) > +{ > + int jl = wh; > + > + while (1.0 * qz / wh < 1) > + { > + qz = wh * (wh + 2); > + > + while (wh < 1) > + jl = 0; > + } > + > + while (qz < 1) > + qz = jl * wh; > +} > + > +/* { dg-final { scan-tree-dump-times "Replacing" 2 "loopdone"} } */ > diff --git a/gcc/tree-cfgcleanup.h b/gcc/tree-cfgcleanup.h > index 6ff6726bfe4..9e368d63709 100644 > --- a/gcc/tree-cfgcleanup.h > +++ b/gcc/tree-cfgcleanup.h > @@ -26,5 +26,6 @@ extern bool cleanup_tree_cfg (unsigned = 0); > extern bool fixup_noreturn_call (gimple *stmt); > extern bool delete_unreachable_blocks_update_callgraph (cgraph_node > *dst_node, > bool update_clones); > +extern unsigned clean_up_loop_closed_phi (function *); > > #endif /* GCC_TREE_CFGCLEANUP_H */ > diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c > index 5e8365d4e83..339a0c50bc8 100644 > --- a/gcc/tree-ssa-loop.c > +++ b/gcc/tree-ssa-loop.c > @@ -529,7 +529,7 @@ tree_ssa_loop_done (void) > { > free_numbers_of_iterations_estimates (cfun); > scev_finalize (); > - loop_optimizer_finalize (); > + loop_optimizer_finalize (cfun, true); > return 0; > } > > diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c > index 87dbf55fab9..daa1db82afc 100644 > --- a/gcc/tree-ssa-propagate.c > +++ b/gcc/tree-ssa-propagate.c > @@ -1549,3 +1549,66 @@ propagate_tree_value_into_stmt (gimple_stmt_iterator > *gsi, tree val) > else > gcc_unreachable (); > } > + > +/* Check exits of each loop in FUN, walk over loop closed PHIs in > + each exit basic block and propagate degenerate PHIs. */ > + > +unsigned > +clean_up_loop_closed_phi (function *fun) > +{ > + unsigned i; > + edge e; > + gphi *phi; > + tree rhs; > + tree lhs; > + gphi_iterator gsi; > + struct loop *loop; > + > + /* Check dominator info before get loop-close PHIs from loop exits. */ > + if (dom_info_state (CDI_DOMINATORS) != DOM_OK)
Please change this to /* Avoid possibly quadratic work when scanning for loop exits across all loops of a nest. */ if (!loop_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS)) return 0; > + return 0; > + > + /* Walk over loop in function. */ > + FOR_EACH_LOOP_FN (fun, loop, 0) > + { > + /* Check each exit edege of loop. */ > + auto_vec<edge> exits = get_loop_exit_edges (loop); > + FOR_EACH_VEC_ELT (exits, i, e) > + if (single_pred_p (e->dest)) > + /* Walk over loop-closed PHIs. */ > + for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi);) > + { > + phi = gsi.phi (); > + rhs = degenerate_phi_result (phi); rhs = gimple_phi_arg_def (phi, 0); OK with that changes. Thanks, Richard. > + lhs = gimple_phi_result (phi); > + > + if (rhs && may_propagate_copy (lhs, rhs)) > + { > + /* Dump details. */ > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, " Replacing '"); > + print_generic_expr (dump_file, lhs, dump_flags); > + fprintf (dump_file, "' with '"); > + print_generic_expr (dump_file, rhs, dump_flags); > + fprintf (dump_file, "'\n"); > + } > + > + use_operand_p use_p; > + imm_use_iterator iter; > + gimple *use_stmt; > + FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) > + { > + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) > + replace_exp (use_p, rhs); > + update_stmt (use_stmt); > + } > + remove_phi_node (&gsi, true); > + } > + else > + gsi_next (&gsi); > + } > + } > + > + return 0; > +} > -- > 2.25.1 > > > > > >> > >>> + && loops_state_satisfies_p (fn, LOOP_CLOSED_SSA)) > >>> + { > >>> + clean_up_loop_closed_phi (fn); > >>> + loops_state_clear (fn, LOOP_CLOSED_SSA); > >>> + } > >>> + > > ...... > >>> + gphi *phi; > >>> + tree rhs; > >>> + tree lhs; > >>> + gphi_iterator gsi; > >>> + struct loop *loop; > >>> + bool cfg_altered = false; > >>> + > >>> + /* Check dominator info before get loop-close PHIs from loop exits. */ > >>> + if (dom_info_state (CDI_DOMINATORS) != DOM_OK) > >> > >> Why? > >> > > As you said, loop_optimizer_finalize is also called from where dominator > > info is not ready, e.g. called from: vrp(execute_vrp). > > At there, loop exits info is not ready, and then > > get_loop_exit_edges/get_loop_body_with_size function does not work. > > > >>> + /* Walk over loop in function. */ > >>> + FOR_EACH_LOOP_FN (fun, loop, 0) > >>> + { > >>> + /* Check each exit edege of loop. */ > >>> + auto_vec<edge> exits = get_loop_exit_edges (loop); > >>> + FOR_EACH_VEC_ELT (exits, i, e) > >>> + if (single_pred_p (e->dest)) > >>> + /* Walk over loop-closed PHIs. */ > >>> + for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi);) > >>> + { > >>> + phi = gsi.phi (); > >>> + rhs = degenerate_phi_result (phi); > >> > >> You have a single predecessor, thus the result is always degenerate. > >> > >>> + lhs = gimple_phi_result (phi); > >>> + > >>> + if (rhs && may_propagate_copy (lhs, rhs)) > >>> + { > >>> + gimple_stmt_iterator psi = gsi; > >>> + /* Advance the iterator before stmt is removed. */ > >>> + gsi_next (&gsi); > >> > >> remove_phi_node should take care of this, you shouldn't need this > >> (just do not advance the iterator when you remove the PHI node). > >> > > Yeap, get it! Thanks, will update the patch accordingly. > >>> + > > ...... > >>> + > >>> + replace_uses_by (lhs, rhs); > >>> + remove_phi_node (&psi, true); > >>> + cfg_altered = true; > >> > >> in the end the return value is unused but I think we should avoid > >> altering the CFG since doing so requires it to be cleaned up for > >> unreachable blocks. That means to open-code replace_uses_by as > >> > >> imm_use_iterator imm_iter; > >> use_operand_p use; > >> gimple *stmt; > >> FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name) > >> { > >> FOR_EACH_IMM_USE_ON_STMT (use, imm_iter) > >> replace_exp (use, val); > >> update_stmt (stmt); > >> } > > > > Thansk! This could also save some code in replace_uses_by. > > > > BR. > > Jiufu Guo > >> > >> Thanks, > >> Richard. > >> > >>> + } > >>> + else > >>> + gsi_next (&gsi); > >>> + } > >>> + } > >>> + > >>> + return cfg_altered; > >>> +} > >>>