Hi! What is holding up this patch still? Ke Wen has pinged it every month since May, and there has still not been a review.
Segher On Thu, May 28, 2020 at 08:19:59PM +0800, Kewen.Lin wrote: > > gcc/ChangeLog > > 2020-MM-DD Kewen Lin <li...@gcc.gnu.org> > > * cfgloop.h (struct loop): New field estimated_unroll. > * tree-ssa-loop-manip.c (decide_unroll_const_iter): New function. > (decide_unroll_runtime_iter): Likewise. > (decide_unroll_stupid): Likewise. > (estimate_unroll_factor): Likewise. > * tree-ssa-loop-manip.h (estimate_unroll_factor): New declaration. > * tree-ssa-loop.c (tree_average_num_loop_insns): New function. > * tree-ssa-loop.h (tree_average_num_loop_insns): New declaration. > > ---- > --- > gcc/cfgloop.h | 3 + > gcc/tree-ssa-loop-manip.c | 253 > ++++++++++++++++++++++++++++++++++++++++++++++ > gcc/tree-ssa-loop-manip.h | 3 +- > gcc/tree-ssa-loop.c | 33 ++++++ > gcc/tree-ssa-loop.h | 2 + > 5 files changed, 292 insertions(+), 2 deletions(-) > > diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h > index 11378ca..c5bcca7 100644 > --- a/gcc/cfgloop.h > +++ b/gcc/cfgloop.h > @@ -232,6 +232,9 @@ public: > Other values means unroll with the given unrolling factor. */ > unsigned short unroll; > > + /* Like unroll field above, but it's estimated in middle-end. */ > + unsigned short estimated_unroll; > + > /* If this loop was inlined the main clique of the callee which does > not need remapping when copying the loop body. */ > unsigned short owned_clique; > diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c > index 120b35b..8a5a1a9 100644 > --- a/gcc/tree-ssa-loop-manip.c > +++ b/gcc/tree-ssa-loop-manip.c > @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see > #include "system.h" > #include "coretypes.h" > #include "backend.h" > +#include "target.h" > #include "tree.h" > #include "gimple.h" > #include "cfghooks.h" > @@ -42,6 +43,7 @@ along with GCC; see the file COPYING3. If not see > #include "cfgloop.h" > #include "tree-scalar-evolution.h" > #include "tree-inline.h" > +#include "wide-int.h" > > /* All bitmaps for rewriting into loop-closed SSA go on this obstack, > so that we can free them all at once. */ > @@ -1592,3 +1594,254 @@ canonicalize_loop_ivs (class loop *loop, tree *nit, > bool bump_in_latch) > > return var_before; > } > + > +/* Try to determine estimated unroll factor for given LOOP with constant > number > + of iterations, mainly refer to decide_unroll_constant_iterations. > + - NITER_DESC holds number of iteration description if it isn't NULL. > + - NUNROLL holds a unroll factor value computed with instruction numbers. > + - ITER holds estimated or likely max loop iterations. > + Return true if it succeeds, also update estimated_unroll. */ > + > +static bool > +decide_unroll_const_iter (class loop *loop, const tree_niter_desc > *niter_desc, > + unsigned nunroll, const widest_int *iter) > +{ > + /* Skip big loops. */ > + if (nunroll <= 1) > + return false; > + > + gcc_assert (niter_desc && niter_desc->assumptions); > + > + /* Check number of iterations is constant, return false if no. */ > + if ((niter_desc->may_be_zero && !integer_zerop (niter_desc->may_be_zero)) > + || !tree_fits_uhwi_p (niter_desc->niter)) > + return false; > + > + unsigned HOST_WIDE_INT const_niter = tree_to_uhwi (niter_desc->niter); > + > + /* If unroll factor is set explicitly, use it as estimated_unroll. */ > + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) > + { > + /* It should have been peeled instead. */ > + if (const_niter == 0 || (unsigned) loop->unroll > const_niter - 1) > + loop->estimated_unroll = 1; > + else > + loop->estimated_unroll = loop->unroll; > + return true; > + } > + > + /* Check whether the loop rolls enough to consider. */ > + if (const_niter < 2 * nunroll || wi::ltu_p (*iter, 2 * nunroll)) > + return false; > + > + /* Success; now compute number of iterations to unroll. */ > + unsigned best_unroll = 0, n_copies = 0; > + unsigned best_copies = 2 * nunroll + 10; > + unsigned i = 2 * nunroll + 2; > + > + if (i > const_niter - 2) > + i = const_niter - 2; > + > + for (; i >= nunroll - 1; i--) > + { > + unsigned exit_mod = const_niter % (i + 1); > + > + if (!empty_block_p (loop->latch)) > + n_copies = exit_mod + i + 1; > + else if (exit_mod != i) > + n_copies = exit_mod + i + 2; > + else > + n_copies = i + 1; > + > + if (n_copies < best_copies) > + { > + best_copies = n_copies; > + best_unroll = i; > + } > + } > + > + loop->estimated_unroll = best_unroll + 1; > + return true; > +} > + > +/* Try to determine estimated unroll factor for given LOOP with countable but > + non-constant number of iterations, mainly refer to > + decide_unroll_runtime_iterations. > + - NITER_DESC holds number of iteration description if it isn't NULL. > + - NUNROLL_IN holds a unroll factor value computed with instruction > numbers. > + - ITER holds estimated or likely max loop iterations. > + Return true if it succeeds, also update estimated_unroll. */ > + > +static bool > +decide_unroll_runtime_iter (class loop *loop, const tree_niter_desc > *niter_desc, > + unsigned nunroll_in, const widest_int *iter) > +{ > + unsigned nunroll = nunroll_in; > + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) > + nunroll = loop->unroll; > + > + /* Skip big loops. */ > + if (nunroll <= 1) > + return false; > + > + gcc_assert (niter_desc && niter_desc->assumptions); > + > + /* Skip constant number of iterations. */ > + if ((!niter_desc->may_be_zero || !integer_zerop (niter_desc->may_be_zero)) > + && tree_fits_uhwi_p (niter_desc->niter)) > + return false; > + > + /* Check whether the loop rolls. */ > + if (wi::ltu_p (*iter, 2 * nunroll)) > + return false; > + > + /* Success; now force nunroll to be power of 2. */ > + unsigned i; > + for (i = 1; 2 * i <= nunroll; i *= 2) > + continue; > + > + loop->estimated_unroll = i; > + return true; > +} > + > +/* Try to determine estimated unroll factor for given LOOP with uncountable > + number of iterations, mainly refer to decide_unroll_stupid. > + - NITER_DESC holds number of iteration description if it isn't NULL. > + - NUNROLL_IN holds a unroll factor value computed with instruction > numbers. > + - ITER holds estimated or likely max loop iterations. > + Return true if it succeeds, also update estimated_unroll. */ > + > +static bool > +decide_unroll_stupid (class loop *loop, const tree_niter_desc *niter_desc, > + unsigned nunroll_in, const widest_int *iter) > +{ > + if (!flag_unroll_all_loops && !loop->unroll) > + return false; > + > + unsigned nunroll = nunroll_in; > + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) > + nunroll = loop->unroll; > + > + /* Skip big loops. */ > + if (nunroll <= 1) > + return false; > + > + gcc_assert (!niter_desc || !niter_desc->assumptions); > + > + /* Skip loop with multiple branches for now. */ > + if (num_loop_branches (loop) > 1) > + return false; > + > + /* Check whether the loop rolls. */ > + if (wi::ltu_p (*iter, 2 * nunroll)) > + return false; > + > + /* Success; now force nunroll to be power of 2. */ > + unsigned i; > + for (i = 1; 2 * i <= nunroll; i *= 2) > + continue; > + > + loop->estimated_unroll = i; > + return true; > +} > + > +/* Try to estimate whether this given LOOP can be unrolled or not, and > compute > + its estimated unroll factor if it can. To avoid duplicated computation, > you > + can pass number of iterations information by DESC. The heuristics mainly > + refer to decide_unrolling in loop-unroll.c. */ > + > +void > +estimate_unroll_factor (class loop *loop, tree_niter_desc *desc) > +{ > + /* Return the existing estimated unroll factor. */ > + if (loop->estimated_unroll) > + return; > + > + /* Don't unroll explicitly. */ > + if (loop->unroll == 1) > + { > + loop->estimated_unroll = loop->unroll; > + return; > + } > + > + /* Like decide_unrolling, don't unroll if: > + 1) the loop is cold. > + 2) the loop can't be manipulated. > + 3) the loop isn't innermost. */ > + if (optimize_loop_for_size_p (loop) || !can_duplicate_loop_p (loop) > + || loop->inner != NULL) > + { > + loop->estimated_unroll = 1; > + return; > + } > + > + /* Don't unroll without explicit information. */ > + if (!loop->unroll && !flag_unroll_loops && !flag_unroll_all_loops) > + { > + loop->estimated_unroll = 1; > + return; > + } > + > + /* Check for instruction number and average instruction number. */ > + loop->ninsns = tree_num_loop_insns (loop, &eni_size_weights); > + loop->av_ninsns = tree_average_num_loop_insns (loop, &eni_size_weights); > + unsigned nunroll = param_max_unrolled_insns / loop->ninsns; > + unsigned nunroll_by_av = param_max_average_unrolled_insns / > loop->av_ninsns; > + > + if (nunroll > nunroll_by_av) > + nunroll = nunroll_by_av; > + if (nunroll > (unsigned) param_max_unroll_times) > + nunroll = param_max_unroll_times; > + > + if (targetm.loop_unroll_adjust) > + nunroll = targetm.loop_unroll_adjust (nunroll, loop); > + > + tree_niter_desc *niter_desc = NULL; > + bool desc_need_delete = false; > + > + /* Compute number of iterations if need. */ > + if (!desc) > + { > + /* For now, use single_dom_exit for simplicity. TODO: Support multiple > + exits like find_simple_exit if we finds some profitable cases. */ > + niter_desc = XNEW (class tree_niter_desc); > + gcc_assert (niter_desc); > + edge exit = single_dom_exit (loop); > + if (!exit || !number_of_iterations_exit (loop, exit, niter_desc, true)) > + { > + XDELETE (niter_desc); > + niter_desc = NULL; > + } > + else > + desc_need_delete = true; > + } > + else > + niter_desc = desc; > + > + /* For checking the loop rolls enough to consider, also consult loop bounds > + and profile. */ > + widest_int iterations; > + if (!get_estimated_loop_iterations (loop, &iterations) > + && !get_likely_max_loop_iterations (loop, &iterations)) > + iterations = 0; > + > + if (niter_desc && niter_desc->assumptions) > + { > + /* For countable loops. */ > + if (!decide_unroll_const_iter (loop, niter_desc, nunroll, &iterations) > + && !decide_unroll_runtime_iter (loop, niter_desc, nunroll, > &iterations)) > + loop->estimated_unroll = 1; > + } > + else > + { > + if (!decide_unroll_stupid (loop, niter_desc, nunroll, &iterations)) > + loop->estimated_unroll = 1; > + } > + > + if (desc_need_delete) > + { > + XDELETE (niter_desc); > + niter_desc = NULL; > + } > +} > + > diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h > index e789e4f..773a2b3 100644 > --- a/gcc/tree-ssa-loop-manip.h > +++ b/gcc/tree-ssa-loop-manip.h > @@ -55,7 +55,6 @@ extern void tree_transform_and_unroll_loop (class loop *, > unsigned, > extern void tree_unroll_loop (class loop *, unsigned, > edge, class tree_niter_desc *); > extern tree canonicalize_loop_ivs (class loop *, tree *, bool); > - > - > +extern void estimate_unroll_factor (class loop *, tree_niter_desc *); > > #endif /* GCC_TREE_SSA_LOOP_MANIP_H */ > diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c > index 5e8365d..25320fb 100644 > --- a/gcc/tree-ssa-loop.c > +++ b/gcc/tree-ssa-loop.c > @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see > #include "diagnostic-core.h" > #include "stringpool.h" > #include "attribs.h" > +#include "sreal.h" > > > /* A pass making sure loops are fixed up. */ > @@ -790,5 +791,37 @@ tree_num_loop_insns (class loop *loop, eni_weights > *weights) > return size; > } > > +/* Computes an estimated number of insns on average per iteration in LOOP, > + weighted by WEIGHTS. Refer to function average_num_loop_insns. */ > > +unsigned > +tree_average_num_loop_insns (class loop *loop, eni_weights *weights) > +{ > + basic_block *body = get_loop_body (loop); > + gimple_stmt_iterator gsi; > + unsigned bb_size, i; > + sreal nsize = 0; > + > + for (i = 0; i < loop->num_nodes; i++) > + { > + bb_size = 0; > + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) > + bb_size += estimate_num_insns (gsi_stmt (gsi), weights); > + nsize += (sreal) bb_size > + * body[i]->count.to_sreal_scale (loop->header->count); > + /* Avoid overflows. */ > + if (nsize > 1000000) > + { > + free (body); > + return 1000000; > + } > + } > + free (body); > + > + unsigned ret = nsize.to_int (); > + if (!ret) > + ret = 1; /* To avoid division by zero. */ > + > + return ret; > +} > > diff --git a/gcc/tree-ssa-loop.h b/gcc/tree-ssa-loop.h > index 9e35125..af36177 100644 > --- a/gcc/tree-ssa-loop.h > +++ b/gcc/tree-ssa-loop.h > @@ -67,6 +67,8 @@ public: > extern bool for_each_index (tree *, bool (*) (tree, tree *, void *), void *); > extern char *get_lsm_tmp_name (tree ref, unsigned n, const char *suffix = > NULL); > extern unsigned tree_num_loop_insns (class loop *, struct eni_weights *); > +extern unsigned tree_average_num_loop_insns (class loop *, > + struct eni_weights *); > > /* Returns the loop of the statement STMT. */ > > -- > 2.7.4 >