Segher Boessenkool <seg...@kernel.crashing.org> writes: > Hi! > > What is holding up this patch still? Ke Wen has pinged it every month > since May, and there has still not been a review.
FAOD (since I'm on cc:), I don't feel qualified to review this. Tree-level loop stuff isn't really my area. Thanks, Richard > > > Segher > > > On Thu, May 28, 2020 at 08:19:59PM +0800, Kewen.Lin wrote: >> >> gcc/ChangeLog >> >> 2020-MM-DD Kewen Lin <li...@gcc.gnu.org> >> >> * cfgloop.h (struct loop): New field estimated_unroll. >> * tree-ssa-loop-manip.c (decide_unroll_const_iter): New function. >> (decide_unroll_runtime_iter): Likewise. >> (decide_unroll_stupid): Likewise. >> (estimate_unroll_factor): Likewise. >> * tree-ssa-loop-manip.h (estimate_unroll_factor): New declaration. >> * tree-ssa-loop.c (tree_average_num_loop_insns): New function. >> * tree-ssa-loop.h (tree_average_num_loop_insns): New declaration. >> >> ---- > >> --- >> gcc/cfgloop.h | 3 + >> gcc/tree-ssa-loop-manip.c | 253 >> ++++++++++++++++++++++++++++++++++++++++++++++ >> gcc/tree-ssa-loop-manip.h | 3 +- >> gcc/tree-ssa-loop.c | 33 ++++++ >> gcc/tree-ssa-loop.h | 2 + >> 5 files changed, 292 insertions(+), 2 deletions(-) >> >> diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h >> index 11378ca..c5bcca7 100644 >> --- a/gcc/cfgloop.h >> +++ b/gcc/cfgloop.h >> @@ -232,6 +232,9 @@ public: >> Other values means unroll with the given unrolling factor. */ >> unsigned short unroll; >> >> + /* Like unroll field above, but it's estimated in middle-end. */ >> + unsigned short estimated_unroll; >> + >> /* If this loop was inlined the main clique of the callee which does >> not need remapping when copying the loop body. */ >> unsigned short owned_clique; >> diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c >> index 120b35b..8a5a1a9 100644 >> --- a/gcc/tree-ssa-loop-manip.c >> +++ b/gcc/tree-ssa-loop-manip.c >> @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see >> #include "system.h" >> #include "coretypes.h" >> #include "backend.h" >> +#include "target.h" >> #include "tree.h" >> #include "gimple.h" >> #include "cfghooks.h" >> @@ -42,6 +43,7 @@ along with GCC; see the file COPYING3. If not see >> #include "cfgloop.h" >> #include "tree-scalar-evolution.h" >> #include "tree-inline.h" >> +#include "wide-int.h" >> >> /* All bitmaps for rewriting into loop-closed SSA go on this obstack, >> so that we can free them all at once. */ >> @@ -1592,3 +1594,254 @@ canonicalize_loop_ivs (class loop *loop, tree *nit, >> bool bump_in_latch) >> >> return var_before; >> } >> + >> +/* Try to determine estimated unroll factor for given LOOP with constant >> number >> + of iterations, mainly refer to decide_unroll_constant_iterations. >> + - NITER_DESC holds number of iteration description if it isn't NULL. >> + - NUNROLL holds a unroll factor value computed with instruction numbers. >> + - ITER holds estimated or likely max loop iterations. >> + Return true if it succeeds, also update estimated_unroll. */ >> + >> +static bool >> +decide_unroll_const_iter (class loop *loop, const tree_niter_desc >> *niter_desc, >> + unsigned nunroll, const widest_int *iter) >> +{ >> + /* Skip big loops. */ >> + if (nunroll <= 1) >> + return false; >> + >> + gcc_assert (niter_desc && niter_desc->assumptions); >> + >> + /* Check number of iterations is constant, return false if no. */ >> + if ((niter_desc->may_be_zero && !integer_zerop (niter_desc->may_be_zero)) >> + || !tree_fits_uhwi_p (niter_desc->niter)) >> + return false; >> + >> + unsigned HOST_WIDE_INT const_niter = tree_to_uhwi (niter_desc->niter); >> + >> + /* If unroll factor is set explicitly, use it as estimated_unroll. */ >> + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) >> + { >> + /* It should have been peeled instead. */ >> + if (const_niter == 0 || (unsigned) loop->unroll > const_niter - 1) >> + loop->estimated_unroll = 1; >> + else >> + loop->estimated_unroll = loop->unroll; >> + return true; >> + } >> + >> + /* Check whether the loop rolls enough to consider. */ >> + if (const_niter < 2 * nunroll || wi::ltu_p (*iter, 2 * nunroll)) >> + return false; >> + >> + /* Success; now compute number of iterations to unroll. */ >> + unsigned best_unroll = 0, n_copies = 0; >> + unsigned best_copies = 2 * nunroll + 10; >> + unsigned i = 2 * nunroll + 2; >> + >> + if (i > const_niter - 2) >> + i = const_niter - 2; >> + >> + for (; i >= nunroll - 1; i--) >> + { >> + unsigned exit_mod = const_niter % (i + 1); >> + >> + if (!empty_block_p (loop->latch)) >> + n_copies = exit_mod + i + 1; >> + else if (exit_mod != i) >> + n_copies = exit_mod + i + 2; >> + else >> + n_copies = i + 1; >> + >> + if (n_copies < best_copies) >> + { >> + best_copies = n_copies; >> + best_unroll = i; >> + } >> + } >> + >> + loop->estimated_unroll = best_unroll + 1; >> + return true; >> +} >> + >> +/* Try to determine estimated unroll factor for given LOOP with countable >> but >> + non-constant number of iterations, mainly refer to >> + decide_unroll_runtime_iterations. >> + - NITER_DESC holds number of iteration description if it isn't NULL. >> + - NUNROLL_IN holds a unroll factor value computed with instruction >> numbers. >> + - ITER holds estimated or likely max loop iterations. >> + Return true if it succeeds, also update estimated_unroll. */ >> + >> +static bool >> +decide_unroll_runtime_iter (class loop *loop, const tree_niter_desc >> *niter_desc, >> + unsigned nunroll_in, const widest_int *iter) >> +{ >> + unsigned nunroll = nunroll_in; >> + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) >> + nunroll = loop->unroll; >> + >> + /* Skip big loops. */ >> + if (nunroll <= 1) >> + return false; >> + >> + gcc_assert (niter_desc && niter_desc->assumptions); >> + >> + /* Skip constant number of iterations. */ >> + if ((!niter_desc->may_be_zero || !integer_zerop (niter_desc->may_be_zero)) >> + && tree_fits_uhwi_p (niter_desc->niter)) >> + return false; >> + >> + /* Check whether the loop rolls. */ >> + if (wi::ltu_p (*iter, 2 * nunroll)) >> + return false; >> + >> + /* Success; now force nunroll to be power of 2. */ >> + unsigned i; >> + for (i = 1; 2 * i <= nunroll; i *= 2) >> + continue; >> + >> + loop->estimated_unroll = i; >> + return true; >> +} >> + >> +/* Try to determine estimated unroll factor for given LOOP with uncountable >> + number of iterations, mainly refer to decide_unroll_stupid. >> + - NITER_DESC holds number of iteration description if it isn't NULL. >> + - NUNROLL_IN holds a unroll factor value computed with instruction >> numbers. >> + - ITER holds estimated or likely max loop iterations. >> + Return true if it succeeds, also update estimated_unroll. */ >> + >> +static bool >> +decide_unroll_stupid (class loop *loop, const tree_niter_desc *niter_desc, >> + unsigned nunroll_in, const widest_int *iter) >> +{ >> + if (!flag_unroll_all_loops && !loop->unroll) >> + return false; >> + >> + unsigned nunroll = nunroll_in; >> + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) >> + nunroll = loop->unroll; >> + >> + /* Skip big loops. */ >> + if (nunroll <= 1) >> + return false; >> + >> + gcc_assert (!niter_desc || !niter_desc->assumptions); >> + >> + /* Skip loop with multiple branches for now. */ >> + if (num_loop_branches (loop) > 1) >> + return false; >> + >> + /* Check whether the loop rolls. */ >> + if (wi::ltu_p (*iter, 2 * nunroll)) >> + return false; >> + >> + /* Success; now force nunroll to be power of 2. */ >> + unsigned i; >> + for (i = 1; 2 * i <= nunroll; i *= 2) >> + continue; >> + >> + loop->estimated_unroll = i; >> + return true; >> +} >> + >> +/* Try to estimate whether this given LOOP can be unrolled or not, and >> compute >> + its estimated unroll factor if it can. To avoid duplicated computation, >> you >> + can pass number of iterations information by DESC. The heuristics mainly >> + refer to decide_unrolling in loop-unroll.c. */ >> + >> +void >> +estimate_unroll_factor (class loop *loop, tree_niter_desc *desc) >> +{ >> + /* Return the existing estimated unroll factor. */ >> + if (loop->estimated_unroll) >> + return; >> + >> + /* Don't unroll explicitly. */ >> + if (loop->unroll == 1) >> + { >> + loop->estimated_unroll = loop->unroll; >> + return; >> + } >> + >> + /* Like decide_unrolling, don't unroll if: >> + 1) the loop is cold. >> + 2) the loop can't be manipulated. >> + 3) the loop isn't innermost. */ >> + if (optimize_loop_for_size_p (loop) || !can_duplicate_loop_p (loop) >> + || loop->inner != NULL) >> + { >> + loop->estimated_unroll = 1; >> + return; >> + } >> + >> + /* Don't unroll without explicit information. */ >> + if (!loop->unroll && !flag_unroll_loops && !flag_unroll_all_loops) >> + { >> + loop->estimated_unroll = 1; >> + return; >> + } >> + >> + /* Check for instruction number and average instruction number. */ >> + loop->ninsns = tree_num_loop_insns (loop, &eni_size_weights); >> + loop->av_ninsns = tree_average_num_loop_insns (loop, &eni_size_weights); >> + unsigned nunroll = param_max_unrolled_insns / loop->ninsns; >> + unsigned nunroll_by_av = param_max_average_unrolled_insns / >> loop->av_ninsns; >> + >> + if (nunroll > nunroll_by_av) >> + nunroll = nunroll_by_av; >> + if (nunroll > (unsigned) param_max_unroll_times) >> + nunroll = param_max_unroll_times; >> + >> + if (targetm.loop_unroll_adjust) >> + nunroll = targetm.loop_unroll_adjust (nunroll, loop); >> + >> + tree_niter_desc *niter_desc = NULL; >> + bool desc_need_delete = false; >> + >> + /* Compute number of iterations if need. */ >> + if (!desc) >> + { >> + /* For now, use single_dom_exit for simplicity. TODO: Support multiple >> + exits like find_simple_exit if we finds some profitable cases. */ >> + niter_desc = XNEW (class tree_niter_desc); >> + gcc_assert (niter_desc); >> + edge exit = single_dom_exit (loop); >> + if (!exit || !number_of_iterations_exit (loop, exit, niter_desc, >> true)) >> + { >> + XDELETE (niter_desc); >> + niter_desc = NULL; >> + } >> + else >> + desc_need_delete = true; >> + } >> + else >> + niter_desc = desc; >> + >> + /* For checking the loop rolls enough to consider, also consult loop >> bounds >> + and profile. */ >> + widest_int iterations; >> + if (!get_estimated_loop_iterations (loop, &iterations) >> + && !get_likely_max_loop_iterations (loop, &iterations)) >> + iterations = 0; >> + >> + if (niter_desc && niter_desc->assumptions) >> + { >> + /* For countable loops. */ >> + if (!decide_unroll_const_iter (loop, niter_desc, nunroll, &iterations) >> + && !decide_unroll_runtime_iter (loop, niter_desc, nunroll, >> &iterations)) >> + loop->estimated_unroll = 1; >> + } >> + else >> + { >> + if (!decide_unroll_stupid (loop, niter_desc, nunroll, &iterations)) >> + loop->estimated_unroll = 1; >> + } >> + >> + if (desc_need_delete) >> + { >> + XDELETE (niter_desc); >> + niter_desc = NULL; >> + } >> +} >> + >> diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h >> index e789e4f..773a2b3 100644 >> --- a/gcc/tree-ssa-loop-manip.h >> +++ b/gcc/tree-ssa-loop-manip.h >> @@ -55,7 +55,6 @@ extern void tree_transform_and_unroll_loop (class loop *, >> unsigned, >> extern void tree_unroll_loop (class loop *, unsigned, >> edge, class tree_niter_desc *); >> extern tree canonicalize_loop_ivs (class loop *, tree *, bool); >> - >> - >> +extern void estimate_unroll_factor (class loop *, tree_niter_desc *); >> >> #endif /* GCC_TREE_SSA_LOOP_MANIP_H */ >> diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c >> index 5e8365d..25320fb 100644 >> --- a/gcc/tree-ssa-loop.c >> +++ b/gcc/tree-ssa-loop.c >> @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see >> #include "diagnostic-core.h" >> #include "stringpool.h" >> #include "attribs.h" >> +#include "sreal.h" >> >> >> /* A pass making sure loops are fixed up. */ >> @@ -790,5 +791,37 @@ tree_num_loop_insns (class loop *loop, eni_weights >> *weights) >> return size; >> } >> >> +/* Computes an estimated number of insns on average per iteration in LOOP, >> + weighted by WEIGHTS. Refer to function average_num_loop_insns. */ >> >> +unsigned >> +tree_average_num_loop_insns (class loop *loop, eni_weights *weights) >> +{ >> + basic_block *body = get_loop_body (loop); >> + gimple_stmt_iterator gsi; >> + unsigned bb_size, i; >> + sreal nsize = 0; >> + >> + for (i = 0; i < loop->num_nodes; i++) >> + { >> + bb_size = 0; >> + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) >> + bb_size += estimate_num_insns (gsi_stmt (gsi), weights); >> + nsize += (sreal) bb_size >> + * body[i]->count.to_sreal_scale (loop->header->count); >> + /* Avoid overflows. */ >> + if (nsize > 1000000) >> + { >> + free (body); >> + return 1000000; >> + } >> + } >> + free (body); >> + >> + unsigned ret = nsize.to_int (); >> + if (!ret) >> + ret = 1; /* To avoid division by zero. */ >> + >> + return ret; >> +} >> >> diff --git a/gcc/tree-ssa-loop.h b/gcc/tree-ssa-loop.h >> index 9e35125..af36177 100644 >> --- a/gcc/tree-ssa-loop.h >> +++ b/gcc/tree-ssa-loop.h >> @@ -67,6 +67,8 @@ public: >> extern bool for_each_index (tree *, bool (*) (tree, tree *, void *), void >> *); >> extern char *get_lsm_tmp_name (tree ref, unsigned n, const char *suffix = >> NULL); >> extern unsigned tree_num_loop_insns (class loop *, struct eni_weights *); >> +extern unsigned tree_average_num_loop_insns (class loop *, >> + struct eni_weights *); >> >> /* Returns the loop of the statement STMT. */ >> >> -- >> 2.7.4 >>