Segher Boessenkool <seg...@kernel.crashing.org> writes:
> Hi!
>
> What is holding up this patch still?  Ke Wen has pinged it every month
> since May, and there has still not been a review.

FAOD (since I'm on cc:), I don't feel qualified to review this.
Tree-level loop stuff isn't really my area.

Thanks,
Richard

>
>
> Segher
>
>
> On Thu, May 28, 2020 at 08:19:59PM +0800, Kewen.Lin wrote:
>> 
>> gcc/ChangeLog
>> 
>> 2020-MM-DD  Kewen Lin  <li...@gcc.gnu.org>
>> 
>>      * cfgloop.h (struct loop): New field estimated_unroll.
>>      * tree-ssa-loop-manip.c (decide_unroll_const_iter): New function.
>>      (decide_unroll_runtime_iter): Likewise.
>>      (decide_unroll_stupid): Likewise.
>>      (estimate_unroll_factor): Likewise.
>>      * tree-ssa-loop-manip.h (estimate_unroll_factor): New declaration.
>>      * tree-ssa-loop.c (tree_average_num_loop_insns): New function.
>>      * tree-ssa-loop.h (tree_average_num_loop_insns): New declaration.
>> 
>> ----
>
>> ---
>>  gcc/cfgloop.h             |   3 +
>>  gcc/tree-ssa-loop-manip.c | 253 
>> ++++++++++++++++++++++++++++++++++++++++++++++
>>  gcc/tree-ssa-loop-manip.h |   3 +-
>>  gcc/tree-ssa-loop.c       |  33 ++++++
>>  gcc/tree-ssa-loop.h       |   2 +
>>  5 files changed, 292 insertions(+), 2 deletions(-)
>> 
>> diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
>> index 11378ca..c5bcca7 100644
>> --- a/gcc/cfgloop.h
>> +++ b/gcc/cfgloop.h
>> @@ -232,6 +232,9 @@ public:
>>       Other values means unroll with the given unrolling factor.  */
>>    unsigned short unroll;
>>  
>> +  /* Like unroll field above, but it's estimated in middle-end.  */
>> +  unsigned short estimated_unroll;
>> +
>>    /* If this loop was inlined the main clique of the callee which does
>>       not need remapping when copying the loop body.  */
>>    unsigned short owned_clique;
>> diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
>> index 120b35b..8a5a1a9 100644
>> --- a/gcc/tree-ssa-loop-manip.c
>> +++ b/gcc/tree-ssa-loop-manip.c
>> @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "system.h"
>>  #include "coretypes.h"
>>  #include "backend.h"
>> +#include "target.h"
>>  #include "tree.h"
>>  #include "gimple.h"
>>  #include "cfghooks.h"
>> @@ -42,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "cfgloop.h"
>>  #include "tree-scalar-evolution.h"
>>  #include "tree-inline.h"
>> +#include "wide-int.h"
>>  
>>  /* All bitmaps for rewriting into loop-closed SSA go on this obstack,
>>     so that we can free them all at once.  */
>> @@ -1592,3 +1594,254 @@ canonicalize_loop_ivs (class loop *loop, tree *nit, 
>> bool bump_in_latch)
>>  
>>    return var_before;
>>  }
>> +
>> +/* Try to determine estimated unroll factor for given LOOP with constant 
>> number
>> +   of iterations, mainly refer to decide_unroll_constant_iterations.
>> +    - NITER_DESC holds number of iteration description if it isn't NULL.
>> +    - NUNROLL holds a unroll factor value computed with instruction numbers.
>> +    - ITER holds estimated or likely max loop iterations.
>> +   Return true if it succeeds, also update estimated_unroll.  */
>> +
>> +static bool
>> +decide_unroll_const_iter (class loop *loop, const tree_niter_desc 
>> *niter_desc,
>> +                  unsigned nunroll, const widest_int *iter)
>> +{
>> +  /* Skip big loops.  */
>> +  if (nunroll <= 1)
>> +    return false;
>> +
>> +  gcc_assert (niter_desc && niter_desc->assumptions);
>> +
>> +  /* Check number of iterations is constant, return false if no.  */
>> +  if ((niter_desc->may_be_zero && !integer_zerop (niter_desc->may_be_zero))
>> +      || !tree_fits_uhwi_p (niter_desc->niter))
>> +    return false;
>> +
>> +  unsigned HOST_WIDE_INT const_niter = tree_to_uhwi (niter_desc->niter);
>> +
>> +  /* If unroll factor is set explicitly, use it as estimated_unroll.  */
>> +  if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
>> +    {
>> +      /* It should have been peeled instead.  */
>> +      if (const_niter == 0 || (unsigned) loop->unroll > const_niter - 1)
>> +    loop->estimated_unroll = 1;
>> +      else
>> +    loop->estimated_unroll = loop->unroll;
>> +      return true;
>> +    }
>> +
>> +  /* Check whether the loop rolls enough to consider.  */
>> +  if (const_niter < 2 * nunroll || wi::ltu_p (*iter, 2 * nunroll))
>> +    return false;
>> +
>> +  /* Success; now compute number of iterations to unroll.  */
>> +  unsigned best_unroll = 0, n_copies = 0;
>> +  unsigned best_copies = 2 * nunroll + 10;
>> +  unsigned i = 2 * nunroll + 2;
>> +
>> +  if (i > const_niter - 2)
>> +    i = const_niter - 2;
>> +
>> +  for (; i >= nunroll - 1; i--)
>> +    {
>> +      unsigned exit_mod = const_niter % (i + 1);
>> +
>> +      if (!empty_block_p (loop->latch))
>> +    n_copies = exit_mod + i + 1;
>> +      else if (exit_mod != i)
>> +    n_copies = exit_mod + i + 2;
>> +      else
>> +    n_copies = i + 1;
>> +
>> +      if (n_copies < best_copies)
>> +    {
>> +      best_copies = n_copies;
>> +      best_unroll = i;
>> +    }
>> +    }
>> +
>> +  loop->estimated_unroll = best_unroll + 1;
>> +  return true;
>> +}
>> +
>> +/* Try to determine estimated unroll factor for given LOOP with countable 
>> but
>> +   non-constant number of iterations, mainly refer to
>> +   decide_unroll_runtime_iterations.
>> +    - NITER_DESC holds number of iteration description if it isn't NULL.
>> +    - NUNROLL_IN holds a unroll factor value computed with instruction 
>> numbers.
>> +    - ITER holds estimated or likely max loop iterations.
>> +   Return true if it succeeds, also update estimated_unroll.  */
>> +
>> +static bool
>> +decide_unroll_runtime_iter (class loop *loop, const tree_niter_desc 
>> *niter_desc,
>> +                    unsigned nunroll_in, const widest_int *iter)
>> +{
>> +  unsigned nunroll = nunroll_in;
>> +  if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
>> +    nunroll = loop->unroll;
>> +
>> +  /* Skip big loops.  */
>> +  if (nunroll <= 1)
>> +    return false;
>> +
>> +  gcc_assert (niter_desc && niter_desc->assumptions);
>> +
>> +  /* Skip constant number of iterations.  */
>> +  if ((!niter_desc->may_be_zero || !integer_zerop (niter_desc->may_be_zero))
>> +      && tree_fits_uhwi_p (niter_desc->niter))
>> +    return false;
>> +
>> +  /* Check whether the loop rolls.  */
>> +  if (wi::ltu_p (*iter, 2 * nunroll))
>> +    return false;
>> +
>> +  /* Success; now force nunroll to be power of 2.  */
>> +  unsigned i;
>> +  for (i = 1; 2 * i <= nunroll; i *= 2)
>> +    continue;
>> +
>> +  loop->estimated_unroll = i;
>> +  return true;
>> +}
>> +
>> +/* Try to determine estimated unroll factor for given LOOP with uncountable
>> +   number of iterations, mainly refer to decide_unroll_stupid.
>> +    - NITER_DESC holds number of iteration description if it isn't NULL.
>> +    - NUNROLL_IN holds a unroll factor value computed with instruction 
>> numbers.
>> +    - ITER holds estimated or likely max loop iterations.
>> +   Return true if it succeeds, also update estimated_unroll.  */
>> +
>> +static bool
>> +decide_unroll_stupid (class loop *loop, const tree_niter_desc *niter_desc,
>> +              unsigned nunroll_in, const widest_int *iter)
>> +{
>> +  if (!flag_unroll_all_loops && !loop->unroll)
>> +    return false;
>> +
>> +  unsigned nunroll = nunroll_in;
>> +  if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
>> +    nunroll = loop->unroll;
>> +
>> +  /* Skip big loops.  */
>> +  if (nunroll <= 1)
>> +    return false;
>> +
>> +  gcc_assert (!niter_desc || !niter_desc->assumptions);
>> +
>> +  /* Skip loop with multiple branches for now.  */
>> +  if (num_loop_branches (loop) > 1)
>> +    return false;
>> +
>> +  /* Check whether the loop rolls.  */
>> +  if (wi::ltu_p (*iter, 2 * nunroll))
>> +    return false;
>> +
>> +  /* Success; now force nunroll to be power of 2.  */
>> +  unsigned i;
>> +  for (i = 1; 2 * i <= nunroll; i *= 2)
>> +    continue;
>> +
>> +  loop->estimated_unroll = i;
>> +  return true;
>> +}
>> +
>> +/* Try to estimate whether this given LOOP can be unrolled or not, and 
>> compute
>> +   its estimated unroll factor if it can.  To avoid duplicated computation, 
>> you
>> +   can pass number of iterations information by DESC.  The heuristics mainly
>> +   refer to decide_unrolling in loop-unroll.c.  */
>> +
>> +void
>> +estimate_unroll_factor (class loop *loop, tree_niter_desc *desc)
>> +{
>> +  /* Return the existing estimated unroll factor.  */
>> +  if (loop->estimated_unroll)
>> +    return;
>> +
>> +  /* Don't unroll explicitly.  */
>> +  if (loop->unroll == 1)
>> +    {
>> +      loop->estimated_unroll = loop->unroll;
>> +      return;
>> +    }
>> +
>> +  /* Like decide_unrolling, don't unroll if:
>> +     1) the loop is cold.
>> +     2) the loop can't be manipulated.
>> +     3) the loop isn't innermost.  */
>> +  if (optimize_loop_for_size_p (loop) || !can_duplicate_loop_p (loop)
>> +      || loop->inner != NULL)
>> +    {
>> +      loop->estimated_unroll = 1;
>> +      return;
>> +    }
>> +
>> +  /* Don't unroll without explicit information.  */
>> +  if (!loop->unroll && !flag_unroll_loops && !flag_unroll_all_loops)
>> +    {
>> +      loop->estimated_unroll = 1;
>> +      return;
>> +    }
>> +
>> +  /* Check for instruction number and average instruction number.  */
>> +  loop->ninsns = tree_num_loop_insns (loop, &eni_size_weights);
>> +  loop->av_ninsns = tree_average_num_loop_insns (loop, &eni_size_weights);
>> +  unsigned nunroll = param_max_unrolled_insns / loop->ninsns;
>> +  unsigned nunroll_by_av = param_max_average_unrolled_insns / 
>> loop->av_ninsns;
>> +
>> +  if (nunroll > nunroll_by_av)
>> +    nunroll = nunroll_by_av;
>> +  if (nunroll > (unsigned) param_max_unroll_times)
>> +    nunroll = param_max_unroll_times;
>> +
>> +  if (targetm.loop_unroll_adjust)
>> +    nunroll = targetm.loop_unroll_adjust (nunroll, loop);
>> +
>> +  tree_niter_desc *niter_desc = NULL;
>> +  bool desc_need_delete = false;
>> +
>> +  /* Compute number of iterations if need.  */
>> +  if (!desc)
>> +    {
>> +      /* For now, use single_dom_exit for simplicity. TODO: Support multiple
>> +     exits like find_simple_exit if we finds some profitable cases.  */
>> +      niter_desc = XNEW (class tree_niter_desc);
>> +      gcc_assert (niter_desc);
>> +      edge exit = single_dom_exit (loop);
>> +      if (!exit || !number_of_iterations_exit (loop, exit, niter_desc, 
>> true))
>> +    {
>> +      XDELETE (niter_desc);
>> +      niter_desc = NULL;
>> +    }
>> +      else
>> +    desc_need_delete = true;
>> +    }
>> +  else
>> +    niter_desc = desc;
>> +
>> +  /* For checking the loop rolls enough to consider, also consult loop 
>> bounds
>> +     and profile.  */
>> +  widest_int iterations;
>> +  if (!get_estimated_loop_iterations (loop, &iterations)
>> +      && !get_likely_max_loop_iterations (loop, &iterations))
>> +    iterations = 0;
>> +
>> +  if (niter_desc && niter_desc->assumptions)
>> +    {
>> +      /* For countable loops.  */
>> +      if (!decide_unroll_const_iter (loop, niter_desc, nunroll, &iterations)
>> +      && !decide_unroll_runtime_iter (loop, niter_desc, nunroll, 
>> &iterations))
>> +    loop->estimated_unroll = 1;
>> +    }
>> +  else
>> +    {
>> +      if (!decide_unroll_stupid (loop, niter_desc, nunroll, &iterations))
>> +    loop->estimated_unroll = 1;
>> +    }
>> +
>> +  if (desc_need_delete)
>> +    {
>> +      XDELETE (niter_desc);
>> +      niter_desc = NULL;
>> +    }
>> +}
>> +
>> diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h
>> index e789e4f..773a2b3 100644
>> --- a/gcc/tree-ssa-loop-manip.h
>> +++ b/gcc/tree-ssa-loop-manip.h
>> @@ -55,7 +55,6 @@ extern void tree_transform_and_unroll_loop (class loop *, 
>> unsigned,
>>  extern void tree_unroll_loop (class loop *, unsigned,
>>                            edge, class tree_niter_desc *);
>>  extern tree canonicalize_loop_ivs (class loop *, tree *, bool);
>> -
>> -
>> +extern void estimate_unroll_factor (class loop *, tree_niter_desc *);
>>  
>>  #endif /* GCC_TREE_SSA_LOOP_MANIP_H */
>> diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
>> index 5e8365d..25320fb 100644
>> --- a/gcc/tree-ssa-loop.c
>> +++ b/gcc/tree-ssa-loop.c
>> @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "diagnostic-core.h"
>>  #include "stringpool.h"
>>  #include "attribs.h"
>> +#include "sreal.h"
>>  
>>  
>>  /* A pass making sure loops are fixed up.  */
>> @@ -790,5 +791,37 @@ tree_num_loop_insns (class loop *loop, eni_weights 
>> *weights)
>>    return size;
>>  }
>>  
>> +/* Computes an estimated number of insns on average per iteration in LOOP,
>> +   weighted by WEIGHTS.  Refer to function average_num_loop_insns.  */
>>  
>> +unsigned
>> +tree_average_num_loop_insns (class loop *loop, eni_weights *weights)
>> +{
>> +  basic_block *body = get_loop_body (loop);
>> +  gimple_stmt_iterator gsi;
>> +  unsigned bb_size, i;
>> +  sreal nsize = 0;
>> +
>> +  for (i = 0; i < loop->num_nodes; i++)
>> +    {
>> +      bb_size = 0;
>> +      for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    bb_size += estimate_num_insns (gsi_stmt (gsi), weights);
>> +      nsize += (sreal) bb_size
>> +           * body[i]->count.to_sreal_scale (loop->header->count);
>> +      /* Avoid overflows.   */
>> +      if (nsize > 1000000)
>> +    {
>> +      free (body);
>> +      return 1000000;
>> +    }
>> +    }
>> +  free (body);
>> +
>> +  unsigned ret = nsize.to_int ();
>> +  if (!ret)
>> +    ret = 1; /* To avoid division by zero.  */
>> +
>> +  return ret;
>> +}
>>  
>> diff --git a/gcc/tree-ssa-loop.h b/gcc/tree-ssa-loop.h
>> index 9e35125..af36177 100644
>> --- a/gcc/tree-ssa-loop.h
>> +++ b/gcc/tree-ssa-loop.h
>> @@ -67,6 +67,8 @@ public:
>>  extern bool for_each_index (tree *, bool (*) (tree, tree *, void *), void 
>> *);
>>  extern char *get_lsm_tmp_name (tree ref, unsigned n, const char *suffix = 
>> NULL);
>>  extern unsigned tree_num_loop_insns (class loop *, struct eni_weights *);
>> +extern unsigned tree_average_num_loop_insns (class loop *,
>> +                                         struct eni_weights *);
>>  
>>  /* Returns the loop of the statement STMT.  */
>>  
>> -- 
>> 2.7.4
>> 

Reply via email to