Re: [PATCH V2] Split loop for NE condition.

Bernd Edlinger Mon, 17 May 2021 23:36:53 -0700

On 5/17/21 4:01 AM, Jiufu Guo via Gcc-patches wrote:
> When there is the possibility that overflow/wrap may happen on the loop index,
> a few optimizations would not happen. For example code:
> 
> foo (int *a, int *b, unsigned k, unsigned n)
> {
>   while (++k != n)
>     a[k] = b[k]  + 1;
> }
> 
> For this code, if "k > n", k would wrap.  if "k < n" at begining,
> it could be optimized (e.g. vectorization).
> 
> We can split the loop into two loops:
> 
>   while (++k > n)
>     a[k] = b[k]  + 1;
>   while (l++ < n)
>     a[k] = b[k]  + 1;
> 
> then for the second loop, it could be optimized.
> 
> This patch is spltting this kind of small loop to achieve better performance.
> 
> Bootstrap and regtest pass on ppc64le.  Is this ok for trunk?
> 
> Thanks!
> 
> Jiufu Guo.
> 
> gcc/ChangeLog:
> 
> 2021-05-15  Jiufu Guo  <guoji...@linux.ibm.com>
> 
>       * tree-ssa-loop-split.c (connect_loop_phis): Add new param.
>       (get_ne_cond_branch): New function.
>       (split_ne_loop): New function.
>       (split_loop_on_ne_cond): New function.
>       (tree_ssa_split_loops): Use split_loop_on_ne_cond.
> 
> gcc/testsuite/ChangeLog:
> 
> 2021-05-15  Jiufu Guo  <guoji...@linux.ibm.com>
> 
>       * gcc.dg/loop-split1.c: New test.
>       * g++.dg/vect/pr98064.cc: Suppress warning.
> ---
>  gcc/testsuite/g++.dg/vect/pr98064.cc |   4 +-
>  gcc/testsuite/gcc.dg/loop-split1.c   | 108 +++++++++++++++
>  gcc/tree-ssa-loop-split.c            | 188 ++++++++++++++++++++++++++-
>  3 files changed, 296 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/loop-split1.c
> 
> diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc 
> b/gcc/testsuite/g++.dg/vect/pr98064.cc
> index 74043ce7725..dcb2985d05a 100644
> --- a/gcc/testsuite/g++.dg/vect/pr98064.cc
> +++ b/gcc/testsuite/g++.dg/vect/pr98064.cc
> @@ -1,5 +1,7 @@
>  // { dg-do compile }
> -// { dg-additional-options "-O3" }
> +// { dg-additional-options "-O3 -Wno-stringop-overflow" }
> +/* There is warning message when "short g = var_8; g; g++"
> +   is optimized/analyzed as string operation,e.g. memset.  */
>  
>  const long long &min(const long long &__a, long long &__b) {
>    if (__b < __a)
> diff --git a/gcc/testsuite/gcc.dg/loop-split1.c 
> b/gcc/testsuite/gcc.dg/loop-split1.c
> new file mode 100644
> index 00000000000..30b006b1b14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/loop-split1.c
> @@ -0,0 +1,108 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
> +
> +void
> +foo (int *a, int *b, unsigned l, unsigned n)
> +{
> +  while (++l != n)
> +    a[l] = b[l]  + 1;
> +}
> +void
> +foo_1 (int *a, int *b, unsigned n)
> +{
> +  unsigned l = 0;
> +  while (++l != n)
> +    a[l] = b[l]  + 1;
> +}
> +
> +void
> +foo1 (int *a, int *b, unsigned l, unsigned n)
> +{
> +  while (l++ != n)
> +    a[l] = b[l]  + 1;
> +}
> +
> +/* No wrap.  */
> +void
> +foo1_1 (int *a, int *b, unsigned n)
> +{
> +  unsigned l = 0;
> +  while (l++ != n)
> +    a[l] = b[l]  + 1;
> +}
> +
> +unsigned
> +foo2 (char *a, char *b, unsigned l, unsigned n)
> +{
> +  while (++l != n)
> +    if (a[l] != b[l])
> +      break;
> +
> +  return l;
> +}
> +
> +unsigned
> +foo2_1 (char *a, char *b, unsigned l, unsigned n)
> +{
> +  l = 0;
> +  while (++l != n)
> +    if (a[l] != b[l])
> +      break;
> +
> +  return l;
> +}
> +
> +unsigned
> +foo3 (char *a, char *b, unsigned l, unsigned n)
> +{
> +  while (l++ != n)
> +    if (a[l] != b[l])
> +      break;
> +
> +  return l;
> +}
> +
> +/* No wrap.  */
> +unsigned
> +foo3_1 (char *a, char *b, unsigned l, unsigned n)
> +{
> +  l = 0;
> +  while (l++ != n)
> +    if (a[l] != b[l])
> +      break;
> +
> +  return l;
> +}
> +
> +void bar();
> +void foo4(unsigned n,  unsigned i)
> +{
> +  do
> +    {
> +      if (i == n)
> +        return;
> +      bar();
> +      ++i;
> +    }
> +  while (1);
> +}
> +
> +unsigned
> +foo5 (double *a, unsigned n, unsigned i)
> +{
> +  while (a[i] > 0 && i != n)
> +    i++;
> +
> +  return i;
> +}
> +
> +unsigned
> +find_skip_diff (char *p, char *q, unsigned n, unsigned i)
> +{
> +  while (p[i] == q[i] && ++i != n)
> +    p++,q++;
> +
> +  return i;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Loop split" 9 "lsplit" } } */
> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index 3a09bbc39e5..5c1742b5ff4 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfghooks.h"
>  #include "gimple-fold.h"
>  #include "gimplify-me.h"
> +#include "tree-ssa-loop-ivopts.h"
>  
>  /* This file implements two kinds of loop splitting.
>  
> @@ -233,7 +234,8 @@ easy_exit_values (class loop *loop)
>     this.  The loops need to fulfill easy_exit_values().  */
>  
>  static void
> -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
> +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
> +                bool use_prev = false)
>  {
>    basic_block rest = loop_preheader_edge (loop2)->src;
>    gcc_assert (new_e->dest == rest);
> @@ -279,7 +281,8 @@ connect_loop_phis (class loop *loop1, class loop *loop2, 
> edge new_e)
>  
>        gphi * newphi = create_phi_node (new_init, rest);
>        add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
> -      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
> +      add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e,
> +                UNKNOWN_LOCATION);
>        SET_USE (op, new_init);
>      }
>  }
> @@ -1593,6 +1596,184 @@ split_loop_on_cond (struct loop *loop)
>    return do_split;
>  }
>  
> +/* Check if the LOOP exit branch likes "if (idx != bound)",
> +   Return the branch edge which exit loop, if overflow/wrap
> +   may happen on "idx".  */
> +
> +static edge
> +get_ne_cond_branch (struct loop *loop)
> +{
> +  int i;
> +  edge e;
> +
> +  auto_vec<edge> edges = get_loop_exit_edges (loop);
> +  FOR_EACH_VEC_ELT (edges, i, e)
> +    {
> +      /* Check if there is possible wrap/overflow.  */
> +      class tree_niter_desc niter;
> +      if (!number_of_iterations_exit (loop, e, &niter, false, false))
> +     continue;
> +      if (niter.control.no_overflow)
> +     return NULL;
> +      if (niter.cmp != NE_EXPR)
> +     continue;
> +
> +      /* Check loop is simple to split.  */
> +      if (single_pred_p (loop->latch)
> +       && single_pred_edge (loop->latch)->src == e->src
> +       && (gsi_end_p (gsi_start_nondebug_bb (loop->latch))))
> +     return e;
> +
> +      /* Simple header.  */
> +      if (e->src == loop->header)
> +     {
> +       if (get_virtual_phi (e->src))
> +         continue;
> +
> +       /* Only one phi.  */
> +       gphi_iterator psi = gsi_start_phis (e->src);
> +       if (gsi_end_p (psi))
> +         continue;
> +       gsi_next (&psi);
> +       if (!gsi_end_p (psi))
> +         continue;
> +
> +       /* ++i or ++i */
> +       gimple_stmt_iterator gsi = gsi_start_bb (e->src);
> +       if (gsi_end_p (gsi))
> +         continue;
> +
> +       gimple *gc = last_stmt (e->src);
> +       tree idx = gimple_cond_lhs (gc);
> +       if (expr_invariant_in_loop_p (loop, idx))
> +         idx = gimple_cond_rhs (gc);
> +
> +       gimple *s1 = gsi_stmt (gsi);
> +       if (!(is_gimple_assign (s1) && idx
> +             && (idx == gimple_assign_lhs (s1)
> +                 || idx == gimple_assign_rhs1 (s1))))
> +         continue;
> +
> +       gsi_next (&gsi);
> +       if (!gsi_end_p (gsi) && gsi_stmt (gsi) == gc)
> +         return e;
> +     }
> +    }
> +
> +  return NULL;
> +}
> +
> +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR.  */
> +
> +static bool
> +split_ne_loop (struct loop *loop, edge cond_e)
> +{
> +  initialize_original_copy_tables ();
> +
> +  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
> +                                  profile_probability::always (),
> +                                  profile_probability::never (),
> +                                  profile_probability::always (),
> +                                  profile_probability::always (), true);
> +
> +  gcc_assert (loop2);
> +  update_ssa (TODO_update_ssa);
> +
> +  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
> +  free_original_copy_tables ();
> +
> +  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
> +  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
> +
> +  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
> +  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
> +  enum tree_code up_code = inv ? LT_EXPR : GT_EXPR;
> +  enum tree_code down_code = inv ? GT_EXPR : LT_EXPR;
> +
> +  gimple_cond_set_code (gc, up_code);
> +  gimple_cond_set_code (dup_gc, down_code);
> +
> +  /* Link the exit cond edge to new loop.  */
> +  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
> +  edge pred_e = single_pred_edge (loop->latch);
> +  bool simple_loop = pred_e && pred_e->src == cond_e->src
> +                  && (gsi_end_p (gsi_start_nondebug_bb (loop->latch)));
> +  if (simple_loop)
> +    gimple_cond_set_code (break_cond, down_code);
> +  else
> +    gimple_cond_make_true (break_cond);
> +
> +  basic_block break_bb = split_edge (cond_e);
> +  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
> +  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
> +
> +  edge to_exit = single_succ_edge (break_bb);
> +  edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, 
> 0);
> +  to_new_loop->flags |= EDGE_TRUE_VALUE;
> +  to_exit->flags |= EDGE_FALSE_VALUE;
> +  to_exit->flags &= ~EDGE_FALLTHRU;
> +  to_exit->probability = cond_e->probability;
> +  to_new_loop->probability = to_exit->probability.invert ();
> +
> +  update_ssa (TODO_update_ssa);
> +
> +  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
> +
> +  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, ";; Loop split on wrap index.\n");
> +
> +  return true;
> +}
> +
> +/* Checks if LOOP contains a suitable NE_EXPR conditional block to split.
> +L_H:
> + if (i!=N)
> +   S;
> + i++;
> + goto L_H;
> +
> +The "i!=N" is like "i>N || i<N", then it could be transform to:
> +
> +L_H:
> + if (i>N)
> +   S;
> + i++;
> + goto L_H;
> +L1_H:
> + if (i<N)
> +   S;
> + i++;
> + goto L1_H;
> +
> +The loop with "i<N" is in favor both GIMPLE and RTL passes.  */
> +
> +static bool
> +split_loop_on_ne_cond (class loop *loop)
> +{
> +  int num = 0;
> +  basic_block *bbs = get_loop_body (loop);
> +
> +  if (!can_copy_bbs_p (bbs, loop->num_nodes))
> +    {
> +      free (bbs);
> +      return false;
> +    }
> +
> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> +    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
> +  free (bbs);
> +
> +  if (num > param_max_peeled_insns)
> +    return false;
> +
> +  edge branch_edge = get_ne_cond_branch (loop);
> +  if (branch_edge && split_ne_loop (loop, branch_edge))
> +    return true;
> +
> +  return false;
> +}
> +
>  /* Main entry point.  Perform loop splitting on all suitable loops.  */
>  
>  static unsigned int
> @@ -1622,7 +1803,8 @@ tree_ssa_split_loops (void)
>        if (optimize_loop_for_size_p (loop))
>       continue;
>  
> -      if (split_loop (loop) || split_loop_on_cond (loop))
> +      if (split_loop (loop) || split_loop_on_cond (loop)
> +       || split_loop_on_ne_cond (loop))
>       {
>         /* Mark our containing loop as having had some split inner loops.  */
>         loop_outer (loop)->aux = loop;
>


Hi,

I've tried this patch and it does not seem to pass its own test:

FAIL: gcc.dg/loop-split1.c scan-tree-dump-times lsplit "Loop split" 9

Note you should always do a bootstrap and regression test as described
here: https://gcc.gnu.org/contribute.html#testing

Also please state in your future patch submissions how you tested the patch,
like "bootstrap and regression test on x86_64-pc-linux-gnu" for instance.


Bernd.

Re: [PATCH V2] Split loop for NE condition.

Reply via email to