Re: [PATCH V3] Split loop for NE condition.

guojiufu via Gcc-patches Wed, 09 Jun 2021 04:20:35 -0700

On 2021-06-09 17:42, guojiufu via Gcc-patches wrote:

On 2021-06-08 18:13, Richard Biener wrote:

On Fri, 4 Jun 2021, Jiufu Guo wrote:

cut...

+      gcond *cond = as_a<gcond *> (last);
+      enum tree_code code = gimple_cond_code (cond);
+      if (!(code == NE_EXPR
+           || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE))))

The NE_EXPR check misses a corresponding && (e->flags &EDGE_FALSE_VALUE)

check.

Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer.

+       continue;
+
+      /* Check if bound is invarant.  */
+      tree idx = gimple_cond_lhs (cond);
+      tree bnd = gimple_cond_rhs (cond);
+      if (expr_invariant_in_loop_p (loop, idx))
+       std::swap (idx, bnd);
+      else if (!expr_invariant_in_loop_p (loop, bnd))
+       continue;
+
+      /* Only unsigned type conversion could cause wrap.  */
+      tree type = TREE_TYPE (idx);
+      if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME
+         || !TYPE_UNSIGNED (type))
+       continue;
+
+      /* Avoid to split if bound is MAX/MIN val.  */
+      tree bound_type = TREE_TYPE (bnd);

+ if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P(bound_type)

+         && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type))
+             || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))))
+       continue;


Note you do not require 'bnd' to be constant and thus at runtime those
cases still need to be handled correctly.

Yes, bnd is not required to be constant. The above code is filteringthe casewhere bnd is const max/min value of the type. So, the code could beupdated as:

      if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type))
          || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))

+      /* Check if there is possible wrap.  */
+      class tree_niter_desc niter;
+ if (!number_of_iterations_exit (loop, e, &niter, false,false))

cut...

+
+  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */


It now occurs to me that we nowhere check the evolution of IDX
(split_at_bb_p uses simple_iv for this for example).  The transform
assumes that we will actually hit i == n and that i increments, but
while you check the control IV from number_of_iterations_exit
for NE_EXPR that does not guarantee a positive evolution.

If I do not correctly reply your question, please point out:
number_of_iterations_exit is similar with simple_iv to invoke
simple_iv_with_niters
which check the evolution, and number_of_iterations_exit check
number_of_iterations_cond

which check no_overflow more accurate, this is one reason I use thisfunction.


This transform assumes that the last run hits i==n.
Otherwise, the loop may run infinitely wrap after wrap.
For safe, if the step is 1 or -1,  this assumption would be true.  I
would add this check.

Thanks so much for pointing out I missed the negative step!

Your testcases do not include any negative step examples, but I guess
the conditions need to be swapped in this case?


I would add cases and code to support step 1/-1.


I think you also have to consider the order we split, say with

  for (i = start; i != end; ++i)
    {
      push (i);
      if (a[i] != b[i])
        break;
    }

push (i) calls need to be in the same order for all cases of
start < end, start == end and start > end (and also cover
runtime testcases with end == 0 or end == UINT_MAX, likewise
for start).

I add tests for the above cases. If missing sth, please point out,thanks!

+  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
+  enum tree_code up_code = inv ? LT_EXPR : GT_EXPR;
+  enum tree_code down_code = inv ? GT_EXPR : LT_EXPR;

cut....

Thanks again for the very helpful review!

BR,
Jiufu Guo.


Here is the updated patch, thanks for your time!

diff --git a/gcc/testsuite/gcc.dg/loop-split1.cb/gcc/testsuite/gcc.dg/loop-split1.c

new file mode 100644
index 00000000000..dd2d03a7b96
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split1.c
@@ -0,0 +1,101 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+void
+foo (int *a, int *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    a[l] = b[l] + 1;
+}
+void
+foo_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (++l != n)
+    a[l] = b[l] + 1;
+}
+
+void
+foo1 (int *a, int *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    a[l] = b[l] + 1;
+}
+
+/* No wrap.  */
+void
+foo1_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (l++ != n)
+    a[l] = b[l] + 1;
+}
+
+unsigned
+foo2 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo2_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo3 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+/* No wrap.  */
+unsigned
+foo3_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+void
+bar ();
+void
+foo4 (unsigned n, unsigned i)
+{
+  do
+    {
+      if (i == n)
+       return;
+      bar ();
+      ++i;
+    }
+  while (1);
+}
+
+unsigned
+find_skip_diff (char *p, char *q, unsigned n, unsigned i)
+{
+  while (p[i] == q[i] && ++i != n)
+    p++, q++;
+
+  return i;
+}
+
+/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */

diff --git a/gcc/testsuite/gcc.dg/loop-split2.cb/gcc/testsuite/gcc.dg/loop-split2.c

new file mode 100644
index 00000000000..56377e2f2f5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split2.c
@@ -0,0 +1,155 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+extern void
+abort (void);
+extern void
+exit (int);
+void
+push (int);
+
+#define NI __attribute__ ((noinline))
+
+void NI
+foo (int *a, int *b, unsigned char l, unsigned char n)
+{
+  while (++l != n)
+    a[l] = b[l] + 1;
+}
+
+unsigned NI
+bar (int *a, int *b, unsigned char l, unsigned char n)
+{
+  while (l++ != n)
+    {
+      push (l);
+      if (a[l] != b[l])
+       break;
+      push (l + 1);
+    }
+  return l;
+}
+
+void NI
+foo_1 (int *a, int *b, unsigned char l, unsigned char n)
+{
+  while (--l != n)
+    a[l] = b[l] + 1;
+}
+
+unsigned NI
+bar_1 (int *a, int *b, unsigned char l, unsigned char n)
+{
+  while (l-- != n)
+    {
+      push (l);
+      if (a[l] != b[l])
+       break;
+      push (l + 1);
+    }
+
+  return l;
+}
+
+int a[258];
+int b[258];
+int c[1024];
+static int top = 0;
+void
+push (int e)
+{
+  c[top++] = e;
+}
+
+void
+reset ()
+{
+  top = 0;
+  __builtin_memset (c, 0, sizeof (c));
+}
+
+#define check(a, b) (a == b)
+
+int
+check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5)
+{
+  return check (c[0], a0) && check (c[1], a1) && check (c[2], a2)
+        && check (c[3], a3) && check (c[4], a4) && check (c[5], a5);
+}
+
+int
+main ()
+{
+  __builtin_memcpy (b, a, sizeof (a));
+  reset ();
+  if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0))
+    abort ();
+
+  reset ();
+  if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9)
+      || !check_c (c + 496, 254, 255, 255, 256, 0, 1))
+    abort ();
+
+  reset ();
+  if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0))
+    abort ();
+
+  reset ();

+ if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 0,0))

+    abort ();
+
+  reset ();

+ if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0,1))

+    abort ();
+
+  reset ();
+  if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4))
+    abort ();
+
+  reset ();
+  if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0))
+    abort ();
+
+  reset ();
+  if (bar_1 (a, b, 6, 6) != 5)
+    abort ();
+
+  reset ();

+ if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255,256))

+    abort ();
+
+  reset ();
+  if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0))
+    abort ();
+
+  b[100] += 1;
+  reset ();
+  if (bar (a, b, 90, 110) != 100)
+    abort ();
+
+  reset ();
+  if (bar (a, b, 110, 105) != 100)
+    abort ();
+
+  reset ();
+  if (bar_1 (a, b, 90, 110) != 109)
+    abort ();
+
+  reset ();
+  if (bar_1 (a, b, 2, 90) != 100)
+    abort ();
+
+  foo (a, b, 99, 99);
+  a[99] = b[99] + 1;
+  for (int i = 0; i < 256; i++)
+    if (a[i] != b[i] + 1)
+      abort ();
+
+  foo_1 (a, b, 99, 99);
+  a[99] = b[99] + 1;
+  for (int i = 0; i < 256; i++)
+    if (a[i] != b[i] + 1)
+      abort ();
+
+  exit (0);
+}

diff --git a/gcc/testsuite/gcc.dg/loop-split3.cb/gcc/testsuite/gcc.dg/loop-split3.c

new file mode 100644
index 00000000000..ec93ee8bd12
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split3.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+void
+foo (int *a, int *b, unsigned l, unsigned n)
+{
+  while (--l != n)
+    a[l] = b[l] + 1;
+}
+
+void
+foo1 (int *a, int *b, unsigned l, unsigned n)
+{
+  while (l-- != n)
+    a[l] = b[l] + 1;
+}
+
+unsigned
+foo2 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (--l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo3 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (l-- != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+void
+bar ();
+void
+foo4 (unsigned n, unsigned i)
+{
+  do
+    {
+      if (i == n)
+       return;
+      bar ();
+      --i;
+    }
+  while (1);
+}
+
+unsigned
+find_skip_diff (char *p, char *q, unsigned n, unsigned i)
+{
+  while (p[i] == q[i] && --i != n)
+    p--, q--;
+
+  return i;
+}
+
+/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..e9f23b32186 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "gimple-fold.h"
 #include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"

 /* This file implements two kinds of loop splitting.

@@ -229,11 +230,14 @@ easy_exit_values (class loop *loop)
    conditional).  I.e. the second loop can now be entered either
    via the original entry or via NEW_E, so the entry values of LOOP2
    phi nodes are either the original ones or those at the exit
-   of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
-   this.  The loops need to fulfill easy_exit_values().  */
+   of LOOP1.  Selecting the previous value instead next value as the
+   exit value of LOOP1 if USE_PREV is true.  Insert new phi nodes in
+   LOOP2 pre-header reflecting this.  The loops need to fulfill
+   easy_exit_values().  */

 static void
-connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
+connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
+                  bool use_prev = false)
 {
   basic_block rest = loop_preheader_edge (loop2)->src;
   gcc_assert (new_e->dest == rest);

@@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop*loop2, edge new_e)


       gphi * newphi = create_phi_node (new_init, rest);
       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
-      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);

+ add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next,new_e,

+                  UNKNOWN_LOCATION);
       SET_USE (op, new_init);
     }
 }
@@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop)
   return do_split;
 }

+/* Check if the LOOP exit branch is like "if (idx != bound)",

+ Return the branch edge which exit loop, if wrap may happen on "idx".*/

+
+static edge
+get_ne_cond_branch (struct loop *loop, tree *step)
+{
+  int i;
+  edge e;
+
+  auto_vec<edge> edges = get_loop_exit_edges (loop);
+  FOR_EACH_VEC_ELT (edges, i, e)
+    {
+      basic_block bb = e->src;
+
+      /* Check if exit at gcond.  */
+      gimple *last = last_stmt (bb);
+      if (!last || gimple_code (last) != GIMPLE_COND)
+       continue;
+      gcond *cond = as_a<gcond *> (last);
+      enum tree_code code = gimple_cond_code (cond);
+      if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE))
+           || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE))))
+       continue;
+
+      /* Check if bound is invarant.  */
+      tree idx = gimple_cond_lhs (cond);
+      tree bnd = gimple_cond_rhs (cond);
+      if (expr_invariant_in_loop_p (loop, idx))
+       std::swap (idx, bnd);
+      else if (!expr_invariant_in_loop_p (loop, bnd))
+       continue;
+
+      /* Only unsigned type conversion could cause wrap.  */
+      tree type = TREE_TYPE (idx);
+      if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME
+         || !TYPE_UNSIGNED (type))
+       continue;
+
+      /* Avoid to split if bound is MAX/MIN val.  */
+      tree bound_type = TREE_TYPE (bnd);
+      if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type))
+         || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))
+       continue;
+
+      /* Check if there is possible wrap.  */
+      class tree_niter_desc niter;
+      if (!number_of_iterations_exit (loop, e, &niter, false, false))
+       continue;
+      if (niter.control.no_overflow)
+       return NULL;
+      if (niter.cmp != NE_EXPR)
+       continue;
+      if (!integer_onep (niter.control.step)
+         && !integer_minus_onep (niter.control.step))
+       continue;
+      *step = niter.control.step;
+

+ /* If exit edge is just before the empty latch, it is easy tolink

+        the split loops: just jump from the exit edge of one loop to the
+        header of new loop.  */
+      if (single_pred_p (loop->latch)
+         && single_pred_edge (loop->latch)->src == bb
+         && empty_block_p (loop->latch))
+       return e;
+

+ /* If exit edge is at end of header, and header contains i++ or++i,

+        only, it is simple to link the split loops: jump from the end of
+        one loop header to the new loop header, and use unchanged PHI result
+        of the first loop as the entry PHI value of the second loop.  */
+      if (bb == loop->header)
+       {
+         /* Only one phi.  */
+         gphi_iterator psi = gsi_start_phis (bb);
+         if (gsi_end_p (psi))
+           continue;
+         gphi *phi = psi.phi ();
+         gsi_next (&psi);
+         if (!gsi_end_p (psi))
+           continue;
+
+         /* Check it is ++i or ++i */
+         tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+         tree prev = PHI_RESULT (phi);
+         if (idx != prev && idx != next)
+           continue;
+
+         gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb);
+         if (gsi_end_p (gsi))
+           continue;
+         gimple *s1 = gsi_stmt (gsi);
+         if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next
+             || gimple_assign_rhs1 (s1) != prev)
+           continue;
+
+         gsi_next_nondebug (&gsi);
+         if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond)
+           return e;
+       }
+    }
+
+  return NULL;
+}
+

+/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR.*/

+
+static bool
+split_ne_loop (struct loop *loop, edge cond_e, tree step)
+{
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
+                                    profile_probability::always (),
+                                    profile_probability::never (),
+                                    profile_probability::always (),
+                                    profile_probability::always (), true);
+
+  gcc_assert (loop2);
+  update_ssa (TODO_update_ssa);
+
+  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
+  free_original_copy_tables ();
+
+  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
+  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
+
+  /* Invert edges for gcond.  */
+  if (gimple_cond_code (gc) == EQ_EXPR)
+    {
+      auto invert_edge = [](basic_block bb) {
+       edge out = EDGE_SUCC (bb, 0);
+       edge in = EDGE_SUCC (bb, 1);
+       if (in->flags & EDGE_TRUE_VALUE)
+         std::swap (in, out);
+       in->flags |= EDGE_TRUE_VALUE;
+       in->flags &= ~EDGE_FALSE_VALUE;
+       out->flags |= EDGE_FALSE_VALUE;
+       out->flags &= ~EDGE_TRUE_VALUE;
+      };
+
+      invert_edge (gimple_bb (gc));
+      invert_edge (gimple_bb (dup_gc));
+    }
+
+  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
+  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
+  if (tree_int_cst_sign_bit (step))
+    inv = !inv;
+  enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR;
+  enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR;
+
+  gimple_cond_set_code (gc, first_loop_code);
+  gimple_cond_set_code (dup_gc, second_loop_code);
+
+  /* Link the exit cond edge to new loop.  */
+  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
+  edge pred_e = single_pred_edge (loop->latch);
+  bool simple_loop

+ = pred_e && pred_e->src == cond_e->src && empty_block_p(loop->latch);

+  if (simple_loop)
+    gimple_cond_set_code (break_cond, second_loop_code);
+  else
+    gimple_cond_make_true (break_cond);
+
+  basic_block break_bb = split_edge (cond_e);
+  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_exit = single_succ_edge (break_bb);

+ edge to_new_loop = make_edge (break_bb, loop_preheader_edge(loop2)->src, 0);

+  to_new_loop->flags |= EDGE_TRUE_VALUE;
+  to_exit->flags |= EDGE_FALSE_VALUE;
+  to_exit->flags &= ~EDGE_FALLTHRU;
+  to_exit->probability = cond_e->probability;
+  to_new_loop->probability = to_exit->probability.invert ();
+
+  update_ssa (TODO_update_ssa);
+
+  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop split on wrap index.\n");
+
+  return true;
+}
+

+/* Checks if LOOP contains a suitable NE_EXPR conditional block tosplit.

+L_H:
+ if (i!=N)
+   S;
+ else
+   goto EXIT;
+ i++;
+ goto L_H;
+
+The "i!=N" is like "i>N || i<N", then it could be transformed to:
+
+L_H:
+ if (i>N)
+   S;
+ else
+   goto EXIT;
+ i++;
+ goto L_H;
+L1_H:
+ if (i<N)
+   S;
+ else
+   goto EXIT;
+ i++;
+ goto L1_H;
+
+The loop with "i<N" is in favor of both GIMPLE and RTL passes.  */
+
+static bool
+split_loop_on_ne_cond (class loop *loop)
+{
+  tree step;
+  edge branch_edge = get_ne_cond_branch (loop, &step);
+  if (!branch_edge)
+    return false;
+
+  int num = 0;
+  basic_block *bbs = get_loop_body (loop);
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+
+  if (num > param_max_peeled_insns)
+    {
+      free (bbs);
+      return false;
+    }
+
+  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+    {
+      free (bbs);
+      return false;
+    }
+  free (bbs);
+
+  if (split_ne_loop (loop, branch_edge, step))
+    return true;
+
+  return false;
+}
+
 /* Main entry point.  Perform loop splitting on all suitable loops.  */

 static unsigned int
@@ -1622,7 +1873,8 @@ tree_ssa_split_loops (void)
       if (optimize_loop_for_size_p (loop))
        continue;

-      if (split_loop (loop) || split_loop_on_cond (loop))
+      if (split_loop (loop) || split_loop_on_cond (loop)
+         || split_loop_on_ne_cond (loop))
        {

/* Mark our containing loop as having had some split inner loops.*/

          loop_outer (loop)->aux = loop;

Re: [PATCH V3] Split loop for NE condition.

Reply via email to