Re: [PATCH GCC][3/3]Refine CFG and bound information for split loops

Bin Cheng Fri, 20 Oct 2017 06:08:40 -0700






From: Richard Biener <richard.guent...@gmail.com>
Sent: 20 October 2017 12:24
To: Bin Cheng
Cc: gcc-patches@gcc.gnu.org; nd
Subject: Re: [PATCH GCC][3/3]Refine CFG and bound information for split loops
    
On Thu, Oct 19, 2017 at 3:26 PM, Bin Cheng <bin.ch...@arm.com> wrote:
> Hi,
> This is a rework of patch at  
> https://gcc.gnu.org/ml/gcc-patches/2017-06/msg01037.html.
> The new patch doesn't try to handle all cases, instead, it only handles 
> obvious cases.
> It also tries to add tests illustrating different cases handled.
> Bootstrap and test for patch set on x86_64 and AArch64.  Comments?

ENOPATCH

Sorry for the mistake, here is the one.

Thanks,
bin

> Thanks,
> bin
> 2017-10-16  Bin Cheng  <bin.ch...@arm.com>
>
>         * tree-ssa-loop-split.c (compute_new_first_bound): New parameter.
>         Compute and return bound information for the second split loop.
>         (adjust_loop_split): New function.
>         (split_loop): Update use and call above function.
>
> gcc/testsuite/ChangeLog
> 2017-10-16  Bin Cheng  <bin.ch...@arm.com>
>
>         * gcc.dg/loop-split-1.c: New test.
>         * gcc.dg/loop-split-2.c: New test.
>         * gcc.dg/loop-split-3.c: New test.

From 3bf8b382682b6a6c6aedf6f085d663e6379f003a Mon Sep 17 00:00:00 2001
From: Bin Cheng <binch...@e108451-lin.cambridge.arm.com>
Date: Wed, 2 Aug 2017 14:57:27 +0100
Subject: [PATCH 3/3] lsplit-refine-cfg-niter-bound-20171017.txt

---
 gcc/testsuite/gcc.dg/loop-split-1.c |  40 ++++++++
 gcc/testsuite/gcc.dg/loop-split-2.c |  34 +++++++
 gcc/testsuite/gcc.dg/loop-split-3.c |  40 ++++++++
 gcc/tree-ssa-loop-split.c           | 179 +++++++++++++++++++++++++++++++++---
 4 files changed, 282 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/loop-split-1.c
 create mode 100644 gcc/testsuite/gcc.dg/loop-split-2.c
 create mode 100644 gcc/testsuite/gcc.dg/loop-split-3.c

diff --git a/gcc/testsuite/gcc.dg/loop-split-1.c b/gcc/testsuite/gcc.dg/loop-split-1.c
new file mode 100644
index 0000000..7cf6a37
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split-1.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+#define NUM (100)
+int x[NUM] = {0, 1, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31};
+int y[NUM] = {0, 1, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31};
+int r[NUM] = {0, 2, 4, 6, 8, 12, 14, 18, 20, 24, 29, 31};
+
+extern void abort (void);
+int __attribute__((noinline)) foo (int *a, int *b, int len)
+{
+  int k;
+  for (k = 1; k <= len; k++)
+    {
+      a[k]++;
+
+      if (k < len)
+	b[k]++;
+    }
+}
+
+int main (void)
+{
+  int i;
+
+  foo (x, y, 9);
+
+  for (i = 0; i < NUM; ++i)
+    {
+      if (i != 9
+	  && (x[i] != r[i] || y[i] != r[i]))
+	abort ();
+      if (i == 9
+	  && (x[i] != r[i] || y[i] != r[i] - 1))
+	abort ();
+    }
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump "The second split loop iterates at 0 latch times." "lsplit" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-split-2.c b/gcc/testsuite/gcc.dg/loop-split-2.c
new file mode 100644
index 0000000..3659a7a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split-2.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+#define NUM (100)
+int x[NUM] = {0, 1, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31};
+int y[NUM] = {0, 1, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31};
+int r[NUM] = {1, 1, 4, 5, 8, 11, 14, 17, 20, 23, 29, 31};
+
+extern void abort (void);
+int __attribute__((noinline)) foo (int *a, int *b, int len)
+{
+  int k, i;
+  for (k = 0, i = 1; k < len; k += 2, i += 2)
+    {
+      a[k]++;
+
+      if (i < 1 + len)
+	b[k]++;
+    }
+}
+
+int main (void)
+{
+  int i;
+
+  foo (x, y, 9);
+
+  for (i = 0; i < NUM; ++i)
+    if (x[i] != r[i] || y[i] != r[i])
+      abort ();
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump "The second split loop is never executed." "lsplit" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-split-3.c b/gcc/testsuite/gcc.dg/loop-split-3.c
new file mode 100644
index 0000000..10e7cfd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split-3.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+#define NUM (100)
+int x[NUM] = {0, 1, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31};
+int y[NUM] = {0, 1, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31};
+int r[NUM] = {0, 2, 4, 6, 8, 12, 14, 18, 20, 24, 30, 31};
+
+extern void abort (void);
+int __attribute__((noinline)) foo (int *a, int *b, int start, int end)
+{
+  int k;
+  for (k = start; k >= end; k--)
+    {
+      a[k]++;
+
+      if (k > end)
+	b[k]++;
+    }
+}
+
+int main (void)
+{
+  int i;
+
+  foo (x, y, 10, 1);
+
+  for (i = 0; i < NUM; ++i)
+    {
+      if (i != 1
+	  && (x[i] != r[i] || y[i] != r[i]))
+	abort ();
+      if (i == 1
+	  && (x[i] != r[i] || y[i] != r[i] - 1))
+	abort ();
+    }
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump "The second split loop iterates at 0 latch times." "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index e454cc5..ee398a2 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -364,8 +364,8 @@ connect_loops (struct loop *loop1, struct loop *loop2)
 
 /* This returns the new bound for iterations given the original iteration
    space in NITER, an arbitrary new bound BORDER, assumed to be some
-   comparison value with a different IV, the initial value GUARD_INIT of
-   that other IV, and the comparison code GUARD_CODE that compares
+   comparison value with a different IV, the initial value GUARD_INIT and
+   STEP of that other IV, and the comparison code GUARD_CODE that compares
    that other IV with BORDER.  We return an SSA name, and place any
    necessary statements for that computation into *STMTS.
 
@@ -387,12 +387,26 @@ connect_loops (struct loop *loop1, struct loop *loop2)
 
    Depending on the direction of the IVs and if the exit tests
    are strict or non-strict we need to use MIN or MAX,
-   and add or subtract 1.  This routine computes newend above.  */
+   and add or subtract 1.  This routine computes newend above.
+
+   After computing the new bound (on j), we may be able to compare the
+   first loop's iteration space against the original loop's.  If it is
+   comparable at compilation time, we may be able to compute the niter
+   bound of the second loop.  Record the second loop's iteration bound
+   to SECOND_LOOP_NITER_BOUND which has below meaning:
+
+     -3: Don't know anything about the second loop;
+     -2: The second loop must not be executed;
+     -1: The second loop must be executed, but niter bound is unknown;
+      n: The second loop must be executed, niter bound is n (>= 0);
+
+   Note we compute niter bound for the second loop's latch.  */
 
 static tree
 compute_new_first_bound (gimple_seq *stmts, struct tree_niter_desc *niter,
-			 tree border,
-			 enum tree_code guard_code, tree guard_init)
+			 tree border, enum tree_code guard_code,
+			 tree guard_init, tree step,
+			 HOST_WIDE_INT *second_loop_niter_bound)
 {
   /* The niter structure contains the after-increment IV, we need
      the loop-enter base, so subtract STEP once.  */
@@ -447,15 +461,16 @@ compute_new_first_bound (gimple_seq *stmts, struct tree_niter_desc *niter,
   /* Depending on the direction of the IVs the new bound for the first
      loop is the minimum or maximum of old bound and border.
      Also, if the guard condition isn't strictly less or greater,
-     we need to adjust the bound.  */ 
+     we need to adjust the bound.  */
   int addbound = 0;
-  enum tree_code minmax;
+  enum tree_code minmax, cmp_code;
   if (niter->cmp == LT_EXPR)
     {
       /* GT and LE are the same, inverted.  */
       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
 	addbound = -1;
       minmax = MIN_EXPR;
+      cmp_code = LT_EXPR;
     }
   else
     {
@@ -463,6 +478,7 @@ compute_new_first_bound (gimple_seq *stmts, struct tree_niter_desc *niter,
       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
 	addbound = 1;
       minmax = MAX_EXPR;
+      cmp_code = GT_EXPR;
     }
 
   if (addbound)
@@ -478,9 +494,146 @@ compute_new_first_bound (gimple_seq *stmts, struct tree_niter_desc *niter,
 			       build_int_cst (type2, addbound));
     }
 
-  tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
-			      border, newbound);
-  return newend;
+  gimple_seq tmp = NULL;
+  tree cmp_rslt = gimple_build (&tmp, cmp_code, boolean_type_node, border,
+				gimple_convert (&tmp, TREE_TYPE (border),
+						newbound));
+  /* For case in which second loop must be executed, we only handle simple
+     case with unit step.  */
+  if (cmp_rslt != NULL_TREE
+      && integer_nonzerop (cmp_rslt)
+      && types_compatible_p (TREE_TYPE (border), TREE_TYPE (newbound))
+      && wi::eq_p (wi::abs (wi::to_wide (step)), 1))
+    {
+      tree niter_type, diff;
+
+      niter_type = unsigned_type_for (TREE_TYPE (newbound));
+      /* The split condition indicates smaller iteration space than the
+	 original loop, so the second loop must be executed.  */
+      if (cmp_code == LT_EXPR)
+	diff = gimple_build (&tmp, MINUS_EXPR, niter_type,
+			     gimple_convert (&tmp, niter_type, newbound),
+			     gimple_convert (&tmp, niter_type, border));
+      else
+	diff = gimple_build (&tmp, MINUS_EXPR, niter_type,
+			     gimple_convert (&tmp, niter_type, border),
+			     gimple_convert (&tmp, niter_type, newbound));
+      /* Check if niter can be computed.  */
+      if (tree_fits_shwi_p (diff) && !tree_int_cst_sign_bit (diff))
+	*second_loop_niter_bound = tree_to_shwi (diff) - 1;
+      else
+	*second_loop_niter_bound = -1;
+
+      gimple_seq_discard (tmp);
+      return border;
+    }
+
+  /* For newbound equals border case, we can handle arbitrary steps.  */
+  cmp_rslt = gimple_build (&tmp, EQ_EXPR, boolean_type_node, border, newbound);
+  gimple_seq_discard (tmp);
+  if (cmp_rslt != NULL_TREE && integer_nonzerop (cmp_rslt))
+    {
+      /* The split condition indicates the same iteration space as the
+	 original space, so the second loop must not be executed.  */
+      *second_loop_niter_bound = -2;
+      return border;
+    }
+  *second_loop_niter_bound = -3;
+  return gimple_build (stmts, minmax, TREE_TYPE (border), border, newbound);
+}
+
+/* We have below control flow graph after versioning the original loop:
+
+               .------ guard1  ------.
+               v                     v
+         pre1(loop1)    .---------->pre2(loop2)
+              |         |            |
+        .--->h1         |           h2<----.
+        |     |         |            |     |
+        |    ex1---.    |       .---ex2    |
+        |    /     v    |       |     \    |
+        '---l1 guard2---'       |     l2---'
+                   |            |
+                   |            |
+                   '--->join<---'
+
+   LOOP2 is the second loop after split, GUARD1 and GUARD2 are the two bbs
+   controling if loop2 should be executed.  SECOND_LOOP_NITER_BOUND is the
+   niter bound for LOOP2's latch which has below meaning:
+
+     -3: Don't know anything about the second loop;
+     -2: The second loop must not be executed;
+     -1: The second loop must be executed, but niter bound is unknown;
+      n: The second loop must be executed, niter bound is n (>= 0).
+
+   With the information, this function folds GUARD1 and GUARD2 to true or
+   false so that LOOP2 is always executed or skipped.  It also sets upper
+   bound for LOOP2's niter.  */
+
+static void
+adjust_loop_split (struct loop *loop2,
+		   basic_block guard1, basic_block guard2,
+		   HOST_WIDE_INT second_loop_niter_bound)
+{
+  if (second_loop_niter_bound == -3)
+    return;
+
+  gcond *cond1 = as_a<gcond *> (last_stmt (guard1));
+  gcond *cond2 = as_a<gcond *> (last_stmt (guard2));
+
+  gcc_assert (EDGE_COUNT (guard1->succs) == 2);
+  gcc_assert (EDGE_COUNT (guard2->succs) == 2);
+  edge e = EDGE_SUCC (guard2, 0);
+  if (e->dest != loop_preheader_edge (loop2)->src)
+    {
+      e = EDGE_SUCC (guard2, 1);
+      gcc_assert (e->dest == loop_preheader_edge (loop2)->src);
+    }
+
+  bool invert_cond2 = (e->flags & EDGE_FALSE_VALUE);
+
+  /* Loop2 should never be executed.  */
+  if (second_loop_niter_bound == -2)
+    {
+      gimple_cond_make_true (cond1);
+      if (invert_cond2)
+	gimple_cond_make_true (cond2);
+      else
+	gimple_cond_make_false (cond2);
+
+      update_stmt (cond1);
+      update_stmt (cond2);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, ";; The second split loop is never executed.\n");
+
+      return;
+    }
+  /* Loop2 must be executed for unknown times.  */
+  if (second_loop_niter_bound == -1)
+    {
+      if (invert_cond2)
+	gimple_cond_make_false (cond2);
+      else
+	gimple_cond_make_true (cond2);
+
+      update_stmt (cond2);
+      return;
+    }
+
+  /* Loop2 must be executed for known times.  */
+  gcc_assert (second_loop_niter_bound >= 0);
+  if (invert_cond2)
+    gimple_cond_make_false (cond2);
+  else
+    gimple_cond_make_true (cond2);
+
+  update_stmt (cond2);
+  record_niter_bound (loop2, second_loop_niter_bound, true, true);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; The second split loop iterates at %i latch"
+	     " times.\n", (int)max_loop_iterations_int (loop2));
 }
 
 /* Checks if LOOP contains an conditional block whose condition
@@ -579,13 +732,17 @@ split_loop (struct loop *loop1, struct tree_niter_desc *niter)
 	   Compute the new bound for the guarding IV and patch the
 	   loop exit to use it instead of original IV and bound.  */
 	gimple_seq stmts = NULL;
+	HOST_WIDE_INT second_loop_niter_bound;
 	tree newend = compute_new_first_bound (&stmts, niter, border,
-					       guard_code, guard_init);
+					       guard_code, guard_init, iv.step,
+					       &second_loop_niter_bound);
 	if (stmts)
 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 					    stmts);
 	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
 	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
+	adjust_loop_split (loop2, cond_bb, new_e->src,
+			   second_loop_niter_bound);
 
 	/* Finally patch out the two copies of the condition to be always
 	   true/false (or opposite).  */
-- 
1.9.1

Re: [PATCH GCC][3/3]Refine CFG and bound information for split loops

Reply via email to