Re: [PATCH] Avoid peeling in cunrolli

Richard Biener Wed, 29 Nov 2017 23:53:07 -0800

On Wed, 29 Nov 2017, Richard Biener wrote:

> 
> It turns out that we don't vectorize the 2nd testcase in PR83202
> (or rather we do that in weird ways during BB vectorization) because
> cunrolli decides to peel the inner loop completely based on
> the size of the accessed arrays.  That unfortunately leaves exit
> tests in the outer loop body which in turn makes us not vectorize
> the loop.
> 
> We have a late unrolling pass for these kind of unrollings so this
> patch simply avoids doing this during cunrolli.
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.


And this is what I applied after reviewing testsuite regressions of
the first.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2017-11-30  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/83202
        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Add
        allow_peel argument and guard peeling.
        (canonicalize_loop_induction_variables): Likewise.
        (canonicalize_induction_variables): Pass false.
        (tree_unroll_loops_completely_1): Pass unroll_outer to disallow
        peeling from cunrolli.

        * gcc.dg/vect/pr83202-1.c: New testcase.
        * gcc.dg/tree-ssa/pr61743-1.c: Adjust.

Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c (revision 255201)
+++ gcc/tree-ssa-loop-ivcanon.c (working copy)
@@ -679,7 +679,7 @@ try_unroll_loop_completely (struct loop
                            edge exit, tree niter,
                            enum unroll_level ul,
                            HOST_WIDE_INT maxiter,
-                           location_t locus)
+                           location_t locus, bool allow_peel)
 {
   unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
@@ -711,7 +711,8 @@ try_unroll_loop_completely (struct loop
     exit = NULL;
 
   /* See if we can improve our estimate by using recorded loop bounds.  */
-  if (maxiter >= 0
+  if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
+      && maxiter >= 0
       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     {
       n_unroll = maxiter;
@@ -1139,7 +1140,7 @@ try_peel_loop (struct loop *loop,
 static bool
 canonicalize_loop_induction_variables (struct loop *loop,
                                       bool create_iv, enum unroll_level ul,
-                                      bool try_eval)
+                                      bool try_eval, bool allow_peel)
 {
   edge exit = NULL;
   tree niter;
@@ -1207,7 +1208,8 @@ canonicalize_loop_induction_variables (s
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus,
+                                 allow_peel))
     return true;
 
   if (create_iv
@@ -1238,7 +1240,7 @@ canonicalize_induction_variables (void)
     {
       changed |= canonicalize_loop_induction_variables (loop,
                                                        true, UL_SINGLE_ITER,
-                                                       true);
+                                                       true, false);
     }
   gcc_assert (!need_ssa_update_p (cfun));
 
@@ -1353,7 +1355,7 @@ tree_unroll_loops_completely_1 (bool may
     ul = UL_NO_GROWTH;
 
   if (canonicalize_loop_induction_variables
-        (loop, false, ul, !flag_tree_loop_ivcanon))
+        (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
     {
       /* If we'll continue unrolling, we need to propagate constants
         within the new basic blocks to fold away induction variable
Index: gcc/testsuite/gcc.dg/vect/pr83202-1.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr83202-1.c       (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr83202-1.c       (working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void test(double data[8][8])
+{
+  for (int i = 0; i < 8; i++)
+    {
+      for (int j = 0; j < i; j+=4)
+       {
+         data[i][j] *= data[i][j];
+         data[i][j+1] *= data[i][j+1];
+         data[i][j+2] *= data[i][j+2];
+         data[i][j+3] *= data[i][j+3];
+       }
+    }
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+/* { dg-final { scan-tree-dump "ectorized 1 loops" "vect" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c   (revision 255201)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c   (working copy)
@@ -48,5 +48,6 @@ int foo1 (e_u8 a[4][N], int b1, int b2,
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely 
unrolled" 8 "cunroll" } } */
-/* { dg-final { scan-tree-dump-times "loop with 8 iterations completely 
unrolled" 2 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely 
unrolled" 2 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely 
unrolled" 2 "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "completely unrolled" "cunrolli" } } */

Re: [PATCH] Avoid peeling in cunrolli

Reply via email to