After the previous fix of mine to no longer peel loops during cunrolli
to avoid littering an outer loop with tons of loop exit tests and thus
disabling vectorization there's a reported slowdown in the exchange
benchmark.  This is because we no longer fully unroll a loop with
appearant constant iterations early.  This is a long-standing issue
of cunrolli running before loop header copying and thus receiving
niters like (a < b) ? 2 : 0.  The fix is to treat zero or constant
iteration counts like if they were constant, doing loop-header copying
on-the fly during unrolling by simply not removing the first peeled
copies exit test.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-12-14  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/83326
        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Add
        may_be_zero parameter and handle it by not marking the first
        peeled copy as not exiting the loop.
        (try_peel_loop): Likewise.
        (canonicalize_loop_induction_variables): Use number_of_iterations_exit
        to handle the case of constant or zero iterations and perform
        loop header copying on-the-fly.

        * gcc.dg/tree-ssa/pr81388-2.c: Adjust.

Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c (revision 255622)
+++ gcc/tree-ssa-loop-ivcanon.c (working copy)
@@ -681,7 +681,7 @@ unloop_loops (bitmap loop_closed_ssa_inv
 
 static bool
 try_unroll_loop_completely (struct loop *loop,
-                           edge exit, tree niter,
+                           edge exit, tree niter, bool may_be_zero,
                            enum unroll_level ul,
                            HOST_WIDE_INT maxiter,
                            location_t locus, bool allow_peel)
@@ -893,6 +893,8 @@ try_unroll_loop_completely (struct loop
          exit = NULL;
          bitmap_clear (wont_exit);
        }
+      if (may_be_zero)
+       bitmap_clear_bit (wont_exit, 1);
 
       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge 
(loop),
                                                 n_unroll, wont_exit,
@@ -977,7 +979,7 @@ estimated_peeled_sequence_size (struct l
 
 static bool
 try_peel_loop (struct loop *loop,
-              edge exit, tree niter,
+              edge exit, tree niter, bool may_be_zero,
               HOST_WIDE_INT maxiter)
 {
   HOST_WIDE_INT npeel;
@@ -1080,6 +1082,8 @@ try_peel_loop (struct loop *loop,
       exit = NULL;
       bitmap_clear (wont_exit);
     }
+  if (may_be_zero)
+    bitmap_clear_bit (wont_exit, 1);
   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
                                             npeel, wont_exit,
                                             exit, &edges_to_remove,
@@ -1152,13 +1156,35 @@ canonicalize_loop_induction_variables (s
   HOST_WIDE_INT maxiter;
   bool modified = false;
   location_t locus = UNKNOWN_LOCATION;
+  struct tree_niter_desc niter_desc;
+  bool may_be_zero = false;
 
-  niter = number_of_latch_executions (loop);
+  /* For unrolling allow conditional constant or zero iterations, thus
+     perform loop-header copying on-the-fly.  */
   exit = single_exit (loop);
+  niter = chrec_dont_know;
+  if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
+    {
+      niter = niter_desc.niter;
+      may_be_zero
+       = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
+    }
   if (TREE_CODE (niter) == INTEGER_CST)
     locus = gimple_location (last_stmt (exit->src));
   else
     {
+      /* For non-constant niter fold may_be_zero into niter again.  */
+      if (may_be_zero)
+       {
+         if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+           niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
+                                niter_desc.may_be_zero,
+                                build_int_cst (TREE_TYPE (niter), 0), niter);
+         else
+           niter = chrec_dont_know;
+         may_be_zero = false;
+       }
+
       /* If the loop has more than one exit, try checking all of them
         for # of iterations determinable through scev.  */
       if (!exit)
@@ -1213,17 +1239,31 @@ canonicalize_loop_induction_variables (s
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus,
-                                 allow_peel))
+  if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
+                                 maxiter, locus, allow_peel))
     return true;
 
   if (create_iv
       && niter && !chrec_contains_undetermined (niter)
       && exit && just_once_each_iteration_p (loop, exit->src))
-    create_canonical_iv (loop, exit, niter);
+    {
+      tree iv_niter = niter;
+      if (may_be_zero)
+       {
+         if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
+           iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
+                                   niter_desc.may_be_zero,
+                                   build_int_cst (TREE_TYPE (iv_niter), 0),
+                                   iv_niter);
+         else
+           iv_niter = NULL_TREE;
+       }
+      if (iv_niter)
+       create_canonical_iv (loop, exit, iv_niter);
+    }
 
   if (ul == UL_ALL)
-    modified |= try_peel_loop (loop, exit, niter, maxiter);
+    modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
 
   return modified;
 }
Index: gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c   (revision 255622)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr81388-2.c   (working copy)
@@ -11,4 +11,4 @@ void foo(unsigned dst)
   } while (dst < end);
 }
 
-/* { dg-final { scan-tree-dump-times " zero if " 1 "ivcanon" } } */
+/* { dg-final { scan-tree-dump " zero if " "ivcanon" } } */

Reply via email to