This patch tries to make the CFG profile consistent when adding a guard
edge to skip the epilog during peeling.

The changes can be summarized as follows:
 - We avoid adding the guard edge entirely if the guard condition folds
   to false, otherwise the profile will become inconsistent since
   the cfgcleanup code doesn't attempt to update it on removing the dead
   edge.
 - If the guard condition instead folds to true, we account for this by
   giving the skip edge 100% probability (otherwise the profile will
   again become inconsistent when removing the other now-dead edge).
 - Finally, we use the new helper scale_loop_freqs_with_new_exit_count instead
   of scale_loop_profile to update the epilog frequencies / probabiltiies.
   We make the assumption here that if the IV exit is taken in the vector loop,
   then it will also be taken in the epilog (and not an early exit).  Since we
   add the guard to the vector iv exit, we know any reduction in count
   associated with the epilog skip should be accounted for by a reduction in the
   epilog's iv exit edge count.

Bootstrapped/regtested as a series on aarch64-linux-gnu, arm-linux-gnueabihf,
and x86_64-linux-gnu.  OK for trunk?

Thanks,
Alex

gcc/ChangeLog:

        PR tree-optimization/117790
        * tree-vect-loop-manip.cc (vect_do_peeling): Attempt to maintain
        consistency of the CFG profile when adding an epilog skip edge.

gcc/testsuite/ChangeLog:

        PR tree-optimization/117790
        * gcc.dg/vect/vect-early-break-profile-1.c: New test.
---
 .../gcc.dg/vect/vect-early-break-profile-1.c  | 10 ++++
 gcc/tree-vect-loop-manip.cc                   | 48 ++++++++++++++-----
 2 files changed, 47 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c
new file mode 100644
index 00000000000..5387e3a0465
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-fdump-tree-vect-blocks-details" } */
+int a[100];
+void f()
+{
+  for (int i = 0; i < 100 && a[i]; i++)
+    a[i]++;
+}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "vect" } } */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 53d36eaa25f..4d472ab56ab 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3546,18 +3546,23 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 
       /* If we have a peeled vector iteration we will never skip the epilog loop
 	 and we can simplify the cfg a lot by not doing the edge split.  */
-      if (skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+      guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
+				niters, niters_vector_mult_vf);
+      if ((skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+	  && !integer_zerop (guard_cond))
 	{
-	  guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
-				    niters, niters_vector_mult_vf);
+	  profile_probability prob_skip
+	    = integer_onep (guard_cond)
+	    ? profile_probability::always ()
+	    : prob_epilog.invert ();
 
 	  guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
+	  edge enter_e = single_succ_edge (guard_bb);
 	  edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
 	  guard_to = epilog_e->dest;
 	  guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
 					   skip_vector ? anchor : guard_bb,
-					   prob_epilog.invert (),
-					   irred_flag);
+					   prob_skip, irred_flag);
 	  doms.safe_push (guard_to);
 	  if (vect_epilogues)
 	    epilogue_vinfo->skip_this_loop_edge = guard_e;
@@ -3586,15 +3591,36 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 		}
 	    }
 
-	  /* Only need to handle basic block before epilog loop if it's not
-	     the guard_bb, which is the case when skip_vector is true.  */
-	  if (guard_bb != bb_before_epilog)
+	  basic_block epilog_ph = loop_preheader_edge (epilog)->src;
+
+	  profile_probability epilog_scale
+	    = (epilog_ph->count - guard_e->count ()).probability_in (epilog_ph->count);
+
+	  enter_e->dest->count -= guard_e->count ();
+
+	  /* If we added a vector skip then ENTER_E may not target the epilog
+	     preheader but a block that has the epilog preheader as its single
+	     successor: handle that case.  */
+	  if (enter_e->dest != epilog_ph)
 	    {
-	      prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
+	      gcc_assert (single_succ (enter_e->dest) == epilog_ph);
+	      epilog_ph->count -= guard_e->count ();
+	    }
 
-	      scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
+	  if (epilog_e->count () < guard_e->count ())
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "epilog iv exit count < guard count, "
+				 "profile will become inconsistent\n");
+	    }
+	  else
+	    {
+	      profile_count new_iv_count
+		= epilog_e->count () - guard_e->count ();
+	      scale_loop_freqs_with_new_exit_count (epilog, epilog_scale,
+						    epilog_e, new_iv_count);
 	    }
-	  scale_loop_profile (epilog, prob_epilog, -1);
 	}
 
       /* Recalculate the dominators after adding the guard edge.  */

Reply via email to