This adjusts scale_profile_for_vect_loop to DTRT for loops with multiple exits,
namely using scale_loop_profile_hold_exit_counts instead and scaling the
expected niters by 1 / VF.

Tested as a series on aarch64-linux-gnu, arm-linux-gnueabihf, and
x86_64-linux-gnu.  OK for trunk?

Thanks,
Alex

gcc/ChangeLog:

        PR tree-optimization/117790
        * tree-vect-loop.cc (scale_profile_for_vect_loop): Use
        scale_loop_profile_hold_exit_counts instead of scale_loop_profile.  Drop
        the exit edge parameter, since the code now handles multiple exits.
        Adjust the caller ...
        (vect_transform_loop): ... here.

gcc/testsuite/ChangeLog:

        PR tree-optimization/117790
        * gcc.dg/vect/vect-early-break-profile-2.c: New test.
---
 .../gcc.dg/vect/vect-early-break-profile-2.c  | 21 +++++++++++++++++++
 gcc/tree-vect-loop.cc                         | 21 ++++++-------------
 2 files changed, 27 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c
new file mode 100644
index 00000000000..03c67802b74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-fdump-tree-vect-blocks-details" } */
+int DECPOWERS[11];
+int multies[] = {5, 3, 1049, 0};
+short decNumberSquareRoot_accnext;
+int decNumberSquareRoot_accunits;
+void decGetDigits(short *, int);
+void decNumberSquareRoot() {
+  int exponent, drop = 0;
+  for (;; drop++) {
+    if (exponent >= 0)
+      break;
+    if (decNumberSquareRoot_accnext * multies[drop] >> 7 * DECPOWERS[drop])
+      break;
+    exponent++;
+  }
+  if (drop)
+    decGetDigits(&decNumberSquareRoot_accnext, decNumberSquareRoot_accunits);
+}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "vect" } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 2b9d5956635..ca65c0058db 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -12022,7 +12022,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
    profile.  */
 
 static void
-scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool flat)
+scale_profile_for_vect_loop (class loop *loop, unsigned vf, bool flat)
 {
   /* For flat profiles do not scale down proportionally by VF and only
      cap by known iteration count bounds.  */
@@ -12053,18 +12053,10 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl
       vf /= 2;
     }
 
-  if (entry_count.nonzero_p ())
-    set_edge_probability_and_rescale_others
-	    (exit_e,
-	     entry_count.probability_in (loop->header->count / vf));
-  /* Avoid producing very large exit probability when we do not have
-     sensible profile.  */
-  else if (exit_e->probability < profile_probability::always () / (vf * 2))
-    set_edge_probability_and_rescale_others (exit_e, exit_e->probability * vf);
-  loop->latch->count = single_pred_edge (loop->latch)->count ();
-
-  scale_loop_profile (loop, profile_probability::always () / vf,
-		      get_likely_max_loop_iterations_int (loop));
+  const auto likely_max_niters = get_likely_max_loop_iterations_int (loop);
+  scale_loop_profile_hold_exit_counts (loop,
+				       profile_probability::always () / vf,
+				       likely_max_niters);
 }
 
 /* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI
@@ -12874,8 +12866,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
 			  assumed_vf) - 1
 	 : wi::udiv_floor (loop->nb_iterations_estimate + bias_for_assumed,
 			   assumed_vf) - 1);
-  scale_profile_for_vect_loop (loop, LOOP_VINFO_IV_EXIT (loop_vinfo),
-			       assumed_vf, flat);
+  scale_profile_for_vect_loop (loop, assumed_vf, flat);
 
   if (dump_enabled_p ())
     {

Reply via email to