This adjusts scale_profile_for_vect_loop to DTRT for loops with multiple exits, namely using scale_loop_profile_hold_exit_counts instead and scaling the expected niters by 1 / VF.
Tested as a series on aarch64-linux-gnu, arm-linux-gnueabihf, and x86_64-linux-gnu. OK for trunk? Thanks, Alex gcc/ChangeLog: PR tree-optimization/117790 * tree-vect-loop.cc (scale_profile_for_vect_loop): Use scale_loop_profile_hold_exit_counts instead of scale_loop_profile. Drop the exit edge parameter, since the code now handles multiple exits. Adjust the caller ... (vect_transform_loop): ... here. gcc/testsuite/ChangeLog: PR tree-optimization/117790 * gcc.dg/vect/vect-early-break-profile-2.c: New test. --- .../gcc.dg/vect/vect-early-break-profile-2.c | 21 +++++++++++++++++++ gcc/tree-vect-loop.cc | 21 ++++++------------- 2 files changed, 27 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c new file mode 100644 index 00000000000..03c67802b74 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-fdump-tree-vect-blocks-details" } */ +int DECPOWERS[11]; +int multies[] = {5, 3, 1049, 0}; +short decNumberSquareRoot_accnext; +int decNumberSquareRoot_accunits; +void decGetDigits(short *, int); +void decNumberSquareRoot() { + int exponent, drop = 0; + for (;; drop++) { + if (exponent >= 0) + break; + if (decNumberSquareRoot_accnext * multies[drop] >> 7 * DECPOWERS[drop]) + break; + exponent++; + } + if (drop) + decGetDigits(&decNumberSquareRoot_accnext, decNumberSquareRoot_accunits); +} +/* { dg-final { scan-tree-dump-not "Invalid sum" "vect" } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 2b9d5956635..ca65c0058db 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -12022,7 +12022,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, profile. */ static void -scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool flat) +scale_profile_for_vect_loop (class loop *loop, unsigned vf, bool flat) { /* For flat profiles do not scale down proportionally by VF and only cap by known iteration count bounds. */ @@ -12053,18 +12053,10 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl vf /= 2; } - if (entry_count.nonzero_p ()) - set_edge_probability_and_rescale_others - (exit_e, - entry_count.probability_in (loop->header->count / vf)); - /* Avoid producing very large exit probability when we do not have - sensible profile. */ - else if (exit_e->probability < profile_probability::always () / (vf * 2)) - set_edge_probability_and_rescale_others (exit_e, exit_e->probability * vf); - loop->latch->count = single_pred_edge (loop->latch)->count (); - - scale_loop_profile (loop, profile_probability::always () / vf, - get_likely_max_loop_iterations_int (loop)); + const auto likely_max_niters = get_likely_max_loop_iterations_int (loop); + scale_loop_profile_hold_exit_counts (loop, + profile_probability::always () / vf, + likely_max_niters); } /* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI @@ -12874,8 +12866,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) assumed_vf) - 1 : wi::udiv_floor (loop->nb_iterations_estimate + bias_for_assumed, assumed_vf) - 1); - scale_profile_for_vect_loop (loop, LOOP_VINFO_IV_EXIT (loop_vinfo), - assumed_vf, flat); + scale_profile_for_vect_loop (loop, assumed_vf, flat); if (dump_enabled_p ()) {