This is a v2. v1 was posted here: https://gcc.gnu.org/pipermail/gcc-patches/2025-January/672678.html
Changes since v1: - Made epilog profile update code more resilient against already-inconsistent profiles (e.g. PR120065), avoid ICEing in such a situation. Bootstrapped/regtested as a series on aarch64-linux-gnu and x86_64-linux-gnu. OK for trunk? Thanks, Alex -- >8 -- This patch tries to make the CFG profile consistent when adding a guard edge to skip the epilog during peeling. The changes can be summarized as follows: - We avoid adding the guard edge entirely if the guard condition folds to false, otherwise the profile will become inconsistent since the cfgcleanup code doesn't attempt to update it on removing the dead edge. - If the guard condition instead folds to true, we account for this by giving the skip edge 100% probability (otherwise the profile will again become inconsistent when removing the other now-dead edge). - Finally, we use the new helper scale_loop_freqs_with_new_exit_count instead of scale_loop_profile to update the epilog frequencies / probabiltiies. We make the assumption here that if the IV exit is taken in the vector loop, then it will also be taken in the epilog (and not an early exit). Since we add the guard to the vector iv exit, we know any reduction in count associated with the epilog skip should be accounted for by a reduction in the epilog's iv exit edge count. gcc/ChangeLog: PR tree-optimization/117790 * tree-vect-loop-manip.cc (vect_do_peeling): Attempt to maintain consistency of the CFG profile when adding an epilog skip edge. gcc/testsuite/ChangeLog: PR tree-optimization/117790 * gcc.dg/vect/vect-early-break-profile-1.c: New test. --- .../gcc.dg/vect/vect-early-break-profile-1.c | 10 ++++ gcc/tree-vect-loop-manip.cc | 53 ++++++++++++++----- 2 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c new file mode 100644 index 00000000000..5387e3a0465 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-fdump-tree-vect-blocks-details" } */ +int a[100]; +void f() +{ + for (int i = 0; i < 100 && a[i]; i++) + a[i]++; +} +/* { dg-final { scan-tree-dump-not "Invalid sum" "vect" } } */ diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 56a4e9a8b63..127d596ce79 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3564,20 +3564,25 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* If we have a peeled vector iteration we will never skip the epilog loop and we can simplify the cfg a lot by not doing the edge split. */ - if (skip_epilog - || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))) + guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, + niters, niters_vector_mult_vf); + if ((skip_epilog + || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))) + && !integer_zerop (guard_cond)) { - guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, - niters, niters_vector_mult_vf); + profile_probability prob_skip + = integer_onep (guard_cond) + ? profile_probability::always () + : prob_epilog.invert (); guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; + edge enter_e = single_succ_edge (guard_bb); edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo); guard_to = epilog_e->dest; guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to, skip_vector ? anchor : guard_bb, - prob_epilog.invert (), - irred_flag); + prob_skip, irred_flag); doms.safe_push (guard_to); if (vect_epilogues) epilogue_vinfo->skip_this_loop_edge = guard_e; @@ -3606,15 +3611,37 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } } - /* Only need to handle basic block before epilog loop if it's not - the guard_bb, which is the case when skip_vector is true. */ - if (guard_bb != bb_before_epilog) + basic_block epilog_ph = loop_preheader_edge (epilog)->src; + if (enter_e->dest->count < guard_e->count () + || epilog_ph->count < guard_e->count () + || epilog_e->count () < guard_e->count ()) { - prob_epilog = prob_vector * prob_epilog + prob_vector.invert (); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "epilog profile cannot be updated; " + "profile is inconsistent\n"); + } + else + { + profile_probability epilog_scale + = (epilog_ph->count - guard_e->count ()).probability_in (epilog_ph->count); + + enter_e->dest->count -= guard_e->count (); + + /* If we added a vector skip then ENTER_E may not target the epilog + preheader but a block that has the epilog preheader as its single + successor: handle that case. */ + if (enter_e->dest != epilog_ph) + { + gcc_assert (single_succ (enter_e->dest) == epilog_ph); + epilog_ph->count -= guard_e->count (); + } - scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog); + profile_count new_iv_count + = epilog_e->count () - guard_e->count (); + scale_loop_freqs_with_new_exit_count (epilog, epilog_scale, + epilog_e, new_iv_count); } - scale_loop_profile (epilog, prob_epilog, -1); } /* Recalculate the dominators after adding the guard edge. */