This patch tries to make the CFG profile consistent when adding a guard edge to skip the epilog during peeling.
The changes can be summarized as follows: - We avoid adding the guard edge entirely if the guard condition folds to false, otherwise the profile will become inconsistent since the cfgcleanup code doesn't attempt to update it on removing the dead edge. - If the guard condition instead folds to true, we account for this by giving the skip edge 100% probability (otherwise the profile will again become inconsistent when removing the other now-dead edge). - Finally, we use the new helper scale_loop_freqs_with_new_exit_count instead of scale_loop_profile to update the epilog frequencies / probabiltiies. We make the assumption here that if the IV exit is taken in the vector loop, then it will also be taken in the epilog (and not an early exit). Since we add the guard to the vector iv exit, we know any reduction in count associated with the epilog skip should be accounted for by a reduction in the epilog's iv exit edge count. Bootstrapped/regtested as a series on aarch64-linux-gnu, arm-linux-gnueabihf, and x86_64-linux-gnu. OK for trunk? Thanks, Alex gcc/ChangeLog: PR tree-optimization/117790 * tree-vect-loop-manip.cc (vect_do_peeling): Attempt to maintain consistency of the CFG profile when adding an epilog skip edge. gcc/testsuite/ChangeLog: PR tree-optimization/117790 * gcc.dg/vect/vect-early-break-profile-1.c: New test. --- .../gcc.dg/vect/vect-early-break-profile-1.c | 10 ++++ gcc/tree-vect-loop-manip.cc | 48 ++++++++++++++----- 2 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c new file mode 100644 index 00000000000..5387e3a0465 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-profile-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-fdump-tree-vect-blocks-details" } */ +int a[100]; +void f() +{ + for (int i = 0; i < 100 && a[i]; i++) + a[i]++; +} +/* { dg-final { scan-tree-dump-not "Invalid sum" "vect" } } */ diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 53d36eaa25f..4d472ab56ab 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3546,18 +3546,23 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* If we have a peeled vector iteration we will never skip the epilog loop and we can simplify the cfg a lot by not doing the edge split. */ - if (skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, + niters, niters_vector_mult_vf); + if ((skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + && !integer_zerop (guard_cond)) { - guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, - niters, niters_vector_mult_vf); + profile_probability prob_skip + = integer_onep (guard_cond) + ? profile_probability::always () + : prob_epilog.invert (); guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; + edge enter_e = single_succ_edge (guard_bb); edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo); guard_to = epilog_e->dest; guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to, skip_vector ? anchor : guard_bb, - prob_epilog.invert (), - irred_flag); + prob_skip, irred_flag); doms.safe_push (guard_to); if (vect_epilogues) epilogue_vinfo->skip_this_loop_edge = guard_e; @@ -3586,15 +3591,36 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } } - /* Only need to handle basic block before epilog loop if it's not - the guard_bb, which is the case when skip_vector is true. */ - if (guard_bb != bb_before_epilog) + basic_block epilog_ph = loop_preheader_edge (epilog)->src; + + profile_probability epilog_scale + = (epilog_ph->count - guard_e->count ()).probability_in (epilog_ph->count); + + enter_e->dest->count -= guard_e->count (); + + /* If we added a vector skip then ENTER_E may not target the epilog + preheader but a block that has the epilog preheader as its single + successor: handle that case. */ + if (enter_e->dest != epilog_ph) { - prob_epilog = prob_vector * prob_epilog + prob_vector.invert (); + gcc_assert (single_succ (enter_e->dest) == epilog_ph); + epilog_ph->count -= guard_e->count (); + } - scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog); + if (epilog_e->count () < guard_e->count ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "epilog iv exit count < guard count, " + "profile will become inconsistent\n"); + } + else + { + profile_count new_iv_count + = epilog_e->count () - guard_e->count (); + scale_loop_freqs_with_new_exit_count (epilog, epilog_scale, + epilog_e, new_iv_count); } - scale_loop_profile (epilog, prob_epilog, -1); } /* Recalculate the dominators after adding the guard edge. */