On Sat, Jul 1, 2017 at 7:14 PM, Jan Hubicka <hubi...@ucw.cz> wrote: > Hi, > this patch makes loop profile scaling to use profile_probability. This > is mostly trivial change except for vect_do_peeling which seems to scale > profile down and then back up. This is a bad idea, because things may simply > drop to 0. So I kept that one to use integer scaling (because probability > can not represent value greater than 1). > > Bootstrapped/regtested x86_64-linux.
This likely regressed FAIL: gcc.dg/vect/pr79347.c scan-tree-dump-not vect "Invalid sum of " Richard. > Honza > * cfg.c (scale_bbs_frequencies): New function. > * cfg.h (scale_bbs_frequencies): Declare it. > * cfgloopanal.c (single_likely_exit): Cleanup. > * cfgloopmanip.c (scale_loop_frequencies): Take profile_probability > as parameter. > (scale_loop_profile): Likewise. > (loop_version): Likewise. > (create_empty_loop_on_edge): Update. > * cfgloopmanip.h (scale_loop_frequencies, scale_loop_profile, > scale_loop_frequencies, scale_loop_profile, loopify, > loop_version): Update prototypes. > * modulo-sched.c (sms_schedule): Update. > * predict.c (unlikely_executed_edge_p): Also check probability. > (probably_never_executed_edge_p): Fix typo. > * tree-if-conv.c (version_loop_for_if_conversion): Update. > * tree-parloops.c (gen_parallel_loop): Update. > * tree-ssa-loop-ivcanon.c (try_peel_loop): Update. > * tree-ssa-loop-manip.c (tree_transform_and_unroll_loop): Update. > * tree-ssa-loop-split.c (split_loop): Update. > * tree-ssa-loop-unswitch.c (tree_unswitch_loop): Update. > * tree-vect-loop-manip.c (vect_do_peeling): Update. > (vect_loop_versioning): Update. > * tree-vect-loop.c (scale_profile_for_vect_loop): Update. > Index: cfg.c > =================================================================== > --- cfg.c (revision 249866) > +++ cfg.c (working copy) > @@ -1051,6 +1051,26 @@ scale_bbs_frequencies_profile_count (bas > } > } > > +/* Multiply all frequencies of basic blocks in array BBS of length NBBS > + by NUM/DEN, in profile_count arithmetic. More accurate than previous > + function but considerably slower. */ > +void > +scale_bbs_frequencies (basic_block *bbs, int nbbs, > + profile_probability p) > +{ > + int i; > + edge e; > + > + for (i = 0; i < nbbs; i++) > + { > + edge_iterator ei; > + bbs[i]->frequency = p.apply (bbs[i]->frequency); > + bbs[i]->count = bbs[i]->count.apply_probability (p); > + FOR_EACH_EDGE (e, ei, bbs[i]->succs) > + e->count = e->count.apply_probability (p); > + } > +} > + > /* Helper types for hash tables. */ > > struct htab_bb_copy_original_entry > Index: cfg.h > =================================================================== > --- cfg.h (revision 249866) > +++ cfg.h (working copy) > @@ -109,6 +109,7 @@ extern void scale_bbs_frequencies_gcov_t > gcov_type); > extern void scale_bbs_frequencies_profile_count (basic_block *, int, > profile_count, profile_count); > +extern void scale_bbs_frequencies (basic_block *, int, profile_probability); > extern void initialize_original_copy_tables (void); > extern void reset_original_copy_tables (void); > extern void free_original_copy_tables (void); > Index: cfgloopanal.c > =================================================================== > --- cfgloopanal.c (revision 249866) > +++ cfgloopanal.c (working copy) > @@ -469,16 +469,12 @@ single_likely_exit (struct loop *loop) > exits = get_loop_exit_edges (loop); > FOR_EACH_VEC_ELT (exits, i, ex) > { > - if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL)) > - continue; > - /* The constant of 5 is set in a way so noreturn calls are > - ruled out by this test. The static branch prediction algorithm > - will not assign such a low probability to conditionals for usual > - reasons. > - FIXME: Turn to likely_never_executed */ > - if ((profile_status_for_fn (cfun) != PROFILE_ABSENT > - && ex->probability < profile_probability::from_reg_br_prob_base > (5)) > - || ex->count == profile_count::zero ()) > + if (probably_never_executed_edge_p (cfun, ex) > + /* We want to rule out paths to noreturns but not low probabilities > + resulting from adjustments or combining. > + FIXME: once we have better quality tracking, make this more > + robust. */ > + || ex->probability <= profile_probability::very_unlikely ()) > continue; > if (!found) > found = ex; > Index: cfgloopmanip.c > =================================================================== > --- cfgloopmanip.c (revision 249866) > +++ cfgloopmanip.c (working copy) > @@ -488,38 +488,42 @@ add_loop (struct loop *loop, struct loop > free (bbs); > } > > -/* Multiply all frequencies in LOOP by NUM/DEN. */ > +/* Scale profile of loop by P. */ > > void > -scale_loop_frequencies (struct loop *loop, int num, int den) > +scale_loop_frequencies (struct loop *loop, profile_probability p) > { > basic_block *bbs; > > bbs = get_loop_body (loop); > - scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den); > + scale_bbs_frequencies (bbs, loop->num_nodes, p); > free (bbs); > } > > -/* Multiply all frequencies in LOOP by SCALE/REG_BR_PROB_BASE. > +/* Scale profile in LOOP by P. > If ITERATION_BOUND is non-zero, scale even further if loop is predicted > to iterate too many times. */ > > void > -scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound) > +scale_loop_profile (struct loop *loop, profile_probability p, > + gcov_type iteration_bound) > { > gcov_type iterations = expected_loop_iterations_unbounded (loop); > edge e; > edge_iterator ei; > > if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, ";; Scaling loop %i with scale %f, " > - "bounding iterations to %i from guessed %i\n", > - loop->num, (double)scale / REG_BR_PROB_BASE, > - (int)iteration_bound, (int)iterations); > + { > + fprintf (dump_file, ";; Scaling loop %i with scale ", > + loop->num); > + p.dump (dump_file); > + fprintf (dump_file, " bounding iterations to %i from guessed %i\n", > + (int)iteration_bound, (int)iterations); > + } > > /* See if loop is predicted to iterate too many times. */ > if (iteration_bound && iterations > 0 > - && apply_probability (iterations, scale) > iteration_bound) > + && p.apply (iterations) > iteration_bound) > { > /* Fixing loop profile for different trip count is not trivial; the > exit > probabilities has to be updated to match and frequencies propagated > down > @@ -569,7 +573,7 @@ scale_loop_profile (struct loop *loop, i > /* Roughly speaking we want to reduce the loop body profile by the > difference of loop iterations. We however can do better if > we look at the actual profile, if it is available. */ > - scale = RDIV (iteration_bound * scale, iterations); > + p = p.apply_scale (iteration_bound, iterations); > > bool determined = false; > if (loop->header->count.initialized_p ()) > @@ -582,9 +586,8 @@ scale_loop_profile (struct loop *loop, i > > if (count_in > profile_count::zero () ) > { > - scale = GCOV_COMPUTE_SCALE (count_in.to_gcov_type () > - * iteration_bound, > - loop->header->count.to_gcov_type > ()); > + p = count_in.probability_in (loop->header->count.apply_scale > + (iteration_bound, 1)); > determined = true; > } > } > @@ -597,18 +600,19 @@ scale_loop_profile (struct loop *loop, i > freq_in += EDGE_FREQUENCY (e); > > if (freq_in != 0) > - scale = GCOV_COMPUTE_SCALE (freq_in * iteration_bound, > - loop->header->frequency); > + p = profile_probability::probability_in_gcov_type > + (freq_in * iteration_bound, loop->header->frequency); > } > - if (!scale) > - scale = 1; > + if (!(p > profile_probability::never ())) > + p = profile_probability::very_unlikely (); > } > > - if (scale == REG_BR_PROB_BASE) > + if (p >= profile_probability::always () > + || !p.initialized_p ()) > return; > > /* Scale the actual probabilities. */ > - scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE); > + scale_loop_frequencies (loop, p); > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, ";; guessed iterations are now %i\n", > (int)expected_loop_iterations_unbounded (loop)); > @@ -778,7 +782,6 @@ create_empty_loop_on_edge (edge entry_ed > gcond *cond_expr; > tree exit_test; > edge exit_e; > - int prob; > > gcc_assert (entry_edge && initial_value && stride && upper_bound && iv); > > @@ -802,9 +805,7 @@ create_empty_loop_on_edge (edge entry_ed > add_loop (loop, outer); > > /* TODO: Fix frequencies and counts. */ > - prob = REG_BR_PROB_BASE / 2; > - > - scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE); > + scale_loop_frequencies (loop, profile_probability::even ()); > > /* Update dominators. */ > update_dominators_in_loop (loop); > @@ -862,7 +863,8 @@ create_empty_loop_on_edge (edge entry_ed > struct loop * > loopify (edge latch_edge, edge header_edge, > basic_block switch_bb, edge true_edge, edge false_edge, > - bool redirect_all_edges, unsigned true_scale, unsigned false_scale) > + bool redirect_all_edges, profile_probability true_scale, > + profile_probability false_scale) > { > basic_block succ_bb = latch_edge->dest; > basic_block pred_bb = header_edge->src; > @@ -915,8 +917,8 @@ loopify (edge latch_edge, edge header_ed > e->count = switch_bb->count.apply_probability (e->probability); > } > } > - scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE); > - scale_loop_frequencies (succ_bb->loop_father, true_scale, > REG_BR_PROB_BASE); > + scale_loop_frequencies (loop, false_scale); > + scale_loop_frequencies (succ_bb->loop_father, true_scale); > update_dominators_in_loop (loop); > > return loop; > @@ -1683,7 +1685,7 @@ struct loop * > loop_version (struct loop *loop, > void *cond_expr, basic_block *condition_bb, > profile_probability then_prob, profile_probability else_prob, > - unsigned then_scale, unsigned else_scale, > + profile_probability then_scale, profile_probability else_scale, > bool place_after) > { > basic_block first_head, second_head; > Index: cfgloopmanip.h > =================================================================== > --- cfgloopmanip.h (revision 249866) > +++ cfgloopmanip.h (working copy) > @@ -37,14 +37,14 @@ extern edge mfb_kj_edge; > extern bool remove_path (edge, bool * = NULL, bitmap = NULL); > extern void place_new_loop (struct function *, struct loop *); > extern void add_loop (struct loop *, struct loop *); > -extern void scale_loop_frequencies (struct loop *, int, int); > -extern void scale_loop_profile (struct loop *, int, gcov_type); > +extern void scale_loop_frequencies (struct loop *, profile_probability); > +extern void scale_loop_profile (struct loop *, profile_probability, > gcov_type); > extern edge create_empty_if_region_on_edge (edge, tree); > extern struct loop *create_empty_loop_on_edge (edge, tree, tree, tree, tree, > tree *, tree *, struct loop *); > extern struct loop *loopify (edge, edge, > basic_block, edge, edge, bool, > - unsigned, unsigned); > + profile_probability, profile_probability); > extern void unloop (struct loop *, bool *, bitmap); > extern void copy_loop_info (struct loop *loop, struct loop *target); > extern struct loop * duplicate_loop (struct loop *, struct loop *); > @@ -60,6 +60,6 @@ extern void force_single_succ_latches (v > struct loop * loop_version (struct loop *, void *, > basic_block *, > profile_probability, profile_probability, > - unsigned, unsigned, bool); > + profile_probability, profile_probability, bool); > > #endif /* GCC_CFGLOOPMANIP_H */ > Index: modulo-sched.c > =================================================================== > --- modulo-sched.c (revision 249866) > +++ modulo-sched.c (working copy) > @@ -1718,9 +1718,7 @@ sms_schedule (void) > > loop_version (loop, comp_rtx, &condition_bb, > prob, prob.invert (), > - prob.to_reg_br_prob_base (), > - prob.invert ().to_reg_br_prob_base (), > - true); > + prob, prob.invert (), true); > } > > /* Set new iteration count of loop kernel. */ > Index: predict.c > =================================================================== > --- predict.c (revision 249866) > +++ predict.c (working copy) > @@ -245,7 +245,8 @@ probably_never_executed_bb_p (struct fun > static bool > unlikely_executed_edge_p (edge e) > { > - return e->count == profile_count::zero () > + return (e->count == profile_count::zero () > + || e->probability == profile_probability::never ()) > || (e->flags & (EDGE_EH | EDGE_FAKE)); > } > > @@ -254,8 +255,8 @@ unlikely_executed_edge_p (edge e) > bool > probably_never_executed_edge_p (struct function *fun, edge e) > { > - if (e->count.initialized_p ()) > - unlikely_executed_edge_p (e); > + if (unlikely_executed_edge_p (e)) > + return true; > return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e)); > } > > Index: tree-if-conv.c > =================================================================== > --- tree-if-conv.c (revision 249870) > +++ tree-if-conv.c (working copy) > @@ -2569,7 +2569,8 @@ version_loop_for_if_conversion (struct l > new_loop = loop_version (loop, cond, &cond_bb, > profile_probability::always (), > profile_probability::always (), > - REG_BR_PROB_BASE, REG_BR_PROB_BASE, true); > + profile_probability::always (), > + profile_probability::always (), true); > free_original_copy_tables (); > > for (unsigned i = 0; i < save_length; i++) > Index: tree-parloops.c > =================================================================== > --- tree-parloops.c (revision 249866) > +++ tree-parloops.c (working copy) > @@ -2251,7 +2251,6 @@ gen_parallel_loop (struct loop *loop, > gimple_seq stmts; > edge entry, exit; > struct clsn_data clsn_data; > - unsigned prob; > location_t loc; > gimple *cond_stmt; > unsigned int m_p_thread=2; > @@ -2358,12 +2357,11 @@ gen_parallel_loop (struct loop *loop, > initialize_original_copy_tables (); > > /* We assume that the loop usually iterates a lot. */ > - prob = 4 * REG_BR_PROB_BASE / 5; > loop_version (loop, many_iterations_cond, NULL, > - profile_probability::from_reg_br_prob_base (prob), > - profile_probability::from_reg_br_prob_base > - (REG_BR_PROB_BASE - prob), > - prob, REG_BR_PROB_BASE - prob, true); > + profile_probability::likely (), > + profile_probability::unlikely (), > + profile_probability::likely (), > + profile_probability::unlikely (), true); > update_ssa (TODO_update_ssa); > free_original_copy_tables (); > } > Index: tree-ssa-loop-ivcanon.c > =================================================================== > --- tree-ssa-loop-ivcanon.c (revision 249866) > +++ tree-ssa-loop-ivcanon.c (working copy) > @@ -1104,12 +1104,13 @@ try_peel_loop (struct loop *loop, > entry_freq += e->src->frequency; > gcc_assert (!flow_bb_inside_loop_p (loop, e->src)); > } > - int scale = 1; > + profile_probability p = profile_probability::very_unlikely (); > if (loop->header->count > 0) > - scale = entry_count.probability_in > (loop->header->count).to_reg_br_prob_base (); > + p = entry_count.probability_in (loop->header->count); > else if (loop->header->frequency) > - scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency); > - scale_loop_profile (loop, scale, 0); > + p = profile_probability::probability_in_gcov_type > + (entry_freq, loop->header->frequency); > + scale_loop_profile (loop, p, 0); > bitmap_set_bit (peeled_loops, loop->num); > return true; > } > Index: tree-ssa-loop-manip.c > =================================================================== > --- tree-ssa-loop-manip.c (revision 249866) > +++ tree-ssa-loop-manip.c (working copy) > @@ -1248,7 +1248,10 @@ tree_transform_and_unroll_loop (struct l > (prob_entry), > profile_probability::from_reg_br_prob_base > (REG_BR_PROB_BASE - prob_entry), > - scale_unrolled, scale_rest, true); > + profile_probability::from_reg_br_prob_base > + (scale_unrolled), > + profile_probability::guessed_always (), > + true); > gcc_assert (new_loop != NULL); > update_ssa (TODO_update_ssa); > > @@ -1374,9 +1377,7 @@ tree_transform_and_unroll_loop (struct l > in loop's preheader. */ > if (freq_e == profile_count::zero ()) > freq_e = profile_count::from_gcov_type (1); > - /* This should not overflow. */ > - scale = freq_e.probability_in (freq_h).to_reg_br_prob_base (); > - scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE); > + scale_loop_frequencies (loop, freq_e.probability_in (freq_h)); > } > > exit_bb = single_pred (loop->latch); > Index: tree-ssa-loop-split.c > =================================================================== > --- tree-ssa-loop-split.c (revision 249866) > +++ tree-ssa-loop-split.c (working copy) > @@ -564,7 +564,8 @@ split_loop (struct loop *loop1, struct t > struct loop *loop2 = loop_version (loop1, cond, &cond_bb, > profile_probability::always (), > profile_probability::always (), > - REG_BR_PROB_BASE, REG_BR_PROB_BASE, > + profile_probability::always (), > + profile_probability::always (), > true); > gcc_assert (loop2); > update_ssa (TODO_update_ssa); > Index: tree-ssa-loop-unswitch.c > =================================================================== > --- tree-ssa-loop-unswitch.c (revision 249866) > +++ tree-ssa-loop-unswitch.c (working copy) > @@ -490,12 +490,10 @@ tree_unswitch_loop (struct loop *loop, > > extract_true_false_edges_from_block (unswitch_on, &edge_true, &edge_false); > prob_true = edge_true->probability; > - int p = prob_true.initialized_p () ? prob_true.to_reg_br_prob_base () > - : REG_BR_PROB_BASE / 2; > return loop_version (loop, unshare_expr (cond), > NULL, prob_true, > prob_true.invert (), > - p, REG_BR_PROB_BASE - p, > + prob_true, prob_true.invert (), > false); > } > > Index: tree-vect-loop-manip.c > =================================================================== > --- tree-vect-loop-manip.c (revision 249866) > +++ tree-vect-loop-manip.c (working copy) > @@ -1720,10 +1720,8 @@ vect_do_peeling (loop_vec_info loop_vinf > needs to be scaled back later. */ > basic_block bb_before_loop = loop_preheader_edge (loop)->src; > if (prob_vector.initialized_p ()) > - scale_bbs_frequencies_int (&bb_before_loop, 1, > - prob_vector.to_reg_br_prob_base (), > - REG_BR_PROB_BASE); > - scale_loop_profile (loop, prob_vector.to_reg_br_prob_base (), bound); > + scale_bbs_frequencies (&bb_before_loop, 1, prob_vector); > + scale_loop_profile (loop, prob_vector, bound); > } > > tree niters_prolog = build_int_cst (type, 0); > @@ -1771,11 +1769,8 @@ vect_do_peeling (loop_vec_info loop_vinf > e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); > slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e); > > - scale_bbs_frequencies_int (&bb_after_prolog, 1, > - prob_prolog.to_reg_br_prob_base (), > - REG_BR_PROB_BASE); > - scale_loop_profile (prolog, prob_prolog.to_reg_br_prob_base (), > - bound_prolog); > + scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog); > + scale_loop_profile (prolog, prob_prolog, bound_prolog); > } > /* Update init address of DRs. */ > vect_update_inits_of_drs (loop_vinfo, niters_prolog); > @@ -1850,10 +1845,15 @@ vect_do_peeling (loop_vec_info loop_vinf > guard_to->frequency = guard_bb->frequency; > guard_to->count = guard_bb->count; > single_succ_edge (guard_to)->count = guard_to->count; > - /* Scale probability of epilog loop back. */ > + /* Scale probability of epilog loop back. > + FIXME: We should avoid scaling down and back up. Profile may > + get lost if we scale down to 0. */ > int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE > / prob_vector.to_reg_br_prob_base (); > - scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE); > + basic_block *bbs = get_loop_body (loop); > + scale_bbs_frequencies_int (bbs, loop->num_nodes, scale_up, > + REG_BR_PROB_BASE); > + free (bbs); > } > > basic_block bb_before_epilog = loop_preheader_edge (epilog)->src; > @@ -1891,11 +1891,9 @@ vect_do_peeling (loop_vec_info loop_vinf > { > prob_epilog = prob_vector * prob_epilog + prob_vector.invert (); > > - scale_bbs_frequencies_int (&bb_before_epilog, 1, > - prob_epilog.to_reg_br_prob_base (), > - REG_BR_PROB_BASE); > + scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog); > } > - scale_loop_profile (epilog, prob_epilog.to_reg_br_prob_base (), > bound); > + scale_loop_profile (epilog, prob_epilog, bound); > } > else > slpeel_update_phi_nodes_for_lcssa (epilog); > @@ -2138,7 +2136,7 @@ vect_loop_versioning (loop_vec_info loop > tree cond_expr = NULL_TREE; > gimple_seq cond_expr_stmt_list = NULL; > tree arg; > - unsigned prob = 4 * REG_BR_PROB_BASE / 5; > + profile_probability prob = profile_probability::likely (); > gimple_seq gimplify_stmt_list = NULL; > tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); > bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); > @@ -2177,12 +2175,8 @@ vect_loop_versioning (loop_vec_info loop > /* We don't want to scale SCALAR_LOOP's frequencies, we need to > scale LOOP's frequencies instead. */ > nloop = loop_version (scalar_loop, cond_expr, &condition_bb, > - profile_probability::guessed_always ().apply_scale > - (prob, REG_BR_PROB_BASE), > - profile_probability::guessed_always ().apply_scale > - (REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE), > - REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true); > - scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE); > + prob, prob.invert (), prob, prob.invert (), true); > + scale_loop_frequencies (loop, prob); > /* CONDITION_BB was created above SCALAR_LOOP's preheader, > while we need to move it above LOOP's preheader. */ > e = loop_preheader_edge (loop); > @@ -2209,11 +2203,7 @@ vect_loop_versioning (loop_vec_info loop > } > else > nloop = loop_version (loop, cond_expr, &condition_bb, > - profile_probability::guessed_always ().apply_scale > - (prob, REG_BR_PROB_BASE), > - profile_probability::guessed_always ().apply_scale > - (REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE), > - prob, REG_BR_PROB_BASE - prob, true); > + prob, prob.invert (), prob, prob.invert (), true); > > if (version_niter) > { > Index: tree-vect-loop.c > =================================================================== > --- tree-vect-loop.c (revision 249867) > +++ tree-vect-loop.c (working copy) > @@ -7118,16 +7118,14 @@ scale_profile_for_vect_loop (struct loop > } > if (freq_h > 0) > { > - gcov_type scale; > + profile_probability p; > > /* Avoid dropping loop body profile counter to 0 because of zero count > in loop's preheader. */ > if (!(freq_e > profile_count::from_gcov_type (1))) > freq_e = profile_count::from_gcov_type (1); > - /* This should not overflow. */ > - scale = freq_e.apply_scale (new_est_niter + 1, 1).probability_in > (freq_h) > - .to_reg_br_prob_base (); > - scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE); > + p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h); > + scale_loop_frequencies (loop, p); > } > > basic_block exit_bb = single_pred (loop->latch);