On Thu, Oct 3, 2013 at 10:15 PM, Teresa Johnson <tejohn...@google.com> wrote: > This patch handles the fact that small profile count values sometimes > get truncated down to 0 during updates due to integer arithmetic. This > causes sub-optimal function splitting under > -freorder-blocks-and-partition. > > The first part fixes the logic in probably_never_executed that looks > at the bb frequency when the bb count is zero. It now correctly > compares the count computed from the frequency to the number of > profile runs instead of to REG_BR_PROB_BASE. The minimum ratio of > profile counts to training runs required for a block to not be > considered cold is now encoded in a parameter, with the default value > of 4 to match the existing code. > > The second part ensures that frequencies are correctly updated after > inlining. The problem is that after inlining the frequencies were > being recomputed directly from the corresponding bb counts in > rebuild_frequencies. If the counts had been truncated to 0, then the > recomputed frequencies would become 0 as well (often leading to > inconsistencies in the frequencies between blocks). Then the above > change to probably_never_executed would not help identify these blocks > as non-cold from the frequencies. The fix was to use the existing > logic used during static profile rebuilding to also > recompute/propagate the frequencies from the branch probabilities in > the profile feedback case. I also renamed a number of estimate_* > routines to compute_* and updated the comments to reflect the fact > that these routines are not doing estimation (from a static profile), > but in fact recomputing/propagating frequencies from the existing > (either guessed or profile-feedback-based) probabilities.
For the second part, it seems to assume the branch probabilities are better maintained than bb counts? David > > Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk? > (One more round of regression testing in progress after making some > slight cleanup changes.) > > Thanks, > Teresa > > 2013-10-03 Teresa Johnson <tejohn...@google.com> > > * params.def (PARAM_MIN_HOT_RUN_RATIO): New parameter. > * predict.c (probably_never_executed): Compare frequency-based > count to number of training runs. > (tree_estimate_probability): Update function call to new name. > compute_bb_frequencies and pass new parameter. > (compute_loops_at_level): Renamed. > (compute_loops): Ditto. > (compute_bb_frequencies): Renamed, and new parameter to > force recomputing frequencies. > (rebuild_frequencies): Recompute bb frequencies from the > probabilities instead of from counts due to truncation issues. > * predict.h (compute_bb_frequencies): Update declaration. > > Index: params.def > =================================================================== > --- params.def (revision 203152) > +++ params.def (working copy) > @@ -373,6 +373,12 @@ DEFPARAM(HOT_BB_FREQUENCY_FRACTION, > "Select fraction of the maximal frequency of executions of > basic block in function given basic block needs to have to be > considered hot", > 1000, 0, 0) > > +DEFPARAM(PARAM_MIN_HOT_RUN_RATIO, > + "min-hot-run-ratio", > + "The minimum ratio of profile runs to basic block execution count " > + "for the block to be considered hot", > + 4, 0, 10000) > + > DEFPARAM (PARAM_ALIGN_THRESHOLD, > "align-threshold", > "Select fraction of the maximal frequency of executions of > basic block in function given basic block get alignment", > Index: predict.c > =================================================================== > --- predict.c (revision 203152) > +++ predict.c (working copy) > @@ -237,17 +237,31 @@ probably_never_executed (struct function *fun, > gcc_checking_assert (fun); > if (profile_status_for_function (fun) == PROFILE_READ) > { > - if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0) > + int min_run_ratio = PARAM_VALUE (PARAM_MIN_HOT_RUN_RATIO); > + if (RDIV (count * min_run_ratio, profile_info->runs) > 0) > return false; > if (!frequency) > return true; > if (!ENTRY_BLOCK_PTR->frequency) > return false; > - if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < > REG_BR_PROB_BASE) > + if (ENTRY_BLOCK_PTR->count) > { > - return (RDIV (frequency * ENTRY_BLOCK_PTR->count, > - ENTRY_BLOCK_PTR->frequency) > - < REG_BR_PROB_BASE / 4); > + gcov_type scaled_count > + = frequency * ENTRY_BLOCK_PTR->count * min_run_ratio; > + gcov_type computed_count; > + /* Check for overflow, in which case ENTRY_BLOCK_PTR->count should > + be large enough to do the division first without losing much > + precision. */ > + if (scaled_count/frequency/min_run_ratio != ENTRY_BLOCK_PTR->count) > + { > + computed_count = RDIV (ENTRY_BLOCK_PTR->count, > + ENTRY_BLOCK_PTR->frequency); > + computed_count *= frequency * min_run_ratio; > + } > + else > + computed_count = RDIV (scaled_count, ENTRY_BLOCK_PTR->frequency); > + if (RDIV (computed_count * min_run_ratio, profile_info->runs) > 0) > + return false; > } > return true; > } > @@ -2388,7 +2402,7 @@ tree_estimate_probability (void) > pointer_map_destroy (bb_predictions); > bb_predictions = NULL; > > - estimate_bb_frequencies (); > + compute_bb_frequencies (false); > free_dominance_info (CDI_POST_DOMINATORS); > remove_fake_exit_edges (); > } > @@ -2551,7 +2565,7 @@ typedef struct edge_info_def > #define BLOCK_INFO(B) ((block_info) (B)->aux) > #define EDGE_INFO(E) ((edge_info) (E)->aux) > > -/* Helper function for estimate_bb_frequencies. > +/* Helper function for compute_bb_frequencies. > Propagate the frequencies in blocks marked in > TOVISIT, starting in HEAD. */ > > @@ -2691,10 +2705,10 @@ propagate_freq (basic_block head, bitmap tovisit) > } > } > > -/* Estimate probabilities of loopback edges in loops at same nest level. */ > +/* Compute frequencies in loops at same nest level. */ > > static void > -estimate_loops_at_level (struct loop *first_loop) > +compute_loops_at_level (struct loop *first_loop) > { > struct loop *loop; > > @@ -2705,7 +2719,7 @@ static void > unsigned i; > bitmap tovisit = BITMAP_ALLOC (NULL); > > - estimate_loops_at_level (loop->inner); > + compute_loops_at_level (loop->inner); > > /* Find current loop back edge and mark it. */ > e = loop_latch_edge (loop); > @@ -2723,14 +2737,14 @@ static void > /* Propagates frequencies through structure of loops. */ > > static void > -estimate_loops (void) > +compute_loops (void) > { > bitmap tovisit = BITMAP_ALLOC (NULL); > basic_block bb; > > - /* Start by estimating the frequencies in the loops. */ > + /* Start by computing the frequencies in the loops. */ > if (number_of_loops (cfun) > 1) > - estimate_loops_at_level (current_loops->tree_root->inner); > + compute_loops_at_level (current_loops->tree_root->inner); > > /* Now propagate the frequencies through all the blocks. */ > FOR_ALL_BB (bb) > @@ -2800,15 +2814,16 @@ expensive_function_p (int threshold) > return false; > } > > -/* Estimate basic blocks frequency by given branch probabilities. */ > +/* Compute and propagate basic block frequencies using the given branch > + probabilities. */ > > void > -estimate_bb_frequencies (void) > +compute_bb_frequencies (bool rebuild) > { > basic_block bb; > sreal freq_max; > > - if (profile_status != PROFILE_READ || !counts_to_freqs ()) > + if (rebuild || profile_status != PROFILE_READ || !counts_to_freqs ()) > { > static int real_values_initialized = 0; > > @@ -2845,9 +2860,9 @@ void > } > } > > - /* First compute probabilities locally for each loop from innermost > + /* First compute frequencies locally for each loop from innermost > to outermost to examine probabilities for back edges. */ > - estimate_loops (); > + compute_loops (); > > memcpy (&freq_max, &real_zero, sizeof (real_zero)); > FOR_EACH_BB (bb) > @@ -3027,18 +3042,16 @@ void > rebuild_frequencies (void) > { > timevar_push (TV_REBUILD_FREQUENCIES); > - if (profile_status == PROFILE_GUESSED) > + if (profile_status == PROFILE_GUESSED || profile_status == PROFILE_READ) > { > loop_optimizer_init (0); > add_noreturn_fake_exit_edges (); > mark_irreducible_loops (); > connect_infinite_loops_to_exit (); > - estimate_bb_frequencies (); > + compute_bb_frequencies (true); > remove_fake_exit_edges (); > loop_optimizer_finalize (); > } > - else if (profile_status == PROFILE_READ) > - counts_to_freqs (); > else > gcc_unreachable (); > timevar_pop (TV_REBUILD_FREQUENCIES); > Index: predict.h > =================================================================== > --- predict.h (revision 203152) > +++ predict.h (working copy) > @@ -37,7 +37,7 @@ enum prediction > > extern void predict_insn_def (rtx, enum br_predictor, enum prediction); > extern int counts_to_freqs (void); > -extern void estimate_bb_frequencies (void); > +extern void compute_bb_frequencies (bool rebuild); > extern const char *predictor_name (enum br_predictor); > extern tree build_predict_expr (enum br_predictor, enum prediction); > extern void tree_estimate_probability (void); > > > -- > Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413