On Thu, Oct 3, 2013 at 10:15 PM, Teresa Johnson <tejohn...@google.com> wrote:
> This patch handles the fact that small profile count values sometimes
> get truncated down to 0 during updates due to integer arithmetic. This
> causes sub-optimal function splitting under
> -freorder-blocks-and-partition.
>
> The first part fixes the logic in probably_never_executed that looks
> at the bb frequency when the bb count is zero. It now correctly
> compares the count computed from the frequency to the number of
> profile runs instead of to REG_BR_PROB_BASE. The minimum ratio of
> profile counts to training runs required for a block to not be
> considered cold is now encoded in a parameter, with the default value
> of 4 to match the existing code.
>
> The second part ensures that frequencies are correctly updated after
> inlining. The problem is that after inlining the frequencies were
> being recomputed directly from the corresponding bb counts in
> rebuild_frequencies. If the counts had been truncated to 0, then the
> recomputed frequencies would become 0 as well (often leading to
> inconsistencies in the frequencies between blocks). Then the above
> change to probably_never_executed would not help identify these blocks
> as non-cold from the frequencies. The fix was to use the existing
> logic used during static profile rebuilding to also
> recompute/propagate the frequencies from the branch probabilities in
> the profile feedback case. I also renamed a number of estimate_*
> routines to compute_* and updated the comments to reflect the fact
> that these routines are not doing estimation (from a static profile),
> but in fact recomputing/propagating frequencies from the existing
> (either guessed or profile-feedback-based) probabilities.

For the second part, it seems to assume the branch probabilities are
better maintained than bb counts?

David

>
> Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?
> (One more round of regression testing in progress after making some
> slight cleanup changes.)
>
> Thanks,
> Teresa
>
> 2013-10-03  Teresa Johnson  <tejohn...@google.com>
>
>         * params.def (PARAM_MIN_HOT_RUN_RATIO): New parameter.
>         * predict.c (probably_never_executed): Compare frequency-based
>         count to number of training runs.
>         (tree_estimate_probability): Update function call to new name.
>         compute_bb_frequencies and pass new parameter.
>         (compute_loops_at_level): Renamed.
>         (compute_loops): Ditto.
>         (compute_bb_frequencies): Renamed, and new parameter to
>         force recomputing frequencies.
>         (rebuild_frequencies): Recompute bb frequencies from the
>         probabilities instead of from counts due to truncation issues.
>         * predict.h (compute_bb_frequencies): Update declaration.
>
> Index: params.def
> ===================================================================
> --- params.def  (revision 203152)
> +++ params.def  (working copy)
> @@ -373,6 +373,12 @@ DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
>          "Select fraction of the maximal frequency of executions of
> basic block in function given basic block needs to have to be
> considered hot",
>          1000, 0, 0)
>
> +DEFPARAM(PARAM_MIN_HOT_RUN_RATIO,
> +        "min-hot-run-ratio",
> +         "The minimum ratio of profile runs to basic block execution count "
> +         "for the block to be considered hot",
> +        4, 0, 10000)
> +
>  DEFPARAM (PARAM_ALIGN_THRESHOLD,
>           "align-threshold",
>           "Select fraction of the maximal frequency of executions of
> basic block in function given basic block get alignment",
> Index: predict.c
> ===================================================================
> --- predict.c   (revision 203152)
> +++ predict.c   (working copy)
> @@ -237,17 +237,31 @@ probably_never_executed (struct function *fun,
>    gcc_checking_assert (fun);
>    if (profile_status_for_function (fun) == PROFILE_READ)
>      {
> -      if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0)
> +      int min_run_ratio = PARAM_VALUE (PARAM_MIN_HOT_RUN_RATIO);
> +      if (RDIV (count * min_run_ratio, profile_info->runs) > 0)
>         return false;
>        if (!frequency)
>         return true;
>        if (!ENTRY_BLOCK_PTR->frequency)
>         return false;
> -      if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < 
> REG_BR_PROB_BASE)
> +      if (ENTRY_BLOCK_PTR->count)
>         {
> -         return (RDIV (frequency * ENTRY_BLOCK_PTR->count,
> -                       ENTRY_BLOCK_PTR->frequency)
> -                 < REG_BR_PROB_BASE / 4);
> +          gcov_type scaled_count
> +              = frequency * ENTRY_BLOCK_PTR->count * min_run_ratio;
> +          gcov_type computed_count;
> +          /* Check for overflow, in which case ENTRY_BLOCK_PTR->count should
> +             be large enough to do the division first without losing much
> +             precision.  */
> +          if (scaled_count/frequency/min_run_ratio != ENTRY_BLOCK_PTR->count)
> +            {
> +              computed_count = RDIV (ENTRY_BLOCK_PTR->count,
> +                                     ENTRY_BLOCK_PTR->frequency);
> +              computed_count *= frequency * min_run_ratio;
> +            }
> +          else
> +            computed_count = RDIV (scaled_count, ENTRY_BLOCK_PTR->frequency);
> +          if (RDIV (computed_count * min_run_ratio, profile_info->runs) > 0)
> +            return false;
>         }
>        return true;
>      }
> @@ -2388,7 +2402,7 @@ tree_estimate_probability (void)
>    pointer_map_destroy (bb_predictions);
>    bb_predictions = NULL;
>
> -  estimate_bb_frequencies ();
> +  compute_bb_frequencies (false);
>    free_dominance_info (CDI_POST_DOMINATORS);
>    remove_fake_exit_edges ();
>  }
> @@ -2551,7 +2565,7 @@ typedef struct edge_info_def
>  #define BLOCK_INFO(B)  ((block_info) (B)->aux)
>  #define EDGE_INFO(E)   ((edge_info) (E)->aux)
>
> -/* Helper function for estimate_bb_frequencies.
> +/* Helper function for compute_bb_frequencies.
>     Propagate the frequencies in blocks marked in
>     TOVISIT, starting in HEAD.  */
>
> @@ -2691,10 +2705,10 @@ propagate_freq (basic_block head, bitmap tovisit)
>      }
>  }
>
> -/* Estimate probabilities of loopback edges in loops at same nest level.  */
> +/* Compute frequencies in loops at same nest level.  */
>
>  static void
> -estimate_loops_at_level (struct loop *first_loop)
> +compute_loops_at_level (struct loop *first_loop)
>  {
>    struct loop *loop;
>
> @@ -2705,7 +2719,7 @@ static void
>        unsigned i;
>        bitmap tovisit = BITMAP_ALLOC (NULL);
>
> -      estimate_loops_at_level (loop->inner);
> +      compute_loops_at_level (loop->inner);
>
>        /* Find current loop back edge and mark it.  */
>        e = loop_latch_edge (loop);
> @@ -2723,14 +2737,14 @@ static void
>  /* Propagates frequencies through structure of loops.  */
>
>  static void
> -estimate_loops (void)
> +compute_loops (void)
>  {
>    bitmap tovisit = BITMAP_ALLOC (NULL);
>    basic_block bb;
>
> -  /* Start by estimating the frequencies in the loops.  */
> +  /* Start by computing the frequencies in the loops.  */
>    if (number_of_loops (cfun) > 1)
> -    estimate_loops_at_level (current_loops->tree_root->inner);
> +    compute_loops_at_level (current_loops->tree_root->inner);
>
>    /* Now propagate the frequencies through all the blocks.  */
>    FOR_ALL_BB (bb)
> @@ -2800,15 +2814,16 @@ expensive_function_p (int threshold)
>    return false;
>  }
>
> -/* Estimate basic blocks frequency by given branch probabilities.  */
> +/* Compute and propagate basic block frequencies using the given branch
> +   probabilities.  */
>
>  void
> -estimate_bb_frequencies (void)
> +compute_bb_frequencies (bool rebuild)
>  {
>    basic_block bb;
>    sreal freq_max;
>
> -  if (profile_status != PROFILE_READ || !counts_to_freqs ())
> +  if (rebuild || profile_status != PROFILE_READ || !counts_to_freqs ())
>      {
>        static int real_values_initialized = 0;
>
> @@ -2845,9 +2860,9 @@ void
>             }
>         }
>
> -      /* First compute probabilities locally for each loop from innermost
> +      /* First compute frequencies locally for each loop from innermost
>           to outermost to examine probabilities for back edges.  */
> -      estimate_loops ();
> +      compute_loops ();
>
>        memcpy (&freq_max, &real_zero, sizeof (real_zero));
>        FOR_EACH_BB (bb)
> @@ -3027,18 +3042,16 @@ void
>  rebuild_frequencies (void)
>  {
>    timevar_push (TV_REBUILD_FREQUENCIES);
> -  if (profile_status == PROFILE_GUESSED)
> +  if (profile_status == PROFILE_GUESSED || profile_status == PROFILE_READ)
>      {
>        loop_optimizer_init (0);
>        add_noreturn_fake_exit_edges ();
>        mark_irreducible_loops ();
>        connect_infinite_loops_to_exit ();
> -      estimate_bb_frequencies ();
> +      compute_bb_frequencies (true);
>        remove_fake_exit_edges ();
>        loop_optimizer_finalize ();
>      }
> -  else if (profile_status == PROFILE_READ)
> -    counts_to_freqs ();
>    else
>      gcc_unreachable ();
>    timevar_pop (TV_REBUILD_FREQUENCIES);
> Index: predict.h
> ===================================================================
> --- predict.h   (revision 203152)
> +++ predict.h   (working copy)
> @@ -37,7 +37,7 @@ enum prediction
>
>  extern void predict_insn_def (rtx, enum br_predictor, enum prediction);
>  extern int counts_to_freqs (void);
> -extern void estimate_bb_frequencies (void);
> +extern void compute_bb_frequencies (bool rebuild);
>  extern const char *predictor_name (enum br_predictor);
>  extern tree build_predict_expr (enum br_predictor, enum prediction);
>  extern void tree_estimate_probability (void);
>
>
> --
> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413

Reply via email to