Hi Honza,

This change seems to have cost a 0.2% regression in SPEC2017 vs a codesize 
reduction of 0.02%.

Only leela seems to have made any noticable difference at 2.64% codesize 
reduction.

Is there anyway to keep the performance at -O3 where the codesize doesn't 
matter much?

Thanks,
Tamar

> -----Original Message-----
> From: gcc-patches-ow...@gcc.gnu.org <gcc-patches-ow...@gcc.gnu.org>
> On Behalf Of Jan Hubicka
> Sent: Thursday, January 16, 2020 23:00
> To: gcc-patches@gcc.gnu.org
> Subject: Make profile estimation more precise
> 
> Hi,
> while analyzing code size regression in SPEC2k GCC binary I noticed that we
> perform some inline decisions because we think that number of executions
> are very high.
> In particular there was inline decision inlining gen_rtx_fmt_ee to
> find_reloads believing that it is called 4 billion times.  This turned out to 
> be
> cummulation of roundoff errors in propagate_freq which was bit
> mechanically updated from original sreals to C++ sreals and later to new
> probabilities.
> 
> This led us to estimate that a loopback edge is reached with probability 2.3
> which was capped to 1-1/10000 and since this happened in nested loop it
> quickly escalated to large values.
> 
> Originally capping to REG_BR_PROB_BASE avoided such problems but now
> we have much higher range.
> 
> This patch avoids going from probabilites to REG_BR_PROB_BASE so
> precision is kept.  In addition it makes the propagation to not estimate more
> than param-max-predicted-loop-iterations.  The first change makes the cap
> to not be triggered on the gcc build, but it is still better to be safe than 
> sorry.
> 
> 2020-01-16  Jan Hubicka  <hubi...@ucw.cz>
> 
>       * ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
>       dump.
>       * params.opt: (max-predicted-iterations): Set bounds.
>       * predict.c (real_almost_one, real_br_prob_base,
>       real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
>       (propagate_freq): Add max_cyclic_prob parameter; cap cyclic
>       probabilities; do not truncate to reg_br_prob_bases.
>       (estimate_loops_at_level): Pass max_cyclic_prob.
>       (estimate_loops): Compute max_cyclic_prob.
>       (estimate_bb_frequencies): Do not initialize real_*; update
> calculation
>       of back edge prob.
>       * profile-count.c (profile_probability::to_sreal): New.
>       * profile-count.h (class sreal): Move up in file.
>       (profile_probability::to_sreal): Declare.
> 
> diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index
> 4e1c587b101..dbd53f12a40 100644
> --- a/gcc/ipa-fnsummary.c
> +++ b/gcc/ipa-fnsummary.c
> @@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node
> *node, int *size,
>         gcc_assert (*size == old_size);
>         if (time && (*time - old_time > 1 || *time - old_time < -1)
>             && dump_file)
> -         fprintf (dump_file, "Time mismatch in call summary %f!=%f",
> +         fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
>                    old_time.to_double (),
>                    time->to_double ());
>       }
> diff --git a/gcc/params.opt b/gcc/params.opt index
> 31cc20031b1..f02c769d0e3 100644
> --- a/gcc/params.opt
> +++ b/gcc/params.opt
> @@ -555,7 +555,7 @@ Common Joined UInteger
> Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32)  Maximum
> depth of sqrt chains to use when synthesizing exponentiation by a real
> constant.
> 
>  -param=max-predicted-iterations=
> -Common Joined UInteger Var(param_max_predicted_iterations) Init(100)
> Param Optimization
> +Common Joined UInteger Var(param_max_predicted_iterations) Init(100)
> +IntegerRange(1, 65536) Param Optimization
>  The maximum number of loop iterations we predict statically.
> 
>  -param=max-reload-search-insns=
> diff --git a/gcc/predict.c b/gcc/predict.c index 02c5fe0667d..c3aed9ed854
> 100644
> --- a/gcc/predict.c
> +++ b/gcc/predict.c
> @@ -76,10 +76,6 @@ enum predictor_reason  static const char
> *reason_messages[] = {"", " (ignored)",
>      " (single edge duplicate)", " (edge pair duplicate)"};
> 
> -/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
> -                1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
> -static sreal real_almost_one, real_br_prob_base,
> -          real_inv_br_prob_base, real_one_half, real_bb_freq_max;
> 
>  static void combine_predictions_for_insn (rtx_insn *, basic_block);  static
> void dump_prediction (FILE *, enum br_predictor, int, basic_block, @@ -
> 3266,7 +3262,8 @@ public:
>     TOVISIT, starting in HEAD.  */
> 
>  static void
> -propagate_freq (basic_block head, bitmap tovisit)
> +propagate_freq (basic_block head, bitmap tovisit,
> +             sreal max_cyclic_prob)
>  {
>    basic_block bb;
>    basic_block last;
> @@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit)
> 
>         FOR_EACH_EDGE (e, ei, bb->preds)
>           if (EDGE_INFO (e)->back_edge)
> -           {
> -             cyclic_probability += EDGE_INFO (e)->back_edge_prob;
> -           }
> +           cyclic_probability += EDGE_INFO (e)->back_edge_prob;
>           else if (!(e->flags & EDGE_DFS_BACK))
>             {
> -             /*  frequency += (e->probability
> -                               * BLOCK_INFO (e->src)->frequency /
> -                               REG_BR_PROB_BASE);  */
> -
>               /* FIXME: Graphite is producing edges with no profile. Once
>                  this is fixed, drop this.  */
>               sreal tmp = e->probability.initialized_p () ?
> -                         e->probability.to_reg_br_prob_base () : 0;
> -             tmp *= BLOCK_INFO (e->src)->frequency;
> -             tmp *= real_inv_br_prob_base;
> -             frequency += tmp;
> +                         e->probability.to_sreal () : 0;
> +             frequency += tmp * BLOCK_INFO (e->src)->frequency;
>             }
> 
>         if (cyclic_probability == 0)
> @@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit)
>           }
>         else
>           {
> -           if (cyclic_probability > real_almost_one)
> -             cyclic_probability = real_almost_one;
> -
> -           /* BLOCK_INFO (bb)->frequency = frequency
> -                                           / (1 - cyclic_probability) */
> -
> -           cyclic_probability = sreal (1) - cyclic_probability;
> -           BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
> +           if (cyclic_probability > max_cyclic_prob)
> +             {
> +               if (dump_file)
> +                 fprintf (dump_file,
> +                          "cyclic probability of bb %i is %f (capped to %f)"
> +                          "; turning freq %f",
> +                          bb->index, cyclic_probability.to_double (),
> +                          max_cyclic_prob.to_double (),
> +                          frequency.to_double ());
> +
> +               cyclic_probability = max_cyclic_prob;
> +             }
> +           else if (dump_file)
> +             fprintf (dump_file,
> +                      "cyclic probability of bb %i is %f; turning freq %f",
> +                      bb->index, cyclic_probability.to_double (),
> +                      frequency.to_double ());
> +
> +           BLOCK_INFO (bb)->frequency = frequency
> +                              / (sreal (1) - cyclic_probability);
> +           if (dump_file)
> +             fprintf (dump_file, " to %f\n",
> +                      BLOCK_INFO (bb)->frequency.to_double ());
>           }
>       }
> 
> @@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit)
>        e = find_edge (bb, head);
>        if (e)
>       {
> -       /* EDGE_INFO (e)->back_edge_prob
> -          = ((e->probability * BLOCK_INFO (bb)->frequency)
> -          / REG_BR_PROB_BASE); */
> -
>         /* FIXME: Graphite is producing edges with no profile. Once
>            this is fixed, drop this.  */
>         sreal tmp = e->probability.initialized_p () ?
> -                   e->probability.to_reg_br_prob_base () : 0;
> -       tmp *= BLOCK_INFO (bb)->frequency;
> -       EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
> +                   e->probability.to_sreal () : 0;
> +       EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)-
> >frequency;
>       }
> 
>        /* Propagate to successor blocks.  */ @@ -3396,7 +3395,7 @@
> propagate_freq (basic_block head, bitmap tovisit)
>  /* Estimate frequencies in loops at same nest level.  */
> 
>  static void
> -estimate_loops_at_level (class loop *first_loop)
> +estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
>  {
>    class loop *loop;
> 
> @@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop)
>        unsigned i;
>        auto_bitmap tovisit;
> 
> -      estimate_loops_at_level (loop->inner);
> +      estimate_loops_at_level (loop->inner, max_cyclic_prob);
> 
>        /* Find current loop back edge and mark it.  */
>        e = loop_latch_edge (loop);
> @@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop)
>        for (i = 0; i < loop->num_nodes; i++)
>       bitmap_set_bit (tovisit, bbs[i]->index);
>        free (bbs);
> -      propagate_freq (loop->header, tovisit);
> +      propagate_freq (loop->header, tovisit, max_cyclic_prob);
>      }
>  }
> 
> @@ -3428,17 +3427,18 @@ estimate_loops (void)  {
>    auto_bitmap tovisit;
>    basic_block bb;
> +  sreal max_cyclic_prob = (sreal)1 - (sreal)1 /
> + param_max_predicted_iterations;
> 
>    /* Start by estimating the frequencies in the loops.  */
>    if (number_of_loops (cfun) > 1)
> -    estimate_loops_at_level (current_loops->tree_root->inner);
> +    estimate_loops_at_level (current_loops->tree_root->inner,
> + max_cyclic_prob);
> 
>    /* Now propagate the frequencies through all the blocks.  */
>    FOR_ALL_BB_FN (bb, cfun)
>      {
>        bitmap_set_bit (tovisit, bb->index);
>      }
> -  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
> +  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit,
> + max_cyclic_prob);
>  }
> 
>  /* Drop the profile for NODE to guessed, and update its frequency based on
> @@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force)
>    if (force || profile_status_for_fn (cfun) != PROFILE_READ
>        || !update_max_bb_count ())
>      {
> -      static int real_values_initialized = 0;
> -
> -      if (!real_values_initialized)
> -        {
> -       real_values_initialized = 1;
> -       real_br_prob_base = REG_BR_PROB_BASE;
> -       /* Scaling frequencies up to maximal profile count may result in
> -          frequent overflows especially when inlining loops.
> -          Small scalling results in unnecesary precision loss.  Stay in
> -          the half of the (exponential) range.  */
> -       real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
> -       real_one_half = sreal (1, -1);
> -       real_inv_br_prob_base = sreal (1) / real_br_prob_base;
> -       real_almost_one = sreal (1) - real_inv_br_prob_base;
> -     }
> 
>        mark_dfs_back_edges ();
> 
> @@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force)
>                this is fixed, drop this.  */
>             if (e->probability.initialized_p ())
>               EDGE_INFO (e)->back_edge_prob
> -                = e->probability.to_reg_br_prob_base ();
> +                = e->probability.to_sreal ();
>             else
> -             EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
> -           EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
> +             /* back_edge_prob = 0.5 */
> +             EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
>           }
>       }
> 
> @@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force)
>       if (freq_max < BLOCK_INFO (bb)->frequency)
>         freq_max = BLOCK_INFO (bb)->frequency;
> 
> -      freq_max = real_bb_freq_max / freq_max;
> +      /* Scaling frequencies up to maximal profile count may result in
> +      frequent overflows especially when inlining loops.
> +      Small scalling results in unnecesary precision loss.  Stay in
> +      the half of the (exponential) range.  */
> +      freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
>        if (freq_max < 16)
>       freq_max = 16;
>        profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa
> ();
>        cfun->cfg->count_max = profile_count::uninitialized ();
>        FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL,
> next_bb)
>       {
> -       sreal tmp = BLOCK_INFO (bb)->frequency * freq_max +
> real_one_half;
> +       sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + sreal (1, -1);
>         profile_count count = profile_count::from_gcov_type (tmp.to_int
> ());
> 
>         /* If we have profile feedback in which this function was never diff -
> -git a/gcc/profile-count.c b/gcc/profile-count.c index
> d463ddd5cce..0c792297826 100644
> --- a/gcc/profile-count.c
> +++ b/gcc/profile-count.c
> @@ -446,3 +446,12 @@ profile_probability::combine_with_count
> (profile_count count1,
>    else
>      return *this * even () + other * even ();  }
> +
> +/* Return probability as sreal in range [0, 1].  */
> +
> +sreal
> +profile_probability::to_sreal () const
> +{
> +  gcc_checking_assert (initialized_p ());
> +  return ((sreal)m_val) >> (n_bits - 2); }
> diff --git a/gcc/profile-count.h b/gcc/profile-count.h index
> 987d3ce9a49..09217a8699b 100644
> --- a/gcc/profile-count.h
> +++ b/gcc/profile-count.h
> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
> 
>  struct function;
>  struct profile_count;
> +class sreal;
> 
>  /* Quality of the profile count.  Because gengtype does not support enums
>     inside of classes, this is in global namespace.  */ @@ -614,6 +615,8 @@
> public:
>                                         profile_probability other,
>                                         profile_count count2) const;
> 
> +  /* Return probability as sreal.  */
> +  sreal to_sreal () const;
>    /* LTO streaming support.  */
>    static profile_probability stream_in (class lto_input_block *);
>    void stream_out (struct output_block *); @@ -674,8 +677,6 @@ public:
> 
>   */
> 
> -class sreal;
> -
>  struct GTY(()) profile_count
>  {
>  public:

Reply via email to