On Thu, 6 Jul 2023, Jan Hubicka wrote:

> Hi,
> original scale_loop_profile was implemented to only handle very simple loops
> produced by vectorizer at that time (basically loops with only one exit and no
> subloops). It also has not been updated to new profile-count API very 
> carefully.
> Since I want to use it from loop peeling and unlooping, I need the
> function to at least not get profile worse on general loops.
> 
> The function does two thigs
>  1) scales down the loop profile by a given probability.
>     This is useful, for example, to scale down profile after peeling when loop
>     body is executed less often than before
>  2) after scaling is done and if profile indicates too large iteration
>     count update profile to cap iteration count by ITERATION_BOUND parameter.
> 
> Step 1 is easy and unchanged.
> 
> I changed ITERATION_BOUND to be actual bound on number of iterations as
> used elsewhere (i.e. number of executions of latch edge) rather then
> number of iterations + 1 as it was before.
> 
> To do 2) one needs to do the following
>   a) scale own loop profile so frquency o header is at most
>      the sum of in-edge counts * (iteration_bound + 1)
>   b) update loop exit probabilities so their count is the same
>      as before scaling.
>   c) reduce frequencies of basic blocks after loop exit
> 
> old code did b) by setting probability to 1 / iteration_bound which is
> correctly only of the basic block containing exit executes precisely one per
> iteration (it is not insie other conditional or inner loop).  This is fixed
> now by using set_edge_probability_and_rescale_others
> 
> aldo c) was implemented only for special case when the exit was just before
> latch bacis block.  I now use dominance info to get right some of addional
> case.
> 
> I still did not try to do anything for multiple exit loops, though the
> implementatoin could be generalized.
> 
> Bootstrapped/regtested x86_64-linux.  Plan to cmmit it tonight if there
> are no complains.

Looks good, but I wonder what we can do to at least make the
multiple exit case behave reasonably?  The vectorizer keeps track
of a "canonical" exit, would it be possible to pass in the main
exit edge and use that instead of single_exit (), would other
exits then behave somewhat reasonable or would we totally screw
things up here?  That is, the "canonical" exit would be the
counting exit while the other exits are on data driven conditions
and thus wouldn't change probability when we reduce the number
of iterations(?)

Richard.

> gcc/ChangeLog:
> 
>       * cfgloopmanip.cc (scale_loop_profile): Rewrite exit edge
>       probability update to be safe on loops with subloops.
>       Make bound parameter to be iteration bound.
>       * tree-ssa-loop-ivcanon.cc (try_peel_loop): Update call
>       of scale_loop_profile.
>       * tree-vect-loop-manip.cc (vect_do_peeling): Likewise.
> 
> diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
> index 6e09dcbb0b1..524b979a546 100644
> --- a/gcc/cfgloopmanip.cc
> +++ b/gcc/cfgloopmanip.cc
> @@ -499,7 +499,7 @@ scale_loop_frequencies (class loop *loop, 
> profile_probability p)
>  }
>  
>  /* Scale profile in LOOP by P.
> -   If ITERATION_BOUND is non-zero, scale even further if loop is predicted
> +   If ITERATION_BOUND is not -1, scale even further if loop is predicted
>     to iterate too many times.
>     Before caling this function, preheader block profile should be already
>     scaled to final count.  This is necessary because loop iterations are
> @@ -510,106 +510,123 @@ void
>  scale_loop_profile (class loop *loop, profile_probability p,
>                   gcov_type iteration_bound)
>  {
> -  edge e, preheader_e;
> -  edge_iterator ei;
> -
> -  if (dump_file && (dump_flags & TDF_DETAILS))
> +  if (!(p == profile_probability::always ()))
>      {
> -      fprintf (dump_file, ";; Scaling loop %i with scale ",
> -            loop->num);
> -      p.dump (dump_file);
> -      fprintf (dump_file, " bounding iterations to %i\n",
> -            (int)iteration_bound);
> -    }
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +     {
> +       fprintf (dump_file, ";; Scaling loop %i with scale ",
> +                loop->num);
> +       p.dump (dump_file);
> +       fprintf (dump_file, "\n");
> +     }
>  
> -  /* Scale the probabilities.  */
> -  scale_loop_frequencies (loop, p);
> +      /* Scale the probabilities.  */
> +      scale_loop_frequencies (loop, p);
> +    }
>  
> -  if (iteration_bound == 0)
> +  if (iteration_bound == -1)
>      return;
>  
>    gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, 
> true);
> +  if (iterations == -1)
> +    return;
>  
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      {
> -      fprintf (dump_file, ";; guessed iterations after scaling %i\n",
> -            (int)iterations);
> +      fprintf (dump_file,
> +            ";; guessed iterations of loop %i:%i new upper bound %i:\n",
> +            loop->num,
> +            (int)iterations,
> +            (int)iteration_bound);
>      }
>  
>    /* See if loop is predicted to iterate too many times.  */
>    if (iterations <= iteration_bound)
>      return;
>  
> -  preheader_e = loop_preheader_edge (loop);
> -
> -  /* We could handle also loops without preheaders, but bounding is
> -     currently used only by optimizers that have preheaders constructed.  */
> -  gcc_checking_assert (preheader_e);
> -  profile_count count_in = preheader_e->count ();
> +  /* Compute number of invocations of the loop.  */
> +  profile_count count_in = profile_count::zero ();
> +  edge e;
> +  edge_iterator ei;
> +  FOR_EACH_EDGE (e, ei, loop->header->preds)
> +    count_in += e->count ();
>  
> -  if (count_in > profile_count::zero ()
> -      && loop->header->count.initialized_p ())
> +  /* Now scale the loop body so header count is
> +     count_in * (iteration_bound + 1)  */
> +  profile_probability scale_prob
> +    = (count_in *= iteration_bound).probability_in (loop->header->count);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
>      {
> -      profile_count count_delta = profile_count::zero ();
> -
> -      e = single_exit (loop);
> -      if (e)
> -     {
> -       edge other_e;
> -       FOR_EACH_EDGE (other_e, ei, e->src->succs)
> -         if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
> -             && e != other_e)
> -           break;
> -
> -       /* Probability of exit must be 1/iterations.  */
> -       count_delta = e->count ();
> -       e->probability = profile_probability::always () / iteration_bound;
> -       other_e->probability = e->probability.invert ();
> -
> -       /* In code below we only handle the following two updates.  */
> -       if (other_e->dest != loop->header
> -           && other_e->dest != loop->latch
> -           && (dump_file && (dump_flags & TDF_DETAILS)))
> -         {
> -           fprintf (dump_file, ";; giving up on update of paths from "
> -                    "exit condition to latch\n");
> -         }
> -     }
> +      fprintf (dump_file, ";; Scaling loop %i with scale ",
> +            loop->num);
> +      p.dump (dump_file);
> +      fprintf (dump_file, " to reach upper bound %i\n",
> +            (int)iteration_bound);
> +    }
> +  /* Finally attempt to fix exit edge probability.  */
> +  auto_vec<edge> exits = get_loop_exit_edges  (loop);
> +  edge exit_edge = single_likely_exit (loop, exits);
> +
> +  /* In a consistent profile unadjusted_exit_count should be same as 
> count_in,
> +     however to preserve as much of the original info, avoid recomputing
> +     it.  */
> +  profile_count unadjusted_exit_count;
> +  if (exit_edge)
> +    unadjusted_exit_count = exit_edge->count ();
> +  scale_loop_frequencies (loop, scale_prob);
> +
> +  if (exit_edge)
> +    {
> +      profile_count old_exit_count = exit_edge->count ();
> +      profile_probability new_probability;
> +      if (iteration_bound > 0)
> +     new_probability
> +       = unadjusted_exit_count.probability_in (exit_edge->src->count);
>        else
> -        if (dump_file && (dump_flags & TDF_DETAILS))
> -       fprintf (dump_file, ";; Loop has multiple exit edges; "
> -                           "giving up on exit condition update\n");
> -
> -      /* Roughly speaking we want to reduce the loop body profile by the
> -      difference of loop iterations.  We however can do better if
> -      we look at the actual profile, if it is available.  */
> -      p = profile_probability::always ();
> -
> -      count_in *= iteration_bound;
> -      p = count_in.probability_in (loop->header->count);
> -      if (!(p > profile_probability::never ()))
> -     p = profile_probability::very_unlikely ();
> -
> -      if (p == profile_probability::always ()
> -       || !p.initialized_p ())
> -     return;
> -
> -      /* If latch exists, change its count, since we changed
> -      probability of exit.  Theoretically we should update everything from
> -      source of exit edge to latch, but for vectorizer this is enough.  */
> -      if (loop->latch && loop->latch != e->src)
> -     loop->latch->count += count_delta;
> -
> -      /* Scale the probabilities.  */
> -      scale_loop_frequencies (loop, p);
> +     new_probability = profile_probability::always ();
> +      set_edge_probability_and_rescale_others (exit_edge, new_probability);
> +      profile_count new_exit_count = exit_edge->count ();
> +
> +      /* Rescale the remaining edge probabilities and see if there is only
> +      one.  */
> +      edge other_edge = NULL;
> +      bool found = false;
> +      FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
> +     if (!(e->flags & EDGE_FAKE)
> +         && !(e->probability == profile_probability::never ())
> +         && !loop_exit_edge_p (loop, e))
> +       {
> +         if (found)
> +           {
> +             other_edge = NULL;
> +             break;
> +           }
> +         other_edge = e;
> +         found = true;
> +       }
> +      /* If there is only loop latch after other edge,
> +      update its profile.  */
> +      if (other_edge && other_edge->dest == loop->latch)
> +     loop->latch->count -= new_exit_count - old_exit_count;
> +      else
> +     {
> +       basic_block *body = get_loop_body (loop);
> +       profile_count new_count = exit_edge->src->count - new_exit_count;
> +       profile_count old_count = exit_edge->src->count - old_exit_count;
>  
> -      /* Change latch's count back.  */
> -      if (loop->latch && loop->latch != e->src)
> -     loop->latch->count -= count_delta;
> +       for (unsigned int i = 0; i < loop->num_nodes; i++)
> +         if (body[i] != exit_edge->src
> +             && dominated_by_p (CDI_DOMINATORS, body[i], exit_edge->src))
> +           body[i]->count.apply_scale (new_count, old_count);
>  
> -      if (dump_file && (dump_flags & TDF_DETAILS))
> -     fprintf (dump_file, ";; guessed iterations are now %i\n",
> -              (int)expected_loop_iterations_unbounded (loop, NULL, true));
> +       free (body);
> +     }
> +    }
> +  else if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file,
> +            ";; Loop has mulitple exits;"
> +            " will leave exit probabilities inconsistent\n");
>      }
>  }
>  
> diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
> index 491b57ec0f1..184c08eec75 100644
> --- a/gcc/tree-ssa-loop-ivcanon.cc
> +++ b/gcc/tree-ssa-loop-ivcanon.cc
> @@ -1173,7 +1179,7 @@ try_peel_loop (class loop *loop,
>        }
>    profile_probability p;
>    p = entry_count.probability_in (loop->header->count);
> -  scale_loop_profile (loop, p, 0);
> +  scale_loop_profile (loop, p, -1);
>    bitmap_set_bit (peeled_loops, loop->num);
>    return true;
>  }
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index d66d4a6de69..2361cb328ab 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -3191,7 +3191,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
> tree nitersm1,
>        if (prob_vector.initialized_p ())
>       {
>         scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
> -       scale_loop_profile (loop, prob_vector, 0);
> +       scale_loop_profile (loop, prob_vector, -1);
>       }
>      }
>  
> @@ -3236,7 +3236,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
> tree nitersm1,
>         slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
>  
>         scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
> -       scale_loop_profile (prolog, prob_prolog, bound_prolog);
> +       scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);
>       }
>  
>        /* Update init address of DRs.  */
> @@ -3378,7 +3378,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
> tree nitersm1,
>  
>             scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
>           }
> -       scale_loop_profile (epilog, prob_epilog, 0);
> +       scale_loop_profile (epilog, prob_epilog, -1);
>       }
>        else
>       slpeel_update_phi_nodes_for_lcssa (epilog);
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Reply via email to