On Wed, Dec 8, 2021 at 7:32 AM Xionghu Luo <luo...@linux.ibm.com> wrote:
>
>
>
> On 2021/12/7 20:17, Richard Biener wrote:
> >>> +  class loop *coldest_loop = coldest_outermost_loop[loop->num];
> >>> +  if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
> >>> +    {
> >>> +      class loop *hotter_loop = hotter_than_inner_loop[loop->num];
> >>> +      if (!hotter_loop
> >>> +       || loop_depth (hotter_loop) < loop_depth (outermost_loop))
> >>> +     return outermost_loop;
> >>> +
> >>> +      /*  hotter_loop is between OUTERMOST_LOOP and LOOP like:
> >>> +     [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
> >>> +     hotter_loop, second_coldest_loop, ..., loop]
> >>> +     return second_coldest_loop to be the hoist target.  */
> >>> +      class loop *aloop;
> >>> +      for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
> >>> +     if (flow_loop_nested_p (aloop, loop))
> >> should be:
> >>
> >>         if (aloop == loop || flow_loop_nested_p (aloop, loop))
> > OK with that fixed.
> >
> > Are necessary prerequesites committed to avoid regressions?
> > I guess we need to keep a watchful eye and eventually revert
> > (or gate with a --param disabled by default) the new behavior if
> > severe regressions are discovered.
> >
> > Thanks and sorry for the repeated delays.
> > Richard.
> >
>
> Thanks for your review, I learned quite a lot and gained very useful
> comments & help through the period :)  There are still 3 patches required
> to avoid regression or so, I've reorganized them and sent it out.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586371.html
>
>
> In addition, cooked the patch to add option for disable/enable it.
> Is it OK to merge it to current patch?

Hmm, let's go without this flag for now, we can add something like this
when we see a testcase where that makes sense (and where profile info
is not wrecked by other bugs).

Adding a --param would be a no brainer, but for new user-facing options
we should be more careful.

Richard.

>
> [PATCH] Add option -fhoist-to-cold-loop
>
>
> gcc/ChangeLog:
>
>         * common.opt: New.
>         * loop-invariant.c (find_invariants_bb):
>         * tree-ssa-loop-im.c (get_coldest_out_loop):
>         (can_sm_ref_p):
>         (loop_invariant_motion_in_fun):
> ---
>  gcc/common.opt         |  4 ++++
>  gcc/loop-invariant.c   |  2 +-
>  gcc/tree-ssa-loop-im.c | 33 ++++++++++++++++++++++-----------
>  3 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index b921f5e3b25..62b82bd8b95 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -1171,6 +1171,10 @@ fcode-hoisting
>  Common Var(flag_code_hoisting) Optimization
>  Enable code hoisting.
>
> +fhoist-to-cold-loop
> +Common Var(flag_hoist_to_cold_loop) Init(1) Optimization
> +Enable hoisting code to cold loop.
> +
>  fcombine-stack-adjustments
>  Common Var(flag_combine_stack_adjustments) Optimization
>  Looks for opportunities to reduce stack adjustments and stack references.
> diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
> index 5c3be7bf0eb..75b9dd47cd7 100644
> --- a/gcc/loop-invariant.c
> +++ b/gcc/loop-invariant.c
> @@ -1189,7 +1189,7 @@ find_invariants_bb (class loop *loop, basic_block bb, 
> bool always_reached,
>    rtx_insn *insn;
>    basic_block preheader = loop_preheader_edge (loop)->src;
>
> -  if (preheader->count > bb->count)
> +  if (flag_hoist_to_cold_loop && preheader->count > bb->count)
>      return;
>
>    FOR_BB_INSNS (bb, insn)
> diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
> index 565ee62d3f7..d745f66851b 100644
> --- a/gcc/tree-ssa-loop-im.c
> +++ b/gcc/tree-ssa-loop-im.c
> @@ -450,6 +450,9 @@ static class loop *
>  get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
>                       basic_block curr_bb)
>  {
> +  if (!flag_hoist_to_cold_loop)
> +    return outermost_loop;
> +
>    gcc_assert (outermost_loop == loop
>               || flow_loop_nested_p (outermost_loop, loop));
>
> @@ -3031,8 +3034,9 @@ can_sm_ref_p (class loop *loop, im_mem_ref *ref)
>    /* Verify whether the candidate is hot for LOOP.  Only do store motion if 
> the
>      candidate's profile count is hot.  Statement in cold BB shouldn't be 
> moved
>      out of it's loop_father.  */
> -  if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
> -    return false;
> +  if (flag_hoist_to_cold_loop)
> +    if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
> +      return false;
>
>    return true;
>  }
> @@ -3373,8 +3377,11 @@ tree_ssa_lim_finalize (void)
>
>    free (bb_loop_postorder);
>
> -  coldest_outermost_loop.release ();
> -  hotter_than_inner_loop.release ();
> +  if (flag_hoist_to_cold_loop)
> +    {
> +      coldest_outermost_loop.release ();
> +      hotter_than_inner_loop.release ();
> +    }
>  }
>
>  /* Moves invariants from loops.  Only "expensive" invariants are moved out --
> @@ -3396,13 +3403,17 @@ loop_invariant_motion_in_fun (function *fun, bool 
> store_motion)
>
>    /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
>     */
> -  class loop *loop;
> -  coldest_outermost_loop.create (number_of_loops (cfun));
> -  coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
> -  hotter_than_inner_loop.create (number_of_loops (cfun));
> -  hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
> -  for (loop = current_loops->tree_root->inner; loop != NULL; loop = 
> loop->next)
> -    fill_coldest_and_hotter_out_loop (loop, NULL, loop);
> +  if (flag_hoist_to_cold_loop)
> +    {
> +      class loop *loop;
> +      coldest_outermost_loop.create (number_of_loops (cfun));
> +      coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
> +      hotter_than_inner_loop.create (number_of_loops (cfun));
> +      hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
> +      for (loop = current_loops->tree_root->inner; loop != NULL;
> +          loop = loop->next)
> +       fill_coldest_and_hotter_out_loop (loop, NULL, loop);
> +    }
>
>    int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
>    int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
> --
> 2.27.0.90.geebb51ba8c
>
>

Reply via email to