"Kewen.Lin via Gcc-patches" <gcc-patches@gcc.gnu.org> writes:
> Hi,
>
> As PR100794 shows, in the current implementation PRE bypasses
> some optimization to avoid introducing loop carried dependence
> which stops loop vectorizer to vectorize the loop.  At -O2,
> there is no downstream pass to re-catch this kind of opportunity
> if loop vectorizer fails to vectorize that loop.
>
> This patch follows Richi's suggestion in the PR, if predcom flag
> isn't set and loop vectorization will enable predcom without any
> unrolling implicitly.  The Power9 SPEC2017 evaluation showed it
> can speed up 521.wrf_r 3.30% and 554.roms_r 1.08% at very-cheap
> cost model, no remarkable impact at cheap cost model, the build
> time and size impact is fine (see the PR for the details).
>
> By the way, I tested another proposal to guard PRE not skip the
> optimization for cheap and very-cheap vect cost models, the
> evaluation results showed it's fine with very cheap cost model,
> but it can degrade some bmks like 521.wrf_r -9.17% and
> 549.fotonik3d_r -2.07% etc.
>
> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
> x86_64-redhat-linux and aarch64-linux-gnu.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -----
> gcc/ChangeLog:
>
>       PR tree-optimization/100794
>       * tree-predcom.c (tree_predictive_commoning_loop): Add parameter
>       allow_unroll_p and only allow unrolling when it's true.
>       (tree_predictive_commoning): Add parameter allow_unroll_p and
>       adjust for it.
>       (run_tree_predictive_commoning): Likewise.
>       (class pass_predcom): Add private member allow_unroll_p.
>       (pass_predcom::pass_predcom): Init allow_unroll_p.
>       (pass_predcom::gate): Check flag_tree_loop_vectorize and 
>       global_options_set.x_flag_predictive_commoning.
>       (pass_predcom::execute): Adjust for allow_unroll_p.
>
> gcc/testsuite/ChangeLog:
>
>       PR tree-optimization/100794
>       * gcc.dg/tree-ssa/pr100794.c: New test.
>
>  gcc/testsuite/gcc.dg/tree-ssa/pr100794.c | 20 +++++++++
>  gcc/tree-predcom.c                       | 57 +++++++++++++++++-------
>  2 files changed, 60 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
> new file mode 100644
> index 00000000000..6f707ae7fba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details 
> -fdisable-tree-vect" } */
> +
> +extern double arr[100];
> +extern double foo (double, double);
> +extern double sum;
> +
> +void
> +test (int i_0, int i_n)
> +{
> +  int i;
> +  for (i = i_0; i < i_n - 1; i++)
> +    {
> +      double a = arr[i];
> +      double b = arr[i + 1];
> +      sum += a * b;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "Executing predictive commoning without 
> unrolling" "pcom" } } */
> diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
> index 02f911a08bb..65a93c8e505 100644
> --- a/gcc/tree-predcom.c
> +++ b/gcc/tree-predcom.c
> @@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec<chain_p> 
> chains)
>     applied to this loop.  */
>  
>  static unsigned
> -tree_predictive_commoning_loop (class loop *loop)
> +tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
>  {
>    vec<data_reference_p> datarefs;
>    vec<ddr_p> dependences;
>    struct component *components;
>    vec<chain_p> chains = vNULL;
> -  unsigned unroll_factor;
> +  unsigned unroll_factor = 0;
>    class tree_niter_desc desc;
>    bool unroll = false, loop_closed_ssa = false;
>  
> @@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop)
>        dump_chains (dump_file, chains);
>      }
>  
> -  /* Determine the unroll factor, and if the loop should be unrolled, ensure
> -     that its number of iterations is divisible by the factor.  */
> -  unroll_factor = determine_unroll_factor (chains);
> -  unroll = (unroll_factor > 1
> -         && can_unroll_loop_p (loop, unroll_factor, &desc));
> +  if (allow_unroll_p)
> +    /* Determine the unroll factor, and if the loop should be unrolled, 
> ensure
> +       that its number of iterations is divisible by the factor.  */
> +    unroll_factor = determine_unroll_factor (chains);
> +
> +  if (unroll_factor > 1)
> +    unroll = can_unroll_loop_p (loop, unroll_factor, &desc);
>  
>    /* Execute the predictive commoning transformations, and possibly unroll 
> the
>       loop.  */
> @@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop)
>  /* Runs predictive commoning.  */
>  
>  unsigned
> -tree_predictive_commoning (void)
> +tree_predictive_commoning (bool allow_unroll_p)
>  {
>    class loop *loop;
>    unsigned ret = 0, changed = 0;
> @@ -3328,7 +3330,7 @@ tree_predictive_commoning (void)
>    FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
>      if (optimize_loop_for_speed_p (loop))
>        {
> -     changed |= tree_predictive_commoning_loop (loop);
> +     changed |= tree_predictive_commoning_loop (loop, allow_unroll_p);
>        }
>    free_original_copy_tables ();
>  
> @@ -3355,12 +3357,12 @@ tree_predictive_commoning (void)
>  /* Predictive commoning Pass.  */
>  
>  static unsigned
> -run_tree_predictive_commoning (struct function *fun)
> +run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p)
>  {
>    if (number_of_loops (fun) <= 1)
>      return 0;
>  
> -  return tree_predictive_commoning ();
> +  return tree_predictive_commoning (allow_unroll_p);
>  }
>  
>  namespace {
> @@ -3382,15 +3384,36 @@ class pass_predcom : public gimple_opt_pass
>  {
>  public:
>    pass_predcom (gcc::context *ctxt)
> -    : gimple_opt_pass (pass_data_predcom, ctxt)
> +    : gimple_opt_pass (pass_data_predcom, ctxt),
> +      allow_unroll_p (true)
>    {}
>  
>    /* opt_pass methods: */
> -  virtual bool gate (function *) { return flag_predictive_commoning != 0; }
> -  virtual unsigned int execute (function *fun)
> -    {
> -      return run_tree_predictive_commoning (fun);
> -    }
> +  virtual bool
> +  gate (function *)
> +  {
> +    if (flag_predictive_commoning != 0)
> +      return true;
> +    /* Loop vectorization enables predictive commoning implicitly
> +       only if predictive commoning isn't set explicitly, and it
> +       doesn't allow unrolling.  */
> +    if (flag_tree_loop_vectorize
> +     && !global_options_set.x_flag_predictive_commoning)
> +      {
> +     allow_unroll_p = false;
> +     return true;
> +      }
> +    return false;
> +  }
> +
> +  virtual unsigned int
> +  execute (function *fun)
> +  {
> +    return run_tree_predictive_commoning (fun, allow_unroll_p);
> +  }
> +
> +private:
> +  bool allow_unroll_p;
>  
>  }; // class pass_predcom

Calculating allow_unroll_p this way doesn't look robust against
changes in options caused by pragmas, etc.  Would it work if we
dropped the member variable and just passed flag_predictive_commoning != 0
to run_tree_predictive commoning?

Thanks,
Richard

Reply via email to