On Tue, 13 Jul 2021, Jiufu Guo wrote:

> Major changes from v1:
> * Add target hook to query preferred doloop mode.
> * Recompute doloop iv base from niter under preferred mode.
> 
> Currently, doloop.xx variable is using the type as niter which may shorter
> than word size.  For some cases, it would be better to use word size type.
> For example, on 64bit system, to access 32bit value, subreg maybe used.
> Then using 64bit type maybe better for niter if it can be present in
> both 32bit and 64bit.
> 
> This patch add target hook for querg perferred mode for doloop iv.
> And update doloop iv mode accordingly.
> 
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
> 
> BR.
> Jiufu
> 
> gcc/ChangeLog:
> 
> 2021-07-13  Jiufu Guo  <guoji...@linux.ibm.com>
> 
>       PR target/61837
>       * config/rs6000/rs6000.c (TARGET_PREFERRED_DOLOOP_MODE): New hook.
>       (rs6000_preferred_doloop_mode): New hook.
>       * doc/tm.texi: Regenerated.
>       * doc/tm.texi.in: Add hook preferred_doloop_mode.
>       * target.def (preferred_doloop_mode): New hook.
>       * targhooks.c (default_preferred_doloop_mode): New hook.
>       * targhooks.h (default_preferred_doloop_mode): New hook.
>       * tree-ssa-loop-ivopts.c (compute_doloop_base_on_mode): New function.
>       (add_iv_candidate_for_doloop): Call targetm.preferred_doloop_mode
>       and compute_doloop_base_on_mode.
> 
> gcc/testsuite/ChangeLog:
> 
> 2021-07-13  Jiufu Guo  <guoji...@linux.ibm.com>
> 
>       PR target/61837
>       * gcc.target/powerpc/pr61837.c: New test.
> ---
>  gcc/config/rs6000/rs6000.c                 |  9 +++
>  gcc/doc/tm.texi                            |  4 ++
>  gcc/doc/tm.texi.in                         |  2 +
>  gcc/target.def                             |  7 +++
>  gcc/targhooks.c                            |  8 +++
>  gcc/targhooks.h                            |  2 +
>  gcc/testsuite/gcc.target/powerpc/pr61837.c | 16 ++++++
>  gcc/tree-ssa-loop-ivopts.c                 | 66 +++++++++++++++++++++-
>  8 files changed, 112 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr61837.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 9a5db63d0ef..444f3c49288 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1700,6 +1700,9 @@ static const struct attribute_spec 
> rs6000_attribute_table[] =
>  #undef TARGET_DOLOOP_COST_FOR_ADDRESS
>  #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
>  
> +#undef TARGET_PREFERRED_DOLOOP_MODE
> +#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
> +
>  #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
>  #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
>  
> @@ -27867,6 +27870,12 @@ rs6000_predict_doloop_p (struct loop *loop)
>    return true;
>  }
>  
> +static machine_mode
> +rs6000_preferred_doloop_mode (machine_mode)
> +{
> +  return word_mode;
> +}
> +
>  /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P.  */
>  
>  static bool
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 2a41ae5fba1..3f5881220f8 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11984,6 +11984,10 @@ By default, the RTL loop optimizer does not use a 
> present doloop pattern for
>  loops containing function calls or branch on table instructions.
>  @end deftypefn
>  
> +@deftypefn {Target Hook} machine_mode TARGET_PREFERRED_DOLOOP_MODE 
> (machine_mode @var{mode})
> +This hook returns a more preferred mode or the @var{mode} itself.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} bool TARGET_LEGITIMATE_COMBINED_INSN (rtx_insn 
> *@var{insn})
>  Take an instruction in @var{insn} and return @code{false} if the instruction
>  is not appropriate as a combination of two or more instructions.  The
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index f881cdabe9e..38215149a92 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -7917,6 +7917,8 @@ to by @var{ce_info}.
>  
>  @hook TARGET_INVALID_WITHIN_DOLOOP
>  
> +@hook TARGET_PREFERRED_DOLOOP_MODE
> +
>  @hook TARGET_LEGITIMATE_COMBINED_INSN
>  
>  @hook TARGET_CAN_FOLLOW_JUMP
> diff --git a/gcc/target.def b/gcc/target.def
> index c009671c583..91a96150e50 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -4454,6 +4454,13 @@ loops containing function calls or branch on table 
> instructions.",
>   const char *, (const rtx_insn *insn),
>   default_invalid_within_doloop)
>  
> +DEFHOOK
> +(preferred_doloop_mode,
> + "This hook returns a more preferred mode or the @var{mode} itself.",
> + machine_mode,
> + (machine_mode mode),
> + default_preferred_doloop_mode)
> +
>  /* Returns true for a legitimate combined insn.  */
>  DEFHOOK
>  (legitimate_combined_insn,
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index 44a1facedcf..eb5190910dc 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -660,6 +660,14 @@ default_predict_doloop_p (class loop *loop 
> ATTRIBUTE_UNUSED)
>    return false;
>  }
>  
> +/* By default, just use the input MODE itself.  */
> +
> +machine_mode
> +default_preferred_doloop_mode (machine_mode mode)
> +{
> +  return mode;
> +}
> +
>  /* NULL if INSN insn is valid within a low-overhead loop, otherwise returns
>     an error message.
>  
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index f70a307d26c..f0560b6ae34 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -88,6 +88,8 @@ extern bool default_fixed_point_supported_p (void);
>  extern bool default_has_ifunc_p (void);
>  
>  extern bool default_predict_doloop_p (class loop *);
> +extern machine_mode
> +default_preferred_doloop_mode (machine_mode);
>  extern const char * default_invalid_within_doloop (const rtx_insn *);
>  
>  extern tree default_builtin_vectorized_function (unsigned int, tree, tree);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr61837.c 
> b/gcc/testsuite/gcc.target/powerpc/pr61837.c
> new file mode 100644
> index 00000000000..dc44eb9cb41
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr61837.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +void foo(int *p1, long *p2, int s)
> +{
> +  int n, v, i;
> +
> +  v = 0;
> +  for (n = 0; n <= 100; n++) {
> +     for (i = 0; i < s; i++)
> +        if (p2[i] == n)
> +           p1[i] = v;
> +     v += 88;
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> index 12a8a49a307..92767a09b6c 100644
> --- a/gcc/tree-ssa-loop-ivopts.c
> +++ b/gcc/tree-ssa-loop-ivopts.c
> @@ -5657,6 +5657,56 @@ relate_compare_use_with_all_cands (struct ivopts_data 
> *data)
>      }
>  }
>  
> +/* If PREFERRED_MODE is suitable and profitable, use the preferred
> +   PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
> +
> +static tree
> +compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
> +                          const widest_int &iterations_max)
> +{
> +  tree ntype = TREE_TYPE (niter);
> +  tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);

It's unlikely but type_for_mode can return NULL, so you shouldn't
assert but fall back to using the original type.  The assert for
TYPE_UNSIGNED is superfluous - you asked for an unsigned type already.

Otherwise the IVOPTs changes and the new target hook look OK, please
address Seghers comments though.

Thanks,
Richard.

> +  gcc_assert (pref_type && TYPE_UNSIGNED (ntype));
> +
> +  int prec = TYPE_PRECISION (ntype);
> +  int pref_prec = TYPE_PRECISION (pref_type);
> +
> +  tree base;
> +
> +  /* Check if the PREFERRED_MODED is able to present niter.  */
> +  if (pref_prec > prec
> +      || wi::ltu_p (iterations_max,
> +                 widest_int::from (wi::max_value (pref_prec, UNSIGNED),
> +                                   UNSIGNED)))
> +    {
> +      /* No wrap, it is safe to use preferred type after niter + 1.  */
> +      if (wi::ltu_p (iterations_max,
> +                  widest_int::from (wi::max_value (prec, UNSIGNED),
> +                                    UNSIGNED)))
> +     {
> +       /* This could help to optimize "-1 +1" pair when niter looks
> +          like "n-1": n is in original mode.  "base = (n - 1) + 1"
> +          in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
> +       base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> +                           build_int_cst (ntype, 1));
> +       base = fold_convert (pref_type, base);
> +     }
> +
> +      /* To avoid wrap, need fold niter to preferred type before plus 1.  */
> +      else
> +     {
> +       niter = fold_convert (pref_type, niter);
> +       base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
> +                           build_int_cst (pref_type, 1));
> +     }
> +    }
> +  else
> +    base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> +                     build_int_cst (ntype, 1));
> +  return base;
> +}
> +
>  /* Add one doloop dedicated IV candidate:
>       - Base is (may_be_zero ? 1 : (niter + 1)).
>       - Step is -1.  */
> @@ -5688,8 +5738,20 @@ add_iv_candidate_for_doloop (struct ivopts_data *data)
>       return;
>      }
>  
> -  tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> -                        build_int_cst (ntype, 1));
> +  machine_mode mode = TYPE_MODE (ntype);
> +  machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
> +
> +  tree base;
> +  if (mode != pref_mode)
> +    {
> +      base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
> +      ntype = TREE_TYPE (base);
> +    }
> +  else
> +    base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> +                     build_int_cst (ntype, 1));
> +
> +
>    add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, 
> true);
>  }
>  
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to