On Tue, 13 Jul 2021, Jiufu Guo wrote: > Major changes from v1: > * Add target hook to query preferred doloop mode. > * Recompute doloop iv base from niter under preferred mode. > > Currently, doloop.xx variable is using the type as niter which may shorter > than word size. For some cases, it would be better to use word size type. > For example, on 64bit system, to access 32bit value, subreg maybe used. > Then using 64bit type maybe better for niter if it can be present in > both 32bit and 64bit. > > This patch add target hook for querg perferred mode for doloop iv. > And update doloop iv mode accordingly. > > Bootstrap and regtest pass on powerpc64le, is this ok for trunk? > > BR. > Jiufu > > gcc/ChangeLog: > > 2021-07-13 Jiufu Guo <guoji...@linux.ibm.com> > > PR target/61837 > * config/rs6000/rs6000.c (TARGET_PREFERRED_DOLOOP_MODE): New hook. > (rs6000_preferred_doloop_mode): New hook. > * doc/tm.texi: Regenerated. > * doc/tm.texi.in: Add hook preferred_doloop_mode. > * target.def (preferred_doloop_mode): New hook. > * targhooks.c (default_preferred_doloop_mode): New hook. > * targhooks.h (default_preferred_doloop_mode): New hook. > * tree-ssa-loop-ivopts.c (compute_doloop_base_on_mode): New function. > (add_iv_candidate_for_doloop): Call targetm.preferred_doloop_mode > and compute_doloop_base_on_mode. > > gcc/testsuite/ChangeLog: > > 2021-07-13 Jiufu Guo <guoji...@linux.ibm.com> > > PR target/61837 > * gcc.target/powerpc/pr61837.c: New test. > --- > gcc/config/rs6000/rs6000.c | 9 +++ > gcc/doc/tm.texi | 4 ++ > gcc/doc/tm.texi.in | 2 + > gcc/target.def | 7 +++ > gcc/targhooks.c | 8 +++ > gcc/targhooks.h | 2 + > gcc/testsuite/gcc.target/powerpc/pr61837.c | 16 ++++++ > gcc/tree-ssa-loop-ivopts.c | 66 +++++++++++++++++++++- > 8 files changed, 112 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr61837.c > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 9a5db63d0ef..444f3c49288 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -1700,6 +1700,9 @@ static const struct attribute_spec > rs6000_attribute_table[] = > #undef TARGET_DOLOOP_COST_FOR_ADDRESS > #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000 > > +#undef TARGET_PREFERRED_DOLOOP_MODE > +#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode > + > #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV > #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv > > @@ -27867,6 +27870,12 @@ rs6000_predict_doloop_p (struct loop *loop) > return true; > } > > +static machine_mode > +rs6000_preferred_doloop_mode (machine_mode) > +{ > + return word_mode; > +} > + > /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */ > > static bool > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi > index 2a41ae5fba1..3f5881220f8 100644 > --- a/gcc/doc/tm.texi > +++ b/gcc/doc/tm.texi > @@ -11984,6 +11984,10 @@ By default, the RTL loop optimizer does not use a > present doloop pattern for > loops containing function calls or branch on table instructions. > @end deftypefn > > +@deftypefn {Target Hook} machine_mode TARGET_PREFERRED_DOLOOP_MODE > (machine_mode @var{mode}) > +This hook returns a more preferred mode or the @var{mode} itself. > +@end deftypefn > + > @deftypefn {Target Hook} bool TARGET_LEGITIMATE_COMBINED_INSN (rtx_insn > *@var{insn}) > Take an instruction in @var{insn} and return @code{false} if the instruction > is not appropriate as a combination of two or more instructions. The > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in > index f881cdabe9e..38215149a92 100644 > --- a/gcc/doc/tm.texi.in > +++ b/gcc/doc/tm.texi.in > @@ -7917,6 +7917,8 @@ to by @var{ce_info}. > > @hook TARGET_INVALID_WITHIN_DOLOOP > > +@hook TARGET_PREFERRED_DOLOOP_MODE > + > @hook TARGET_LEGITIMATE_COMBINED_INSN > > @hook TARGET_CAN_FOLLOW_JUMP > diff --git a/gcc/target.def b/gcc/target.def > index c009671c583..91a96150e50 100644 > --- a/gcc/target.def > +++ b/gcc/target.def > @@ -4454,6 +4454,13 @@ loops containing function calls or branch on table > instructions.", > const char *, (const rtx_insn *insn), > default_invalid_within_doloop) > > +DEFHOOK > +(preferred_doloop_mode, > + "This hook returns a more preferred mode or the @var{mode} itself.", > + machine_mode, > + (machine_mode mode), > + default_preferred_doloop_mode) > + > /* Returns true for a legitimate combined insn. */ > DEFHOOK > (legitimate_combined_insn, > diff --git a/gcc/targhooks.c b/gcc/targhooks.c > index 44a1facedcf..eb5190910dc 100644 > --- a/gcc/targhooks.c > +++ b/gcc/targhooks.c > @@ -660,6 +660,14 @@ default_predict_doloop_p (class loop *loop > ATTRIBUTE_UNUSED) > return false; > } > > +/* By default, just use the input MODE itself. */ > + > +machine_mode > +default_preferred_doloop_mode (machine_mode mode) > +{ > + return mode; > +} > + > /* NULL if INSN insn is valid within a low-overhead loop, otherwise returns > an error message. > > diff --git a/gcc/targhooks.h b/gcc/targhooks.h > index f70a307d26c..f0560b6ae34 100644 > --- a/gcc/targhooks.h > +++ b/gcc/targhooks.h > @@ -88,6 +88,8 @@ extern bool default_fixed_point_supported_p (void); > extern bool default_has_ifunc_p (void); > > extern bool default_predict_doloop_p (class loop *); > +extern machine_mode > +default_preferred_doloop_mode (machine_mode); > extern const char * default_invalid_within_doloop (const rtx_insn *); > > extern tree default_builtin_vectorized_function (unsigned int, tree, tree); > diff --git a/gcc/testsuite/gcc.target/powerpc/pr61837.c > b/gcc/testsuite/gcc.target/powerpc/pr61837.c > new file mode 100644 > index 00000000000..dc44eb9cb41 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr61837.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +void foo(int *p1, long *p2, int s) > +{ > + int n, v, i; > + > + v = 0; > + for (n = 0; n <= 100; n++) { > + for (i = 0; i < s; i++) > + if (p2[i] == n) > + p1[i] = v; > + v += 88; > + } > +} > + > +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ > diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c > index 12a8a49a307..92767a09b6c 100644 > --- a/gcc/tree-ssa-loop-ivopts.c > +++ b/gcc/tree-ssa-loop-ivopts.c > @@ -5657,6 +5657,56 @@ relate_compare_use_with_all_cands (struct ivopts_data > *data) > } > } > > +/* If PREFERRED_MODE is suitable and profitable, use the preferred > + PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */ > + > +static tree > +compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter, > + const widest_int &iterations_max) > +{ > + tree ntype = TREE_TYPE (niter); > + tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
It's unlikely but type_for_mode can return NULL, so you shouldn't assert but fall back to using the original type. The assert for TYPE_UNSIGNED is superfluous - you asked for an unsigned type already. Otherwise the IVOPTs changes and the new target hook look OK, please address Seghers comments though. Thanks, Richard. > + gcc_assert (pref_type && TYPE_UNSIGNED (ntype)); > + > + int prec = TYPE_PRECISION (ntype); > + int pref_prec = TYPE_PRECISION (pref_type); > + > + tree base; > + > + /* Check if the PREFERRED_MODED is able to present niter. */ > + if (pref_prec > prec > + || wi::ltu_p (iterations_max, > + widest_int::from (wi::max_value (pref_prec, UNSIGNED), > + UNSIGNED))) > + { > + /* No wrap, it is safe to use preferred type after niter + 1. */ > + if (wi::ltu_p (iterations_max, > + widest_int::from (wi::max_value (prec, UNSIGNED), > + UNSIGNED))) > + { > + /* This could help to optimize "-1 +1" pair when niter looks > + like "n-1": n is in original mode. "base = (n - 1) + 1" > + in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */ > + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), > + build_int_cst (ntype, 1)); > + base = fold_convert (pref_type, base); > + } > + > + /* To avoid wrap, need fold niter to preferred type before plus 1. */ > + else > + { > + niter = fold_convert (pref_type, niter); > + base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter), > + build_int_cst (pref_type, 1)); > + } > + } > + else > + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), > + build_int_cst (ntype, 1)); > + return base; > +} > + > /* Add one doloop dedicated IV candidate: > - Base is (may_be_zero ? 1 : (niter + 1)). > - Step is -1. */ > @@ -5688,8 +5738,20 @@ add_iv_candidate_for_doloop (struct ivopts_data *data) > return; > } > > - tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), > - build_int_cst (ntype, 1)); > + machine_mode mode = TYPE_MODE (ntype); > + machine_mode pref_mode = targetm.preferred_doloop_mode (mode); > + > + tree base; > + if (mode != pref_mode) > + { > + base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max); > + ntype = TREE_TYPE (base); > + } > + else > + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), > + build_int_cst (ntype, 1)); > + > + > add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, > true); > } > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)