On Wed, May 20, 2020 at 11:08 AM Jiufu Guo <guoji...@linux.ibm.com> wrote: > > Richard Biener <richard.guent...@gmail.com> writes: > > > On Wed, May 20, 2020 at 5:56 AM Jiufu Guo via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > >> > >> Hi, > >> > >> In r10-4525, and r10-4161, loop unroller was enabled for simply loops at > >> -O2. > >> At the same time, the GIMPLE cunroll is also enabled, while it is not only > >> for > >> simple loops. This patch introduces a hook to check if a loop is suitable > >> to > >> unroll completely. The hook can be used to check if a loop is simply > >> enough > >> for complete unroll, then avoid negative effects like size increasing or > >> complex loop unrolling at -O2. > >> > >> This patch could help to handle the code in PR95018 which contains complex > >> loop, and does not cause obvious performance change on SPEC2017. > >> > >> Bootstrap/regtest pass on powerpc64le. OK for trunk? > > > > I fear you're just digging the rs6000 specific hole you dug up deeper > > with this ... > > I'm also thinking to add a check to cunroll directly without digging to > platforms. As patch below. This may not harm for platforms. > Is this acceptable? > > Thanks, > Jiufu > > diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c > index 8ab6ab3330c..67335b8a911 100644 > --- a/gcc/tree-ssa-loop-ivcanon.c > +++ b/gcc/tree-ssa-loop-ivcanon.c > @@ -1231,6 +1231,11 @@ canonicalize_loop_induction_variables (class loop > *loop, > /* Force re-computation of loop bounds so we can remove redundant exits. > */ > maxiter = max_loop_iterations_int (loop); > > + /* For optimize level 2, unroll and peel only for simple loops. */ > + if (allow_peel && optimize == 2 && maxiter >= 4 > + && !(TREE_CODE (niter) == INTEGER_CST && exit)) > + return false; > +
I think this is the wrong way to approach this. You're doing too many things at once. Try to fix the powerpc regression with the extra flag_rtl_unroll_loops, that could be backported. Then you can independently see whether enabling more unrolling at -O2 makes sense. Because currently we _do_ unroll at -O2 when it does not increase size. Its just your patches make this as aggressive as -O3. Richard. > if (dump_file && (dump_flags & TDF_DETAILS) > && TREE_CODE (niter) == INTEGER_CST) > { > > > > > > Richard. > > > >> Jiufu > >> > >> 2020-05-20 Jiufu Guo <guoji...@cn.ibm.com> > >> > >> PR target/95018 > >> * target.def (loop_allow_unroll_completely_peel): New hook. > >> * doc/tm.texi (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook. > >> * doc/tm.texi.in (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New > >> hook. > >> * config/rs6000/rs6000.c > >> (TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL): New hook. > >> (rs6000_loop_allow_unroll_completely_peel): New hook > >> implementation. > >> * tree-ssa-loop-ivcanon.c (canonicalize_loop_induction_variables): > >> Call > >> hook loop_allow_unroll_completely_peel. > >> > >> > >> --- > >> gcc/config/rs6000/rs6000.c | 19 +++++++++++++++++++ > >> gcc/doc/tm.texi | 9 +++++++++ > >> gcc/doc/tm.texi.in | 2 ++ > >> gcc/target.def | 12 ++++++++++++ > >> gcc/tree-ssa-loop-ivcanon.c | 5 +++++ > >> 5 files changed, 47 insertions(+) > >> > >> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > >> index 8435bc15d72..a1a3f9cb583 100644 > >> --- a/gcc/config/rs6000/rs6000.c > >> +++ b/gcc/config/rs6000/rs6000.c > >> @@ -1433,6 +1433,9 @@ static const struct attribute_spec > >> rs6000_attribute_table[] = > >> #undef TARGET_LOOP_UNROLL_ADJUST > >> #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust > >> > >> +#undef TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL > >> +#define TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL > >> rs6000_loop_allow_unroll_completely_peel > >> + > >> #undef TARGET_INIT_BUILTINS > >> #define TARGET_INIT_BUILTINS rs6000_init_builtins > >> #undef TARGET_BUILTIN_DECL > >> @@ -5139,6 +5142,22 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct > >> loop *loop) > >> return nunroll; > >> } > >> > >> +/* Implement targetm.loop_allow_unroll_completely_peel. */ > >> + > >> +static bool > >> +rs6000_loop_allow_unroll_completely_peel (HOST_WIDE_INT maxiter, tree > >> niter, > >> + struct loop *loop) > >> +{ > >> + if (unroll_only_small_loops && optimize == 2) > >> + { > >> + if (maxiter >= 4 > >> + && !(TREE_CODE (niter) == INTEGER_CST && single_exit (loop))) > >> + return false; > >> + } > >> + > >> + return true; > >> +} > >> + > >> /* Handler for the Mathematical Acceleration Subsystem (mass) interface > >> to a > >> library with vectorized intrinsics. */ > >> > >> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi > >> index 6e7d9dc54a9..f7419872c2f 100644 > >> --- a/gcc/doc/tm.texi > >> +++ b/gcc/doc/tm.texi > >> @@ -11893,6 +11893,15 @@ is required only when the target has special > >> constraints like maximum > >> number of memory accesses. > >> @end deftypefn > >> > >> +@deftypefn {Target Hook} bool TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL > >> (HOST_WIDE_INT @var{maxiter}, tree @var{niter}, class loop *@var{loop}) > >> +This target hook check if the loop @var{loop} is suitable to unroll > >> +completely on the target. The parameter @var{maxiter} is the possible max > >> +bound of iterations. The parameter @var{niter} is the number expression of > >> +iterations the loop is executed. The parameter @var{loop} is a pointer to > >> +the loop. This target hook is required only when the target has special > >> +constraints. > >> +@end deftypefn > >> + > >> @defmac POWI_MAX_MULTS > >> If defined, this macro is interpreted as a signed integer C expression > >> that specifies the maximum number of floating point multiplications > >> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in > >> index 3be984bbd5c..4ae079a650c 100644 > >> --- a/gcc/doc/tm.texi.in > >> +++ b/gcc/doc/tm.texi.in > >> @@ -8010,6 +8010,8 @@ lists. > >> > >> @hook TARGET_LOOP_UNROLL_ADJUST > >> > >> +@hook TARGET_LOOP_ALLOW_UNROLL_COMPLETELY_PEEL > >> + > >> @defmac POWI_MAX_MULTS > >> If defined, this macro is interpreted as a signed integer C expression > >> that specifies the maximum number of floating point multiplications > >> diff --git a/gcc/target.def b/gcc/target.def > >> index 07059a87caf..3842ba611a2 100644 > >> --- a/gcc/target.def > >> +++ b/gcc/target.def > >> @@ -2709,6 +2709,18 @@ number of memory accesses.", > >> unsigned, (unsigned nunroll, class loop *loop), > >> NULL) > >> > >> +/* Return if a loop is suitable for complete unroll. */ > >> +DEFHOOK > >> +(loop_allow_unroll_completely_peel, > >> + "This target hook check if the loop @var{loop} is suitable to unroll\n\ > >> +completely on the target. The parameter @var{maxiter} is the possible > >> max\n\ > >> +bound of iterations. The parameter @var{niter} is the number expression > >> of\n\ > >> +iterations the loop is executed. The parameter @var{loop} is a pointer > >> to\n\ > >> +the loop. This target hook is required only when the target has special\n\ > >> +constraints.", > >> + bool, (HOST_WIDE_INT maxiter, tree niter, class loop *loop), > >> + NULL) > >> + > >> /* True if X is a legitimate MODE-mode immediate operand. */ > >> DEFHOOK > >> (legitimate_constant_p, > >> diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c > >> index 8ab6ab3330c..52d45a90a56 100644 > >> --- a/gcc/tree-ssa-loop-ivcanon.c > >> +++ b/gcc/tree-ssa-loop-ivcanon.c > >> @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see > >> #include "system.h" > >> #include "coretypes.h" > >> #include "backend.h" > >> +#include "target.h" > >> #include "tree.h" > >> #include "gimple.h" > >> #include "cfghooks.h" > >> @@ -1231,6 +1232,10 @@ canonicalize_loop_induction_variables (class loop > >> *loop, > >> /* Force re-computation of loop bounds so we can remove redundant > >> exits. */ > >> maxiter = max_loop_iterations_int (loop); > >> > >> + if (allow_peel && targetm.loop_allow_unroll_completely_peel > >> + && !targetm.loop_allow_unroll_completely_peel (maxiter, niter, > >> loop)) > >> + return false; > >> + > >> if (dump_file && (dump_flags & TDF_DETAILS) > >> && TREE_CODE (niter) == INTEGER_CST) > >> { > >> -- > >> 2.17.1 > >>