Jiufu Guo <guoji...@linux.ibm.com> writes:
> Segher Boessenkool <seg...@kernel.crashing.org> writes: > >> Hi! >> >> On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote: >>> In this patch, loop unroll adjust hook is introduced for powerpc. We can do >>> target related hueristic adjustment in this hook. In this patch, small loops >>> is unrolled 2 times for O2 and O3 by default. With this patch, we can see >>> some improvement for spec2017. This patch enhanced a little for [Patch V2] >>> to >>> enable small loops unroll for O3 by default like O2. >> >>> * gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag. >> >> That's the declaration of a variable. A command line flag is something >> like -munroll-small-loops. Do we want a command line option like that? >> It makes testing simpler. > Thanks for great sugguestion, will update patch to add a command line > option. > >> >>> - /* unroll very small loops 2 time if no -funroll-loops. */ >>> + /* If funroll-loops is not enabled explicitly, then enable small >>> loops >>> + unrolling for -O2, and do not turn fweb or frename-registers on. */ >>> if (!global_options_set.x_flag_unroll_loops >>> && !global_options_set.x_flag_unroll_all_loops) >>> { >>> - maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2, >>> - global_options.x_param_values, >>> - global_options_set.x_param_values); >>> - >>> - maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20, >>> - global_options.x_param_values, >>> - global_options_set.x_param_values); >>> + unroll_small_loops = optimize >= 2 ? 1 : 0; >> >> That includes -Os? >> >> I think you shouldn't always set it to some value, only enable it where >> you want to enable it. If you make a command line option for it this is >> especially simple (the table in common/config/rs6000/rs6000-common.c). > Thanks again, update rs6000_option_optimization_table as : > { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 }, > { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fweb, NULL, 0 }, > { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_frename_registers, NULL, 0 }, Sorry, typo, in patch, above 2 lines are not there. Because they do not turn off flag_web and flag_ename_registers. > { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 }, > > While, I still keep the code to disable unroll_small_loops for explicit > -funroll-loops via checking global_options_set.x_flag_unroll_loops. >> >>> +static unsigned >>> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop) >>> +{ >>> + if (unroll_small_loops) >>> + { >>> + /* TODO: This is hardcoded to 10 right now. It can be refined, for >>> + example we may want to unroll very small loops more times (4 perhaps). >>> + We also should use a PARAM for this. */ >>> + if (loop->ninsns <= 10) >>> + return MIN (2, nunroll); >>> + else >>> + return 0; >>> + } >> >> (Add an empty line here?) > Thanks again, updated accordingly. >> >>> + return nunroll; >>> +} >> >> Great :-) >> >>> @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct >>> cl_target_option *ptr, >>> { >>> ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; >>> ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; >>> + ptr->x_unroll_small_loops = opts->x_unroll_small_loops; >>> } >> >> Yeah we shouldn't need to add that, this should all be automatic. > Yes, through adding new option in .opt, this is handled automaticly. > > Updated patch is at the end of this mail. Thanks for review. > > Jiufu >> >> >> Segher > > Updated patch: > > Index: gcc/common/config/rs6000/rs6000-common.c > =================================================================== > --- gcc/common/config/rs6000/rs6000-common.c (revision 277765) > +++ gcc/common/config/rs6000/rs6000-common.c (working copy) > @@ -35,7 +35,9 @@ static const struct default_options rs6000_option_ > { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, > /* Enable -fsched-pressure for first pass instruction scheduling. */ > { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, > - { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 }, > + /* Enable -funroll-loops with -munroll-small-loops. */ > + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 }, > + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 }, > { OPT_LEVELS_NONE, 0, NULL, 0 } > }; > > Index: gcc/config/rs6000/rs6000.c > =================================================================== > --- gcc/config/rs6000/rs6000.c (revision 277765) > +++ gcc/config/rs6000/rs6000.c (working copy) > @@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribut > #undef TARGET_VECTORIZE_DESTROY_COST_DATA > #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data > > +#undef TARGET_LOOP_UNROLL_ADJUST > +#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust > + > #undef TARGET_INIT_BUILTINS > #define TARGET_INIT_BUILTINS rs6000_init_builtins > #undef TARGET_BUILTIN_DECL > @@ -4540,24 +4543,21 @@ rs6000_option_override_internal (bool global_init_ > global_options.x_param_values, > global_options_set.x_param_values); > > - /* unroll very small loops 2 time if no -funroll-loops. */ > - if (!global_options_set.x_flag_unroll_loops > - && !global_options_set.x_flag_unroll_all_loops) > + /* Explicit funroll-loops turns -munroll-small-loops off. > + Implicit funroll-loops does not turn fweb or frename-registers on. */ > + if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops) > + || (global_options_set.x_flag_unroll_all_loops > + && flag_unroll_all_loops)) > { > - maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2, > - global_options.x_param_values, > - global_options_set.x_param_values); > - > - maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20, > - global_options.x_param_values, > - global_options_set.x_param_values); > - > - /* If fweb or frename-registers are not specificed in command-line, > - do not turn them on implicitly. */ > + if (!global_options_set.x_unroll_small_loops) > + unroll_small_loops = 0; > + } > + else > + { > if (!global_options_set.x_flag_web) > - global_options.x_flag_web = 0; > + flag_web = 0; > if (!global_options_set.x_flag_rename_registers) > - global_options.x_flag_rename_registers = 0; > + flag_rename_registers = 0; > } > > /* If using typedef char *va_list, signal that > @@ -5101,6 +5101,25 @@ rs6000_destroy_cost_data (void *data) > free (data); > } > > +/* Implement targetm.loop_unroll_adjust. */ > + > +static unsigned > +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop) > +{ > + if (unroll_small_loops) > + { > + /* TODO: This is hardcoded to 10 right now. It can be refined, for > + example we may want to unroll very small loops more times (4 perhaps). > + We also should use a PARAM for this. */ > + if (loop->ninsns <= 10) > + return MIN (2, nunroll); > + else > + return 0; > + } > + > + return nunroll; > +} > + > /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a > library with vectorized intrinsics. */ > > Index: gcc/config/rs6000/rs6000.opt > =================================================================== > --- gcc/config/rs6000/rs6000.opt (revision 277765) > +++ gcc/config/rs6000/rs6000.opt (working copy) > @@ -501,6 +501,10 @@ moptimize-swaps > Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save > Analyze and remove doubleword swaps from VSX computations. > > +munroll-small-loops > +Target Undocumented Var(unroll_small_loops) Init(0) Save > +Use conservative small loop unrolling. > + > mpower9-misc > Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags) > Use certain scalar instructions added in ISA 3.0. > Index: gcc/testsuite/gcc.dg/pr59643.c > =================================================================== > --- gcc/testsuite/gcc.dg/pr59643.c (revision 277765) > +++ gcc/testsuite/gcc.dg/pr59643.c (working copy) > @@ -1,9 +1,6 @@ > /* PR tree-optimization/59643 */ > /* { dg-do compile } */ > /* { dg-options "-O3 -fdump-tree-pcom-details" } */ > -/* { dg-additional-options "--param max-unrolled-insns=400" { target { > powerpc*-*-* } } } */ > -/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the > - loop of this case. */ > > void > foo (double *a, double *b, double *c, double d, double e, int n)