Hi, In this patch, loop unroll adjust hook is introduced for powerpc. We can do target related hueristic adjustment in this hook. In this patch, small loops is unrolled 2 times for O2 and O3 by default. With this patch, we can see some improvement for spec2017. This patch enhanced a little for [Patch V2] to enable small loops unroll for O3 by default like O2.
Bootstrapped and regtested on powerpc64le. Is this ok for trunk? Jiufu BR. gcc/ 2019-11-04 Jiufu Guo <guoji...@linux.ibm.com> PR tree-optimization/88760 * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove code which changes PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS. (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook. (rs6000_loop_unroll_adjust): New hook for loop unroll adjust. Unrolling small loop 2 times for -O2 and -O3. (rs6000_function_specific_save): Save unroll_small_loops flag. (rs6000_function_specific_restore): Restore unroll_small_loops flag. * gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag. gcc.testsuite/ 2019-11-04 Jiufu Guo <guoji...@linux.ibm.com> PR tree-optimization/88760 * gcc.dg/pr59643.c: Update back to r277550. --- gcc/config/rs6000/rs6000.c | 38 ++++++++++++++++++++++++++++---------- gcc/config/rs6000/rs6000.opt | 7 +++++++ gcc/testsuite/gcc.dg/pr59643.c | 3 --- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 9ed5151..5e1a75d 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_VECTORIZE_DESTROY_COST_DATA #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data +#undef TARGET_LOOP_UNROLL_ADJUST +#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins #undef TARGET_BUILTIN_DECL @@ -4540,25 +4543,20 @@ rs6000_option_override_internal (bool global_init_p) global_options.x_param_values, global_options_set.x_param_values); - /* unroll very small loops 2 time if no -funroll-loops. */ + /* If funroll-loops is not enabled explicitly, then enable small loops + unrolling for -O2, and do not turn fweb or frename-registers on. */ if (!global_options_set.x_flag_unroll_loops && !global_options_set.x_flag_unroll_all_loops) { - maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2, - global_options.x_param_values, - global_options_set.x_param_values); - - maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20, - global_options.x_param_values, - global_options_set.x_param_values); + unroll_small_loops = optimize >= 2 ? 1 : 0; - /* If fweb or frename-registers are not specificed in command-line, - do not turn them on implicitly. */ if (!global_options_set.x_flag_web) global_options.x_flag_web = 0; if (!global_options_set.x_flag_rename_registers) global_options.x_flag_rename_registers = 0; } + else + unroll_small_loops = 0; /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) can be optimized to @@ -5101,6 +5099,24 @@ rs6000_destroy_cost_data (void *data) free (data); } +/* Implement targetm.loop_unroll_adjust. */ + +static unsigned +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop) +{ + if (unroll_small_loops) + { + /* TODO: This is hardcoded to 10 right now. It can be refined, for + example we may want to unroll very small loops more times (4 perhaps). + We also should use a PARAM for this. */ + if (loop->ninsns <= 10) + return MIN (2, nunroll); + else + return 0; + } + return nunroll; +} + /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a library with vectorized intrinsics. */ @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct cl_target_option *ptr, { ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; + ptr->x_unroll_small_loops = opts->x_unroll_small_loops; } /* Restore the current options */ @@ -23483,6 +23500,7 @@ rs6000_function_specific_restore (struct gcc_options *opts, { opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags; opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; + opts->x_unroll_small_loops = ptr->x_unroll_small_loops; (void) rs6000_option_override_internal (false); } diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 1f37a92..9cd5b4e 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -96,6 +96,13 @@ enum rs6000_cmodel rs6000_current_cmodel = CMODEL_SMALL TargetVariable unsigned int rs6000_recip_control +;; Whether to unroll small loops only +Variable +unsigned char unroll_small_loops + +TargetSave +unsigned char x_unroll_small_loops + ;; Mask of what builtin functions are allowed TargetVariable HOST_WIDE_INT rs6000_builtin_mask diff --git a/gcc/testsuite/gcc.dg/pr59643.c b/gcc/testsuite/gcc.dg/pr59643.c index 4446f6e..de78d60 100644 --- a/gcc/testsuite/gcc.dg/pr59643.c +++ b/gcc/testsuite/gcc.dg/pr59643.c @@ -1,9 +1,6 @@ /* PR tree-optimization/59643 */ /* { dg-do compile } */ /* { dg-options "-O3 -fdump-tree-pcom-details" } */ -/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */ -/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the - loop of this case. */ void foo (double *a, double *b, double *c, double d, double e, int n) -- 2.7.4