Hi,

In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
target related hueristic adjustment in this hook. In this patch, small loops
is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
some improvement for spec2017.  This patch enhanced a little for [Patch V2] to
enable small loops unroll for O3 by default like O2.

Bootstrapped and regtested on powerpc64le.  Is this ok for trunk?

Jiufu
BR.

gcc/
2019-11-04  Jiufu Guo  <guoji...@linux.ibm.com>     

        PR tree-optimization/88760
        * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
        code which changes PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
        (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
        (rs6000_loop_unroll_adjust): New hook for loop unroll adjust.
        Unrolling small loop 2 times for -O2 and -O3.
        (rs6000_function_specific_save): Save unroll_small_loops flag.
        (rs6000_function_specific_restore): Restore unroll_small_loops flag.
        * gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.

        
gcc.testsuite/
2019-11-04  Jiufu Guo  <guoji...@linux.ibm.com>

        PR tree-optimization/88760
        * gcc.dg/pr59643.c: Update back to r277550.

---
 gcc/config/rs6000/rs6000.c     | 38 ++++++++++++++++++++++++++++----------
 gcc/config/rs6000/rs6000.opt   |  7 +++++++
 gcc/testsuite/gcc.dg/pr59643.c |  3 ---
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9ed5151..5e1a75d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1428,6 +1428,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
 
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS rs6000_init_builtins
 #undef TARGET_BUILTIN_DECL
@@ -4540,25 +4543,20 @@ rs6000_option_override_internal (bool global_init_p)
                             global_options.x_param_values,
                             global_options_set.x_param_values);
 
-      /* unroll very small loops 2 time if no -funroll-loops.  */
+      /* If funroll-loops is not enabled explicitly, then enable small loops
+        unrolling for -O2, and do not turn fweb or frename-registers on.  */
       if (!global_options_set.x_flag_unroll_loops
          && !global_options_set.x_flag_unroll_all_loops)
        {
-         maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
-                                global_options.x_param_values,
-                                global_options_set.x_param_values);
-
-         maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
-                                global_options.x_param_values,
-                                global_options_set.x_param_values);
+         unroll_small_loops = optimize >= 2 ? 1 : 0;
 
-         /* If fweb or frename-registers are not specificed in command-line,
-            do not turn them on implicitly.  */
          if (!global_options_set.x_flag_web)
            global_options.x_flag_web = 0;
          if (!global_options_set.x_flag_rename_registers)
            global_options.x_flag_rename_registers = 0;
        }
+      else
+       unroll_small_loops = 0;
 
       /* If using typedef char *va_list, signal that
         __builtin_va_start (&ap, 0) can be optimized to
@@ -5101,6 +5099,24 @@ rs6000_destroy_cost_data (void *data)
   free (data);
 }
 
+/*  Implement targetm.loop_unroll_adjust.  */
+
+static unsigned
+rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
+{
+  if (unroll_small_loops)
+    {
+      /* TODO: This is hardcoded to 10 right now.  It can be refined, for
+        example we may want to unroll very small loops more times (4 perhaps).
+        We also should use a PARAM for this.  */
+      if (loop->ninsns <= 10)
+       return MIN (2, nunroll);
+      else
+       return 0;
+    }
+  return nunroll;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
@@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct cl_target_option 
*ptr,
 {
   ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
   ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
+  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
 }
 
 /* Restore the current options */
@@ -23483,6 +23500,7 @@ rs6000_function_specific_restore (struct gcc_options 
*opts,
 {
   opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
   opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
+  opts->x_unroll_small_loops = ptr->x_unroll_small_loops;
   (void) rs6000_option_override_internal (false);
 }
 
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1f37a92..9cd5b4e 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -96,6 +96,13 @@ enum rs6000_cmodel rs6000_current_cmodel = CMODEL_SMALL
 TargetVariable
 unsigned int rs6000_recip_control
 
+;; Whether to unroll small loops only
+Variable
+unsigned char unroll_small_loops
+
+TargetSave
+unsigned char x_unroll_small_loops
+
 ;; Mask of what builtin functions are allowed
 TargetVariable
 HOST_WIDE_INT rs6000_builtin_mask
diff --git a/gcc/testsuite/gcc.dg/pr59643.c b/gcc/testsuite/gcc.dg/pr59643.c
index 4446f6e..de78d60 100644
--- a/gcc/testsuite/gcc.dg/pr59643.c
+++ b/gcc/testsuite/gcc.dg/pr59643.c
@@ -1,9 +1,6 @@
 /* PR tree-optimization/59643 */
 /* { dg-do compile } */
 /* { dg-options "-O3 -fdump-tree-pcom-details" } */
-/* { dg-additional-options "--param max-unrolled-insns=400" { target { 
powerpc*-*-* } } } */
-/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
-   loop of this case.  */
 
 void
 foo (double *a, double *b, double *c, double d, double e, int n)
-- 
2.7.4

Reply via email to