The following patch fixes the case where unrolling in the absence of profile 
information can cause a loop to no longer look hot and therefor not get 
aligned. In this case, instead of dividing by the unroll factor we now just 
scale by profile_probability::likely (). The diff looks worse than what really 
changed, just the addition of an if test and putting the existing 'scalemain' 
setting code into the else leg. Bootstrap/regtest on powerpc64le with no new 
regressions. I also ran a CPU2006 comparison, there were 4 benchmarks that 
improved 2-3% with the others in the noise range.

Ok for trunk?

-Pat



2018-11-28  Pat Haugen  <pthau...@us.ibm.com>

        PR rtl-optimization/68212
        * cfgloopmanip.c (duplicate_loop_to_header_edge): Adjust scale factor.

testsuite/ChangeLog:
2018-11-28  Pat Haugen  <pthau...@us.ibm.com>

        PR rtl-optimization/68212
        * gcc.dg/pr68212.c: New test.



Index: gcc/cfgloopmanip.c
===================================================================
--- gcc/cfgloopmanip.c  (revision 266522)
+++ gcc/cfgloopmanip.c  (working copy)
@@ -1242,16 +1242,25 @@ duplicate_loop_to_header_edge (struct lo
          profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
                                                        ? prob_pass_wont_exit
                                                        : prob_pass_thru;
-         profile_probability p = prob_pass_main;
-         profile_count scale_main_den = count_in;
-         for (i = 0; i < ndupl; i++)
+
+         /* If not using real profile data then don't scale the loop by ndupl.
+            This can lead to the loop no longer looking hot wrt surrounding
+            code.  */
+         if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
+           scale_main = profile_probability::likely ();
+         else
            {
-             scale_main_den += count_in.apply_probability (p);
-             p = p * scale_step[i];
+             profile_probability p = prob_pass_main;
+             profile_count scale_main_den = count_in;
+             for (i = 0; i < ndupl; i++)
+               {
+                 scale_main_den += count_in.apply_probability (p);
+                 p = p * scale_step[i];
+               }
+             /* If original loop is executed COUNT_IN times, the unrolled
+                loop will account SCALE_MAIN_DEN times.  */
+             scale_main = count_in.probability_in (scale_main_den);
            }
-         /* If original loop is executed COUNT_IN times, the unrolled
-            loop will account SCALE_MAIN_DEN times.  */
-         scale_main = count_in.probability_in (scale_main_den);
          scale_act = scale_main * prob_pass_main;
        }
       else
Index: gcc/testsuite/gcc.dg/pr68212.c
===================================================================
--- gcc/testsuite/gcc.dg/pr68212.c      (nonexistent)
+++ gcc/testsuite/gcc.dg/pr68212.c      (working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
max-unroll-times=4 -fdump-rtl-alignments" } */
+
+void foo(long int *a, long int *b, long int n)
+{
+  long int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = *b;
+}
+
+/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
"alignments"} } */

Reply via email to