Hi,

    Currently, tree unrolling pass(cunroll) does not allow any code
size growth in O2 mode.  Code size growth is permitted only if O3 or
funroll-loops/fpeel-loops is used. I have created  a patch to allow
partial code size increase in O2 mode. With funroll-loops the maximum
allowed code growth is 100 unrolled insns. For partial growth, I
experimented with various values of code growth and I have attached
SPEC 2006 performance numbers for code growth from 20 to 100 insns in
steps of 20.

   For this patch, I have set the partial code growth in O2 mode to be
40 insns (tunable via param) where we get performance improvements
with minimal code size growth.  Perf. data shows good improvements in
a few benchmarks.  h264, sjeng and bzip2 get >2%  improvement.
calculix shows a big regression(4.5% on westmere) which I am
investigating along with the povray regression.

   I also ran experiments with -ftree-vectorize turned on with -O2
both in baseline and with the partial unroll to study the effect of
unrolling on vectorization. Loop unrolling seems to benefit more
benchmarks when vectorization is turned on.

   I have attached the patch and pdfs of the perf. data. and code size growth.

How to read the attached perf data:

There are two data files.

* spec_perf_O2_unroll.txt contains perf data using unrolling with
various code size growth on O2.
* spec_perf_O2_vectorize_ unroll.txt contains perf data using
unrolling with various code size growth on O2 + ftree-vectorize.

Each file contains perf. improvements and code size growth data.
Experiments were done on Ibis-sandybridge and Ikaria-westmere.

Here is a sample from the file (All perf. numbers are in %):

Unroll insns code growth           20      40     60       80        100
_____________________________________________________
spec/2006/fp/C++/444.namd     -3.2   -0.13   -0.4    -0.57      -0.31

This data shows that namd regressed by 3.2% over baseline when code
size growth was set to 20 insns and regressed by 0.57% over baseline
when growth was 80 insns.

   Please let me know what you think.

Thanks
Sri
        * tree-ssa-loop-ivcanon.c (unroll_level): New enum value UL_PARTIAL.
        (increase_code_size): New enum.
        (try_unroll_loop_completely): Check if max unrolled insns is less than
        the partial growth value when partial growth is set.
        (tree_unroll_loops_completely_1): Change type of may_increase_size.
        Set growth to partial when desired.
        (tree_unroll_loops_completely): Set code growth to partial in O2 mode.
        (tree_complete_unroll_inner): Rewrite code growth block to use enum.
        * params.def (PARAM_MAX_DEFAULT_UNROLL_INSNS): New param.
        


Index: params.def
===================================================================
--- params.def  (revision 205058)
+++ params.def  (working copy)
@@ -304,6 +304,11 @@ DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
        "max-completely-peeled-insns",
        "The maximum number of insns of a completely peeled loop",
        100, 0, 0)
+/* The maximum number of insns in a peeled loop for default unrolling.  */
+DEFPARAM(PARAM_MAX_DEFAULT_UNROLL_INSNS,
+       "max-default-unroll-insns",
+       "The maximum number of insns for the default tree unrolling",
+       40, 0, 0)
 /* The maximum number of peelings of a single loop that is peeled completely.  
*/
 DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
        "max-completely-peel-times",
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c     (revision 205058)
+++ tree-ssa-loop-ivcanon.c     (working copy)
@@ -71,9 +71,18 @@ enum unroll_level
                           iteration.  */
   UL_NO_GROWTH,                /* Only loops whose unrolling will not cause 
increase
                           of code size.  */
+  UL_PARTIAL,          /* All suitable loops whose unrolling will not
+                          increase code size by more than 50% of UL_ALL.  */
   UL_ALL               /* All suitable loops.  */
 };
 
+typedef enum _increase_code_size
+{
+  UNROLL_NO_INCREASE = 0,
+  UNROLL_PARTIAL_INCREASE = 1,
+  UNROLL_FULL_INCREASE = 2
+} increase_code_size;
+
 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
    is the exit edge whose condition is replaced.  */
 
@@ -651,6 +660,7 @@ try_unroll_loop_completely (struct loop *loop,
                            location_t locus)
 {
   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
+  unsigned HOST_WIDE_INT max_unroll_insns;
   gimple cond;
   struct loop_size size;
   bool n_unroll_found = false;
@@ -696,6 +706,10 @@ try_unroll_loop_completely (struct loop *loop,
     return false;
 
   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+  max_unroll_insns = (ul != UL_PARTIAL) ?
+                    PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) :
+                    PARAM_VALUE (PARAM_MAX_DEFAULT_UNROLL_INSNS);
+
   if (n_unroll > max_unroll)
     return false;
 
@@ -805,8 +819,7 @@ try_unroll_loop_completely (struct loop *loop,
                     loop->num);
          return false;
        }
-      else if (unr_insns
-              > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+      else if (unr_insns > max_unroll_insns)
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
            fprintf (dump_file, "Not unrolling loop %d: "
@@ -1100,7 +1113,8 @@ propagate_constants_for_unrolling (basic_block bb)
    loop we unrolled.  */
 
 static bool
-tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
+tree_unroll_loops_completely_1 (increase_code_size may_increase_size,
+                               bool unroll_outer,
                                vec<loop_p, va_heap>& father_stack,
                                struct loop *loop)
 {
@@ -1135,7 +1149,7 @@ static bool
       /* Unroll outermost loops only if asked to do so or they do
         not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
-    ul = UL_ALL;
+    ul = (may_increase_size == UNROLL_PARTIAL_INCREASE) ? UL_PARTIAL : UL_ALL;
   else
     ul = UL_NO_GROWTH;
 
@@ -1163,7 +1177,8 @@ static bool
    size of the code does not increase.  */
 
 unsigned int
-tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
+tree_unroll_loops_completely (increase_code_size may_increase_size,
+                             bool unroll_outer)
 {
   stack_vec<loop_p, 16> father_stack;
   bool changed;
@@ -1308,12 +1323,19 @@ make_pass_iv_canon (gcc::context *ctxt)
 static unsigned int
 tree_complete_unroll (void)
 {
+  increase_code_size code_size;
+
   if (number_of_loops (cfun) <= 1)
     return 0;
 
-  return tree_unroll_loops_completely (flag_unroll_loops
-                                      || flag_peel_loops
-                                      || optimize >= 3, true);
+  if (flag_unroll_loops || flag_peel_loops || (optimize >= 3))
+    code_size = UNROLL_FULL_INCREASE;
+  else if (optimize == 2)
+    code_size = UNROLL_PARTIAL_INCREASE;
+  else
+    code_size = UNROLL_NO_INCREASE;
+
+  return tree_unroll_loops_completely (code_size, true);
 }
 
 static bool
@@ -1366,13 +1388,20 @@ static unsigned int
 tree_complete_unroll_inner (void)
 {
   unsigned ret = 0;
+  increase_code_size code_size;
 
   loop_optimizer_init (LOOPS_NORMAL
                       | LOOPS_HAVE_RECORDED_EXITS);
   if (number_of_loops (cfun) > 1)
     {
       scev_initialize ();
-      ret = tree_unroll_loops_completely (optimize >= 3, false);
+
+      if (optimize >= 3)
+       code_size = UNROLL_FULL_INCREASE;
+      else
+       code_size = UNROLL_NO_INCREASE;
+
+      ret = tree_unroll_loops_completely (code_size, false);
       free_numbers_of_iterations_estimates ();
       scev_finalize ();
     }
O2 mode, all numbers in %, positive
is good and negative is bad.
IBIS SANDYBRIDGE
Unroll insns code growth                 20        40      60       80      100
spec/2006/fp/C++/444.namd              0.18      0.26    0.04      0.44     0.31
spec/2006/fp/C++/450.soplex            0.57      0.29    -0.38    -0.62    -0.19
spec/2006/fp/C++/453.povray            -0.52    -2.04    0.19      -0.5     1.52
spec/2006/fp/C-F/436.cactusADM         -0.18     0.58    0.04      0.44     -0.4
spec/2006/fp/C-F/454.calculix           -4.3    -2.49    -1.89    -1.13    -0.15
spec/2006/fp/C/433.milc                -0.38      0.8    0.51      3.07     2.78
spec/2006/fp/C/470.lbm                 -0.25     0.46    0.46      0.18    -0.13
spec/2006/fp/C/482.sphinx3             -0.81    -1.02    -1.02    -0.74     -0.4
spec/2006/fp/F/410.bwaves              -3.59    -0.21    0.21     -0.36    -0.32
spec/2006/fp/F/434.zeusmp              -0.13     0.04    0.18      0.36     0.13
spec/2006/int/C++/473.astar            -0.75    -0.47    -0.37    -0.14    -0.42
spec/2006/int/C/400.perlbench          -0.52    -0.52     -0.2     0.87    -0.61
spec/2006/int/C/401.bzip2              1.13      2.44    1.57      1.18     1.13
spec/2006/int/C/403.gcc                -0.31     0.15    0.03       0.4    -0.12
spec/2006/int/C/429.mcf                -0.87    -0.07    -0.28    -0.05    -0.45
spec/2006/int/C/445.gobmk               -0.3     0.42    1.63      0.42     1.21
spec/2006/int/C/456.hmmer              0.46         0     0.3      0.17     0.21
spec/2006/int/C/458.sjeng              0.68      2.04    0.83       1.4     1.77
spec/2006/int/C/462.libquantum           0.1    -0.15    0.94     -0.58    -0.52
spec/2006/int/C/464.h264ref            -0.06     2.17    2.47      2.97    -0.41


geometric mean                          -0.5     0.13    0.26      0.38     0.24



IKARIA WESTMERE
Unroll insns code growth                 20        40      60       80      100
spec/2006/fp/C++/444.namd              0.13      0.19    0.13      0.19     0.19
spec/2006/fp/C++/450.soplex            1.35      0.96    -0.99     0.07     1.81
spec/2006/fp/C++/453.povray            0.36        -1    -1.18    -1.04     0.18
spec/2006/fp/C-F/436.cactusADM         0.64      0.18    0.37        0      0.37
spec/2006/fp/C-F/454.calculix           -9.5     -4.5     -3.8     -2.6     -1.4
spec/2006/fp/C/433.milc                0.72      1.13    1.95      4.68     4.73
spec/2006/fp/C/470.lbm                 0.06      0.09    0.03      0.23        0
spec/2006/fp/C/482.sphinx3              -1.5    -0.72    -0.07     -1.6     -1.5
spec/2006/fp/F/410.bwaves              -0.59    -0.95       0     -0.59    -0.59
spec/2006/fp/F/434.zeusmp                0.4     0.13    0.07      -0.2     0.13
spec/2006/int/C++/473.astar            -0.21    -0.28    -0.28       0     -1.25
spec/2006/int/C/400.perlbench          -0.62    -0.12       0        0         0
spec/2006/int/C/401.bzip2                0.3     1.16    -0.67    -0.55    -0.73
spec/2006/int/C/403.gcc                  0.9     -0.6    0.15     -0.05     -0.4
spec/2006/int/C/429.mcf                -1.02     0.39     0.2     -0.63    -0.33
spec/2006/int/C/445.gobmk              -0.15    -0.35    0.75       0.7     1.04
spec/2006/int/C/456.hmmer              0.22      0.05    0.05     -0.05     0.33
spec/2006/int/C/458.sjeng              0.73      0.93    0.64      1.62     1.91
spec/2006/int/C/462.libquantum         -0.47    -0.74    -1.08    -0.08    -0.26
spec/2006/int/C/464.h264ref              0.5     2.15    2.58      3.04     2.91


geometric mean                         -0.41     -0.1    -0.06     0.15     0.35

TEXT SIZE INCREASE
Unroll insns code growth              20         40         60        80        
100
spec/2006/fp/C++/444.namd              0      -0.01      -0.01      -0.02     
-0.01
spec/2006/fp/C++/450.soplex            0          0          0         0        
   0
spec/2006/fp/C++/453.povray       -0.14       -0.34      -0.42      -0.62     
-0.73
spec/2006/fp/C-F/436.cactusADM    -0.01       -0.06      -0.07      -0.09     
-0.14
spec/2006/fp/C-F/454.calculix       1.89       1.54       1.33       1.09       
0.88
spec/2006/fp/C/433.milc           -0.15       -0.37      -0.56       -0.8     
-0.83
spec/2006/fp/C/470.lbm                 0          0          0         0        
   0
spec/2006/fp/C/482.sphinx3             0      -0.01      -0.01      -0.01     
-0.01
spec/2006/fp/F/410.bwaves           0.14       0.12       0.12       0.12       
0.12
spec/2006/fp/F/434.zeusmp           0.27       0.15        0.1       0.03       
   0
spec/2006/int/C++/473.astar            0          0          0         0        
-0.1
spec/2006/int/C/400.perlbench     -0.01       -0.04      -0.05      -0.05     
-0.07
spec/2006/int/C/401.bzip2         -0.07       -0.16      -0.22      -0.22     
-0.32
spec/2006/int/C/403.gcc           -0.12       -0.27      -0.36      -0.43     
-0.45
spec/2006/int/C/429.mcf                0          0          0         0        
   0
spec/2006/int/C/445.gobmk           -0.1      -0.39      -0.78      -0.97     
-1.23
spec/2006/int/C/456.hmmer         -0.02       -0.14       -0.2      -0.31     
-0.31
spec/2006/int/C/458.sjeng         -0.06       -1.64      -1.79      -1.95     
-2.07
spec/2006/int/C/462.libquantum    -0.02       -0.03      -0.03      -0.03       
-0.1
spec/2006/int/C/464.h264ref       -0.38       -1.54      -3.16      -4.04       
-5.3


total size                        0.14%     -0.07%     -0.23%     -0.35%     
-0.48%
geo mean                          0.06%     -0.16%     -0.31%     -0.42%     
-0.54%

O2 + -ftree-vectorize, all numbers in
%, positive is good and negative is
bad.

IBIS SANDYBRIDGE
Unroll insns code growth                    20       40       60       80      
100
spec/2006/fp/C++/444.namd                  -3.2    -0.13     -0.4    -0.57    
-0.31
spec/2006/fp/C++/450.soplex               0.43     1.76     -1.48    -0.14    
-0.48
spec/2006/fp/C++/453.povray               -2.33    0.36     -2.42    -0.55     
-1.1
spec/2006/fp/C-
F/436.cactusADM                           0.73      -0.1    0.19     -0.67    
-0.35
spec/2006/fp/C-F/454.calculix             -5.24     -1.6    -0.91    -0.76     
1.37
spec/2006/fp/C/433.milc                   0.59     0.59     0.34     3.23      
1.97
spec/2006/fp/C/470.lbm                     -1.2    0.36     0.23     0.03     
-0.15
spec/2006/fp/C/482.sphinx3                -0.76    -0.24    -0.36    -0.17    
-0.02
spec/2006/fp/F/410.bwaves                 4.24     3.29     4.64     4.75      
4.93
spec/2006/fp/F/434.zeusmp                  -0.6    0.43     -0.22    -0.34    
-0.56
spec/2006/int/C++/473.astar               -0.61    0.19     -1.12    -0.84    
-2.38
spec/2006/int/C/400.perlbench             -1.02    -2.37    -0.29     -0.2     
1.11
spec/2006/int/C/401.bzip2                 0.04     0.78     1.69     1.17      
1.99
spec/2006/int/C/403.gcc                   -0.03    -0.37    -0.79    -0.43    
-0.34
spec/2006/int/C/429.mcf                   -0.35    0.19        0     -0.66     
0.14
spec/2006/int/C/445.gobmk                 0.64     1.31     1.35     0.94      
1.39
spec/2006/int/C/456.hmmer                 -0.32    0.36     0.44     0.32      
0.56
spec/2006/int/C/458.sjeng                 0.29     0.66     0.91     1.31      
1.64
spec/2006/int/C/462.libquantum            -2.08    -1.06     -0.9    -1.42    
-1.18
spec/2006/int/C/464.h264ref               0.38     1.28      2.4     2.61      
3.06


geometric mean                            -0.54    0.28     0.15     0.37      
0.55




IKARIA WESTMERE
Unroll insns code growth                    20       40       60       80      
100
spec/2006/fp/C++/444.namd                 -0.06    -0.06    -0.06    -0.13     
0.06
spec/2006/fp/C++/450.soplex               1.49     1.03     0.85     2.49      
0.14
spec/2006/fp/C++/453.povray               -1.17    -0.54    -0.23    -0.09    
-0.63
spec/2006/fp/C-
F/436.cactusADM                           0.58     0.45     -0.32    -0.19     
0.06
spec/2006/fp/C-F/454.calculix            -12.51     -4.9     -4.9     -4.4     
-0.1
spec/2006/fp/C/433.milc                   0.05      2.1     2.05     4.15      
3.69
spec/2006/fp/C/470.lbm                    0.15     0.06     0.67     -0.09     
0.06
spec/2006/fp/C/482.sphinx3                1.03     0.93      0.3     0.53     
-0.66
spec/2006/fp/F/410.bwaves                 0.83     0.65     1.67     1.07      
1.13
spec/2006/fp/F/434.zeusmp                 0.07     -0.21    -0.14    0.21      
0.28
spec/2006/int/C++/473.astar               0.42     -0.28    0.14     0.56      
0.07
spec/2006/int/C/400.perlbench             -0.08    -1.25    0.21     0.17     
-0.33
spec/2006/int/C/401.bzip2                 0.43      -1.4    1.83     1.22      
0.73
spec/2006/int/C/403.gcc                    -0.4    -0.25     0.9      -0.2      
0.7
spec/2006/int/C/429.mcf                   0.33      3.1     1.13       3.3     
1.03
spec/2006/int/C/445.gobmk                 0.25      0.2     1.09     0.74      
1.04
spec/2006/int/C/456.hmmer                 0.36     0.26     0.21     0.16      
0.26
spec/2006/int/C/458.sjeng                 0.14     2.01     1.44     1.92      
2.25
spec/2006/int/C/462.libquantum      -1.2      -0.08      -0.96       1.12       
 0.5
spec/2006/int/C/464.h264ref         0.16       1.89       2.18       2.63       
2.15


geometric mean                      -0.5       0.17       0.39       0.74       
0.62




Text Size
Unroll insns code growth              20         40         60         80       
100
spec/2006/fp/C++/444.namd              0      -0.11      -0.11      -0.12     
-0.11
spec/2006/fp/C++/450.soplex        -0.03      -0.04      -0.14      -0.14     
-0.14
spec/2006/fp/C++/453.povray        -0.13      -0.37      -0.47      -0.75       
-0.9
spec/2006/fp/C-
F/436.cactusADM                    -0.06      -0.53      -0.61      -0.72     
-0.79
spec/2006/fp/C-F/454.calculix       2.18       1.56        1.3       1.01       
0.77
spec/2006/fp/C/433.milc            -0.13      -0.42      -0.55      -0.74     
-0.82
spec/2006/fp/C/470.lbm                 0          0          0          0       
   0
spec/2006/fp/C/482.sphinx3         -0.05       -0.1      -0.12      -0.15     
-0.15
spec/2006/fp/F/410.bwaves           0.26       0.22       0.22       0.22       
0.22
spec/2006/fp/F/434.zeusmp           0.51       0.33       0.28       0.09       
   0
spec/2006/int/C++/473.astar            0      -0.01      -0.03      -0.03       
-0.2
spec/2006/int/C/400.perlbench      -0.01      -0.05      -0.18      -0.27     
-0.33
spec/2006/int/C/401.bzip2          -0.14      -0.38      -0.62      -0.62     
-0.62
spec/2006/int/C/403.gcc            -0.13       -0.3      -0.44      -0.55       
-0.6
spec/2006/int/C/429.mcf                0          0          0          0       
   0
spec/2006/int/C/445.gobmk          -0.14      -0.52      -0.98      -1.15     
-1.45
spec/2006/int/C/456.hmmer          -0.11      -0.24      -0.35      -0.51     
-0.56
spec/2006/int/C/458.sjeng          -0.01      -1.59      -1.74       -1.9     
-2.02
spec/2006/int/C/462.libquantum     -0.02      -0.04      -0.04      -0.04     
-0.12
spec/2006/int/C/464.h264ref        -0.35      -1.56      -3.03      -3.84     
-4.78


total size                        0.18%     -0.13%     -0.32%     -0.48%     
-0.61%
geo mean                          0.08%     -0.20%     -0.38%     -0.51%     
-0.63%


Reply via email to