The patch here, https://gcc.gnu.org/ml/gcc-patches/2014-10/msg01872.html, 
attempted to scale down the register limit used by -fsched-pressure for the 
case where the block in question executes as frequently as the entry block to 
just the call_clobbered (i.e. call_used) regs. But the code is actually scaling 
toward call_saved registers. The following patch corrects that by computing 
call_saved regs per class and subtracting out some scaled portion of that.

Bootstrap/regtest on powerpc64le with no new failures. Ok for trunk?

-Pat


2016-10-07  Pat Haugen  <pthau...@us.ibm.com>

        * haifa-sched.c call_used_regs_num: Rename to...
        call_saved_regs_num: ...this.
        (sched_pressure_start_bb): Scale call_saved regs not call_used.
        (alloc_global_sched_pressure_data): Compute call_saved regs.


Index: gcc/haifa-sched.c
===================================================================
--- gcc/haifa-sched.c	(revision 240812)
+++ gcc/haifa-sched.c	(working copy)
@@ -932,9 +932,9 @@ static bitmap region_ref_regs;
 /* Effective number of available registers of a given class (see comment
    in sched_pressure_start_bb).  */
 static int sched_class_regs_num[N_REG_CLASSES];
-/* Number of call_used_regs.  This is a helper for calculating of
+/* Number of call_saved_regs.  This is a helper for calculating of
    sched_class_regs_num.  */
-static int call_used_regs_num[N_REG_CLASSES];
+static int call_saved_regs_num[N_REG_CLASSES];
 
 /* Initiate register pressure relative info for scheduling the current
    region.  Currently it is only clearing register mentioned in the
@@ -3900,13 +3900,13 @@ sched_pressure_start_bb (basic_block bb)
      * If the basic block executes as often as the prologue/epilogue,
      then spill in the block is as costly as in the prologue, so the effective
      number of available registers is
-     (ira_class_hard_regs_num[cl] - call_used_regs_num[cl]).
+     (ira_class_hard_regs_num[cl] - call_saved_regs_num[cl]).
      Note that all-else-equal, we prefer to spill in the prologue, since that
      allows "extra" registers for other basic blocks of the function.
      * If the basic block is on the cold path of the function and executes
      rarely, then we should always prefer to spill in the block, rather than
      in the prologue/epilogue.  The effective number of available register is
-     (ira_class_hard_regs_num[cl] - call_used_regs_num[cl]).  */
+     (ira_class_hard_regs_num[cl] - call_saved_regs_num[cl]).  */
   {
     int i;
     int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
@@ -3925,7 +3925,7 @@ sched_pressure_start_bb (basic_block bb)
 	enum reg_class cl = ira_pressure_classes[i];
 	sched_class_regs_num[cl] = ira_class_hard_regs_num[cl];
 	sched_class_regs_num[cl]
-	  -= (call_used_regs_num[cl] * entry_freq) / bb_freq;
+	  -= (call_saved_regs_num[cl] * entry_freq) / bb_freq;
       }
   }
 
@@ -7237,17 +7237,17 @@ alloc_global_sched_pressure_data (void)
 	  region_ref_regs = BITMAP_ALLOC (NULL);
 	}
 
-      /* Calculate number of CALL_USED_REGS in register classes that
+      /* Calculate number of CALL_SAVED_REGS in register classes that
 	 we calculate register pressure for.  */
       for (int c = 0; c < ira_pressure_classes_num; ++c)
 	{
 	  enum reg_class cl = ira_pressure_classes[c];
 
-	  call_used_regs_num[cl] = 0;
+	  call_saved_regs_num[cl] = 0;
 
 	  for (int i = 0; i < ira_class_hard_regs_num[cl]; ++i)
-	    if (call_used_regs[ira_class_hard_regs[cl][i]])
-	      ++call_used_regs_num[cl];
+	    if (!call_used_regs[ira_class_hard_regs[cl][i]])
+	      ++call_saved_regs_num[cl];
 	}
     }
 }

Reply via email to