New version of the patch, with all of Richard Sandiford's comments applied and retested.

Ok for commit?

Kenny

2012-03-31 Kenneth Zadeck <zad...@naturalbridge.com>

* toplev.c (backend_init_target): Call initializer for lower-subreg pass.

    * lower-subreg.c (target_info): New static var.
    (compute_move_cost, profitable_shift_p, init_lower_subreg): New
    functions.
(find_pseudo_copy, resolve_simple_move): Added code to only split based on costs.
    (find_decomposable_subregs): Added code to mark as decomposable
    moves that are not profitable.
    (find_decomposable_shift_zext): Added code to only decompose
    shifts and zext if profitable.
    (resolve_shift_zext): Added comment.
    (decompose_multiword_subregs): Dump list of profitable
    transformations.  Add code to skip non profitable transformations.

    *rtl.h(init_lower_subreg): Added declaration.

Index: toplev.c
===================================================================
--- toplev.c	(revision 186034)
+++ toplev.c	(working copy)
@@ -1660,6 +1660,7 @@ backend_init_target (void)
   /* rtx_cost is mode-dependent, so cached values need to be recomputed
      on a mode change.  */
   init_expmed ();
+  init_lower_subreg ();
 
   /* We may need to recompute regno_save_code[] and regno_restore_code[]
      after a mode change as well.  */
Index: lower-subreg.c
===================================================================
--- lower-subreg.c	(revision 186034)
+++ lower-subreg.c	(working copy)
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
 #include "tree-pass.h"
 #include "df.h"
 
+
 #ifdef STACK_GROWS_DOWNWARD
 # undef STACK_GROWS_DOWNWARD
 # define STACK_GROWS_DOWNWARD 1
@@ -52,10 +53,36 @@ DEF_VEC_P (bitmap);
 DEF_VEC_ALLOC_P (bitmap,heap);
 
 /* Decompose multi-word pseudo-registers into individual
-   pseudo-registers when possible.  This is possible when all the uses
-   of a multi-word register are via SUBREG, or are copies of the
-   register to another location.  Breaking apart the register permits
-   more CSE and permits better register allocation.  */
+   pseudo-registers when possible and profitable.  This is possible
+   when all the uses of a multi-word register are via SUBREG, or are
+   copies of the register to another location.  Breaking apart the
+   register permits more CSE and permits better register allocation.
+   This is profitable if the machine does not have move instructions
+   to do this.  
+
+   This pass only splits moves with modes that are wider than
+   word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
+   modes that are twice the width of word_mode.  The latter could be
+   generalized if there was a need to do this, but the trend in
+   architectures is to not need this.
+
+   There are two useful preprocessor defines for use by maintainers:  
+
+   #define LOG_COSTS
+
+   if you wish to see the actual cost estimates that are being used
+   for each mode wider than word mode and the cost estimates for zero
+   extension and the shifts.   This can be useful when port maintainers 
+   are tuning insn rtx costs.
+
+   #define FORCE_LOWERING
+
+   if you wish to test the pass with all the transformation forced on.
+   This can be useful for finding bugs in the transformations.
+
+   Both of these should not be enabled at the same time. */
+
+#define FORCE_LOWERING
 
 /* Bit N in this bitmap is set if regno N is used in a context in
    which we can decompose it.  */
@@ -75,8 +102,188 @@ static bitmap subreg_context;
    copy from reg M to reg N.  */
 static VEC(bitmap,heap) *reg_copy_graph;
 
-/* Return whether X is a simple object which we can take a word_mode
-   subreg of.  */
+static struct {
+
+  /* This pass can transform 4 different operations: move, ashift,
+     lshiftrt, and zero_extend.  There is a boolean vector for move
+     splitting that is indexed by mode and is true for each mode that is
+     to have its copies split.  The other three operations are only done
+     for one mode so they are only controlled by a single boolean  .*/
+  bool move_modes_to_split[MAX_MACHINE_MODE];
+  
+  /* Other splitting operations to be done on the this platform.  */
+  bool splitting_ashift[MAX_BITS_PER_WORD];
+  bool splitting_lshiftrt[MAX_BITS_PER_WORD];
+  bool splitting_zext;
+
+  bool something_to_do;
+
+  /* Precomputed cost values used to determine if lowering shift
+     operations is profitable.  */ 
+  int word_mode_move_cost;
+  int move_zero_cost;
+} target_info;
+
+/* See what the move cost is.  */
+static int 
+compute_move_cost (enum machine_mode mode)
+{
+  rtx src = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
+  rtx trg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER + 1);
+  rtx pat = gen_rtx_SET (VOIDmode, trg, src);
+
+  return insn_rtx_cost (pat, true);
+}
+
+
+/* Return true if it is profitable to lower and shift by SHIFT_AMT.
+   CODE can be either ASHIFT or LSHIFTRT.  */
+static bool
+profitable_shift_p (enum rtx_code code, int shift_amt)
+{
+  rtx trg = gen_rtx_REG (GET_MODE_WIDER_MODE (word_mode), FIRST_PSEUDO_REGISTER);
+  rtx src = gen_rtx_REG (GET_MODE_WIDER_MODE (word_mode), FIRST_PSEUDO_REGISTER + 1);
+  int wide_cost 
+    = insn_rtx_cost (gen_rtx_SET (VOIDmode, trg, 
+				  gen_rtx_fmt_ee (code, GET_MODE_WIDER_MODE (word_mode), 
+						  src, GEN_INT (shift_amt))),
+		     true);
+#ifdef FORCE_LOWERING
+  return true;
+#endif
+
+#ifdef LOG_COSTS
+  fprintf (stderr, "shift code %d, shift amt %d, wide_cost %d\n", 
+	   code, shift_amt, wide_cost);
+#endif
+  if (shift_amt == BITS_PER_WORD)
+    return wide_cost 
+      >= target_info.word_mode_move_cost + target_info.move_zero_cost;
+  else
+    {
+      int narrow_cost;
+
+      trg = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
+      src = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
+      narrow_cost 
+	= insn_rtx_cost (gen_rtx_SET 
+			 (VOIDmode, trg, 
+			  gen_rtx_fmt_ee (code, word_mode, 
+					  src, 
+					  GEN_INT (shift_amt - BITS_PER_WORD))),
+				   true);
+
+#ifdef LOG_COSTS
+      fprintf (stderr, "narrow_cost %d\n", narrow_cost);
+#endif
+      return wide_cost > narrow_cost + target_info.move_zero_cost;
+    }
+}
+
+
+/* Initialize pass once per execution.  This envolves determining
+   which operations on the machine are profitable.  If none are found,
+   then the pass just returns when called.  */
+
+void
+init_lower_subreg (void)
+{
+  rtx trg = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
+  rtx pat;
+  unsigned int i;
+  int factor;
+
+  memset (&target_info, 0, sizeof target_info);
+
+  target_info.word_mode_move_cost = compute_move_cost (word_mode);
+  target_info.move_zero_cost 
+    = insn_rtx_cost (gen_rtx_SET (VOIDmode, trg, 
+				  CONST0_RTX (word_mode)), true);
+
+#ifdef LOG_COSTS
+  fprintf (stderr, "word mode move cost %d\n", target_info.word_mode_move_cost);
+  fprintf (stderr, "move 0 cost %d\n", target_info.move_zero_cost);
+#endif
+  for (i = 0; i < MAX_MACHINE_MODE; i++) 
+    {
+      int t;
+      factor = GET_MODE_SIZE (i) / UNITS_PER_WORD;
+
+      if (factor <= 1) 
+	continue;
+
+      t = compute_move_cost ((enum machine_mode) i);
+
+#ifdef LOG_COSTS
+      fprintf (stderr, "mode %s, move cost %d, factor %d\n", 
+	       mode_name[i], t, factor);
+#endif
+      if (t >= target_info.word_mode_move_cost * factor)
+	{
+	  target_info.move_modes_to_split[i] = true;
+	  target_info.something_to_do = true;
+	}
+#ifdef FORCE_LOWERING
+      target_info.move_modes_to_split[i] = true;
+      target_info.something_to_do = true;
+#endif
+    }
+
+  /* For the moves and shifts, the only case that is checked is one
+     where the mode of the target is an integer mode twice the width
+     of the word_mode. 
+
+     If it is not profitable to split a double word move then do not
+     even consider the shifts or the zero extension.  */
+  if (target_info.move_modes_to_split[(int) GET_MODE_WIDER_MODE (word_mode)])
+    {
+      /* The only case here to check to see if moving the upper part with a
+	 zero is cheaper than doing the zext itself.  */
+      pat = gen_rtx_SET (VOIDmode, trg, 
+			 gen_rtx_ZERO_EXTEND (GET_MODE_WIDER_MODE (word_mode), 
+					      gen_reg_rtx (word_mode)));
+      
+      if (insn_rtx_cost (pat, true) 
+	  >= target_info.word_mode_move_cost + target_info.move_zero_cost) 
+	{
+	  target_info.splitting_zext = true;
+	  target_info.something_to_do = true;
+	}
+      
+#ifdef FORCE_LOWERING
+      target_info.splitting_zext = true;
+      target_info.something_to_do = true;
+#endif
+      /* For the shifts there are three shift amounts that need to be
+	 checked: word_mode, word_mode + 1 and word_mode * 1.5.  The first
+	 of these can be done without a shift.  The second and third case
+	 are the same on large machines but may be different on small
+	 machines that special case shift 1 and 2.  If any of these are
+	 found to be useful, then we set the flags to consider those cases
+	 and when the actual shift amount is known, redo the cost
+	 calculation.  */
+      
+      for (i = 0; i < BITS_PER_WORD; i++)
+	{
+	  bool p = profitable_shift_p (ASHIFT, BITS_PER_WORD + i);
+
+	  target_info.splitting_ashift[i] = p;
+      	  target_info.something_to_do |= p;
+      
+	  p = profitable_shift_p (LSHIFTRT, BITS_PER_WORD + i);
+
+	  target_info.splitting_lshiftrt[i] = p;
+      	  target_info.something_to_do |= p;
+
+#ifdef FORCE_LOWERING
+	  target_info.splitting_ashift[i] = true;
+	  target_info.splitting_lshiftrt[i] = true;
+	  target_info.something_to_do = true;
+#endif
+	}
+    }
+}
+
 
 static bool
 simple_move_operand (rtx x)
@@ -173,7 +380,7 @@ find_pseudo_copy (rtx set)
   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
     return false;
 
-  if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
+  if (!target_info.move_modes_to_split[(int) GET_MODE (dest)])
     return false;
 
   b = VEC_index (bitmap, reg_copy_graph, rs);
@@ -335,6 +542,11 @@ find_decomposable_subregs (rtx *px, void
 		bitmap_set_bit (decomposable_context, regno);
 	      break;
 	    case SIMPLE_MOVE:
+	      /* If this is marked as a simple move with a mode this
+		 large, it is because find_pseudo_copy returned false
+		 because this was not a mode we wanted to split.  */
+	      if (!target_info.move_modes_to_split[(int) GET_MODE (x)])
+		bitmap_set_bit (non_decomposable_context, regno);
 	      break;
 	    default:
 	      gcc_unreachable ();
@@ -668,7 +887,7 @@ resolve_simple_move (rtx set, rtx insn)
   orig_mode = GET_MODE (dest);
 
   words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-  if (words <= 1)
+  if (!target_info.move_modes_to_split[(int) GET_MODE (dest)])
     return insn;
 
   start_sequence ();
@@ -941,17 +1160,29 @@ find_decomposable_shift_zext (rtx insn)
   rtx set;
   rtx op;
   rtx op_operand;
+  enum machine_mode mode;
 
   set = single_set (insn);
   if (!set)
     return 0;
 
   op = SET_SRC (set);
-  if (GET_CODE (op) != ASHIFT
-      && GET_CODE (op) != LSHIFTRT
-      && GET_CODE (op) != ZERO_EXTEND)
+  mode = GET_MODE (op);
+
+  if (mode != GET_MODE_WIDER_MODE (word_mode))
     return 0;
 
+  switch (GET_CODE (op)) 
+    {
+    case ASHIFT:
+    case LSHIFTRT:
+    case ZERO_EXTEND:
+      break;
+      
+    default:
+      return 0;
+    }
+
   op_operand = XEXP (op, 0);
   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
@@ -959,25 +1190,51 @@ find_decomposable_shift_zext (rtx insn)
       || !SCALAR_INT_MODE_P (GET_MODE (op)))
     return 0;
 
-  if (GET_CODE (op) == ZERO_EXTEND)
-    {
-      if (GET_MODE (op_operand) != word_mode
-	  || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
-	return 0;
-    }
-  else /* left or right shift */
+  switch (GET_CODE (op)) 
     {
-      if (!CONST_INT_P (XEXP (op, 1))
-	  || INTVAL (XEXP (op, 1)) < BITS_PER_WORD
-	  || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
+    case ASHIFT:
+      {
+	int shift_val;
+	
+	if (!CONST_INT_P (XEXP (op, 1))
+	    || INTVAL (XEXP (op, 1)) < BITS_PER_WORD)
+	  return 0;
+	
+	shift_val = INTVAL (XEXP (op, 1));
+	if (target_info.splitting_ashift[shift_val - BITS_PER_WORD])
+	  return 0;
+	
+	bitmap_set_bit (decomposable_context, REGNO (op_operand));
+	break;
+      }
+
+    case LSHIFTRT:
+      {
+	int shift_val;
+	
+	if (!CONST_INT_P (XEXP (op, 1))
+	    || INTVAL (XEXP (op, 1)) < BITS_PER_WORD)
+	  return 0;
+	
+	shift_val = INTVAL (XEXP (op, 1));
+	if (target_info.splitting_lshiftrt[shift_val - BITS_PER_WORD])
+	  return 0;
+	
+	bitmap_set_bit (decomposable_context, REGNO (op_operand));
+	break;
+      }
+
+    case ZERO_EXTEND:
+      if (GET_MODE (op_operand) != word_mode || !target_info.splitting_zext)
 	return 0;
+      break;
+
+    default:
+      gcc_unreachable ();
     }
 
   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
 
-  if (GET_CODE (op) != ZERO_EXTEND)
-    bitmap_set_bit (decomposable_context, REGNO (op_operand));
-
   return 1;
 }
 
@@ -1008,6 +1265,8 @@ resolve_shift_zext (rtx insn)
 
   op_operand = XEXP (op, 0);
 
+  /* We can tear this operation apart only if the regs were already
+     torn apart.  */ 
   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
     return NULL_RTX;
 
@@ -1083,8 +1351,57 @@ decompose_multiword_subregs (void)
   unsigned int max;
   basic_block bb;
 
-  if (df)
-    df_set_flags (DF_DEFER_INSN_RESCAN);
+  if (dump_file)
+    {
+      unsigned int i;
+      const char *sep;
+
+      for (i = 0; i < MAX_MACHINE_MODE; i++) 
+	{
+	  if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
+	    fprintf (dump_file, "%s mode %s for copy lowering.\n", 
+		     target_info.move_modes_to_split[i] ? "Splitting" : "Skipping", mode_name[i]);
+	}
+
+      fprintf (dump_file, "%s mode %s for zero_extend lowering.\n", 
+	       target_info.splitting_zext ? "Splitting" : "Skipping", 
+	       mode_name[GET_MODE_WIDER_MODE (word_mode)]);
+
+      
+      fprintf (dump_file, "Splitting mode %s for ashift lowering with shift amounts = ", 
+	       mode_name[GET_MODE_WIDER_MODE (word_mode)]);
+      sep = "";
+      for (i = 0; i < BITS_PER_WORD; i++)
+	{
+	  if (target_info.splitting_ashift[i])
+	    {
+	      fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
+	      sep = ",";
+	    }
+	}
+
+      fprintf (dump_file, "\nSplitting mode %s for lshiftrt lowering with shift amounts = ", 
+	       mode_name[GET_MODE_WIDER_MODE (word_mode)]);
+      sep = "";
+      for (i = 0; i < BITS_PER_WORD; i++)
+	{
+	  if (target_info.splitting_lshiftrt[i])
+	    {
+	      fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
+	      sep = ",";
+	    }
+	}
+
+      fprintf (dump_file, "\n");
+
+      if (!target_info.something_to_do)
+	fprintf (dump_file, "Nothing to lower for port.\n\n");
+    }
+
+
+  /* Check if this target even has any modes to consider lowering.   */
+  if (!target_info.something_to_do)
+    return;
 
   max = max_reg_num ();
 
@@ -1094,24 +1411,39 @@ decompose_multiword_subregs (void)
      all the insns.  */
   {
     unsigned int i;
+    bool useful_modes_seen = false;
 
     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
       {
-	if (regno_reg_rtx[i] != NULL
-	    && GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD)
-	  break;
+	if (regno_reg_rtx[i] != NULL)
+	  {
+	    enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
+	    if (regno_reg_rtx[i] != NULL && target_info.move_modes_to_split [mode])
+	      {
+		useful_modes_seen = true;
+		break;
+	      }
+	  }
+      }
+
+    if (!useful_modes_seen)
+      {
+	if (dump_file)
+	  fprintf (dump_file, "Nothing to lower in this function.\n");
+	return;
       }
-    if (i == max)
-      return;
   }
 
-  if (df)
-    run_word_dce ();
+  if (df) 
+    {
+      df_set_flags (DF_DEFER_INSN_RESCAN);
+      run_word_dce ();
+    }
 
-  /* FIXME: When the dataflow branch is merged, we can change this
-     code to look for each multi-word pseudo-register and to find each
-     insn which sets or uses that register.  That should be faster
-     than scanning all the insns.  */
+  /* FIXME: It may be possible to change this code to look for each
+     multi-word pseudo-register and to find each insn which sets or
+     uses that register.  That should be faster than scanning all the
+     insns.  */
 
   decomposable_context = BITMAP_ALLOC (NULL);
   non_decomposable_context = BITMAP_ALLOC (NULL);
Index: rtl.h
===================================================================
--- rtl.h	(revision 186034)
+++ rtl.h	(working copy)
@@ -2523,6 +2523,9 @@ extern void init_expmed (void);
 extern void expand_inc (rtx, rtx);
 extern void expand_dec (rtx, rtx);
 
+/* In lower-subreg.c */
+extern void init_lower_subreg (void);
+
 /* In gcse.c */
 extern bool can_copy_p (enum machine_mode);
 extern bool can_assign_to_reg_without_clobbers_p (rtx);

Reply via email to