Hi,
I committed the patch fixing size regression of regrename on Thumb2 to ARM
Embedded-4_7 branch as r193980.

Thanks.

gcc/ChangeLog.arm
2012-11-30  Bin Cheng  <bin.ch...@arm.com>

        * config/arm/arm-protos.h (tune_params): Add
        preferred_renaming_class.
        * config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
        (arm_strongarm_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune)
        (arm_cortex_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
        (arm_cortex_v7m_tune, arm_cortex_v6m_tune, arm_fa726te_tune): Set
        preferred_renaming_class field.
        (arm_preferred_rename_class): Return preferred renaming register
        class.
        * config/arm/arm.md (*arm_addsi3, *arm_subsi3_insn, *arm_mulsi3_v6)
        (*arm_andsi3_insn, andsi_notsi_si, *iorsi3_insn, *arm_xorsi3)
        (*arm_shiftsi3): Add alternatives for Thumb2 set.
        * regrename.c (find_best_rename_reg): Don't rename preferred
        register to non-preferred register.
Index: gcc/regrename.c
===================================================================
--- gcc/regrename.c     (revision 193979)
+++ gcc/regrename.c     (revision 193980)
@@ -358,8 +358,9 @@
 {
   bool has_preferred_class;
   enum reg_class preferred_class;
-  int pass;
-  int best_new_reg = old_reg;
+  int new_reg;
+  int best_reg = old_reg;
+  int best_preferred_reg = old_reg;
 
   /* Further narrow the set of registers we can use for renaming.
      If the chain needs a call-saved register, mark the call-used
@@ -375,39 +376,36 @@
   preferred_class
     = (enum reg_class) targetm.preferred_rename_class (super_class);
 
-  /* If PREFERRED_CLASS is not NO_REGS, we iterate in the first pass
-     over registers that belong to PREFERRED_CLASS and try to find the
-     best register within the class.  If that failed, we iterate in
-     the second pass over registers that don't belong to the class.
-     If PREFERRED_CLASS is NO_REGS, we iterate over all registers in
-     ascending order without any preference.  */
+  /* If PREFERRED_CLASS is defined as register class other than NO_REGS:
+     we don't rename old_reg into non-preferred register if old_reg is in
+     PREFERRED_CLASS; otherwise we rename old_reg into preferred register
+     whenever possible, and only after that we try to rename it into other
+     registers.  */
   has_preferred_class = (preferred_class != NO_REGS);
-  for (pass = (has_preferred_class ? 0 : 1); pass < 2; pass++)
+  for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
     {
-      int new_reg;
-      for (new_reg = 0; new_reg < FIRST_PSEUDO_REGISTER; new_reg++)
+      /* Don't rename to non-preferred register if old_reg is in
+        PREFERRED_CLASS.  */
+      if (has_preferred_class
+         && TEST_HARD_REG_BIT (reg_class_contents[preferred_class], old_reg)
+         && !TEST_HARD_REG_BIT (reg_class_contents[preferred_class], new_reg))
+       continue;
+
+      if (check_new_reg_p (old_reg, new_reg, this_head, *unavailable))
        {
+         /* Record new_reg in best_preferred_reg if it's in PREFERRED_CLASS,
+            otherwise record it in best_reg.  */
          if (has_preferred_class
-             && (pass == 0)
-             != TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
-                                   new_reg))
-           continue;
-
-         /* In the first pass, we force the renaming of registers that
-            don't belong to PREFERRED_CLASS to registers that do, even
-            though the latters were used not very long ago.  */
-         if (check_new_reg_p (old_reg, new_reg, this_head,
-                              *unavailable)
-             && ((pass == 0
-                  && !TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
-                                         best_new_reg))
-                 || tick[best_new_reg] > tick[new_reg]))
-           best_new_reg = new_reg;
+             && TEST_HARD_REG_BIT (reg_class_contents[preferred_class],
+                                   new_reg)
+             && tick[best_preferred_reg] > tick[new_reg])
+           best_preferred_reg = new_reg;
+         else if (tick[best_reg] > tick[new_reg])
+           best_reg = new_reg;
        }
-      if (pass == 0 && best_new_reg != old_reg)
-       break;
     }
-  return best_new_reg;
+
+  return (best_preferred_reg != old_reg) ? best_preferred_reg : best_reg;
 }
 
 /* Perform register renaming on the current function.  */
Index: gcc/ChangeLog.arm
===================================================================
--- gcc/ChangeLog.arm   (revision 193979)
+++ gcc/ChangeLog.arm   (revision 193980)
@@ -1,5 +1,22 @@
 2012-11-30  Bin Cheng  <bin.ch...@arm.com>
 
+       * config/arm/arm-protos.h (tune_params): Add
+       preferred_renaming_class.
+       * config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
+       (arm_strongarm_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune)
+       (arm_cortex_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
+       (arm_cortex_v7m_tune, arm_cortex_v6m_tune, arm_fa726te_tune): Set
+       preferred_renaming_class field.
+       (arm_preferred_rename_class): Return preferred renaming register
+       class.
+       * config/arm/arm.md (*arm_addsi3, *arm_subsi3_insn, *arm_mulsi3_v6)
+       (*arm_andsi3_insn, andsi_notsi_si, *iorsi3_insn, *arm_xorsi3)
+       (*arm_shiftsi3): Add alternatives for Thumb2 set.
+       * regrename.c (find_best_rename_reg): Don't rename preferred
+       register to non-preferred register.
+
+2012-11-30  Bin Cheng  <bin.ch...@arm.com>
+
        * config/arm/arm.c (arm_option_override): Disable option
        -fira-hoist-pressure on Thumb2.
 
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c        (revision 193979)
+++ gcc/config/arm/arm.c        (revision 193980)
@@ -883,6 +883,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 const struct tune_params arm_fastmul_tune =
@@ -896,6 +897,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 /* StrongARM has early execution of branches, so a sequence that is worth
@@ -912,6 +914,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 const struct tune_params arm_xscale_tune =
@@ -925,6 +928,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 const struct tune_params arm_9e_tune =
@@ -938,6 +942,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 const struct tune_params arm_v6t2_tune =
@@ -951,6 +956,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -965,6 +971,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -981,6 +988,7 @@
   arm_cortex_a5_branch_cost,
   arm_default_unroll_times,
   {false, false},                              /* Prefer non short circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 const struct tune_params arm_cortex_a9_tune =
@@ -994,6 +1002,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -1008,6 +1017,7 @@
   arm_cortex_v7m_branch_cost,
   arm_cortex_m_unroll_times,
   {false, false},                              /* Prefer non short circuit.  */
+  LO_REGS,                                     /* Preferred rename class.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -1022,6 +1032,7 @@
   arm_default_branch_cost,
   arm_cortex_m_unroll_times,
   {false, false},                              /* Prefer non short circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 const struct tune_params arm_fa726te_tune =
@@ -1035,6 +1046,7 @@
   arm_default_branch_cost,
   arm_default_unroll_times,
   {true, true},                                        /* Prefer non short 
circuit.  */
+  NO_REGS,                                     /* Preferred rename class.  */
 };
 
 
@@ -24834,10 +24846,11 @@
   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
      and code size can be reduced.  */
-  if (TARGET_THUMB2 && rclass == GENERAL_REGS)
-    return LO_REGS;
+  if (optimize_size)
+    return (TARGET_THUMB2 && reg_class_subset_p (rclass, CORE_REGS))
+           ? LO_REGS : NO_REGS;
   else
-    return NO_REGS;
+    return TARGET_THUMB2 ? current_tune->preferred_renaming_class : NO_REGS;
 }
 
 /* Compute the atrribute "length" of insn "*push_multi".
Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h (revision 193979)
+++ gcc/config/arm/arm-protos.h (revision 193980)
@@ -243,6 +243,8 @@
      performance. The first element covers Thumb state and the second one
      is for ARM state.  */
   bool logical_op_non_short_circuit[2];
+  /* Preferred reg class for register renaming.  */
+  enum reg_class preferred_renaming_class;
 };
 
 extern const struct tune_params *current_tune;
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md       (revision 193979)
+++ gcc/config/arm/arm.md       (revision 193980)
@@ -718,18 +718,20 @@
 ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
 ;; put the duplicated register first, and not try the commutative version.
 (define_insn_and_split "*arm_addsi3"
-  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k, r, 
k,r, k, r")
-       (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, 
rk,k,rk,k, rk")
-                (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, 
L,PJ,PJ,?n")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=l, r, k,r,r, k, l, 
r, k,r, k, r")
+       (plus:SI (match_operand:SI 1 "s_register_operand" "%0, rk,k,r,rk,k, 0, 
rk,k,rk,k, rk")
+                (match_operand:SI 2 "reg_or_int_operand" 
"Py,rI,rI,k,Pj,Pj,Pv,L, L,PJ,PJ,?n")))]
   "TARGET_32BIT"
   "@
    add%?\\t%0, %1, %2
    add%?\\t%0, %1, %2
+   add%?\\t%0, %1, %2
    add%?\\t%0, %2, %1
    addw%?\\t%0, %1, %2
    addw%?\\t%0, %1, %2
    sub%?\\t%0, %1, #%n2
    sub%?\\t%0, %1, #%n2
+   sub%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    #"
@@ -744,9 +746,9 @@
                      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+  [(set_attr "length" "2,4,4,4,4,4,2,4,4,4,4,16")
    (set_attr "predicable" "yes")
-   (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
+   (set_attr "arch" "t2,*,*,*,t2,t2,t2,*,*,t2,t2,*")]
 )
 
 (define_insn_and_split "*thumb1_addsi3"
@@ -1214,14 +1216,15 @@
 
 ; ??? Check Thumb-2 split length
 (define_insn_and_split "*arm_subsi3_insn"
-  [(set (match_operand:SI           0 "s_register_operand" "=r,r,rk,r")
-       (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n")
-                 (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))]
+  [(set (match_operand:SI           0 "s_register_operand" "=r,l, r,rk,r")
+       (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,0, r,k,?n")
+                 (match_operand:SI 2 "reg_or_int_operand" "r,Py,rI,r, r")))]
   "TARGET_32BIT"
   "@
    rsb%?\\t%0, %2, %1
    sub%?\\t%0, %1, %2
    sub%?\\t%0, %1, %2
+   sub%?\\t%0, %1, %2
    #"
   "&& (GET_CODE (operands[1]) == CONST_INT
        && !const_ok_for_arm (INTVAL (operands[1])))"
@@ -1231,8 +1234,9 @@
                       INTVAL (operands[1]), operands[0], operands[2], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "4,2,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "*,t2,*,*,*")]
 )
 
 (define_peephole2
@@ -1351,13 +1355,15 @@
 )
 
 (define_insn "*arm_mulsi3_v6"
-  [(set (match_operand:SI          0 "s_register_operand" "=r")
-       (mult:SI (match_operand:SI 1 "s_register_operand" "r")
-                (match_operand:SI 2 "s_register_operand" "r")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=l,r")
+       (mult:SI (match_operand:SI 1 "s_register_operand" "%l,r")
+                (match_operand:SI 2 "s_register_operand" "0,r")))]
   "TARGET_32BIT && arm_arch6"
   "mul%?\\t%0, %1, %2"
-  [(set_attr "insn" "mul")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4")
+   (set_attr "insn" "mul")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*")]
 )
 
 ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
@@ -2187,12 +2193,14 @@
 
 ; ??? Check split length for Thumb-2
 (define_insn_and_split "*arm_andsi3_insn"
-  [(set (match_operand:SI         0 "s_register_operand" "=r,r,r")
-       (and:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
-               (match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,l,r,r,r")
+       (and:SI (match_operand:SI 1 "s_register_operand" "0, l,r,r,r")
+               (match_operand:SI 2 "reg_or_int_operand" "l, 0,rI,K,?n")))]
   "TARGET_32BIT"
   "@
    and%?\\t%0, %1, %2
+   and%?\\t%0, %2, %1
+   and%?\\t%0, %1, %2
    bic%?\\t%0, %1, #%B2
    #"
   "TARGET_32BIT
@@ -2205,8 +2213,9 @@
                       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,2,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*,*,*")]
 )
 
 (define_insn "*thumb1_andsi3_insn"
@@ -2783,12 +2792,14 @@
 )
   
 (define_insn "andsi_notsi_si"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
-       (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
-               (match_operand:SI 1 "s_register_operand" "r")))]
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+       (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "l,r"))
+               (match_operand:SI 1 "s_register_operand" "0,r")))]
   "TARGET_32BIT"
   "bic%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*")]
 )
 
 (define_insn "thumb1_bicsi3"
@@ -2913,12 +2924,13 @@
 )
 
 (define_insn_and_split "*iorsi3_insn"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
-       (ior:SI (match_operand:SI 1 "s_register_operand" "%r,r,r")
-               (match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r,r,r")
+       (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r,r")
+               (match_operand:SI 2 "reg_or_int_operand" "l,rI,K,?n")))]
   "TARGET_32BIT"
   "@
    orr%?\\t%0, %1, %2
+   orr%?\\t%0, %1, %2
    orn%?\\t%0, %1, #%B2
    #"
   "TARGET_32BIT
@@ -2931,8 +2943,8 @@
                       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
-  [(set_attr "length" "4,4,16")
-   (set_attr "arch" "32,t2,32")
+  [(set_attr "length" "2,4,4,16")
+   (set_attr "arch" "t2,32,t2,32")
    (set_attr "predicable" "yes")])
 
 (define_insn "*thumb1_iorsi3_insn"
@@ -3051,12 +3063,13 @@
 )
 
 (define_insn_and_split "*arm_xorsi3"
-  [(set (match_operand:SI         0 "s_register_operand" "=r,r")
-       (xor:SI (match_operand:SI 1 "s_register_operand" "%r,r")
-               (match_operand:SI 2 "reg_or_int_operand" "rI,?n")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,r,r")
+       (xor:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
+               (match_operand:SI 2 "reg_or_int_operand" "l,rI,?n")))]
   "TARGET_32BIT"
   "@
    eor%?\\t%0, %1, %2
+   eor%?\\t%0, %1, %2
    #"
   "TARGET_32BIT
    && GET_CODE (operands[2]) == CONST_INT
@@ -3067,8 +3080,9 @@
                       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
-  [(set_attr "length" "4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "2,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,*,*")]
 )
 
 (define_insn "*thumb1_xorsi3_insn"
@@ -3672,17 +3686,19 @@
 )
 
 (define_insn "*arm_shiftsi3"
-  [(set (match_operand:SI   0 "s_register_operand" "=r")
+  [(set (match_operand:SI   0 "s_register_operand" "=l,r")
        (match_operator:SI  3 "shift_operator"
-        [(match_operand:SI 1 "s_register_operand"  "r")
-         (match_operand:SI 2 "reg_or_int_operand" "rM")]))]
+        [(match_operand:SI 1 "s_register_operand"  "0,r")
+         (match_operand:SI 2 "reg_or_int_operand" "l,rM")]))]
   "TARGET_32BIT"
   "* return arm_output_shift(operands, 0);"
-  [(set_attr "predicable" "yes")
+  [(set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
    (set_attr "shift" "1")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
                      (const_string "alu_shift")
-                     (const_string "alu_shift_reg")))]
+                     (const_string "alu_shift_reg")))
+   (set_attr "arch" "t2,*")]
 )
 
 (define_insn "*shiftsi3_compare0"

Reply via email to