Hi,

This patch is to treat those new pseudo-to-pseudo copies
after hard-reg-to-pseudo-copy as zero costs.  The
justification is that these new copies are closely after
the corresponding hard-reg-to-pseudo-copy insns, register
allocation should be able to coalesce them and get them
eliminated.

Now these copies follow the normal costing scheme, the
below case dump shows the unexpected combination:

``` dump

Trying 3, 2 -> 13:
    3: r119:DI=r132:DI
      REG_DEAD r132:DI
    2: r118:DI=r131:DI
      REG_DEAD r131:DI
   13: r128:DI=r118:DI&0xffffffff|r119:DI<<0x20
      REG_DEAD r119:DI
      REG_DEAD r118:DI

Failed to match this instruction:
(set (reg:DI 128)
    (ior:DI (ashift:DI (reg:DI 132)
            (const_int 32 [0x20]))
        (reg:DI 131)))
Successfully matched this instruction:
(set (reg/v:DI 119 [ f2 ])
    (ashift:DI (reg:DI 132)
        (const_int 32 [0x20])))
Successfully matched this instruction:
(set (reg:DI 128)
    (ior:DI (reg/v:DI 119 [ f2 ])
        (reg:DI 131)))
allowing combination of insns 2, 3 and 13
original costs 4 + 4 + 4 = 12
replacement costs 4 + 4 = 8
deferring deletion of insn with uid = 2.
modifying insn i2     3: r119:DI=r132:DI<<0x20
      REG_DEAD r132:DI
deferring rescan insn with uid = 3.
modifying insn i3    13: r128:DI=r119:DI|r131:DI
      REG_DEAD r131:DI
      REG_DEAD r119:DI
deferring rescan insn with uid = 13.

``` end dump

The original insn 13 can work well as rotldi3_insert_3,
so the combination with shift/or isn't better, but the
costing doesn't matches.

With this patch, we get below instead:

rejecting combination of insns 2, 3 and 13
original costs 0 + 0 + 4 = 4
replacement costs 4 + 4 = 8


Bootstrapped/regtested on powerpc64le-linux-gnu P9.

Is it reasonable?  Any comments are highly appreciated!

BR,
Kewen
------
gcc/ChangeLog:

        * combine.c (new_copies): New static global variable declare/init.
        (combine_validate_cost): Consider zero costs from new_copies.
        (combine_instructions): Set zero cost for insns in new_copies.
        (make_more_copies): Record new pseudo-to-pseudo copies to new_copies.
        (rest_of_handle_combine): Call bitmap alloc/free for new_copies.
diff --git a/gcc/combine.c b/gcc/combine.c
index ed1ad45de83..6fb2fa82c3f 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -419,6 +419,10 @@ static struct undobuf undobuf;
 
 static int n_occurrences;
 
+/* Record the newly introduced pseudo-to-pseudo copies in function
+   make_more_copies.  */
+static bitmap new_copies = NULL;
+
 static rtx reg_nonzero_bits_for_combine (const_rtx, scalar_int_mode,
                                         scalar_int_mode,
                                         unsigned HOST_WIDE_INT *);
@@ -856,30 +860,38 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, 
rtx_insn *i2, rtx_insn *i3,
   int i0_cost, i1_cost, i2_cost, i3_cost;
   int new_i2_cost, new_i3_cost;
   int old_cost, new_cost;
+  bool i0_cost_ok, i1_cost_ok, i2_cost_ok, i3_cost_ok;
 
   /* Lookup the original insn_costs.  */
   i2_cost = INSN_COST (i2);
   i3_cost = INSN_COST (i3);
+  i2_cost_ok = (i2_cost > 0) || bitmap_bit_p (new_copies, INSN_UID (i2));
+  i3_cost_ok = (i3_cost > 0) || bitmap_bit_p (new_copies, INSN_UID (i3));
 
   if (i1)
     {
       i1_cost = INSN_COST (i1);
+      i1_cost_ok = (i1_cost > 0) || bitmap_bit_p (new_copies, INSN_UID (i1));
       if (i0)
        {
          i0_cost = INSN_COST (i0);
-         old_cost = (i0_cost > 0 && i1_cost > 0 && i2_cost > 0 && i3_cost > 0
-                     ? i0_cost + i1_cost + i2_cost + i3_cost : 0);
+         i0_cost_ok = (i0_cost > 0)
+                      || bitmap_bit_p (new_copies, INSN_UID (i0));
+         old_cost = (i0_cost_ok && i1_cost_ok && i2_cost_ok && i3_cost_ok
+                       ? i0_cost + i1_cost + i2_cost + i3_cost
+                       : 0);
        }
       else
        {
-         old_cost = (i1_cost > 0 && i2_cost > 0 && i3_cost > 0
-                     ? i1_cost + i2_cost + i3_cost : 0);
+         old_cost = (i1_cost_ok && i2_cost_ok && i3_cost_ok
+                       ? i1_cost + i2_cost + i3_cost
+                       : 0);
          i0_cost = 0;
        }
     }
   else
     {
-      old_cost = (i2_cost > 0 && i3_cost > 0) ? i2_cost + i3_cost : 0;
+      old_cost = (i2_cost_ok && i3_cost_ok) ? i2_cost + i3_cost : 0;
       i1_cost = i0_cost = 0;
     }
 
@@ -1233,7 +1245,12 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
                                                    insn);
 
            /* Record the current insn_cost of this instruction.  */
-           INSN_COST (insn) = insn_cost (insn, optimize_this_for_speed_p);
+           if (bitmap_bit_p (new_copies, INSN_UID (insn)))
+             /* Newly added pseudo-to-pseudo copies should not take any
+                costs since they should be able to be coalesced.  */
+             INSN_COST (insn) = 0;
+           else
+             INSN_COST (insn) = insn_cost (insn, optimize_this_for_speed_p);
            if (dump_file)
              {
                fprintf (dump_file, "insn_cost %d for ", INSN_COST (insn));
@@ -15068,6 +15085,7 @@ make_more_copies (void)
          SET_SRC (set) = new_reg;
          emit_insn_before (new_insn, insn);
          df_insn_rescan (insn);
+         bitmap_set_bit (new_copies, INSN_UID (insn));
        }
     }
 }
@@ -15076,6 +15094,7 @@ make_more_copies (void)
 static unsigned int
 rest_of_handle_combine (void)
 {
+  new_copies = BITMAP_ALLOC (NULL);
   make_more_copies ();
 
   df_set_flags (DF_LR_RUN_DCE + DF_DEFER_INSN_RESCAN);
@@ -15102,6 +15121,7 @@ rest_of_handle_combine (void)
     }
 
   regstat_free_n_sets_and_refs ();
+  BITMAP_FREE (new_copies);
   return 0;
 }
 

Reply via email to