The following patch fixes PR55342:

  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55342

  To solve the problem, new transformations called *optional reloads*
and undoing them have been implemented.

  Here is an example of what the patch does

                       Before               After
 inout pseudo      inout stack_slot     hr <- stack_slot -- optional reload
 ...           =>  ...                  inout hr
                   hr <- stack slot     ...
 use pseudo        use hr               use hr

Instead of 3 memory access, now we have 1 memory insn.

If hr is not inherited, we undo the optional reload and we have the
same code as before (which have less and shorter insns).

The patch was successfully tested and bootstrapped on x86/x86-64.

Committed as rev. 200723.

2013-07-05  Vladimir Makarov  <vmaka...@redhat.com>

        PR rtl-optimization/55342
        * lra-int.h (lra_subreg_reload_pseudos): New.
        * lra.c: Add undoing optional reloads to the block diagram.
        (lra_subreg_reload_pseudos): New.
        (lra_optional_reload_pseudos): Change comments.
        (lra): Init and clear lra_subreg_reload_pseudos.  Clear
        lra_optional_reload_pseudos after undo transformations.
        * lra-assigns.c (pseudo_prefix_title): New.
        (lra_setup_reg_renumber): Use it.
        (spill_for): Ditto.  Check subreg reload pseudos too.
        (assign_by_spills): Consider subreg reload pseudos too.
        * lra-constraints.c (simplify_operand_subreg): Use
        lra_subreg_reload_pseudos instead of lra_optional_reload_pseudos.
        (curr_insn_transform): Recognize and do optional reloads.
        (undo_optional_reloads): New.
        (lra_undo_inheritance): Call undo_optional_reloads.

2013-07-05  Vladimir Makarov  <vmaka...@redhat.com>

        PR rtl-optimization/55342
        * gcc.target/i386/pr55342.c: New.


Index: lra-assigns.c
===================================================================
--- lra-assigns.c       (revision 200675)
+++ lra-assigns.c       (working copy)
@@ -672,6 +672,19 @@ update_hard_regno_preference (int regno,
     }
 }
 
+/* Return prefix title for pseudo REGNO.  */
+static const char *
+pseudo_prefix_title (int regno)
+{
+  return
+    (regno < lra_constraint_new_regno_start ? ""
+     : bitmap_bit_p (&lra_inheritance_pseudos, regno) ? "inheritance "
+     : bitmap_bit_p (&lra_split_regs, regno) ? "split "
+     : bitmap_bit_p (&lra_optional_reload_pseudos, regno) ? "optional reload "
+     : bitmap_bit_p (&lra_subreg_reload_pseudos, regno) ? "subreg reload "
+     : "reload ");
+}
+
 /* Update REG_RENUMBER and other pseudo preferences by assignment of
    HARD_REGNO to pseudo REGNO and print about it if PRINT_P.  */
 void
@@ -692,13 +705,7 @@ lra_setup_reg_renumber (int regno, int h
       lra_hard_reg_usage[hr + i] += lra_reg_info[regno].freq;
   if (print_p && lra_dump_file != NULL)
     fprintf (lra_dump_file, "     Assign %d to %sr%d (freq=%d)\n",
-            reg_renumber[regno],
-            regno < lra_constraint_new_regno_start
-            ? ""
-            : bitmap_bit_p (&lra_inheritance_pseudos, regno) ? "inheritance "
-            : bitmap_bit_p (&lra_split_regs, regno) ? "split "
-            : bitmap_bit_p (&lra_optional_reload_pseudos, regno)
-            ? "optional reload ": "reload ",
+            reg_renumber[regno], pseudo_prefix_title (regno),
             regno, lra_reg_info[regno].freq);
   if (hard_regno >= 0)
     {
@@ -844,6 +851,7 @@ spill_for (int regno, bitmap spilled_pse
        if ((int) spill_regno >= lra_constraint_new_regno_start
            && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
            && ! bitmap_bit_p (&lra_split_regs, spill_regno)
+           && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
            && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno))
          goto fail;
       insn_pseudos_num = 0;
@@ -953,14 +961,7 @@ spill_for (int regno, bitmap spilled_pse
     {
       if (lra_dump_file != NULL)
        fprintf (lra_dump_file, "      Spill %sr%d(hr=%d, freq=%d) for r%d\n",
-                ((int) spill_regno < lra_constraint_new_regno_start
-                 ? ""
-                 : bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
-                 ? "inheritance "
-                 : bitmap_bit_p (&lra_split_regs, spill_regno)
-                 ? "split "
-                 : bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)
-                 ? "optional reload " : "reload "),
+                pseudo_prefix_title (spill_regno),
                 spill_regno, reg_renumber[spill_regno],
                 lra_reg_info[spill_regno].freq, regno);
       update_lives (spill_regno, true);
@@ -1176,6 +1177,7 @@ assign_by_spills (void)
   bitmap_initialize (&changed_insns, &reg_obstack);
   bitmap_initialize (&non_reload_pseudos, &reg_obstack);
   bitmap_ior (&non_reload_pseudos, &lra_inheritance_pseudos, &lra_split_regs);
+  bitmap_ior_into (&non_reload_pseudos, &lra_subreg_reload_pseudos);
   bitmap_ior_into (&non_reload_pseudos, &lra_optional_reload_pseudos);
   for (iter = 0; iter <= 1; iter++)
     {
@@ -1350,6 +1352,7 @@ assign_by_spills (void)
                 && lra_reg_info[i].restore_regno >= 0)
             || (bitmap_bit_p (&lra_split_regs, i)
                 && lra_reg_info[i].restore_regno >= 0)
+            || bitmap_bit_p (&lra_subreg_reload_pseudos, i)
             || bitmap_bit_p (&lra_optional_reload_pseudos, i))
            && reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0
            && regno_allocno_class_array[i] != NO_REGS)
Index: lra.c
===================================================================
--- lra.c       (revision 200675)
+++ lra.c       (working copy)
@@ -43,13 +43,13 @@ along with GCC; see the file COPYING3.      I
 
    Here is block diagram of LRA passes:
 
-                                ---------------------
-           ---------------     | Undo inheritance    |     ---------------
-          | Memory-memory |    | for spilled pseudos)|    | New (and old) |
-          | move coalesce |<---| and splits (for     |<-- |   pseudos     |
-           ---------------     | pseudos got the     |    |  assignment   |
-  Start           |            |  same  hard regs)   |     ---------------
-    |             |             ---------------------               ^
+                                ------------------------
+           ---------------     | Undo inheritance for   |     ---------------
+          | Memory-memory |    | spilled pseudos,       |    | New (and old) |
+          | move coalesce |<---| splits for pseudos got |<-- |   pseudos     |
+           ---------------     | the same hard regs,    |    |  assignment   |
+  Start           |            | and optional reloads   |     ---------------
+    |             |             ------------------------            ^
     V             |              ----------------                   |
  -----------      V             | Update virtual |                  |
 |  Remove   |----> ------------>|    register    |                  |
@@ -2187,10 +2187,16 @@ bitmap_head lra_inheritance_pseudos;
 /* Split regnos before the new spill pass.  */
 bitmap_head lra_split_regs;
 
-/* Reload pseudo regnos before the new assign pass which still can be
-   spilled after the assinment pass.  */
+/* Reload pseudo regnos before the new assignmnet pass which still can
+   be spilled after the assinment pass as memory is also accepted in
+   insns for the reload pseudos.  */
 bitmap_head lra_optional_reload_pseudos;
 
+/* Pseudo regnos used for subreg reloads before the new assignment
+   pass.  Such pseudos still can be spilled after the assinment
+   pass.  */
+bitmap_head lra_subreg_reload_pseudos;
+
 /* First UID of insns generated before a new spill pass.  */
 int lra_constraint_new_insn_uid_start;
 
@@ -2296,6 +2302,7 @@ lra (FILE *f)
   bitmap_initialize (&lra_inheritance_pseudos, &reg_obstack);
   bitmap_initialize (&lra_split_regs, &reg_obstack);
   bitmap_initialize (&lra_optional_reload_pseudos, &reg_obstack);
+  bitmap_initialize (&lra_subreg_reload_pseudos, &reg_obstack);
   live_p = false;
   if (get_frame_size () != 0 && crtl->stack_alignment_needed)
     /* If we have a stack frame, we must align it now.  The stack size
@@ -2356,8 +2363,9 @@ lra (FILE *f)
              if (! live_p)
                lra_clear_live_ranges ();
            }
+         bitmap_clear (&lra_optional_reload_pseudos);
        }
-      bitmap_clear (&lra_optional_reload_pseudos);
+      bitmap_clear (&lra_subreg_reload_pseudos);
       bitmap_clear (&lra_inheritance_pseudos);
       bitmap_clear (&lra_split_regs);
       if (! lra_need_for_spills_p ())
Index: lra-constraints.c
===================================================================
--- lra-constraints.c   (revision 200675)
+++ lra-constraints.c   (working copy)
@@ -1228,7 +1228,7 @@ simplify_operand_subreg (int nop, enum m
       if (get_reload_reg (curr_static_id->operand[nop].type, reg_mode, reg,
                          rclass, "subreg reg", &new_reg))
        {
-         bitmap_set_bit (&lra_optional_reload_pseudos, REGNO (new_reg));
+         bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg));
          if (type != OP_OUT
              || GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode))
            {
@@ -3183,6 +3183,8 @@ curr_insn_transform (void)
 
   for (i = 0; i < n_operands; i++)
     {
+      int regno;
+      bool optional_p = false;
       rtx old, new_reg;
       rtx op = *curr_id->operand_loc[i];
 
@@ -3205,7 +3207,22 @@ curr_insn_transform (void)
                   current one.  */
                reg_renumber[regno] = -1;
            }
-         continue;
+         /* We can do an optional reload.  If the pseudo got a hard
+            reg, we might improve the code through inheritance.  If
+            it does not get a hard register we coalesce memory/memory
+            moves later.  Ignore move insns to avoid cycling.  */
+         if (! lra_simple_p
+             && lra_undo_inheritance_iter < LRA_MAX_INHERITANCE_PASSES
+             && goal_alt[i] != NO_REGS && REG_P (op)
+             && (regno = REGNO (op)) >= FIRST_PSEUDO_REGISTER
+             && reg_renumber[regno] < 0
+             && (curr_insn_set == NULL_RTX
+                 || !(REG_P (SET_SRC (curr_insn_set))
+                      || MEM_P (SET_SRC (curr_insn_set))
+                      || GET_CODE (SET_SRC (curr_insn_set)) == SUBREG)))
+           optional_p = true;
+         else
+           continue;
        }
 
       /* Operands that match previous ones have already been handled.  */
@@ -3328,6 +3345,21 @@ curr_insn_transform (void)
        /* We must generate code in any case when function
           process_alt_operands decides that it is possible.  */
        gcc_unreachable ();
+      if (optional_p)
+       {
+         lra_assert (REG_P (op));
+         regno = REGNO (op);
+         op = *curr_id->operand_loc[i]; /* Substitution.  */
+         if (GET_CODE (op) == SUBREG)
+           op = SUBREG_REG (op);
+         gcc_assert (REG_P (op) && (int) REGNO (op) >= new_regno_start);
+         bitmap_set_bit (&lra_optional_reload_pseudos, REGNO (op));
+         lra_reg_info[REGNO (op)].restore_regno = regno;
+         if (lra_dump_file != NULL)
+           fprintf (lra_dump_file,
+                    "      Making reload reg %d for reg %d optional\n",
+                    REGNO (op), regno);
+       }
     }
   if (before != NULL_RTX || after != NULL_RTX
       || max_regno_before != max_reg_num ())
@@ -5273,6 +5305,100 @@ remove_inheritance_pseudos (bitmap remov
   return change_p;
 }
 
+/* If optional reload pseudos failed to get a hard register or was not
+   inherited, it is better to remove optional reloads.  We do this
+   transformation after undoing inheritance to figure out necessity to
+   remove optional reloads easier.  Return true if we do any
+   change.  */
+static bool
+undo_optional_reloads (void)
+{
+  bool change_p;
+  unsigned int regno, uid;
+  bitmap_iterator bi, bi2;
+  rtx insn, set, src, dest;
+  bitmap_head removed_optional_reload_pseudos, insn_bitmap;
+
+  bitmap_initialize (&removed_optional_reload_pseudos, &reg_obstack);
+  bitmap_copy (&removed_optional_reload_pseudos, &lra_optional_reload_pseudos);
+  EXECUTE_IF_SET_IN_BITMAP (&lra_optional_reload_pseudos, 0, regno, bi)
+    if (reg_renumber[regno] >= 0)
+      EXECUTE_IF_SET_IN_BITMAP (&lra_reg_info[regno].insn_bitmap, 0, uid, bi2)
+       {
+         insn = lra_insn_recog_data[uid]->insn;
+         if ((set = single_set (insn)) == NULL_RTX)
+           continue;
+         src = SET_SRC (set);
+         dest = SET_DEST (set);
+         if (! REG_P (src) || ! REG_P (dest))
+           continue;
+         if ((REGNO (src) == regno
+              && lra_reg_info[regno].restore_regno != (int) REGNO (dest))
+             || (REGNO (dest) == regno
+                 && lra_reg_info[regno].restore_regno != (int) REGNO (src)))
+           {
+             /* Optional reload was inherited.  Keep it.  */
+             bitmap_clear_bit (&removed_optional_reload_pseudos, regno);
+             if (lra_dump_file != NULL)
+               fprintf (lra_dump_file, "Keep optional reload reg %d\n", regno);
+           }
+       }
+  change_p = ! bitmap_empty_p (&removed_optional_reload_pseudos);
+  bitmap_initialize (&insn_bitmap, &reg_obstack);
+  EXECUTE_IF_SET_IN_BITMAP (&removed_optional_reload_pseudos, 0, regno, bi)
+    {
+      if (lra_dump_file != NULL)
+       fprintf (lra_dump_file, "Remove optional reload reg %d\n", regno);
+      bitmap_copy (&insn_bitmap, &lra_reg_info[regno].insn_bitmap);
+      EXECUTE_IF_SET_IN_BITMAP (&insn_bitmap, 0, uid, bi2)
+       {
+         insn = lra_insn_recog_data[uid]->insn;
+         if ((set = single_set (insn)) != NULL_RTX)
+           {
+             src = SET_SRC (set);
+             dest = SET_DEST (set);
+             if (REG_P (src) && REG_P (dest)
+                 && ((REGNO (src) == regno
+                      && (lra_reg_info[regno].restore_regno
+                          == (int) REGNO (dest)))
+                     || (REGNO (dest) == regno
+                         && (lra_reg_info[regno].restore_regno
+                             == (int) REGNO (src)))))
+               {
+                 if (lra_dump_file != NULL)
+                   {
+                     fprintf (lra_dump_file, "  Deleting move %u\n",
+                              INSN_UID (insn));
+                     dump_insn_slim (lra_dump_file, insn);
+                   }
+                 lra_set_insn_deleted (insn);
+                 continue;
+               }
+             /* We should not worry about generation memory-memory
+                moves here as if the corresponding inheritance did
+                not work (inheritance pseudo did not get a hard reg),
+                we remove the inheritance pseudo and the optional
+                reload.  */
+           }
+         substitute_pseudo (&insn, regno,
+                            regno_reg_rtx[lra_reg_info[regno].restore_regno]);
+         lra_update_insn_regno_info (insn);
+         if (lra_dump_file != NULL)
+           {
+             fprintf (lra_dump_file,
+                      "  Restoring original insn:\n");
+             dump_insn_slim (lra_dump_file, insn);
+           }
+       }
+    }
+  /* Clear restore_regnos.  */
+  EXECUTE_IF_SET_IN_BITMAP (&lra_optional_reload_pseudos, 0, regno, bi)
+    lra_reg_info[regno].restore_regno = -1;
+  bitmap_clear (&insn_bitmap);
+  bitmap_clear (&removed_optional_reload_pseudos);
+  return change_p;
+}
+
 /* Entry function for undoing inheritance/split transformation.         Return 
true
    if we did any RTL change in this pass.  */
 bool
@@ -5335,5 +5461,6 @@ lra_undo_inheritance (void)
     lra_reg_info[regno].restore_regno = -1;
   EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi)
     lra_reg_info[regno].restore_regno = -1;
+  change_p = undo_optional_reloads () || change_p;
   return change_p;
 }
Index: lra-int.h
===================================================================
--- lra-int.h   (revision 200675)
+++ lra-int.h   (working copy)
@@ -321,6 +321,7 @@ extern int lra_new_regno_start;
 extern int lra_constraint_new_regno_start;
 extern bitmap_head lra_inheritance_pseudos;
 extern bitmap_head lra_split_regs;
+extern bitmap_head lra_subreg_reload_pseudos;
 extern bitmap_head lra_optional_reload_pseudos;
 extern int lra_constraint_new_insn_uid_start;
 
Index: testsuite/gcc.target/i386/pr55342.c
===================================================================
--- testsuite/gcc.target/i386/pr55342.c (revision 0)
+++ testsuite/gcc.target/i386/pr55342.c (working copy)
@@ -0,0 +1,28 @@
+/* PR rtl-optimization/55342 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "notb" } } */
+
+
+void convert_image(unsigned char *in, unsigned char *out, int size) {
+    int i;
+    unsigned char * read = in,
+     * write = out;
+    for(i = 0; i < size; i++) {
+        unsigned char r = *read++;
+        unsigned char g = *read++;
+        unsigned char b = *read++;
+        unsigned char c, m, y, k, tmp;
+        c = 255 - r;
+        m = 255 - g;
+        y = 255 - b;
+       if (c < m)
+         k = ((c) > (y)?(y):(c));
+       else
+          k = ((m) > (y)?(y):(m));
+        *write++ = c - k;
+        *write++ = m - k;
+        *write++ = y - k;
+        *write++ = k;
+    }
+}

Reply via email to