Re: [PATCH] lra: Avoid cycling on certain subreg reloads [PR96796]

Vladimir Makarov via Gcc-patches Fri, 23 Apr 2021 12:04:04 -0700


On 2021-04-23 12:13 p.m., Richard Sandiford wrote:

This is a backport of the PR96796 fix to GCC 10 and GCC 9.  The original
trunk patch was:

    https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552878.html

reviewed here:

    https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553308.html

...

This backport is less aggressive than the trunk version, in that the new
code reuses the test for a reload move from in_class_p.  We will therefore
only narrow OP_OUT classes if the instruction is a register move or memory
load that was generated by LRA itself.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK for GCC 10
and GCC 9?

Yes. I think as the previous patch did not introduced new issues andthis patch works in less cases, the patch is ok for GCC10 and GCC9branches. I definitely like this version of the patch more.


Thank you, Richard, for working on this issue.

gcc/
        PR rtl-optimization/96796
        * lra-constraints.c (in_class_p): Add a default-false
        allow_all_reload_class_changes_p parameter.  Do not treat
        reload moves specially when the parameter is true.
        (get_reload_reg): Try to narrow the class of an existing OP_OUT
        reload if we're reloading a reload pseudo in a reload instruction.

gcc/testsuite/
        PR rtl-optimization/96796
        * gcc.c-torture/compile/pr96796.c: New test.
---
  gcc/lra-constraints.c                         | 59 +++++++++++++++----
  gcc/testsuite/gcc.c-torture/compile/pr96796.c | 56 ++++++++++++++++++
  2 files changed, 105 insertions(+), 10 deletions(-)
  create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr96796.c

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 7cc479b3042..29a734e0e10 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -235,12 +235,17 @@ get_reg_class (int regno)
     CL.  Use elimination first if REG is a hard register.  If REG is a
     reload pseudo created by this constraints pass, assume that it will
     be allocated a hard register from its allocno class, but allow that
-   class to be narrowed to CL if it is currently a superset of CL.
+   class to be narrowed to CL if it is currently a superset of CL and
+   if either:
+
+   - ALLOW_ALL_RELOAD_CLASS_CHANGES_P is true or
+   - the instruction we're processing is not a reload move.

If NEW_CLASS is nonnull, set *NEW_CLASS to the new allocno class of

     REGNO (reg), or NO_REGS if no change in its class was needed.  */
  static bool
-in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class)
+in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class,
+           bool allow_all_reload_class_changes_p = false)
  {
    enum reg_class rclass, common_class;
    machine_mode reg_mode;
@@ -267,7 +272,8 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class 
*new_class)
         typically moves that have many alternatives, and restricting
         reload pseudos for one alternative may lead to situations
         where other reload pseudos are no longer allocatable.  */
-      || (INSN_UID (curr_insn) >= new_insn_uid_start
+      || (!allow_all_reload_class_changes_p
+         && INSN_UID (curr_insn) >= new_insn_uid_start
          && src != NULL
          && ((REG_P (src) || MEM_P (src))
              || (GET_CODE (src) == SUBREG
@@ -570,13 +576,12 @@ init_curr_insn_input_reloads (void)
    curr_insn_input_reloads_num = 0;
  }

-/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already

-   created input reload pseudo (only if TYPE is not OP_OUT).  Don't
-   reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
-   wrapped up in SUBREG.  The result pseudo is returned through
-   RESULT_REG.  Return TRUE if we created a new pseudo, FALSE if we
-   reused the already created input reload pseudo.  Use TITLE to
-   describe new registers for debug purposes.  */
+/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse an existing
+   reload pseudo.  Don't reuse an existing reload pseudo if IN_SUBREG_P
+   is true and the reused pseudo should be wrapped up in a SUBREG.
+   The result pseudo is returned through RESULT_REG.  Return TRUE if we
+   created a new pseudo, FALSE if we reused an existing reload pseudo.
+   Use TITLE to describe new registers for debug purposes.  */
  static bool
  get_reload_reg (enum op_type type, machine_mode mode, rtx original,
                enum reg_class rclass, bool in_subreg_p,
@@ -588,6 +593,40 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,

if (type == OP_OUT)

      {
+      /* Output reload registers tend to start out with a conservative
+        choice of register class.  Usually this is ALL_REGS, although
+        a target might narrow it (for performance reasons) through
+        targetm.preferred_reload_class.  It's therefore quite common
+        for a reload instruction to require a more restrictive class
+        than the class that was originally assigned to the reload register.
+
+        In these situations, it's more efficient to refine the choice
+        of register class rather than create a second reload register.
+        This also helps to avoid cycling for registers that are only
+        used by reload instructions.  */
+      rtx src = curr_insn_set != NULL ? SET_SRC (curr_insn_set) : NULL;
+      if (REG_P (original)
+         && (int) REGNO (original) >= new_regno_start
+         && INSN_UID (curr_insn) >= new_insn_uid_start
+         && in_class_p (original, rclass, &new_class, true)
+         && src != NULL
+         && ((REG_P (src) || MEM_P (src))
+             || (GET_CODE (src) == SUBREG
+                 && (REG_P (SUBREG_REG (src)) || MEM_P (SUBREG_REG (src))))))
+       {
+         unsigned int regno = REGNO (original);
+         if (lra_dump_file != NULL)
+           {
+             fprintf (lra_dump_file, "     Reuse r%d for output ", regno);
+             dump_value_slim (lra_dump_file, original, 1);
+           }
+         if (new_class != lra_get_allocno_class (regno))
+           lra_change_class (regno, new_class, ", change to", false);
+         if (lra_dump_file != NULL)
+           fprintf (lra_dump_file, "\n");
+         *result_reg = original;
+         return false;
+       }
        *result_reg
        = lra_create_new_reg_with_unique_value (mode, original, rclass, title);
        return true;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96796.c 
b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
new file mode 100644
index 00000000000..cf917e7277e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96796.c
@@ -0,0 +1,56 @@
+/* { dg-do compile { target { ! nvptx*-*-* } } } */
+/* { dg-additional-options "-fcommon" } */
+
+struct S0 {
+  signed f0 : 8;
+  unsigned f1;
+  unsigned f4;
+};
+struct S1 {
+  long f3;
+  char f4;
+} g_3_4;
+
+int g_5, func_1_l_32, func_50___trans_tmp_31;
+static struct S0 g_144, g_834, g_1255, g_1261;
+
+int g_273[120] = {};
+int *g_555;
+char **g_979;
+static int g_1092_0;
+static int g_1193;
+int safe_mul_func_int16_t_s_s(int si1, int si2) { return si1 * si2; }
+static struct S0 *func_50();
+int func_1() { func_50(g_3_4, g_5, func_1_l_32, 8, 3); }
+void safe_div_func_int64_t_s_s(int *);
+void safe_mod_func_uint32_t_u_u(struct S0);
+struct S0 *func_50(int p_51, struct S0 p_52, struct S1 p_53, int p_54,
+                   int p_55) {
+  int __trans_tmp_30;
+  char __trans_tmp_22;
+  short __trans_tmp_19;
+  long l_985_1;
+  long l_1191[8];
+  safe_div_func_int64_t_s_s(g_273);
+  __builtin_printf((char*)g_1261.f4);
+  safe_mod_func_uint32_t_u_u(g_834);
+  g_144.f0 += 1;
+  for (;;) {
+    struct S1 l_1350 = {&l_1350};
+    for (; p_53.f3; p_53.f3 -= 1)
+      for (; g_1193 <= 2; g_1193 += 1) {
+        __trans_tmp_19 = safe_mul_func_int16_t_s_s(l_1191[l_985_1 + p_53.f3],
+                                                   p_55 % (**g_979 = 10));
+        __trans_tmp_22 = g_1255.f1 * p_53.f4;
+        __trans_tmp_30 = __trans_tmp_19 + __trans_tmp_22;
+        if (__trans_tmp_30)
+          g_1261.f0 = p_51;
+        else {
+          g_1255.f0 = p_53.f3;
+          int *l_1422 = g_834.f0 = g_144.f4 != (*l_1422)++ > 0 < 0 ^ 51;
+          g_555 = ~0;
+          g_1092_0 |= func_50___trans_tmp_31;
+        }
+      }
+  }
+}

Re: [PATCH] lra: Avoid cycling on certain subreg reloads [PR96796]

Reply via email to