Am 09.09.24 um 09:08 schrieb Richard Biener:
On Sun, Sep 8, 2024 at 12:22 PM Georg-Johann Lay <a...@gjlay.de> wrote:

The reason for PR116326 is that LRA and reload require different
ELIMINABLE_REGS for a multi-register frame pointer.  As ELIMINABLE_REGS
is used to initialize static const objects, it is not possible to make
ELIMINABLE_REGS dependent on -mlra.

It was also concluded that it is not desirable to adjust reload so that
it behaves like LRA, see

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116326#c8

This patch adds a new macro RELOAD_ELIMINABLE_REGS that takes
precedence over ELIMINABLE_REGS in reload1.cc.

The patch was proposed by H.J. Lu and is only required for trunk.

Can you add documentation to tm.texi please?  Otherwise looks good, but
please give others a chance to comment.

Thanks,
Richard.

Ok, here is a version with documentation.

Btw, currently neither libgcc nor libc will build with LRA.  LRA
should stabilize first before switching avr to LRA.  This still
applies with the current patch applied.

Johann

--

reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.

The new macro is required because reload and LRA are using different
representations for a multi-register frame pointer.  As ELIMINABLE_REGS
is used to initialize static const objects, it can't depend on -mlra.

        PR rtl-optimization/116326
gcc/
        * reload1.cc (reg_eliminate_1): Initialize from
        RELOAD_ELIMINABLE_REGS if defined.
        * config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from ELIMINABLE_REGS.
        (ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer.
        * doc/tm.texi.in (Eliminating Frame Pointer and Arg Pointer)
        <RELOAD_ELIMINABLE_REGS>: Add documentation.
        * doc/tm.texi: Rebuild.
/testsuite/
        * gcc.target/avr/torture/lra-pr116324.c: New test.
        * gcc.target/avr/torture/lra-pr116325.c: New test.
    reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.
    
    The new macro is required because reload and LRA are using different
    representations for a multi-register frame pointer.  As ELIMINABLE_REGS
    is used to initialize static const objects, it can't depend on -mlra.
    
            PR rtl-optimization/116326
    gcc/
            * reload1.cc (reg_eliminate_1): Initialize from
            RELOAD_ELIMINABLE_REGS if defined.
            * config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from ELIMINABLE_REGS.
            (ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer.
            * doc/tm.texi.in (Eliminating Frame Pointer and Arg Pointer)
            <RELOAD_ELIMINABLE_REGS>: Add documentation.
            * doc/tm.texi: Rebuild.
    /testsuite/
            * gcc.target/avr/torture/lra-pr116324.c: New test.
            * gcc.target/avr/torture/lra-pr116325.c: New test.

diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 1cf4180e534..3fa2ee76c43 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -308,12 +308,19 @@ enum reg_class {
 
 #define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2)
 
-#define ELIMINABLE_REGS {					\
+#define RELOAD_ELIMINABLE_REGS {				\
     { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },               \
     { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },               \
     { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },             \
     { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } }
 
+#define ELIMINABLE_REGS						\
+  {								\
+    { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
+    { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },		\
+    { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }		\
+  }
+
 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
   OFFSET = avr_initial_elimination_offset (FROM, TO)
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cc33084ed32..9e520429ba9 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4005,6 +4005,14 @@ Note that the elimination of the argument pointer with the stack pointer is
 specified first since that is the preferred elimination.
 @end defmac
 
+@defmac RELOAD_ELIMINABLE_REGS
+Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where
+it takes precedence over @code{ELIMINABLE_REGS}.  This macro can be useful
+during the transition to LRA because there are cases where reload and LRA
+disagree on how eliminable registers should be represented. For an example,
+see @file{avr.h}.
+@end defmac
+
 @deftypefn {Target Hook} bool TARGET_CAN_ELIMINATE (const int @var{from_reg}, const int @var{to_reg})
 This target hook should return @code{true} if the compiler is allowed to
 try to replace register number @var{from_reg} with register number
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 8af3f414505..a34674e33c9 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3179,6 +3179,14 @@ Note that the elimination of the argument pointer with the stack pointer is
 specified first since that is the preferred elimination.
 @end defmac
 
+@defmac RELOAD_ELIMINABLE_REGS
+Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where
+it takes precedence over @code{ELIMINABLE_REGS}.  This macro can be useful
+during the transition to LRA because there are cases where reload and LRA
+disagree on how eliminable registers should be represented. For an example,
+see @file{avr.h}.
+@end defmac
+
 @hook TARGET_CAN_ELIMINATE
 
 @defmac INITIAL_ELIMINATION_OFFSET (@var{from-reg}, @var{to-reg}, @var{offset-var})
diff --git a/gcc/reload1.cc b/gcc/reload1.cc
index 2e059b09970..b0ae64e10b2 100644
--- a/gcc/reload1.cc
+++ b/gcc/reload1.cc
@@ -283,7 +283,13 @@ static const struct elim_table_1
   const int to;
 } reg_eliminate_1[] =
 
+  // Reload and LRA don't agree on how a multi-register frame pointer
+  // is represented for elimination.  See avr.h for a use case.
+#ifdef RELOAD_ELIMINABLE_REGS
+  RELOAD_ELIMINABLE_REGS;
+#else
   ELIMINABLE_REGS;
+#endif
 
 #define NUM_ELIMINABLE_REGS ARRAY_SIZE (reg_eliminate_1)
 
diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c
new file mode 100644
index 00000000000..b54eb402d8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c
@@ -0,0 +1,86 @@
+/* { dg-options { -std=gnu99 } } */
+
+void f7_clr (void *cc)
+{
+  __asm ("%~call __f7_clr_asm" :: "z" (cc) : "memory");
+}
+
+void* f7_copy (void *cc, const void *aa)
+{
+  extern void __f7_copy_asm (void);
+  __asm ("%~call __f7_copy_asm" :: "z" (cc), "x" (aa) : "memory");
+  return cc;
+}
+
+typedef _Bool bool;
+typedef unsigned int uint16_t;
+typedef unsigned char uint8_t;
+typedef int int16_t;
+
+typedef struct f7_t
+{
+  union
+  {
+    struct
+    {
+      uint8_t sign :1;
+      uint8_t reserved1 :1;
+      uint8_t is_nan :1;
+      uint8_t reserved2 :4;
+      uint8_t is_inf :1;
+    };
+    uint8_t flags;
+  };
+
+  uint8_t mant[7];
+  int16_t expo;
+} f7_t;
+
+
+static inline __attribute__((__always_inline__))
+void __f7_clr (f7_t *cc)
+{
+  extern void __f7_clr_asm (void);
+  __asm ("%~call %x[f]"
+  :
+  : [f] "i" (__f7_clr_asm), "z" (cc)
+  : "memory");
+}
+
+static inline __attribute__((__always_inline__))
+bool __f7_signbit (const f7_t *aa)
+{
+  return aa->flags & (1 << 0);
+}
+
+static inline __attribute__((__always_inline__))
+int16_t sub_ssat16 (int16_t a, int16_t b)
+{
+  _Sat _Fract sa = __builtin_avr_rbits (a);
+  _Sat _Fract sb = __builtin_avr_rbits (b);
+  return __builtin_avr_bitsr (sa - sb);
+}
+
+extern void __f7_Iadd (f7_t*, const f7_t*);
+extern void __f7_addsub (f7_t*, const f7_t*, const f7_t*, bool neg_b);
+extern uint8_t __f7_mulx (f7_t*, const f7_t*, const f7_t*, bool);
+extern f7_t* __f7_normalize_asm (f7_t*);
+
+void __f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd,
+                   bool neg_d)
+{
+  f7_t xx7, *xx = &xx7;
+  uint8_t x_lsb = __f7_mulx (xx, aa, bb, 1 );
+  uint8_t x_sign = __f7_signbit (xx);
+  int16_t x_expo = xx->expo;
+  __f7_addsub (xx, xx, dd, neg_d);
+
+
+  __f7_clr (cc);
+  cc->expo = sub_ssat16 (x_expo, (8 * 7));
+  cc->mant[7 - 1] = x_lsb;
+  cc = __f7_normalize_asm (cc);
+  cc->flags = x_sign;
+  __f7_Iadd (cc, xx);
+}
+
diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c
new file mode 100644
index 00000000000..747e9a0f219
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c
@@ -0,0 +1,117 @@
+typedef __SIZE_TYPE__ size_t;
+typedef __UINT8_TYPE__ uint8_t;
+
+void swapfunc (char *a, char *b, int n)
+{
+    do
+    {
+        char t = *a;
+        *a++ = *b;
+        *b++ = t;
+    } while (--n > 0);
+}
+
+
+typedef int cmp_t (const void*, const void*);
+
+#define min(a, b)       ((a) < (b) ? (a) : (b))
+
+#define swap(a, b) \
+    swapfunc (a, b, es)
+
+#define vecswap(a, b, n) \
+    if ((n) > 0) swapfunc (a, b, n)
+
+static char*
+med3 (char *a, char *b, char *c, cmp_t *cmp)
+{
+    return cmp (a, b) < 0
+        ? (cmp (b, c) < 0 ? b : (cmp (a, c) < 0 ? c : a ))
+        : (cmp (b, c) > 0 ? b : (cmp (a, c) < 0 ? a : c ));
+}
+
+void
+qsort (void *a, size_t n, size_t es, cmp_t *cmp)
+{
+    char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+    int d, r, swap_cnt;
+
+loop:
+    swap_cnt = 0;
+    if (n < 7)
+    {
+        for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es)
+            for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es)
+                swap (pl, pl - es);
+        return;
+    }
+    pm = (char*) a + (n / 2) * es;
+    if (n > 7)
+    {
+        pl = a;
+        pn = (char*) a + (n - 1) * es;
+        if (n > 40)
+        {
+            d = (n / 8) * es;
+            pl = med3 (pl, pl + d, pl + 2 * d, cmp);
+            pm = med3 (pm - d, pm, pm + d, cmp);
+            pn = med3 (pn - 2 * d, pn - d, pn, cmp);
+        }
+        pm = med3 (pl, pm, pn, cmp);
+    }
+    swap (a, pm);
+    pa = pb = (char*) a + es;
+
+    pc = pd = (char*) a + (n - 1) * es;
+    for (;;)
+    {
+        while (pb <= pc && (r = cmp (pb, a)) <= 0)
+        {
+            if (r == 0)
+            {
+                swap_cnt = 1;
+                swap (pa, pb);
+                pa += es;
+            }
+            pb += es;
+        }
+        while (pb <= pc && (r = cmp (pc, a)) >= 0)
+        {
+            if (r == 0)
+            {
+                swap_cnt = 1;
+                swap (pc, pd);
+                pd -= es;
+            }
+            pc -= es;
+        }
+        if (pb > pc)
+            break;
+        swap (pb, pc);
+        swap_cnt = 1;
+        pb += es;
+        pc -= es;
+    }
+    if (swap_cnt == 0)
+    {
+        for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es)
+            for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es)
+                swap (pl, pl - es);
+        return;
+    }
+
+    pn = (char*) a + n * es;
+    r = min (pa - (char*) a, pb - pa);
+    vecswap (a, pb - r, r);
+    r = min (pd - pc, (int) (pn - pd - es));
+    vecswap (pb, pn - r, r);
+    if ((r = pb - pa) > (int) es)
+        qsort(a, r / es, es, cmp);
+    if ((r = pd - pc) > (int) es)
+    {
+        /* Iterate rather than recurse to save stack space */
+        a = pn - r;
+        n = r / es;
+        goto loop;
+    }
+}

Reply via email to