The reason for PR116326 is that LRA and reload require different
ELIMINABLE_REGS for a multi-register frame pointer.  As ELIMINABLE_REGS
is used to initialize static const objects, it is not possible to make
ELIMINABLE_REGS dependent on -mlra.

It was also concluded that it is not desirable to adjust reload so that
it behaves like LRA, see

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116326#c8

This patch adds a new macro RELOAD_ELIMINABLE_REGS that takes
precedence over ELIMINABLE_REGS in reload1.cc.

The patch was proposed by H.J. Lu and is only required for trunk.

Johann

--

reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.

The new macro is required because reload and LRA are using different
representations for a multi-register frame pointer.  As ELIMINABLE_REGS
is used to initialize static const objects, it can't depend on -mlra.

        PR rtl-optimization/116326
gcc/
        * reload1.cc (reg_eliminate_1): Initialize from
        RELOAD_ELIMINABLE_REGS if defined.
        * config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from ELIMINABLE_REGS.
        (ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer.
gcc/testsuite/
        * gcc.target/avr/torture/lra-pr116324.c: New test.
        * gcc.target/avr/torture/lra-pr116325.c: New test.
    reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.
    
    The new macro is required because reload and LRA are using different
    representations for a multi-register frame pointer.  As ELIMINABLE_REGS
    is used to initialize static const objects, it can't depend on -mlra.
    
            PR rtl-optimization/116326
    gcc/
            * reload1.cc (reg_eliminate_1): Initialize from
            RELOAD_ELIMINABLE_REGS if defined.
            * config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from ELIMINABLE_REGS.
            (ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer.
    gcc/testsuite/
            * gcc.target/avr/torture/lra-pr116324.c: New test.
            * gcc.target/avr/torture/lra-pr116325.c: New test.

diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 1cf4180e534..3fa2ee76c43 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -308,12 +308,19 @@ enum reg_class {
 
 #define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2)
 
-#define ELIMINABLE_REGS {					\
+#define RELOAD_ELIMINABLE_REGS {				\
     { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },               \
     { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },               \
     { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },             \
     { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } }
 
+#define ELIMINABLE_REGS						\
+  {								\
+    { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
+    { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },		\
+    { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }		\
+  }
+
 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
   OFFSET = avr_initial_elimination_offset (FROM, TO)
 
diff --git a/gcc/reload1.cc b/gcc/reload1.cc
index 2e059b09970..b0ae64e10b2 100644
--- a/gcc/reload1.cc
+++ b/gcc/reload1.cc
@@ -283,7 +283,13 @@ static const struct elim_table_1
   const int to;
 } reg_eliminate_1[] =
 
+  // Reload and LRA don't agree on how a multi-register frame pointer
+  // is represented for elimination.  See avr.h for a use case.
+#ifdef RELOAD_ELIMINABLE_REGS
+  RELOAD_ELIMINABLE_REGS;
+#else
   ELIMINABLE_REGS;
+#endif
 
 #define NUM_ELIMINABLE_REGS ARRAY_SIZE (reg_eliminate_1)
 
diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c
new file mode 100644
index 00000000000..b54eb402d8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c
@@ -0,0 +1,86 @@
+/* { dg-options { -std=gnu99 } } */
+
+void f7_clr (void *cc)
+{
+  __asm ("%~call __f7_clr_asm" :: "z" (cc) : "memory");
+}
+
+void* f7_copy (void *cc, const void *aa)
+{
+  extern void __f7_copy_asm (void);
+  __asm ("%~call __f7_copy_asm" :: "z" (cc), "x" (aa) : "memory");
+  return cc;
+}
+
+typedef _Bool bool;
+typedef unsigned int uint16_t;
+typedef unsigned char uint8_t;
+typedef int int16_t;
+
+typedef struct f7_t
+{
+  union
+  {
+    struct
+    {
+      uint8_t sign :1;
+      uint8_t reserved1 :1;
+      uint8_t is_nan :1;
+      uint8_t reserved2 :4;
+      uint8_t is_inf :1;
+    };
+    uint8_t flags;
+  };
+
+  uint8_t mant[7];
+  int16_t expo;
+} f7_t;
+
+
+static inline __attribute__((__always_inline__))
+void __f7_clr (f7_t *cc)
+{
+  extern void __f7_clr_asm (void);
+  __asm ("%~call %x[f]"
+  :
+  : [f] "i" (__f7_clr_asm), "z" (cc)
+  : "memory");
+}
+
+static inline __attribute__((__always_inline__))
+bool __f7_signbit (const f7_t *aa)
+{
+  return aa->flags & (1 << 0);
+}
+
+static inline __attribute__((__always_inline__))
+int16_t sub_ssat16 (int16_t a, int16_t b)
+{
+  _Sat _Fract sa = __builtin_avr_rbits (a);
+  _Sat _Fract sb = __builtin_avr_rbits (b);
+  return __builtin_avr_bitsr (sa - sb);
+}
+
+extern void __f7_Iadd (f7_t*, const f7_t*);
+extern void __f7_addsub (f7_t*, const f7_t*, const f7_t*, bool neg_b);
+extern uint8_t __f7_mulx (f7_t*, const f7_t*, const f7_t*, bool);
+extern f7_t* __f7_normalize_asm (f7_t*);
+
+void __f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd,
+                   bool neg_d)
+{
+  f7_t xx7, *xx = &xx7;
+  uint8_t x_lsb = __f7_mulx (xx, aa, bb, 1 );
+  uint8_t x_sign = __f7_signbit (xx);
+  int16_t x_expo = xx->expo;
+  __f7_addsub (xx, xx, dd, neg_d);
+
+
+  __f7_clr (cc);
+  cc->expo = sub_ssat16 (x_expo, (8 * 7));
+  cc->mant[7 - 1] = x_lsb;
+  cc = __f7_normalize_asm (cc);
+  cc->flags = x_sign;
+  __f7_Iadd (cc, xx);
+}
+
diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c
new file mode 100644
index 00000000000..747e9a0f219
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c
@@ -0,0 +1,117 @@
+typedef __SIZE_TYPE__ size_t;
+typedef __UINT8_TYPE__ uint8_t;
+
+void swapfunc (char *a, char *b, int n)
+{
+    do
+    {
+        char t = *a;
+        *a++ = *b;
+        *b++ = t;
+    } while (--n > 0);
+}
+
+
+typedef int cmp_t (const void*, const void*);
+
+#define min(a, b)       ((a) < (b) ? (a) : (b))
+
+#define swap(a, b) \
+    swapfunc (a, b, es)
+
+#define vecswap(a, b, n) \
+    if ((n) > 0) swapfunc (a, b, n)
+
+static char*
+med3 (char *a, char *b, char *c, cmp_t *cmp)
+{
+    return cmp (a, b) < 0
+        ? (cmp (b, c) < 0 ? b : (cmp (a, c) < 0 ? c : a ))
+        : (cmp (b, c) > 0 ? b : (cmp (a, c) < 0 ? a : c ));
+}
+
+void
+qsort (void *a, size_t n, size_t es, cmp_t *cmp)
+{
+    char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+    int d, r, swap_cnt;
+
+loop:
+    swap_cnt = 0;
+    if (n < 7)
+    {
+        for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es)
+            for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es)
+                swap (pl, pl - es);
+        return;
+    }
+    pm = (char*) a + (n / 2) * es;
+    if (n > 7)
+    {
+        pl = a;
+        pn = (char*) a + (n - 1) * es;
+        if (n > 40)
+        {
+            d = (n / 8) * es;
+            pl = med3 (pl, pl + d, pl + 2 * d, cmp);
+            pm = med3 (pm - d, pm, pm + d, cmp);
+            pn = med3 (pn - 2 * d, pn - d, pn, cmp);
+        }
+        pm = med3 (pl, pm, pn, cmp);
+    }
+    swap (a, pm);
+    pa = pb = (char*) a + es;
+
+    pc = pd = (char*) a + (n - 1) * es;
+    for (;;)
+    {
+        while (pb <= pc && (r = cmp (pb, a)) <= 0)
+        {
+            if (r == 0)
+            {
+                swap_cnt = 1;
+                swap (pa, pb);
+                pa += es;
+            }
+            pb += es;
+        }
+        while (pb <= pc && (r = cmp (pc, a)) >= 0)
+        {
+            if (r == 0)
+            {
+                swap_cnt = 1;
+                swap (pc, pd);
+                pd -= es;
+            }
+            pc -= es;
+        }
+        if (pb > pc)
+            break;
+        swap (pb, pc);
+        swap_cnt = 1;
+        pb += es;
+        pc -= es;
+    }
+    if (swap_cnt == 0)
+    {
+        for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es)
+            for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es)
+                swap (pl, pl - es);
+        return;
+    }
+
+    pn = (char*) a + n * es;
+    r = min (pa - (char*) a, pb - pa);
+    vecswap (a, pb - r, r);
+    r = min (pd - pc, (int) (pn - pd - es));
+    vecswap (pb, pn - r, r);
+    if ((r = pb - pa) > (int) es)
+        qsort(a, r / es, es, cmp);
+    if ((r = pd - pc) > (int) es)
+    {
+        /* Iterate rather than recurse to save stack space */
+        a = pn - r;
+        n = r / es;
+        goto loop;
+    }
+}

Reply via email to