The is patch adds the new target macro RELOAD_ELIMINABLE_REGS
that's needed during the reload -> LRA transition because reload
and LRA disagree on how ELIMINABLE_REGS should represent multi-register
frame pointer eliminations. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116326#c2
As ELIMINABLE_REGS is used to initialize static const objects, it is not
possible to use options like -mlra in ELIMINABLE_REGS.
The reload1.cc hunk is just
diff --git a/gcc/reload1.cc b/gcc/reload1.cc
index 2e059b09970..b0ae64e10b2 100644
--- a/gcc/reload1.cc
+++ b/gcc/reload1.cc
@@ -283,7 +283,13 @@ static const struct elim_table_1
const int to;
} reg_eliminate_1[] =
+ // Reload and LRA don't agree on how a multi-register frame pointer
+ // is represented for elimination. See avr.h for a use case.
+#ifdef RELOAD_ELIMINABLE_REGS
+ RELOAD_ELIMINABLE_REGS;
+#else
ELIMINABLE_REGS;
+#endif
#define NUM_ELIMINABLE_REGS ARRAY_SIZE (reg_eliminate_1)
so that RELOAD_ELIMINABLE_REGS takes precedence over ELIMINABLE_REGS
in reload1.cc. This is the only change to the middle-end.
On the avr side, the patch fixes PR116326, PR116325 and PR116324.
Though even with this patch in place, neither libgcc nor libc
will build for avr due to LRA issues like PR116550.
Bootstrapped + regtested on x86_64.
Ok for trunk?
Johann
reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.
The new macro is required because reload and LRA are using different
representations for a multi-register frame pointer. As ELIMINABLE_REGS
is used to initialize static const objects, it can't depend on -mlra.
PR rtl-optimization/116326
gcc/
* reload1.cc (reg_eliminate_1): Initialize from
RELOAD_ELIMINABLE_REGS if defined.
* config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from
ELIMINABLE_REGS.
(ELIMINABLE_REGS): Don't mention sub-regnos of the frame
pointer.
* doc/tm.texi.in (Eliminating Frame Pointer and Arg Pointer)
<RELOAD_ELIMINABLE_REGS>: Add documentation.
* doc/tm.texi: Rebuild.
gcc/testsuite/
* gcc.target/avr/torture/lra-pr116324.c: New test.
* gcc.target/avr/torture/lra-pr116325.c: New test.
reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.
The new macro is required because reload and LRA are using different
representations for a multi-register frame pointer. As ELIMINABLE_REGS
is used to initialize static const objects, it can't depend on -mlra.
PR rtl-optimization/116326
gcc/
* reload1.cc (reg_eliminate_1): Initialize from
RELOAD_ELIMINABLE_REGS if defined.
* config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from ELIMINABLE_REGS.
(ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer.
* doc/tm.texi.in (Eliminating Frame Pointer and Arg Pointer)
<RELOAD_ELIMINABLE_REGS>: Add documentation.
* doc/tm.texi: Rebuild.
/testsuite/
* gcc.target/avr/torture/lra-pr116324.c: New test.
* gcc.target/avr/torture/lra-pr116325.c: New test.
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 1cf4180e534..3fa2ee76c43 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -308,12 +308,19 @@ enum reg_class {
#define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2)
-#define ELIMINABLE_REGS { \
+#define RELOAD_ELIMINABLE_REGS { \
{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
{ ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM }, \
{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
{ FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } }
+#define ELIMINABLE_REGS \
+ { \
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM }, \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM } \
+ }
+
#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
OFFSET = avr_initial_elimination_offset (FROM, TO)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cc33084ed32..9e520429ba9 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4005,6 +4005,14 @@ Note that the elimination of the argument pointer with the stack pointer is
specified first since that is the preferred elimination.
@end defmac
+@defmac RELOAD_ELIMINABLE_REGS
+Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where
+it takes precedence over @code{ELIMINABLE_REGS}. This macro can be useful
+during the transition to LRA because there are cases where reload and LRA
+disagree on how eliminable registers should be represented. For an example,
+see @file{avr.h}.
+@end defmac
+
@deftypefn {Target Hook} bool TARGET_CAN_ELIMINATE (const int @var{from_reg}, const int @var{to_reg})
This target hook should return @code{true} if the compiler is allowed to
try to replace register number @var{from_reg} with register number
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 8af3f414505..a34674e33c9 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3179,6 +3179,14 @@ Note that the elimination of the argument pointer with the stack pointer is
specified first since that is the preferred elimination.
@end defmac
+@defmac RELOAD_ELIMINABLE_REGS
+Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where
+it takes precedence over @code{ELIMINABLE_REGS}. This macro can be useful
+during the transition to LRA because there are cases where reload and LRA
+disagree on how eliminable registers should be represented. For an example,
+see @file{avr.h}.
+@end defmac
+
@hook TARGET_CAN_ELIMINATE
@defmac INITIAL_ELIMINATION_OFFSET (@var{from-reg}, @var{to-reg}, @var{offset-var})
diff --git a/gcc/reload1.cc b/gcc/reload1.cc
index 2e059b09970..b0ae64e10b2 100644
--- a/gcc/reload1.cc
+++ b/gcc/reload1.cc
@@ -283,7 +283,13 @@ static const struct elim_table_1
const int to;
} reg_eliminate_1[] =
+ // Reload and LRA don't agree on how a multi-register frame pointer
+ // is represented for elimination. See avr.h for a use case.
+#ifdef RELOAD_ELIMINABLE_REGS
+ RELOAD_ELIMINABLE_REGS;
+#else
ELIMINABLE_REGS;
+#endif
#define NUM_ELIMINABLE_REGS ARRAY_SIZE (reg_eliminate_1)
diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c
new file mode 100644
index 00000000000..b54eb402d8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c
@@ -0,0 +1,86 @@
+/* { dg-options { -std=gnu99 } } */
+
+void f7_clr (void *cc)
+{
+ __asm ("%~call __f7_clr_asm" :: "z" (cc) : "memory");
+}
+
+void* f7_copy (void *cc, const void *aa)
+{
+ extern void __f7_copy_asm (void);
+ __asm ("%~call __f7_copy_asm" :: "z" (cc), "x" (aa) : "memory");
+ return cc;
+}
+
+typedef _Bool bool;
+typedef unsigned int uint16_t;
+typedef unsigned char uint8_t;
+typedef int int16_t;
+
+typedef struct f7_t
+{
+ union
+ {
+ struct
+ {
+ uint8_t sign :1;
+ uint8_t reserved1 :1;
+ uint8_t is_nan :1;
+ uint8_t reserved2 :4;
+ uint8_t is_inf :1;
+ };
+ uint8_t flags;
+ };
+
+ uint8_t mant[7];
+ int16_t expo;
+} f7_t;
+
+
+static inline __attribute__((__always_inline__))
+void __f7_clr (f7_t *cc)
+{
+ extern void __f7_clr_asm (void);
+ __asm ("%~call %x[f]"
+ :
+ : [f] "i" (__f7_clr_asm), "z" (cc)
+ : "memory");
+}
+
+static inline __attribute__((__always_inline__))
+bool __f7_signbit (const f7_t *aa)
+{
+ return aa->flags & (1 << 0);
+}
+
+static inline __attribute__((__always_inline__))
+int16_t sub_ssat16 (int16_t a, int16_t b)
+{
+ _Sat _Fract sa = __builtin_avr_rbits (a);
+ _Sat _Fract sb = __builtin_avr_rbits (b);
+ return __builtin_avr_bitsr (sa - sb);
+}
+
+extern void __f7_Iadd (f7_t*, const f7_t*);
+extern void __f7_addsub (f7_t*, const f7_t*, const f7_t*, bool neg_b);
+extern uint8_t __f7_mulx (f7_t*, const f7_t*, const f7_t*, bool);
+extern f7_t* __f7_normalize_asm (f7_t*);
+
+void __f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd,
+ bool neg_d)
+{
+ f7_t xx7, *xx = &xx7;
+ uint8_t x_lsb = __f7_mulx (xx, aa, bb, 1 );
+ uint8_t x_sign = __f7_signbit (xx);
+ int16_t x_expo = xx->expo;
+ __f7_addsub (xx, xx, dd, neg_d);
+
+
+ __f7_clr (cc);
+ cc->expo = sub_ssat16 (x_expo, (8 * 7));
+ cc->mant[7 - 1] = x_lsb;
+ cc = __f7_normalize_asm (cc);
+ cc->flags = x_sign;
+ __f7_Iadd (cc, xx);
+}
+
diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c
new file mode 100644
index 00000000000..747e9a0f219
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c
@@ -0,0 +1,117 @@
+typedef __SIZE_TYPE__ size_t;
+typedef __UINT8_TYPE__ uint8_t;
+
+void swapfunc (char *a, char *b, int n)
+{
+ do
+ {
+ char t = *a;
+ *a++ = *b;
+ *b++ = t;
+ } while (--n > 0);
+}
+
+
+typedef int cmp_t (const void*, const void*);
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+#define swap(a, b) \
+ swapfunc (a, b, es)
+
+#define vecswap(a, b, n) \
+ if ((n) > 0) swapfunc (a, b, n)
+
+static char*
+med3 (char *a, char *b, char *c, cmp_t *cmp)
+{
+ return cmp (a, b) < 0
+ ? (cmp (b, c) < 0 ? b : (cmp (a, c) < 0 ? c : a ))
+ : (cmp (b, c) > 0 ? b : (cmp (a, c) < 0 ? a : c ));
+}
+
+void
+qsort (void *a, size_t n, size_t es, cmp_t *cmp)
+{
+ char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+ int d, r, swap_cnt;
+
+loop:
+ swap_cnt = 0;
+ if (n < 7)
+ {
+ for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es)
+ for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es)
+ swap (pl, pl - es);
+ return;
+ }
+ pm = (char*) a + (n / 2) * es;
+ if (n > 7)
+ {
+ pl = a;
+ pn = (char*) a + (n - 1) * es;
+ if (n > 40)
+ {
+ d = (n / 8) * es;
+ pl = med3 (pl, pl + d, pl + 2 * d, cmp);
+ pm = med3 (pm - d, pm, pm + d, cmp);
+ pn = med3 (pn - 2 * d, pn - d, pn, cmp);
+ }
+ pm = med3 (pl, pm, pn, cmp);
+ }
+ swap (a, pm);
+ pa = pb = (char*) a + es;
+
+ pc = pd = (char*) a + (n - 1) * es;
+ for (;;)
+ {
+ while (pb <= pc && (r = cmp (pb, a)) <= 0)
+ {
+ if (r == 0)
+ {
+ swap_cnt = 1;
+ swap (pa, pb);
+ pa += es;
+ }
+ pb += es;
+ }
+ while (pb <= pc && (r = cmp (pc, a)) >= 0)
+ {
+ if (r == 0)
+ {
+ swap_cnt = 1;
+ swap (pc, pd);
+ pd -= es;
+ }
+ pc -= es;
+ }
+ if (pb > pc)
+ break;
+ swap (pb, pc);
+ swap_cnt = 1;
+ pb += es;
+ pc -= es;
+ }
+ if (swap_cnt == 0)
+ {
+ for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es)
+ for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es)
+ swap (pl, pl - es);
+ return;
+ }
+
+ pn = (char*) a + n * es;
+ r = min (pa - (char*) a, pb - pa);
+ vecswap (a, pb - r, r);
+ r = min (pd - pc, (int) (pn - pd - es));
+ vecswap (pb, pn - r, r);
+ if ((r = pb - pa) > (int) es)
+ qsort(a, r / es, es, cmp);
+ if ((r = pd - pc) > (int) es)
+ {
+ /* Iterate rather than recurse to save stack space */
+ a = pn - r;
+ n = r / es;
+ goto loop;
+ }
+}