From: Stefan Schulze Frielinghaus <[email protected]>
This fixes
asm-hard-reg-3.c:10:1: error: unrecognizable insn:
10 | }
| ^
(insn 9 18 14 2 (parallel [
(set (reg:DI 0 ax [orig:99 x ] [99])
(asm_operands:DI ("") ("=r") 0 [
(reg:SI 0 ax [100])
(reg:DI 1 dx [105]) repeated x2
]
[
(asm_input:SI ("0") asm-hard-reg-3.c:8)
(asm_input:DI ("r") asm-hard-reg-3.c:8)
(asm_input:DI ("{r8}") asm-hard-reg-3.c:8)
]
[] asm-hard-reg-3.c:8))
(clobber (reg:CC 17 flags))
]) "asm-hard-reg-3.c":8:3 -1
(nil))
during RTL pass: reload
During get_reload_reg() a reload register may be reused and so far
exclude start hard registers were not taken into account. For the test
case this means operands 2 and 3 use the same reload register which gets
dx assigned, although, the constraint of operand 3 refers to register
r8. That in turn renders the insn unsatisfiable.
A conservative approach would be to simply not reuse any reload register
whenever the set of exclude start hard regs is non-empty. However, this
would lead to some missed optimizations like in this example where
operands 2 and 3 would land in different registers. Therefore, if both
share a start hard register, still reuse the reload and refine the
exclude start hard regs set.
I only have a test case for inputs. However, I expect an analogue
problem for outputs which is why I adapted that case, too.
gcc/ChangeLog:
* lra-constraints.cc (get_reload_reg): Honor exclude start regs
while reusing reloads.
gcc/testsuite/ChangeLog:
* gcc.dg/asm-hard-reg-9.c: New test.
---
Bootstrapped and regtested on s390 and x86_64. Ok for mainline?
gcc/lra-constraints.cc | 21 ++++++++++++++++++++-
gcc/testsuite/gcc.dg/asm-hard-reg-9.c | 15 +++++++++++++++
2 files changed, 35 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-9.c
diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 48ce75781d4..f4223dd8e51 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -686,7 +686,11 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx
original,
&& (int) REGNO (original) >= new_regno_start
&& (INSN_UID (curr_insn) >= new_insn_uid_start
|| ira_former_scratch_p (REGNO (original)))
- && in_class_p (original, rclass, &new_class, true))
+ && in_class_p (original, rclass, &new_class, true)
+ && (exclude_start_hard_regs == nullptr
+ || hard_reg_set_intersect_p (
+ ~lra_reg_info[REGNO (original)].exclude_start_hard_regs,
+ ~*exclude_start_hard_regs)))
{
unsigned int regno = REGNO (original);
if (lra_dump_file != NULL)
@@ -698,6 +702,9 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx
original,
lra_change_class (regno, new_class, ", change to", false);
if (lra_dump_file != NULL)
fprintf (lra_dump_file, "\n");
+ if (exclude_start_hard_regs)
+ lra_reg_info[regno].exclude_start_hard_regs
+ |= *exclude_start_hard_regs;
*result_reg = original;
return false;
}
@@ -734,6 +741,18 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx
original,
if (reg == NULL_RTX || GET_CODE (reg) != SUBREG)
continue;
}
+ /* If the existing reload and this have no start hard register in
+ common, then skip. Otherwise update exclude_start_hard_regs. */
+ if (exclude_start_hard_regs
+ && ! hard_reg_set_empty_p (*exclude_start_hard_regs))
+ {
+ HARD_REG_SET r = lra_reg_info[regno].exclude_start_hard_regs
+ | *exclude_start_hard_regs;
+ if (hard_reg_set_empty_p (~r))
+ continue;
+ else
+ lra_reg_info[regno].exclude_start_hard_regs = r;
+ }
*result_reg = reg;
if (lra_dump_file != NULL)
{
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-9.c
b/gcc/testsuite/gcc.dg/asm-hard-reg-9.c
new file mode 100644
index 00000000000..0866cb4554a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-9.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2" } */
+
+/* Ensure that if the reload register for operand 2 is resued for operand 3,
+ that exclude start hard regs coming from operand 3 are taken into account.
+ Otherwise a different register than r8 may be chosen rendering the insn
+ after LRA unsatisfiable. */
+
+long
+test ()
+{
+ long x;
+ __asm__ ("" : "=r" (x) : "0" (1000), "r" (0l), "{r8}" (0l));
+ return x;
+}
--
2.49.0