REGMODE_NATURAL_SIZE is set to 64 bits for everything except VLA SVE modes. This means that it's possible to modify (say) the highpart of a TI pseudo or a V2DI pseudo independently of the lowpart. Modifying such highparts requires a reload if the highpart ends up in the upper 64 bits of an FPR, since RTL semantics do not allow the highpart of a single hard register to be modified independently of the lowpart.
early-ra missed a check for this case, which meant that it effectively treated an assignment to (subreg:DI (reg:TI R) 0) as an assignment to the whole of R. Tested on aarch64-linux-gnu & pushed to trunk. I'll backport to GCC 14 after a grace period. Richard gcc/ PR target/118184 * config/aarch64/aarch64-early-ra.cc (allocno_assignment_is_rmw): New function. (early_ra::record_insn_defs): Mark the live range information as untrustworthy if an assignment would change part of an allocno but preserve the rest. gcc/testsuite/ * gcc.dg/torture/pr118184.c: New test. --- gcc/config/aarch64/aarch64-early-ra.cc | 51 ++++++++++++++++++++++++- gcc/testsuite/gcc.dg/torture/pr118184.c | 36 +++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr118184.c diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc index 660a47195d2..479fe56b4d8 100644 --- a/gcc/config/aarch64/aarch64-early-ra.cc +++ b/gcc/config/aarch64/aarch64-early-ra.cc @@ -2033,6 +2033,43 @@ early_ra::record_artificial_refs (unsigned int flags) m_current_point += 1; } +// Return true if: +// +// - X is a SUBREG, in which case it is a SUBREG of some REG Y +// +// - one 64-bit word of Y can be modified while preserving all other words +// +// - X refers to no more than one 64-bit word of Y +// +// - assigning FPRs to Y would put more than one 64-bit word in each FPR +// +// For example, this is true of: +// +// - (subreg:DI (reg:TI R) 0) and +// - (subreg:DI (reg:TI R) 8) +// +// but is not true of: +// +// - (subreg:V2SI (reg:V2x2SI R) 0) or +// - (subreg:V2SI (reg:V2x2SI R) 8). +static bool +allocno_assignment_is_rmw (rtx x) +{ + if (partial_subreg_p (x)) + { + auto outer_mode = GET_MODE (x); + auto inner_mode = GET_MODE (SUBREG_REG (x)); + if (known_eq (REGMODE_NATURAL_SIZE (inner_mode), 0U + UNITS_PER_WORD) + && known_lt (GET_MODE_SIZE (outer_mode), UNITS_PER_VREG)) + { + auto nregs = targetm.hard_regno_nregs (V0_REGNUM, inner_mode); + if (maybe_ne (nregs * UNITS_PER_WORD, GET_MODE_SIZE (inner_mode))) + return true; + } + } + return false; +} + // Called as part of a backwards walk over a block. Model the definitions // in INSN, excluding partial call clobbers. void @@ -2045,9 +2082,21 @@ early_ra::record_insn_defs (rtx_insn *insn) record_fpr_def (DF_REF_REGNO (ref)); else { - auto range = get_allocno_subgroup (DF_REF_REG (ref)); + rtx reg = DF_REF_REG (ref); + auto range = get_allocno_subgroup (reg); for (auto &allocno : range.allocnos ()) { + // Make sure that assigning to the DF_REF_REG clobbers the + // whole of this allocno, not just some of it. + if (allocno_assignment_is_rmw (reg)) + { + record_live_range_failure ([&](){ + fprintf (dump_file, "read-modify-write of allocno %d", + allocno.id); + }); + break; + } + // If the destination is unused, record a momentary blip // in its live range. if (!bitmap_bit_p (m_live_allocnos, allocno.id)) diff --git a/gcc/testsuite/gcc.dg/torture/pr118184.c b/gcc/testsuite/gcc.dg/torture/pr118184.c new file mode 100644 index 00000000000..20f567af11f --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr118184.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { longdouble128 && lp64 } } } */ + +union u1 +{ + long double ld; + unsigned long l[2]; +}; + +[[gnu::noipa]] +unsigned long m() +{ + return 1000; +} + +[[gnu::noinline]] +long double f(void) +{ + union u1 u; + u.ld = __builtin_nanf128(""); + u.l[0] = m(); + return u.ld; +} + +int main() +{ + union u1 u; + u.ld = f(); + union u1 u2; + u2.ld = __builtin_nanf128(""); + u2.l[0] = m(); + if (u.l[0] != u2.l[0]) + __builtin_abort(); + if (u.l[1] != u2.l[1]) + __builtin_abort(); + return 0; +} -- 2.25.1