https://gcc.gnu.org/g:06c41504bd4a23c3f5848793fda503c30fe51353
commit r16-2207-g06c41504bd4a23c3f5848793fda503c30fe51353 Author: Vladimir N. Makarov <vmaka...@redhat.com> Date: Fri Jul 11 11:27:54 2025 -0400 [PR121007, LRA]: Fall back to reload of whole inner address in PR case and constrain iteration number of address reloads gcc/ChangeLog: * lra-constraints.cc (process_address_1): When changing base reg on a reg of the base class, fall back to reload of whole inner address. (process_address): Constrain the iteration number. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr121007.c: New. Diff: --- gcc/lra-constraints.cc | 41 ++++++++++++++++++++--------- gcc/testsuite/gcc.target/powerpc/pr121007.c | 40 ++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index 68aaf863a97f..274b52cd6176 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -3930,6 +3930,16 @@ process_address_1 (int nop, bool check_only_p, enum reg_class cl; rtx set; rtx_insn *insns, *last_insn; + + cl = base_reg_class (ad.mode, ad.as, ad.base_outer_code, + get_index_code (&ad), curr_insn); + + if (REG_P (*ad.base_term) + && ira_class_subset_p[get_reg_class (REGNO (*ad.base_term))][cl]) + /* It seems base reg is already in the base reg class and changing it + does not make a progress. So reload the whole inner address. */ + goto reload_inner_addr; + /* Try to reload base into register only if the base is invalid for the address but with valid offset, case (4) above. */ start_sequence (); @@ -3975,8 +3985,6 @@ process_address_1 (int nop, bool check_only_p, { *ad.base_term = XEXP (SET_SRC (set), 0); *ad.disp_term = XEXP (SET_SRC (set), 1); - cl = base_reg_class (ad.mode, ad.as, ad.base_outer_code, - get_index_code (&ad), curr_insn); regno = REGNO (*ad.base_term); if (regno >= FIRST_PSEUDO_REGISTER && cl != lra_get_allocno_class (regno)) @@ -4019,11 +4027,11 @@ process_address_1 (int nop, bool check_only_p, } else { - enum reg_class cl = base_reg_class (ad.mode, ad.as, - SCRATCH, SCRATCH, - curr_insn); - rtx addr = *ad.inner; - + enum reg_class cl; + rtx addr; + reload_inner_addr: + cl = base_reg_class (ad.mode, ad.as, SCRATCH, SCRATCH, curr_insn); + addr = *ad.inner; new_reg = lra_create_new_reg (Pmode, NULL_RTX, cl, NULL, "addr"); /* addr => new_base. */ lra_emit_move (new_reg, addr); @@ -4044,14 +4052,21 @@ process_address (int nop, bool check_only_p, rtx_insn **before, rtx_insn **after) { bool res = false; - - while (process_address_1 (nop, check_only_p, before, after)) + /* Use enough iterations to process all address parts: */ + for (int i = 0; i < 10; i++) { - if (check_only_p) - return true; - res = true; + if (!process_address_1 (nop, check_only_p, before, after)) + { + return res; + } + else + { + if (check_only_p) + return true; + res = true; + } } - return res; + fatal_insn ("unable to reload address in ", curr_insn); } /* Override the generic address_reload_context in order to diff --git a/gcc/testsuite/gcc.target/powerpc/pr121007.c b/gcc/testsuite/gcc.target/powerpc/pr121007.c new file mode 100644 index 000000000000..9e6b1be7911f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr121007.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */ + +typedef struct { int a; } A; +unsigned char *a; +char b; +int c; +void foo (vector char, vector char, vector char); + +void +bar (long stride) +{ + vector char v0, v1, v2, v3, v5; + vector char r0 = __builtin_vec_vsx_ld (0, a); + vector char r2 = __builtin_vec_vsx_ld (2 * stride, a - 3); + vector char r3 = __builtin_vec_vsx_ld (3 * stride, a - 3); + vector char r4; + vector char r6 = __builtin_vec_vsx_ld (6 * stride, a - 3); + vector char r7 = __builtin_vec_vsx_ld (7 * stride, a - 3); + vector char r14, h, i, j; + if (b) + return; + v1 = __builtin_vec_vsx_ld (9 * stride, a); + v2 = __builtin_vec_vsx_ld (10 * stride, a - 3); + v3 = __builtin_vec_vsx_ld (11 * stride, a - 3); + r3 = __builtin_vec_mergeh (r3, v3); + v5 = __builtin_vec_mergel (r2, r6); + r14 = __builtin_vec_mergeh (r3, r7); + r4 = __builtin_vec_mergeh (v2, r14); + v0 = __builtin_vec_mergeh (r0, r4); + union { unsigned char a[16]; A b; } temp; + vector signed char k; + h = __builtin_vec_ld (0, temp.a); + i = __builtin_vec_splat (h, 1); + temp.b.a = c; + k = __builtin_vec_ld (0, (signed char *) temp.a); + j = __builtin_vec_and (i, (vector char) k); + foo (v1, v0, j); + foo (v1, v5, j); +}