https://gcc.gnu.org/g:83332e3f808b146ca06dbc6a91d15bd3e5650658
commit r14-10398-g83332e3f808b146ca06dbc6a91d15bd3e5650658 Author: Wilco Dijkstra <wilco.dijks...@arm.com> Date: Fri Jul 5 17:31:25 2024 +0100 Arm: Fix ldrd offset range [PR115153] The valid offset range of LDRD in arm_legitimate_index_p is increased to -1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode. Fix this by moving the LDRD check earlier. gcc: PR target/115153 * config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before NEON. (thumb2_legitimate_index_p): Update comments. (output_move_neon): Use DFmode for vldr/vstr and non-checking adjust_address. gcc/testsuite: PR target/115153 * gcc.target/arm/pr115153.c: Add new test. * lib/target-supports.exp: Add arm_arch_v7ve_neon target support. (cherry picked from commit 44e5ecfd261afe72aa04eba4bf1a9ec782579cab) Diff: --- gcc/config/arm/arm.cc | 59 +++++++++++++++++---------------- gcc/testsuite/gcc.target/arm/pr115153.c | 16 +++++++++ gcc/testsuite/lib/target-supports.exp | 2 ++ 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index b8c32db0a1d7..912f2c315769 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -8852,6 +8852,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer, && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); + if (arm_address_register_rtx_p (index, strict_p) + && (GET_MODE_SIZE (mode) <= 4)) + return 1; + + /* This handles DFmode only if !TARGET_HARD_FLOAT. */ + if (mode == DImode || mode == DFmode) + { + if (code == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (index); + + /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD. + If vldr is selected it uses arm_coproc_mem_operand. */ + if (TARGET_LDRD) + return val > -256 && val < 256; + else + return val > -4096 && val < 4092; + } + + return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); + } + /* For quad modes, we restrict the constant offset to be slightly less than what the instruction format permits. We do this because for quad mode moves, we will actually decompose them into two separate @@ -8864,7 +8886,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer, && (INTVAL (index) & 3) == 0); /* We have no such constraint on double mode offsets, so we permit the - full range of the instruction format. */ + full range of the instruction format. Note DImode is included here. */ if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) return (code == CONST_INT && INTVAL (index) < 1024 @@ -8877,27 +8899,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer, && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); - if (arm_address_register_rtx_p (index, strict_p) - && (GET_MODE_SIZE (mode) <= 4)) - return 1; - - if (mode == DImode || mode == DFmode) - { - if (code == CONST_INT) - { - HOST_WIDE_INT val = INTVAL (index); - - /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD. - If vldr is selected it uses arm_coproc_mem_operand. */ - if (TARGET_LDRD) - return val > -256 && val < 256; - else - return val > -4096 && val < 4092; - } - - return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); - } - if (GET_MODE_SIZE (mode) <= 4 && ! (arm_arch4 && (mode == HImode @@ -9000,7 +9001,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p) && (INTVAL (index) & 3) == 0); /* We have no such constraint on double mode offsets, so we permit the - full range of the instruction format. */ + full range of the instruction format. Note DImode is included here. */ if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) return (code == CONST_INT && INTVAL (index) < 1024 @@ -9011,6 +9012,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p) && (GET_MODE_SIZE (mode) <= 4)) return 1; + /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE. */ if (mode == DImode || mode == DFmode) { if (code == CONST_INT) @@ -20859,10 +20861,9 @@ output_move_neon (rtx *operands) int overlap = -1; for (i = 0; i < nregs; i++) { - /* We're only using DImode here because it's a convenient - size. */ - ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); - ops[1] = adjust_address (mem, DImode, 8 * i); + /* Use DFmode for vldr/vstr. */ + ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i); + ops[1] = adjust_address_nv (mem, DFmode, 8 * i); if (reg_overlap_mentioned_p (ops[0], mem)) { gcc_assert (overlap == -1); @@ -20879,8 +20880,8 @@ output_move_neon (rtx *operands) } if (overlap != -1) { - ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap); - ops[1] = adjust_address (mem, SImode, 8 * overlap); + ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap); + ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap); if (TARGET_HAVE_MVE && LABEL_REF_P (addr)) sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st"); else diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c b/gcc/testsuite/gcc.target/arm/pr115153.c new file mode 100644 index 000000000000..80b57acf87ec --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr115153.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -marm" } */ +/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */ +/* { dg-add-options arm_arch_v7ve_neon } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** f1: +** add r0, r0, #256 +** ldrd r0, r1, \[r0\] +** bx lr +*/ +long long f1 (long long *p) +{ + return __atomic_load_n (p + 32, __ATOMIC_RELAXED); +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 3a55b2a4159c..fb9191d6e73b 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5529,6 +5529,8 @@ foreach { armfunc armflag armdefs } { v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__ v7ve "-march=armv7ve+fp -marm" "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV" + v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp" + "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__" v8a "-march=armv8-a+simd" __ARM_ARCH_8A__ v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" __ARM_ARCH_8A__ v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__