Hi all, this is to fix PR98931 where the LE (loop end, Armv8.1-M low overhead loops) instruction cannot cover sufficently long branches.
In this case we emit as an alternative: subs lr, #1 bne label arm-none-eabi regtested, arm-none-linux-gnueabihf boostrapped. Okay for trunk? Regards Andrea
>From c8216ed1313d670e79b28141dadd644e698c83cf Mon Sep 17 00:00:00 2001 From: Andrea Corallo <andrea.cora...@arm.com> Date: Wed, 3 Feb 2021 15:21:54 +0100 Subject: [PATCH] arm: Low overhead loop handle long range branches [PR98931] gcc/ChangeLog 2021-02-05 Andrea Corallo <andrea.cora...@arm.com> * config/arm/thumb2.md: Generate alternative sequence to handle long range branches. gcc/testsuite/Changelog 2021-02-08 Andrea Corallo <andrea.cora...@arm.com> * gcc.target/arm/pr98931.c: New testcase. --- gcc/config/arm/thumb2.md | 13 ++++++++++++- gcc/testsuite/gcc.target/arm/pr98931.c | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/arm/pr98931.c diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index bd53bf320de..2646926d3c1 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -1719,7 +1719,18 @@ (set (reg:SI LR_REGNUM) (plus:SI (reg:SI LR_REGNUM) (const_int -1)))])] "TARGET_32BIT && TARGET_HAVE_LOB" - "le\t%|lr, %l0") + "* + if (get_attr_length (insn) == 4) + return \"le\\t%|lr, %l0\"; + else + return \"subs\\t%|lr, #1\;bne\\t%l0\"; + " + [(set (attr "length") + (if_then_else + (lt (minus (pc) (match_dup 0)) (const_int 1024)) + (const_int 4) + (const_int 6))) + (set_attr "type" "branch")]) (define_expand "doloop_begin" [(match_operand 0 "" "") diff --git a/gcc/testsuite/gcc.target/arm/pr98931.c b/gcc/testsuite/gcc.target/arm/pr98931.c new file mode 100644 index 00000000000..313876a3912 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr98931.c @@ -0,0 +1,17 @@ +/* { dg-do assemble } */ +/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */ +/* { dg-options "-march=armv8.1-m.main -O3 --param=max-completely-peeled-insns=1300 --save-temps" } */ + +extern long long a[][20][26][26][22]; + +void +foo () +{ + for (short d = 0; d + 1; d++) + for (unsigned e = 0; e < 25; e += 4) + for (unsigned f = 0; f < 25; f += 4) + for (int g = 0; g < 21; g += 4) + a[4][d][e][f][g] = 0; +} + +/* { dg-final { scan-assembler-not {le\slr,\s\S*} } } */ -- 2.20.1