This patch refactors and fixes an issue where
arm_mve_dlstp_check_dec_counter
was making an assumption about the form of what a candidate for a
dec_insn
should be, which caused an ICE.
This dec_insn is the instruction that decreases the loop counter
inside a
decrementing loop and we expect it to have the following form:
(set (reg CONDCOUNT)
(plus (reg CONDCOUNT)
(const_int)))
Where CONDCOUNT is the loop counter, and const int is the negative
constant
used to decrement it.
This patch also improves our search for a valid dec_insn. Before
this patch
we'd only look for a dec_insn inside the loop header if the loop
latch was
empty. We now also search the loop header if the loop latch is not
empty but
the last instruction is not a valid dec_insn. This could
potentially be improved
to search all instructions inside the loop latch.
gcc/ChangeLog:
* config/arm/arm.cc (check_dec_insn): New helper function
containing
code hoisted from...
(arm_mve_dlstp_check_dec_counter): ... here. Use
check_dec_insn to
check the validity of the candidate dec_insn.
gcc/testsuite/ChangeLog:
* gcc.targer/arm/mve/dlstp-loop-form.c: New test.
On 31/07/2024 15:15, Christophe Lyon wrote:
Because I tested with a toolchain configured for cortex-m85, which has
mve.fp enabled by default, which means I didn't realize the testcase
required arm_v8_1m_mve_fp_ok instead of arm_v8_1m_mve_ok.
Addressed that now.
Thanks, I thought you meant you ran the testsuite with -mcpu=cortex-m85
in RUNTESTFLAGS.
To be fair, that's not a terrible assumption. But what I did was I
configured the toolchain (and single multilib) for I ran them in a build
configured for armv8.1-m.main+mve.fp+fp.dp and fpu=auto (and
float-abi=hard).
Regarding the patch, did you consider making the new check_dec_insn
helper return an rtx (NULL or dec_set) instead of bool?
I think it would save a call to single_set when computing decrementnum,
but that's nitpicking.
Yeah I had also contemplated that, I'm OK either way, doesn't look too
bad with the rtx return. See attached.
Thanks,
Christophe
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index
92cd168e65937ef7350477464e8b0becf85bceed..363a972170b37275372bb8bf30d510876021c8c0
100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -35214,6 +35214,32 @@ arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn*
vctp_insn,
return vctp_insn;
}
+/* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
+ is of the expected form:
+ (set (reg a) (plus (reg a) (const_int)))
+ where (reg a) is the same as CONDCOUNT.
+ Return a rtx with the set if it is in the right format or NULL_RTX
+ otherwise. */
+
+static rtx
+check_dec_insn (rtx_insn *dec_insn, rtx condcount)
+{
+ if (!NONDEBUG_INSN_P (dec_insn))
+ return NULL_RTX;
+ rtx dec_set = single_set (dec_insn);
+ if (!dec_set
+ || !REG_P (SET_DEST (dec_set))
+ || GET_CODE (SET_SRC (dec_set)) != PLUS
+ || !REG_P (XEXP (SET_SRC (dec_set), 0))
+ || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
+ || REGNO (SET_DEST (dec_set))
+ != REGNO (XEXP (SET_SRC (dec_set), 0))
+ || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
+ return NULL_RTX;
+
+ return dec_set;
+}
+
/* Helper function to `arm_mve_loop_valid_for_dlstp`. In the case of a
counter that is decrementing, ensure that it is decrementing by the
right amount in each iteration and that the target condition is what
@@ -35230,30 +35256,19 @@ arm_mve_dlstp_check_dec_counter (loop *loop,
rtx_insn* vctp_insn,
loop latch. Here we simply need to verify that this counter is the same
reg that is also used in the vctp_insn and that it is not otherwise
modified. */
- rtx_insn *dec_insn = BB_END (loop->latch);
+ rtx dec_set = check_dec_insn (BB_END (loop->latch), condcount);
/* If not in the loop latch, try to find the decrement in the loop header.
*/
- if (!NONDEBUG_INSN_P (dec_insn))
+ if (dec_set == NULL_RTX)
{
df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount));
/* If we haven't been able to find the decrement, bail out. */
if (!temp)
return NULL;
- dec_insn = DF_REF_INSN (temp);
- }
-
- rtx dec_set = single_set (dec_insn);
+ dec_set = check_dec_insn (DF_REF_INSN (temp), condcount);
- /* Next, ensure that it is a PLUS of the form:
- (set (reg a) (plus (reg a) (const_int)))
- where (reg a) is the same as condcount. */
- if (!dec_set
- || !REG_P (SET_DEST (dec_set))
- || !REG_P (XEXP (SET_SRC (dec_set), 0))
- || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
- || REGNO (SET_DEST (dec_set))
- != REGNO (XEXP (SET_SRC (dec_set), 0))
- || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
- return NULL;
+ if (dec_set == NULL_RTX)
+ return NULL;
+ }
decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
new file mode 100644
index
0000000000000000000000000000000000000000..a1b26873d7908035c726e3724c91b186c697bc60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-options "-Ofast" } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+#pragma GCC arm "arm_mve_types.h"
+#pragma GCC arm "arm_mve.h" false
+typedef __attribute__((aligned(2))) float16x8_t e;
+mve_pred16_t c(long d) { return __builtin_mve_vctp16qv8bi(d); }
+int f();
+void n() {
+ int g, h, *i, j;
+ mve_pred16_t k;
+ e acc;
+ e l;
+ e m;
+ for (;;) {
+ j = g;
+ acc[g];
+ for (; h < g; h += 8) {
+ k = c(j);
+ acc = vfmsq_m(acc, l, m, k);
+ j -= 8;
+ }
+ i[g] = f(acc);
+ }
+}
+