Hi Christophe,
Thanks for the comments, attached new version for testcase, see below
new cover letter:
This patch refactors and fixes an issue where
arm_mve_dlstp_check_dec_counter
was making an assumption about the form of what a candidate for a dec_insn.
This dec_insn is the instruction that decreases the loop counter inside a
decrementing loop and we expect it to have the following form:
(set (reg CONDCOUNT)
(plus (reg CONDCOUNT)
(const_int)))
Where CONDCOUNT is the loop counter, and const int is the negative constant
used to decrement it.
This patch also improves our search for a valid dec_insn. Before this patch
we'd only look for a dec_insn inside the loop header if the loop latch was
empty. We now also search the loop header if the loop latch is not
empty but
the last instruction is not a valid dec_insn. This could potentially be
improved
to search all instructions inside the loop latch.
gcc/ChangeLog:
* config/arm/arm.cc (check_dec_insn): New helper function containing
code hoisted from...
(arm_mve_dlstp_check_dec_counter): ... here. Use check_dec_insn to
check the validity of the candidate dec_insn.
gcc/testsuite/ChangeLog:
* gcc.targer/arm/mve/dlstp-loop-form.c: New test.
On 30/07/2024 21:31, Christophe Lyon wrote:
I manually tried to exercise the testcase with a cross-compiler, and
found the same issue as the Linaro CI should have reported (there was a
temporary breakage).
You can find detailed logs from Linaro in gcc.log.1.xz under
https://ci.linaro.org/job/tcwg_gcc_check--master-arm-precommit/8357/artifact/artifacts/artifacts.precommit/00-sumfiles/
Basically the testcase fails to compile with loads of
dlstp-loop-form.c:6:9: warning: 'pure' attribute on function returning
'void' [-Wattributes]
then
dlstp-loop-form.c:7:37: error: unknown type name 'float16x8_t'; did you
mean 'int16x8_t'?
dlstp-loop-form.c: In function 'n':
dlstp-loop-form.c:18:8: error: subscripted value is neither array nor
pointer nor vector
dlstp-loop-form.c:21:13: error: passing 'e' {aka 'int'} to argument 2 of
'vfmsq_m', which expects an MVE vector type
Why would the test pass for you?
Because I tested with a toolchain configured for cortex-m85, which has
mve.fp enabled by default, which means I didn't realize the testcase
required arm_v8_1m_mve_fp_ok instead of arm_v8_1m_mve_ok.
Addressed that now.
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index
92cd168e65937ef7350477464e8b0becf85bceed..e3c3db5c816bfaedf3afb775a0436d4b7c984b51
100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -35214,6 +35214,30 @@ arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn*
vctp_insn,
return vctp_insn;
}
+/* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
+ is of the expected form:
+ (set (reg a) (plus (reg a) (const_int)))
+ where (reg a) is the same as CONDCOUNT. */
+
+static bool
+check_dec_insn (rtx_insn *dec_insn, rtx condcount)
+{
+ if (!NONDEBUG_INSN_P (dec_insn))
+ return false;
+ rtx dec_set = single_set (dec_insn);
+ if (!dec_set
+ || !REG_P (SET_DEST (dec_set))
+ || GET_CODE (SET_SRC (dec_set)) != PLUS
+ || !REG_P (XEXP (SET_SRC (dec_set), 0))
+ || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
+ || REGNO (SET_DEST (dec_set))
+ != REGNO (XEXP (SET_SRC (dec_set), 0))
+ || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
+ return false;
+
+ return true;
+}
+
/* Helper function to `arm_mve_loop_valid_for_dlstp`. In the case of a
counter that is decrementing, ensure that it is decrementing by the
right amount in each iteration and that the target condition is what
@@ -35232,30 +35256,19 @@ arm_mve_dlstp_check_dec_counter (loop *loop,
rtx_insn* vctp_insn,
modified. */
rtx_insn *dec_insn = BB_END (loop->latch);
/* If not in the loop latch, try to find the decrement in the loop header.
*/
- if (!NONDEBUG_INSN_P (dec_insn))
+ if (!check_dec_insn (dec_insn, condcount))
{
df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount));
/* If we haven't been able to find the decrement, bail out. */
if (!temp)
return NULL;
dec_insn = DF_REF_INSN (temp);
- }
- rtx dec_set = single_set (dec_insn);
-
- /* Next, ensure that it is a PLUS of the form:
- (set (reg a) (plus (reg a) (const_int)))
- where (reg a) is the same as condcount. */
- if (!dec_set
- || !REG_P (SET_DEST (dec_set))
- || !REG_P (XEXP (SET_SRC (dec_set), 0))
- || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
- || REGNO (SET_DEST (dec_set))
- != REGNO (XEXP (SET_SRC (dec_set), 0))
- || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
- return NULL;
+ if (!check_dec_insn (dec_insn, condcount))
+ return NULL;
+ }
- decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
+ decrementnum = INTVAL (XEXP (SET_SRC (single_set (dec_insn)), 1));
/* This decrementnum is the number of lanes/elements it decrements from the
remaining number of lanes/elements to process in the loop, for this reason
diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
new file mode 100644
index
0000000000000000000000000000000000000000..a1b26873d7908035c726e3724c91b186c697bc60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-options "-Ofast" } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+#pragma GCC arm "arm_mve_types.h"
+#pragma GCC arm "arm_mve.h" false
+typedef __attribute__((aligned(2))) float16x8_t e;
+mve_pred16_t c(long d) { return __builtin_mve_vctp16qv8bi(d); }
+int f();
+void n() {
+ int g, h, *i, j;
+ mve_pred16_t k;
+ e acc;
+ e l;
+ e m;
+ for (;;) {
+ j = g;
+ acc[g];
+ for (; h < g; h += 8) {
+ k = c(j);
+ acc = vfmsq_m(acc, l, m, k);
+ j -= 8;
+ }
+ i[g] = f(acc);
+ }
+}
+