https://gcc.gnu.org/g:61de759f943ce0a5981affeee19debc82ec43744
commit r14-11162-g61de759f943ce0a5981affeee19debc82ec43744 Author: Christophe Lyon <christophe.l...@linaro.org> Date: Sun Nov 24 18:08:48 2024 +0000 arm: [MVE intrinsics] Fix support for predicate constants [PR target/114801] This backport is a cherry pick of commit 2089009210a1774c37e527ead8bbcaaa1a7a9d2d, with a small change needed because force_lowpart_subreg does not exist in gcc-14: the patch replaces it with the equivalent: - x = force_lowpart_subreg (mode, x, GET_MODE (x)); + { + auto byte = subreg_lowpart_offset (mode, GET_MODE (x)); + x = force_subreg (mode, x, GET_MODE (x), byte); + } In this PR, we have to handle a case where MVE predicates are supplied as a const_int, where individual predicates have illegal boolean values (such as 0xc for a 4-bit boolean predicate). To avoid the ICE, fix the constant (any non-zero value is converted to all 1s) and emit a warning. On MVE, V8BI and V4BI multi-bit masks are interpreted byte-by-byte at instruction level, but end-users should describe lanes rather than bytes (so all bytes of a true-predicated lane should be '1'), see the section on MVE intrinsics in the Arm ACLE specification. Since force_lowpart_subreg cannot handle const_int (because they have VOID mode), use gen_lowpart on them, force_lowpart_subreg otherwise. 2024-11-20 Christophe Lyon <christophe.l...@linaro.org> Jakub Jelinek <ja...@redhat.com> PR target/114801 gcc/ * config/arm/arm-mve-builtins.cc (function_expander::add_input_operand): Handle CONST_INT predicates. gcc/testsuite/ * gcc.target/arm/mve/pr108443.c: Update predicate constant. * gcc.target/arm/mve/pr108443-run.c: Likewise. * gcc.target/arm/mve/pr114801.c: New test. (cherry picked from commit 2089009210a1774c37e527ead8bbcaaa1a7a9d2d) Diff: --- gcc/config/arm/arm-mve-builtins.cc | 35 +++++++++++++++++++++- gcc/testsuite/gcc.target/arm/mve/pr108443-run.c | 2 +- gcc/testsuite/gcc.target/arm/mve/pr108443.c | 4 +-- gcc/testsuite/gcc.target/arm/mve/pr114801.c | 39 +++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 4 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index e1826ae40527..c57bf0844b05 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -2107,7 +2107,40 @@ function_expander::add_input_operand (insn_code icode, rtx x) mode = GET_MODE (x); } else if (VALID_MVE_PRED_MODE (mode)) - x = gen_lowpart (mode, x); + { + if (CONST_INT_P (x)) + { + if (mode == V8BImode || mode == V4BImode) + { + /* In V8BI or V4BI each element has 2 or 4 bits, if those bits + aren't all the same, gen_lowpart might ICE. Canonicalize all + the 2 or 4 bits to all ones if any of them is non-zero. V8BI + and V4BI multi-bit masks are interpreted byte-by-byte at + instruction level, but such constants should describe lanes, + rather than bytes. See the section on MVE intrinsics in the + Arm ACLE specification. */ + unsigned HOST_WIDE_INT xi = UINTVAL (x); + xi |= ((xi & 0x5555) << 1) | ((xi & 0xaaaa) >> 1); + if (mode == V4BImode) + xi |= ((xi & 0x3333) << 2) | ((xi & 0xcccc) >> 2); + if (xi != UINTVAL (x)) + warning_at (location, 0, "constant predicate argument %d" + " (%wx) does not map to %d lane numbers," + " converted to %wx", + opno, UINTVAL (x) & 0xffff, + mode == V8BImode ? 8 : 4, + xi & 0xffff); + + x = gen_int_mode (xi, HImode); + } + x = gen_lowpart (mode, x); + } + else + { + auto byte = subreg_lowpart_offset (mode, GET_MODE (x)); + x = force_subreg (mode, x, GET_MODE (x), byte); + } + } m_ops.safe_grow (m_ops.length () + 1, true); create_input_operand (&m_ops.last (), x, mode); diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c index cb4b45bd3056..b894f019b8bb 100644 --- a/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c @@ -16,7 +16,7 @@ __attribute__ ((noipa)) partial_write (uint32_t *a, uint32x4_t v, unsigned short int main (void) { - unsigned short p = 0x00CC; + unsigned short p = 0x00FF; uint32_t a[] = {0, 0, 0, 0}; uint32_t b[] = {0, 0, 0, 0}; uint32x4_t v = vdupq_n_u32 (0xFFFFFFFFU); diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443.c b/gcc/testsuite/gcc.target/arm/mve/pr108443.c index c5fbfa4a1bb7..0c0e2dd6eb8f 100644 --- a/gcc/testsuite/gcc.target/arm/mve/pr108443.c +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443.c @@ -7,8 +7,8 @@ void __attribute__ ((noipa)) partial_write_cst (uint32_t *a, uint32x4_t v) { - vstrwq_p_u32 (a, v, 0x00CC); + vstrwq_p_u32 (a, v, 0x00FF); } -/* { dg-final { scan-assembler {mov\tr[0-9]+, #204} } } */ +/* { dg-final { scan-assembler {mov\tr[0-9]+, #255} } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/pr114801.c b/gcc/testsuite/gcc.target/arm/mve/pr114801.c new file mode 100644 index 000000000000..ab3130fd4ce8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/pr114801.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_mve.h> + +/* +** test_32: +**... +** mov r[0-9]+, #65295 @ movhi +**... +*/ +uint32x4_t test_32() { + /* V4BI predicate converted to 0xff0f. */ + return vdupq_m_n_u32(vdupq_n_u32(0xffffffff), 0, 0x4f02); /* { dg-warning {constant predicate argument 3 \(0x4f02\) does not map to 4 lane numbers, converted to 0xff0f} } */ +} + +/* +** test_16: +**... +** mov r[0-9]+, #12339 @ movhi +**... +*/ +uint16x8_t test_16() { + /* V8BI predicate converted to 0x3033. */ + return vdupq_m_n_u16(vdupq_n_u16(0xffff), 0, 0x3021); /* { dg-warning {constant predicate argument 3 \(0x3021\) does not map to 8 lane numbers, converted to 0x3033} } */ +} + +/* +** test_8: +**... +** mov r[0-9]+, #23055 @ movhi +**... +*/ +uint8x16_t test_8() { + return vdupq_m_n_u8(vdupq_n_u8(0xff), 0, 0x5a0f); +}