On 18/10/2024 13:57, Andre Vieira (lists) wrote: > Hi, > > This looks like an acceptable work around. We special case behavior that I'm > not sure we can express in ways GCC can understand or will make use of, > whilst at the same time we keep expressing behavior it does understand and > can optimize. > > Nice idea! > > LGTM, needs maintainer approval though. >
As Jakub said in the PR, this is really just papering over a general problem in the compiler. As such, it's OK for the gcc-14 branch, if we really need it, but not really suitable for trunk. I think the problem is related to failing to correclty canonicalize non-standard bool types in trunk_int_for_mode. The code there has: /* Canonicalize BImode to 0 and STORE_FLAG_VALUE. */ if (smode == BImode) return c & 1 ? STORE_FLAG_VALUE : 0; and that means we fail to generate canonical boolean values, so later on the compiler asserts when trying to pick non-standard values out. Perhaps instead we need something like: /* Canonicalize scalar boolean modes to 0 and STORE_FLAG_VALUE. */ if (smode >= MIN_MODE_BOOL && smode <= MAX_MODE_BOOL) return c & 1 ? STORE_FLAG_VALUE : 0; Jakub? R. > Kind regards, > Andre > > On 07/05/2024 17:19, Christophe Lyon wrote: >> In this PR, we have to handle a case where MVE predicates are supplied >> as a const_int, where individual predicates have illegal boolean >> values (such as 0xc for a 4-bit boolean predicate). To avoid the ICE, >> we hide the constant behind an unspec. >> >> On MVE V8BI and V4BI multi-bit masks are interpreted byte-by-byte at >> instruction level, see >> https://developer.arm.com/documentation/101028/0012/14--M-profile-Vector-Extension--MVE--intrinsics. >> >> This is a workaround until we change such predicates representation to >> V16BImode. >> >> 2024-05-06 Christophe Lyon <christophe.l...@linaro.org> >> Jakub Jelinek <ja...@redhat.com> >> >> PR target/114801 >> gcc/ >> * config/arm/arm-mve-builtins.cc >> (function_expander::add_input_operand): Handle CONST_INT >> predicates. >> * mve.md (set_mve_const_pred): New pattern. >> * unspec.md (MVE_PRED): New unspec. >> >> gcc/testsuite/ >> * gcc.target/arm/mve/pr114801.c: New test. >> --- >> gcc/config/arm/arm-mve-builtins.cc | 27 ++++++++++++++- >> gcc/config/arm/mve.md | 12 +++++++ >> gcc/config/arm/unspecs.md | 1 + >> gcc/testsuite/gcc.target/arm/mve/pr114801.c | 37 +++++++++++++++++++++ >> 4 files changed, 76 insertions(+), 1 deletion(-) >> create mode 100644 gcc/testsuite/gcc.target/arm/mve/pr114801.c >> >> diff --git a/gcc/config/arm/arm-mve-builtins.cc >> b/gcc/config/arm/arm-mve-builtins.cc >> index 6a5775c67e5..7d5af649857 100644 >> --- a/gcc/config/arm/arm-mve-builtins.cc >> +++ b/gcc/config/arm/arm-mve-builtins.cc >> @@ -2205,7 +2205,32 @@ function_expander::add_input_operand (insn_code >> icode, rtx x) >> mode = GET_MODE (x); >> } >> else if (VALID_MVE_PRED_MODE (mode)) >> - x = gen_lowpart (mode, x); >> + { >> + if (CONST_INT_P (x) && (mode == V8BImode || mode == V4BImode)) >> + { >> + /* In V8BI or V4BI each element has 2 or 4 bits, if those >> + bits aren't all the same, gen_lowpart might ICE. Hide >> + the move behind an unspec to avoid this. >> + V8BI and V4BI multi-bit masks are interpreted >> + byte-by-byte at instruction level, see >> + >> https://developer.arm.com/documentation/101028/0012/14--M-profile-Vector-Extension--MVE--intrinsics. >> */ >> + unsigned HOST_WIDE_INT xi = UINTVAL (x); >> + if ((xi & 0x5555) != ((xi >> 1) & 0x5555) >> + || (mode == V4BImode >> + && (xi & 0x3333) != ((xi >> 2) & 0x3333))) >> + { >> + rtx unspec_x; >> + unspec_x = gen_rtx_UNSPEC (HImode, gen_rtvec (1, x), MVE_PRED); >> + x = force_reg (HImode, unspec_x); >> + } >> + >> + } >> + else if (SUBREG_P (x)) >> + /* gen_lowpart on a SUBREG can ICE. */ >> + x = force_reg (GET_MODE (x), x); >> + >> + x = gen_lowpart (mode, x); >> + } >> m_ops.safe_grow (m_ops.length () + 1, true); >> create_input_operand (&m_ops.last (), x, mode); >> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md >> index 35916f62604..d337422d695 100644 >> --- a/gcc/config/arm/mve.md >> +++ b/gcc/config/arm/mve.md >> @@ -6621,3 +6621,15 @@ (define_expand "@arm_mve_reinterpret<mode>" >> } >> } >> ) >> + >> +;; Hide predicate constants from optimizers >> +(define_insn "set_mve_const_pred" >> + [(set >> + (match_operand:HI 0 "s_register_operand" "=r") >> + (unspec:HI [(match_operand:HI 1 "general_operand" "n")] MVE_PRED))] >> + "TARGET_HAVE_MVE" >> +{ >> + return "movw%?\t%0, %L1\t%@ set_mve_const_pred"; >> +} >> + [(set_attr "type" "mov_imm")] >> +) >> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md >> index 4713ec840ab..336f2fe08e6 100644 >> --- a/gcc/config/arm/unspecs.md >> +++ b/gcc/config/arm/unspecs.md >> @@ -1256,4 +1256,5 @@ (define_c_enum "unspec" [ >> SQRSHRL_48 >> VSHLCQ_M_ >> REINTERPRET >> + MVE_PRED >> ]) >> diff --git a/gcc/testsuite/gcc.target/arm/mve/pr114801.c >> b/gcc/testsuite/gcc.target/arm/mve/pr114801.c >> new file mode 100644 >> index 00000000000..fb3e4d855f9 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/arm/mve/pr114801.c >> @@ -0,0 +1,37 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ >> +/* { dg-options "-O2" } */ >> +/* { dg-add-options arm_v8_1m_mve } */ >> +/* { dg-final { check-function-bodies "**" "" "" } } */ >> + >> +#include <arm_mve.h> >> + >> +/* >> +** test_32: >> +**... >> +** movw r[0-9]+, 52428 @ set_mve_const_pred >> +**... >> +*/ >> +uint32x4_t test_32() { >> + return vdupq_m_n_u32(vdupq_n_u32(0xffffffff), 0, 0xcccc); >> +} >> + >> +/* >> +** test_16: >> +**... >> +** movw r[0-9]+, 6927 @ set_mve_const_pred >> +**... >> +*/ >> +uint16x8_t test_16() { >> + return vdupq_m_n_u16(vdupq_n_u16(0xffff), 0, 0x1b0f); >> +} >> + >> +/* >> +** test_8: >> +**... >> +** mov r[0-9]+, #23055 @ movhi >> +**... >> +*/ >> +uint8x16_t test_8() { >> + return vdupq_m_n_u8(vdupq_n_u8(0xff), 0, 0x5a0f); >> +} >