Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Tamar Christina <tamar.christ...@arm.com> writes: >> Hi All, >> >> The new partial and full vector types added to AArch64, e.g. >> >> int8x8x2_t with mode V2x8QI are incorrectly being defined as being short >> vectors and not being composite types. >> >> This causes the layout code to incorrectly conclude that the registers are >> packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers. >> >> Because of this the code under !aarch64_composite_type_p is unreachable but >> also >> lacked any extra checks to see that nregs is what we expected it to be. >> >> I have also updated aarch64_advsimd_full_struct_mode_p and >> aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct >> modes. Otherwise types such as OImode and friends would qualify leading to >> incorrect results. > > How easy would it be to fix the bug without doing this last bit? > The idea was that OI, CI and XI should continue to be structure > modes until we remove them. aarch64_advsimd_partial_struct_mode_p > and aarch64_advsimd_full_struct_mode_p are meant to be convenience > wrappers and so they shouldn't make different decisions from the > underlying aarch64_classify_vector_mode. > >> >> This patch fixes up the issues and we now generate correct code. >> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> >> Ok for master? >> >> Thanks, >> Tamar >> >> >> >> gcc/ChangeLog: >> >> PR target/103094 >> * config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg): >> Fix unreachable code for partial vectors and re-order switch to perform >> the simplest test first. >> (aarch64_short_vector_p): Mark as not short vectors. >> (aarch64_composite_type_p): Mark as composite types. >> (aarch64_advsimd_partial_struct_mode_p, >> aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types. >> >> gcc/testsuite/ChangeLog: >> >> PR target/103094 >> * gcc.target/aarch64/pr103094.c: New test. >> >> --- inline copy of patch -- >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> index >> fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f >> 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode) >> static bool >> aarch64_advsimd_partial_struct_mode_p (machine_mode mode) >> { >> - return (aarch64_classify_vector_mode (mode) >> - == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL)); >> + return VECTOR_MODE_P (mode) >> + && (aarch64_classify_vector_mode (mode) >> + == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL)); >> } >> >> /* Return true if MODE is an Advanced SIMD Q-register structure mode. */ >> static bool >> aarch64_advsimd_full_struct_mode_p (machine_mode mode) >> { >> - return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | >> VEC_STRUCT)); >> + return VECTOR_MODE_P (mode) >> + && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT)); >> } >> >> /* Return true if MODE is any of the data vector modes, including >> @@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree >> func, >> NULL, false)) >> { >> gcc_assert (!sve_p); >> - if (!aarch64_composite_type_p (type, mode)) >> + if (aarch64_advsimd_full_struct_mode_p (mode)) >> + { >> + gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count)); >> + return gen_rtx_REG (mode, V0_REGNUM); >> + } >> + else if (aarch64_advsimd_partial_struct_mode_p (mode)) >> + { >> + gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count)); >> + return gen_rtx_REG (mode, V0_REGNUM); >> + } >> + else if (!aarch64_composite_type_p (type, mode)) >> { >> gcc_assert (count == 1 && mode == ag_mode); >> return gen_rtx_REG (mode, V0_REGNUM); >> } >> - else if (aarch64_advsimd_full_struct_mode_p (mode) >> - && known_eq (GET_MODE_SIZE (ag_mode), 16)) >> - return gen_rtx_REG (mode, V0_REGNUM); >> - else if (aarch64_advsimd_partial_struct_mode_p (mode) >> - && known_eq (GET_MODE_SIZE (ag_mode), 8)) >> - return gen_rtx_REG (mode, V0_REGNUM); >> else >> { >> int i; >> @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const >> function_arg_info &arg) >> /* No frontends can create types with variable-sized modes, so we >> shouldn't be asked to pass or return them. */ >> size = GET_MODE_SIZE (mode).to_constant (); >> + >> size = ROUND_UP (size, UNITS_PER_WORD); >> >> allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); >> @@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const >> function_arg_info &arg) >> if (nvrn + nregs <= NUM_FP_ARG_REGS) >> { >> pcum->aapcs_nextnvrn = nvrn + nregs; >> - if (!aarch64_composite_type_p (type, mode)) >> + if (aarch64_advsimd_full_struct_mode_p (mode)) >> + { >> + gcc_assert (nregs == size / 16); >> + pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); >> + } >> + else if (aarch64_advsimd_partial_struct_mode_p (mode)) >> + { >> + gcc_assert (nregs == size / 8); >> + pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); >> + } >> + else if (!aarch64_composite_type_p (type, mode)) >> { >> gcc_assert (nregs == 1); >> pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); >> } >> - else if (aarch64_advsimd_full_struct_mode_p (mode) >> - && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16)) >> - pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); >> - else if (aarch64_advsimd_partial_struct_mode_p (mode) >> - && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8)) >> - pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); >> else >> { >> rtx par; >> @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type, >> else >> size = GET_MODE_SIZE (mode); >> } >> + >> + /* If a Advanced SIMD partial or full aggregate vector type we aren't a >> short >> + type. */ >> + if (aarch64_advsimd_partial_struct_mode_p (mode) >> + || aarch64_advsimd_full_struct_mode_p (mode)) >> + return false; >> + >> if (known_eq (size, 8) || known_eq (size, 16)) >> { >> /* 64-bit and 128-bit vectors should only acquire an SVE mode if > > I think the bug here is that we trust the mode even if we're > given a conflicting type. In principle it would be OK to use, > say, V4SI for an array of 4 ints, but that shouldn't suddenly > make aarch64_short_vector_p true. > > Unfortunately that ship has sailed, so we e.g. treat: > > struct wrapper { int32x4_t x; int :0; }; > > as a short vector too. > > So it feels like this a case of limiting the contagion and > that the check should go in here: > > else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT > || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) > { > /* Rely only on the type, not the mode, when processing SVE types. */ > if (type && aarch64_some_values_include_pst_objects_p (type)) > /* Leave later code to report an error if SVE is disabled. */ > gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode)); > else > size = GET_MODE_SIZE (mode); > } > > where we needed similar protection for SVE. E.g. we could change the > inner else to: > > else if (!aarch64_advsimd_struct_mode_p (mode)) > > or keep it is an early-out (but within the outer “else if”) > if that seems clearer.
Following some off-line discussion, I've committed the following combined patch after testing on aarch64-linux-gnu. Thanks, Richard In this PR we were wrongly classifying a pair of 8-byte vectors as a 16-byte “short vector” (in the AAPCS64 sense). As the comment in the patch says, this stems from an old condition in aarch64_short_vector_p that is too loose, but that would be difficult to tighten now. We can still do the right thing for the newly-added modes though, since there are no backwards compatibility concerns there. Co-authored-by: Tamar Christina <tamar.christ...@arm.com> gcc/ PR target/103094 * config/aarch64/aarch64.c (aarch64_short_vector_p): Return false for structure modes, rather than ignoring the type in that case. gcc/testsuite/ PR target/103094 * gcc.target/aarch64/pr103094.c: New test. --- gcc/config/aarch64/aarch64.c | 19 ++++++++++++++++-- gcc/testsuite/gcc.target/aarch64/pr103094.c | 22 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr103094.c diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index f07330cff4f..ff4a808629b 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -19299,7 +19299,21 @@ aarch64_short_vector_p (const_tree type, else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) { - /* Rely only on the type, not the mode, when processing SVE types. */ + /* The containing "else if" is too loose: it means that we look at TYPE + if the type is a vector type (good), but that we otherwise ignore TYPE + and look only at the mode. This is wrong because the type describes + the language-level information whereas the mode is purely an internal + GCC concept. We can therefore reach here for types that are not + vectors in the AAPCS64 sense. + + We can't "fix" that for the traditional Advanced SIMD vector modes + without breaking backwards compatibility. However, there's no such + baggage for the structure modes, which were introduced in GCC 12. */ + if (aarch64_advsimd_struct_mode_p (mode)) + return false; + + /* For similar reasons, rely only on the type, not the mode, when + processing SVE types. */ if (type && aarch64_some_values_include_pst_objects_p (type)) /* Leave later code to report an error if SVE is disabled. */ gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode)); @@ -19310,7 +19324,8 @@ aarch64_short_vector_p (const_tree type, { /* 64-bit and 128-bit vectors should only acquire an SVE mode if they are being treated as scalable AAPCS64 types. */ - gcc_assert (!aarch64_sve_mode_p (mode)); + gcc_assert (!aarch64_sve_mode_p (mode) + && !aarch64_advsimd_struct_mode_p (mode)); return true; } return false; diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c b/gcc/testsuite/gcc.target/aarch64/pr103094.c new file mode 100644 index 00000000000..beda99dc1f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fdump-rtl-expand -w" } */ + +#include <arm_neon.h> + +void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t* +outptr0) { + uint16x4x4_t cols_01_23_45_67 = { { + vreinterpret_u16_u8(cols_01_23.val[0]), + vreinterpret_u16_u8(cols_01_23.val[1]), + vreinterpret_u16_u8(cols_45_67.val[0]), + vreinterpret_u16_u8(cols_45_67.val[1]) + } }; + + vst4_lane_u16(outptr0, cols_01_23_45_67, 0); } + +/* Check that we expand to v0 and v2 from the function arguments. */ +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23 +\]\)} expand } } */ +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67 +\]\)} expand } } */ + -- 2.25.1