Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Tamar Christina <tamar.christ...@arm.com> writes:
>> Hi All,
>>
>> The new partial and full vector types added to AArch64, e.g.
>>
>> int8x8x2_t with mode V2x8QI are incorrectly being defined as being short
>> vectors and not being composite types.
>>
>> This causes the layout code to incorrectly conclude that the registers are
>> packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.
>>
>> Because of this the code under !aarch64_composite_type_p is unreachable but 
>> also
>> lacked any extra checks to see that nregs is what we expected it to be.
>>
>> I have also updated aarch64_advsimd_full_struct_mode_p and 
>> aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct
>> modes.  Otherwise types such as OImode and friends would qualify leading to
>> incorrect results.
>
> How easy would it be to fix the bug without doing this last bit?
> The idea was that OI, CI and XI should continue to be structure
> modes until we remove them.  aarch64_advsimd_partial_struct_mode_p
> and aarch64_advsimd_full_struct_mode_p are meant to be convenience
> wrappers and so they shouldn't make different decisions from the
> underlying aarch64_classify_vector_mode.
>
>>
>> This patch fixes up the issues and we now generate correct code.
>>
>> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>>
>> Ok for master?
>>
>> Thanks,
>> Tamar
>>
>>
>>
>> gcc/ChangeLog:
>>
>>      PR target/103094
>>      * config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg):
>>      Fix unreachable code for partial vectors and re-order switch to perform
>>      the simplest test first.
>>      (aarch64_short_vector_p): Mark as not short vectors.
>>      (aarch64_composite_type_p): Mark as composite types.
>>      (aarch64_advsimd_partial_struct_mode_p,
>>      aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.
>>
>> gcc/testsuite/ChangeLog:
>>
>>      PR target/103094
>>      * gcc.target/aarch64/pr103094.c: New test.
>>
>> --- inline copy of patch -- 
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index 
>> fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f
>>  100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode)
>>  static bool
>>  aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
>>  {
>> -  return (aarch64_classify_vector_mode (mode)
>> -      == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>> +  return VECTOR_MODE_P (mode)
>> +     && (aarch64_classify_vector_mode (mode)
>> +            == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>>  }
>>  
>>  /* Return true if MODE is an Advanced SIMD Q-register structure mode.  */
>>  static bool
>>  aarch64_advsimd_full_struct_mode_p (machine_mode mode)
>>  {
>> -  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | 
>> VEC_STRUCT));
>> +  return VECTOR_MODE_P (mode)
>> +     && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
>>  }
>>  
>>  /* Return true if MODE is any of the data vector modes, including
>> @@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree 
>> func,
>>                                             NULL, false))
>>      {
>>        gcc_assert (!sve_p);
>> -      if (!aarch64_composite_type_p (type, mode))
>> +      if (aarch64_advsimd_full_struct_mode_p (mode))
>> +    {
>> +      gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
>> +      return gen_rtx_REG (mode, V0_REGNUM);
>> +    }
>> +      else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> +    {
>> +      gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
>> +      return gen_rtx_REG (mode, V0_REGNUM);
>> +    }
>> +      else if (!aarch64_composite_type_p (type, mode))
>>      {
>>        gcc_assert (count == 1 && mode == ag_mode);
>>        return gen_rtx_REG (mode, V0_REGNUM);
>>      }
>> -      else if (aarch64_advsimd_full_struct_mode_p (mode)
>> -           && known_eq (GET_MODE_SIZE (ag_mode), 16))
>> -    return gen_rtx_REG (mode, V0_REGNUM);
>> -      else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> -           && known_eq (GET_MODE_SIZE (ag_mode), 8))
>> -    return gen_rtx_REG (mode, V0_REGNUM);
>>        else
>>      {
>>        int i;
>> @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const 
>> function_arg_info &arg)
>>      /* No frontends can create types with variable-sized modes, so we
>>         shouldn't be asked to pass or return them.  */
>>      size = GET_MODE_SIZE (mode).to_constant ();
>> +
>>    size = ROUND_UP (size, UNITS_PER_WORD);
>>  
>>    allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
>> @@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const 
>> function_arg_info &arg)
>>        if (nvrn + nregs <= NUM_FP_ARG_REGS)
>>      {
>>        pcum->aapcs_nextnvrn = nvrn + nregs;
>> -      if (!aarch64_composite_type_p (type, mode))
>> +      if (aarch64_advsimd_full_struct_mode_p (mode))
>> +        {
>> +          gcc_assert (nregs == size / 16);
>> +          pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> +        }
>> +      else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> +        {
>> +          gcc_assert (nregs == size / 8);
>> +          pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> +        }
>> +      else if (!aarch64_composite_type_p (type, mode))
>>          {
>>            gcc_assert (nregs == 1);
>>            pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>>          }
>> -      else if (aarch64_advsimd_full_struct_mode_p (mode)
>> -               && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
>> -        pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> -      else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> -               && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
>> -        pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>>        else
>>          {
>>            rtx par;
>> @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
>>        else
>>      size = GET_MODE_SIZE (mode);
>>      }
>> +
>> +  /* If a Advanced SIMD partial or full aggregate vector type we aren't a 
>> short
>> +     type.  */
>> +  if (aarch64_advsimd_partial_struct_mode_p (mode)
>> +      || aarch64_advsimd_full_struct_mode_p (mode))
>> +    return false;
>> +
>>    if (known_eq (size, 8) || known_eq (size, 16))
>>      {
>>        /* 64-bit and 128-bit vectors should only acquire an SVE mode if
>
> I think the bug here is that we trust the mode even if we're
> given a conflicting type.  In principle it would be OK to use,
> say, V4SI for an array of 4 ints, but that shouldn't suddenly
> make aarch64_short_vector_p true.
>
> Unfortunately that ship has sailed, so we e.g. treat:
>
>   struct wrapper { int32x4_t x; int :0; };
>
> as a short vector too.
>
> So it feels like this a case of limiting the contagion and
> that the check should go in here:
>
>   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
>          || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
>     {
>       /* Rely only on the type, not the mode, when processing SVE types.  */
>       if (type && aarch64_some_values_include_pst_objects_p (type))
>       /* Leave later code to report an error if SVE is disabled.  */
>       gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
>       else
>       size = GET_MODE_SIZE (mode);
>     }
>
> where we needed similar protection for SVE.  E.g. we could change the
> inner else to:
>
>       else if (!aarch64_advsimd_struct_mode_p (mode))
>
> or keep it is an early-out (but within the outer “else if”)
> if that seems clearer.

Following some off-line discussion, I've committed the following
combined patch after testing on aarch64-linux-gnu.

Thanks,
Richard


In this PR we were wrongly classifying a pair of 8-byte vectors
as a 16-byte “short vector” (in the AAPCS64 sense).  As the
comment in the patch says, this stems from an old condition
in aarch64_short_vector_p that is too loose, but that would
be difficult to tighten now.

We can still do the right thing for the newly-added modes though,
since there are no backwards compatibility concerns there.

Co-authored-by: Tamar Christina <tamar.christ...@arm.com>

gcc/
        PR target/103094
        * config/aarch64/aarch64.c (aarch64_short_vector_p): Return false
        for structure modes, rather than ignoring the type in that case.

gcc/testsuite/
        PR target/103094
        * gcc.target/aarch64/pr103094.c: New test.
---
 gcc/config/aarch64/aarch64.c                | 19 ++++++++++++++++--
 gcc/testsuite/gcc.target/aarch64/pr103094.c | 22 +++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr103094.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f07330cff4f..ff4a808629b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -19299,7 +19299,21 @@ aarch64_short_vector_p (const_tree type,
   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
     {
-      /* Rely only on the type, not the mode, when processing SVE types.  */
+      /* The containing "else if" is too loose: it means that we look at TYPE
+        if the type is a vector type (good), but that we otherwise ignore TYPE
+        and look only at the mode.  This is wrong because the type describes
+        the language-level information whereas the mode is purely an internal
+        GCC concept.  We can therefore reach here for types that are not
+        vectors in the AAPCS64 sense.
+
+        We can't "fix" that for the traditional Advanced SIMD vector modes
+        without breaking backwards compatibility.  However, there's no such
+        baggage for the structure modes, which were introduced in GCC 12.  */
+      if (aarch64_advsimd_struct_mode_p (mode))
+       return false;
+
+      /* For similar reasons, rely only on the type, not the mode, when
+        processing SVE types.  */
       if (type && aarch64_some_values_include_pst_objects_p (type))
        /* Leave later code to report an error if SVE is disabled.  */
        gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
@@ -19310,7 +19324,8 @@ aarch64_short_vector_p (const_tree type,
     {
       /* 64-bit and 128-bit vectors should only acquire an SVE mode if
         they are being treated as scalable AAPCS64 types.  */
-      gcc_assert (!aarch64_sve_mode_p (mode));
+      gcc_assert (!aarch64_sve_mode_p (mode)
+                 && !aarch64_advsimd_struct_mode_p (mode));
       return true;
     }
   return false;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c 
b/gcc/testsuite/gcc.target/aarch64/pr103094.c
new file mode 100644
index 00000000000..beda99dc1f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-rtl-expand -w" } */
+
+#include <arm_neon.h>
+
+void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t*
+outptr0) {
+  uint16x4x4_t cols_01_23_45_67 = { {
+    vreinterpret_u16_u8(cols_01_23.val[0]),
+    vreinterpret_u16_u8(cols_01_23.val[1]),
+    vreinterpret_u16_u8(cols_45_67.val[0]),
+    vreinterpret_u16_u8(cols_45_67.val[1])
+  } };
+
+  vst4_lane_u16(outptr0, cols_01_23_45_67, 0); }
+
+/* Check that we expand to v0 and v2 from the function arguments.  */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23
+\]\)} expand } } */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67
+\]\)} expand } } */
+
-- 
2.25.1

Reply via email to