Tamar Christina <tamar.christ...@arm.com> writes:
> Hi All,
>
> The new partial and full vector types added to AArch64, e.g.
>
> int8x8x2_t with mode V2x8QI are incorrectly being defined as being short
> vectors and not being composite types.
>
> This causes the layout code to incorrectly conclude that the registers are
> packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.
>
> Because of this the code under !aarch64_composite_type_p is unreachable but 
> also
> lacked any extra checks to see that nregs is what we expected it to be.
>
> I have also updated aarch64_advsimd_full_struct_mode_p and 
> aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct
> modes.  Otherwise types such as OImode and friends would qualify leading to
> incorrect results.

How easy would it be to fix the bug without doing this last bit?
The idea was that OI, CI and XI should continue to be structure
modes until we remove them.  aarch64_advsimd_partial_struct_mode_p
and aarch64_advsimd_full_struct_mode_p are meant to be convenience
wrappers and so they shouldn't make different decisions from the
underlying aarch64_classify_vector_mode.

>
> This patch fixes up the issues and we now generate correct code.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
>
>
> gcc/ChangeLog:
>
>       PR target/103094
>       * config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg):
>       Fix unreachable code for partial vectors and re-order switch to perform
>       the simplest test first.
>       (aarch64_short_vector_p): Mark as not short vectors.
>       (aarch64_composite_type_p): Mark as composite types.
>       (aarch64_advsimd_partial_struct_mode_p,
>       aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.
>
> gcc/testsuite/ChangeLog:
>
>       PR target/103094
>       * gcc.target/aarch64/pr103094.c: New test.
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode)
>  static bool
>  aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
>  {
> -  return (aarch64_classify_vector_mode (mode)
> -       == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
> +  return VECTOR_MODE_P (mode)
> +      && (aarch64_classify_vector_mode (mode)
> +             == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>  }
>  
>  /* Return true if MODE is an Advanced SIMD Q-register structure mode.  */
>  static bool
>  aarch64_advsimd_full_struct_mode_p (machine_mode mode)
>  {
> -  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
> +  return VECTOR_MODE_P (mode)
> +      && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
>  }
>  
>  /* Return true if MODE is any of the data vector modes, including
> @@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree 
> func,
>                                              NULL, false))
>      {
>        gcc_assert (!sve_p);
> -      if (!aarch64_composite_type_p (type, mode))
> +      if (aarch64_advsimd_full_struct_mode_p (mode))
> +     {
> +       gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
> +       return gen_rtx_REG (mode, V0_REGNUM);
> +     }
> +      else if (aarch64_advsimd_partial_struct_mode_p (mode))
> +     {
> +       gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
> +       return gen_rtx_REG (mode, V0_REGNUM);
> +     }
> +      else if (!aarch64_composite_type_p (type, mode))
>       {
>         gcc_assert (count == 1 && mode == ag_mode);
>         return gen_rtx_REG (mode, V0_REGNUM);
>       }
> -      else if (aarch64_advsimd_full_struct_mode_p (mode)
> -            && known_eq (GET_MODE_SIZE (ag_mode), 16))
> -     return gen_rtx_REG (mode, V0_REGNUM);
> -      else if (aarch64_advsimd_partial_struct_mode_p (mode)
> -            && known_eq (GET_MODE_SIZE (ag_mode), 8))
> -     return gen_rtx_REG (mode, V0_REGNUM);
>        else
>       {
>         int i;
> @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const 
> function_arg_info &arg)
>      /* No frontends can create types with variable-sized modes, so we
>         shouldn't be asked to pass or return them.  */
>      size = GET_MODE_SIZE (mode).to_constant ();
> +
>    size = ROUND_UP (size, UNITS_PER_WORD);
>  
>    allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
> @@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const 
> function_arg_info &arg)
>        if (nvrn + nregs <= NUM_FP_ARG_REGS)
>       {
>         pcum->aapcs_nextnvrn = nvrn + nregs;
> -       if (!aarch64_composite_type_p (type, mode))
> +       if (aarch64_advsimd_full_struct_mode_p (mode))
> +         {
> +           gcc_assert (nregs == size / 16);
> +           pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> +         }
> +       else if (aarch64_advsimd_partial_struct_mode_p (mode))
> +         {
> +           gcc_assert (nregs == size / 8);
> +           pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> +         }
> +       else if (!aarch64_composite_type_p (type, mode))
>           {
>             gcc_assert (nregs == 1);
>             pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>           }
> -       else if (aarch64_advsimd_full_struct_mode_p (mode)
> -                && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
> -         pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> -       else if (aarch64_advsimd_partial_struct_mode_p (mode)
> -                && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
> -         pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>         else
>           {
>             rtx par;
> @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
>        else
>       size = GET_MODE_SIZE (mode);
>      }
> +
> +  /* If a Advanced SIMD partial or full aggregate vector type we aren't a 
> short
> +     type.  */
> +  if (aarch64_advsimd_partial_struct_mode_p (mode)
> +      || aarch64_advsimd_full_struct_mode_p (mode))
> +    return false;
> +
>    if (known_eq (size, 8) || known_eq (size, 16))
>      {
>        /* 64-bit and 128-bit vectors should only acquire an SVE mode if

I think the bug here is that we trust the mode even if we're
given a conflicting type.  In principle it would be OK to use,
say, V4SI for an array of 4 ints, but that shouldn't suddenly
make aarch64_short_vector_p true.

Unfortunately that ship has sailed, so we e.g. treat:

  struct wrapper { int32x4_t x; int :0; };

as a short vector too.

So it feels like this a case of limiting the contagion and
that the check should go in here:

  else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
    {
      /* Rely only on the type, not the mode, when processing SVE types.  */
      if (type && aarch64_some_values_include_pst_objects_p (type))
        /* Leave later code to report an error if SVE is disabled.  */
        gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
      else
        size = GET_MODE_SIZE (mode);
    }

where we needed similar protection for SVE.  E.g. we could change the
inner else to:

      else if (!aarch64_advsimd_struct_mode_p (mode))

or keep it is an early-out (but within the outer “else if”)
if that seems clearer.

> @@ -19316,6 +19334,12 @@ static bool
>  aarch64_composite_type_p (const_tree type,
>                         machine_mode mode)
>  {
> +  /* If a Advanced SIMD partial or full aggregate vector type we are a
> +     composite type.  */
> +  if (aarch64_advsimd_partial_struct_mode_p (mode)
> +      || aarch64_advsimd_full_struct_mode_p (mode))
> +    return true;
> +

Isn't this naturally true after the fix to aarch64_short_vector_p?
It would be good to avoid adding new “mode only” tests if we can
help it.

Also, the old code didn't handle OI, CI or XI specially here,
so doing something different now might be dangerous.

Thanks,
Richard

>    if (aarch64_short_vector_p (type, mode))
>      return false;
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c 
> b/gcc/testsuite/gcc.target/aarch64/pr103094.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..441e602928ce8ac4e9890a1376acbc25671e284d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-fdump-rtl-expand -w" } */
> +
> +#include <arm_neon.h>
> +
> +void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t* outptr0)
> +{
> +  uint16x4x4_t cols_01_23_45_67 = { {
> +    vreinterpret_u16_u8(cols_01_23.val[0]),
> +    vreinterpret_u16_u8(cols_01_23.val[1]),
> +    vreinterpret_u16_u8(cols_45_67.val[0]),
> +    vreinterpret_u16_u8(cols_45_67.val[1])
> +  } };
> +
> +  vst4_lane_u16(outptr0, cols_01_23_45_67, 0);
> +}
> +
> +/* Check that we expand to v0 and v2 from the function arguments.  */
> +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23 \]\)} 
> expand } } */
> +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67 \]\)} 
> expand } } */
> +

Reply via email to