> -----Original Message-----
> From: Srinath Parvathaneni <srinath.parvathan...@arm.com>
> Sent: 04 June 2020 17:57
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov <kyrylo.tkac...@arm.com>
> Subject: [PATCH][GCC] arm: Fix the MVE ACLE vaddq_m polymorphic variants.
> 
> Hello,
> 
> This patch fixes the MVE ACLE vaddq_m polymorphic variants by modifying
> the corresponding
> intrinsic parameters and vaddq_m polymorphic variant's _Generic case
> entries in "arm_mve.h"
> header file.
> 
> Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more
> details.
> [1] https://developer.arm.com/architectures/instruction-sets/simd-
> isas/helium/mve-intrinsics
> 
> Regression tested on arm-none-eabi and found no regressions.
> 
> Ok for master and gcc-10 branch?
> 

Ok.
Thanks,
Kyrill

> Thanks,
> Srinath.
> 
> gcc/ChangeLog:
> 
> 2020-06-04  Srinath Parvathaneni  <srinath.parvathan...@arm.com>
> 
>       * config/arm/arm_mve.h (__arm_vaddq_m_n_s8): Correct the
> intrinsic
>       arguments.
>       (__arm_vaddq_m_n_s32): Likewise.
>       (__arm_vaddq_m_n_s16): Likewise.
>       (__arm_vaddq_m_n_u8): Likewise.
>       (__arm_vaddq_m_n_u32): Likewise.
>       (__arm_vaddq_m_n_u16): Likewise.
>       (__arm_vaddq_m): Modify polymorphic variant.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-06-04  Srinath Parvathaneni  <srinath.parvathan...@arm.com>
> 
>       * gcc.target/arm/mve/intrinsics/mve_vaddq_m.c: New test.
> 
> 
> 
> ###############     Attachment also inlined for ease of reply
> ###############
> 
> 
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index
> 1002512a98f9364403f66eba0e320fe5070bdc3a..9ea146189883c09c71842f10
> d25dc9924b5ae7e3 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -9713,42 +9713,42 @@ __arm_vabdq_m_u16 (uint16x8_t __inactive,
> uint16x8_t __a, uint16x8_t __b, mve_pr
> 
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int8_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int __b,
> mve_pred16_t __p)
>  {
>    return __builtin_mve_vaddq_m_n_sv16qi (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int __b,
> mve_pred16_t __p)
>  {
>    return __builtin_mve_vaddq_m_n_sv4si (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int16_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int __b,
> mve_pred16_t __p)
>  {
>    return __builtin_mve_vaddq_m_n_sv8hi (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, int __b,
> mve_pred16_t __p)
>  {
>    return __builtin_mve_vaddq_m_n_uv16qi (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32_t
> __b, mve_pred16_t __p)
> +__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, int __b,
> mve_pred16_t __p)
>  {
>    return __builtin_mve_vaddq_m_n_uv4si (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16_t
> __b, mve_pred16_t __p)
> +__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, int __b,
> mve_pred16_t __p)
>  {
>    return __builtin_mve_vaddq_m_n_uv8hi (__inactive, __a, __b, __p);
>  }
> @@ -26493,42 +26493,42 @@ __arm_vabdq_m (uint16x8_t __inactive,
> uint16x8_t __a, uint16x8_t __b, mve_pred16
> 
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int8_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int __b,
> mve_pred16_t __p)
>  {
>   return __arm_vaddq_m_n_s8 (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int __b,
> mve_pred16_t __p)
>  {
>   return __arm_vaddq_m_n_s32 (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int16_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int __b,
> mve_pred16_t __p)
>  {
>   return __arm_vaddq_m_n_s16 (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, int __b,
> mve_pred16_t __p)
>  {
>   return __arm_vaddq_m_n_u8 (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, uint32_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, int __b,
> mve_pred16_t __p)
>  {
>   return __arm_vaddq_m_n_u32 (__inactive, __a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, uint16_t __b,
> mve_pred16_t __p)
> +__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, int __b,
> mve_pred16_t __p)
>  {
>   return __arm_vaddq_m_n_u16 (__inactive, __a, __b, __p);
>  }
> @@ -37383,12 +37383,12 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3), \
>    int
> (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_
> mve_type_float16x8_t]: __arm_vaddq_m_f16 (__ARM_mve_coerce(__p0,
> float16x8_t), __ARM_mve_coerce(__p1, float16x8_t),
> __ARM_mve_coerce(__p2, float16x8_t), p3), \
>    int
> (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_
> mve_type_float32x4_t]: __arm_vaddq_m_f32 (__ARM_mve_coerce(__p0,
> float32x4_t), __ARM_mve_coerce(__p1, float32x4_t),
> __ARM_mve_coerce(__p2, float32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3),
> \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t),
> p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t),
> p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32_t), p3), \
> +  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int), p3), \
>    int
> (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_
> mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0,
> float16x8_t), __ARM_mve_coerce(__p1, float16x8_t),
> __ARM_mve_coerce(__p2, float16_t), p3), \
>    int
> (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_
> mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0,
> float32x4_t), __ARM_mve_coerce(__p1, float32x4_t),
> __ARM_mve_coerce(__p2, float32_t), p3));})
> 
> @@ -39749,12 +39749,12 @@ extern void *__ARM_undef;
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
>    _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3),
> \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t),
> p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t),
> p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32_t), p3), \
> +  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int), p3), \
> +  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int), p3), \
>    int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_m.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_m.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..719b95d902088c82970383e
> eee367d30ed112417
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_m.c
> @@ -0,0 +1,48 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { dg-additional-options "-O2" } */
> +
> +#include <arm_mve.h>
> +mve_pred16_t p;
> +
> +int32x4_t fn1 (int32x4_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_s32(), vecIdx, 1, p);
> +}
> +
> +int16x8_t fn2 (int16x8_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_s16(), vecIdx, 1, p);
> +}
> +
> +int8x16_t fn3 (int8x16_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_s8(), vecIdx, 1, p);
> +}
> +
> +uint32x4_t fn4 (uint32x4_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_u32(), vecIdx, 1, p);
> +}
> +
> +uint16x8_t fn5 (uint16x8_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_u16(), vecIdx, 1, p);
> +}
> +
> +uint8x16_t fn6 (uint8x16_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_u8(), vecIdx, 1, p);
> +}
> +
> +float32x4_t fn7 (float32x4_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_f32(), vecIdx, (float32_t) 1.23, p);
> +}
> +
> +float16x8_t fn8 (float16x8_t vecIdx)
> +{
> +  return vaddq_m(vuninitializedq_f16(), vecIdx, (float16_t) 1.40, p);
> +}
> +
> +/* { dg-final { scan-assembler-not "__ARM_undef" } } */

Reply via email to