> -----Original Message----- > From: Srinath Parvathaneni <srinath.parvathan...@arm.com> > Sent: 04 June 2020 17:57 > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <kyrylo.tkac...@arm.com> > Subject: [PATCH][GCC] arm: Fix the MVE ACLE vaddq_m polymorphic variants. > > Hello, > > This patch fixes the MVE ACLE vaddq_m polymorphic variants by modifying > the corresponding > intrinsic parameters and vaddq_m polymorphic variant's _Generic case > entries in "arm_mve.h" > header file. > > Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more > details. > [1] https://developer.arm.com/architectures/instruction-sets/simd- > isas/helium/mve-intrinsics > > Regression tested on arm-none-eabi and found no regressions. > > Ok for master and gcc-10 branch? > Ok. Thanks, Kyrill > Thanks, > Srinath. > > gcc/ChangeLog: > > 2020-06-04 Srinath Parvathaneni <srinath.parvathan...@arm.com> > > * config/arm/arm_mve.h (__arm_vaddq_m_n_s8): Correct the > intrinsic > arguments. > (__arm_vaddq_m_n_s32): Likewise. > (__arm_vaddq_m_n_s16): Likewise. > (__arm_vaddq_m_n_u8): Likewise. > (__arm_vaddq_m_n_u32): Likewise. > (__arm_vaddq_m_n_u16): Likewise. > (__arm_vaddq_m): Modify polymorphic variant. > > gcc/testsuite/ChangeLog: > > 2020-06-04 Srinath Parvathaneni <srinath.parvathan...@arm.com> > > * gcc.target/arm/mve/intrinsics/mve_vaddq_m.c: New test. > > > > ############### Attachment also inlined for ease of reply > ############### > > > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h > index > 1002512a98f9364403f66eba0e320fe5070bdc3a..9ea146189883c09c71842f10 > d25dc9924b5ae7e3 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -9713,42 +9713,42 @@ __arm_vabdq_m_u16 (uint16x8_t __inactive, > uint16x8_t __a, uint16x8_t __b, mve_pr > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int8_t __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_sv16qi (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_sv4si (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int16_t __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_sv8hi (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, int __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_uv16qi (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32_t > __b, mve_pred16_t __p) > +__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, int __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_uv4si (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16_t > __b, mve_pred16_t __p) > +__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, int __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_uv8hi (__inactive, __a, __b, __p); > } > @@ -26493,42 +26493,42 @@ __arm_vabdq_m (uint16x8_t __inactive, > uint16x8_t __a, uint16x8_t __b, mve_pred16 > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int8_t __b, > mve_pred16_t __p) > +__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_s8 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b, > mve_pred16_t __p) > +__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_s32 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int16_t __b, > mve_pred16_t __p) > +__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_s16 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, > mve_pred16_t __p) > +__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, int __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_u8 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, uint32_t __b, > mve_pred16_t __p) > +__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, int __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_u32 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, uint16_t __b, > mve_pred16_t __p) > +__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, int __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_u16 (__inactive, __a, __b, __p); > } > @@ -37383,12 +37383,12 @@ extern void *__ARM_undef; > int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, uint32x4_t), p3), \ > int > (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_ > mve_type_float16x8_t]: __arm_vaddq_m_f16 (__ARM_mve_coerce(__p0, > float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), > __ARM_mve_coerce(__p2, float16x8_t), p3), \ > int > (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_ > mve_type_float32x4_t]: __arm_vaddq_m_f32 (__ARM_mve_coerce(__p0, > float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), > __ARM_mve_coerce(__p2, float32x4_t), p3), \ > - int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), > \ > - int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), > p3), \ > - int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), > p3), \ > - int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce(__p2, uint8_t), p3), \ > - int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, uint16_t), p3), \ > - int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, uint32_t), p3), \ > + int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, int), p3), \ > int > (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_ > mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, > float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), > __ARM_mve_coerce(__p2, float16_t), p3), \ > int > (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_ > mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, > float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), > __ARM_mve_coerce(__p2, float32_t), p3));}) > > @@ -39749,12 +39749,12 @@ extern void *__ARM_undef; > __typeof(p1) __p1 = (p1); \ > __typeof(p2) __p2 = (p2); \ > _Generic( (int > (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ > eid(__p2)])0, \ > - int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), > \ > - int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), > p3), \ > - int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), > p3), \ > - int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce(__p2, uint8_t), p3), \ > - int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, uint16_t), p3), \ > - int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, uint32_t), p3), \ > + int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, int), p3), \ > int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, > int8x16_t), p3), \ > int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int16x8_t), p3), \ > int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int32x4_t), p3), \ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_m.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_m.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..719b95d902088c82970383e > eee367d30ed112417 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_m.c > @@ -0,0 +1,48 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > +/* { dg-additional-options "-O2" } */ > + > +#include <arm_mve.h> > +mve_pred16_t p; > + > +int32x4_t fn1 (int32x4_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_s32(), vecIdx, 1, p); > +} > + > +int16x8_t fn2 (int16x8_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_s16(), vecIdx, 1, p); > +} > + > +int8x16_t fn3 (int8x16_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_s8(), vecIdx, 1, p); > +} > + > +uint32x4_t fn4 (uint32x4_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_u32(), vecIdx, 1, p); > +} > + > +uint16x8_t fn5 (uint16x8_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_u16(), vecIdx, 1, p); > +} > + > +uint8x16_t fn6 (uint8x16_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_u8(), vecIdx, 1, p); > +} > + > +float32x4_t fn7 (float32x4_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_f32(), vecIdx, (float32_t) 1.23, p); > +} > + > +float16x8_t fn8 (float16x8_t vecIdx) > +{ > + return vaddq_m(vuninitializedq_f16(), vecIdx, (float16_t) 1.40, p); > +} > + > +/* { dg-final { scan-assembler-not "__ARM_undef" } } */