> -----Original Message----- > From: Andrea Corallo <andrea.cora...@arm.com> > Sent: Thursday, November 17, 2022 4:38 PM > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <kyrylo.tkac...@arm.com>; Richard Earnshaw > <richard.earns...@arm.com>; Stam Markianos-Wright <Stam.Markianos- > wri...@arm.com> > Subject: [PATCH 13/35] arm: further fix overloading of MVE vaddq[_m]_n > intrinsic > > From: Stam Markianos-Wright <stam.markianos-wri...@arm.com> > > It was observed that in tests `vaddq_m_n_[s/u][8/16/32].c`, the _Generic > resolution would fall back to the `__ARM_undef` failure state. > > This is a regression since `dc39db873670bea8d8e655444387ceaa53a01a79` > and > `6bd4ce64eb48a72eca300cb52773e6101d646004`, but it previously wasn't > identified, because the tests were not checking for this kind of failure. > > The above commits changed the definitions of the intrinsics from using > `[u]int[8/16/32]_t` types for the scalar argument to using `int`. This > allowed `int` to be supported in user code through the overloaded > `#defines`, but seems to have broken the `[u]int[8/16/32]_t` types > > The solution implemented by this patch is to explicitly use a new > _Generic mapping from all the `[u]int[8/16/32]_t` types for int. With this > change, both `int` and `[u]int[8/16/32]_t` parameters are supported from > user code and are handled by the overloading mechanism correctly. > > gcc/ChangeLog: > > * config/arm/arm_mve.h (__arm_vaddq_m_n_s8): Change types. > (__arm_vaddq_m_n_s32): Likewise. > (__arm_vaddq_m_n_s16): Likewise. > (__arm_vaddq_m_n_u8): Likewise. > (__arm_vaddq_m_n_u32): Likewise. > (__arm_vaddq_m_n_u16): Likewise. > (__arm_vaddq_m): Fix Overloading. > (__ARM_mve_coerce3): New. Ok. Wasn't there a PR in Bugzilla about this that we can cite in the commit message? Thanks, Kyrill > --- > gcc/config/arm/arm_mve.h | 78 ++++++++++++++++++++-------------------- > 1 file changed, 40 insertions(+), 38 deletions(-) > > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h > index 684f997520f..951dc25374b 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -9675,42 +9675,42 @@ __arm_vabdq_m_u16 (uint16x8_t __inactive, > uint16x8_t __a, uint16x8_t __b, mve_pr > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int8_t __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_sv16qi (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_sv4si (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int16_t __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_sv8hi (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, > mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_uv16qi (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32_t > __b, mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_uv4si (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16_t > __b, mve_pred16_t __p) > { > return __builtin_mve_vaddq_m_n_uv8hi (__inactive, __a, __b, __p); > } > @@ -26417,42 +26417,42 @@ __arm_vabdq_m (uint16x8_t __inactive, > uint16x8_t __a, uint16x8_t __b, mve_pred16 > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int8_t __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_s8 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_s32 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int16_t __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_s16 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_u8 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, uint32_t __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_u32 (__inactive, __a, __b, __p); > } > > __extension__ extern __inline uint16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, int __b, > mve_pred16_t __p) > +__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, uint16_t __b, > mve_pred16_t __p) > { > return __arm_vaddq_m_n_u16 (__inactive, __a, __b, __p); > } > @@ -35657,6 +35657,8 @@ extern void *__ARM_undef; > _Generic(param, type: param, const type: param, default: *(type > *)__ARM_undef) > #define __ARM_mve_coerce2(param, type) \ > _Generic(param, type: param, float16_t: param, float32_t: param, default: > *(type *)__ARM_undef) > +#define __ARM_mve_coerce3(param, type) \ > + _Generic(param, type: param, int8_t: param, int16_t: param, int32_t: > param, int64_t: param, uint8_t: param, uint16_t: param, uint32_t: param, > uint64_t: param, default: *(type *)__ARM_undef) > > #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ > > @@ -35871,14 +35873,14 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: > __arm_vaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: > __arm_vaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: > __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), > __ARM_mve_coerce(__p1, uint32x4_t)), \ > - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: > __arm_vaddq_f16 (__ARM_mve_coerce(p0, float16x8_t), > __ARM_mve_coerce(p1, float16x8_t)), \ > - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: > __arm_vaddq_f32 (__ARM_mve_coerce(p0, float32x4_t), > __ARM_mve_coerce(p1, float32x4_t)), \ > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce(__p1, int)), \ > + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: > __arm_vaddq_f16 (__ARM_mve_coerce(__p0, float16x8_t), > __ARM_mve_coerce(__p1, float16x8_t)), \ > + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: > __arm_vaddq_f32 (__ARM_mve_coerce(__p0, float32x4_t), > __ARM_mve_coerce(__p1, float32x4_t)), \ > + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce3(p1, int)), \ > int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: > __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), > __ARM_mve_coerce2(__p1, double)), \ > int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: > __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), > __ARM_mve_coerce2(__p1, double)));}) > > @@ -37316,12 +37318,12 @@ extern void *__ARM_undef; > int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, uint32x4_t), p3), \ > int > (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_ > mve_type_float16x8_t]: __arm_vaddq_m_f16 (__ARM_mve_coerce(__p0, > float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), > __ARM_mve_coerce(__p2, float16x8_t), p3), \ > int > (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_ > mve_type_float32x4_t]: __arm_vaddq_m_f32 (__ARM_mve_coerce(__p0, > float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), > __ARM_mve_coerce(__p2, float32x4_t), p3), \ > - int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \ > - int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int), p3), \ > - int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int), p3), \ > - int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce(__p2, int), p3), \ > - int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, int), p3), \ > - int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \ > + int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, > int), p3), \ > + int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, > int), p3), \ > + int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce3(p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce3(p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce3(p2, int), p3), \ > int > (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_ > mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, > float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), > __ARM_mve_coerce2(__p2, double), p3), \ > int > (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_ > mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, > float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), > __ARM_mve_coerce2(__p2, double), p3));}) > > @@ -38820,12 +38822,12 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: > __arm_vaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: > __arm_vaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: > __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), > __ARM_mve_coerce(__p1, uint32x4_t)), \ > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce(__p1, int)), \ > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce(__p1, int)));}) > + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), > __ARM_mve_coerce3(p1, int)), \ > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: > __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), > __ARM_mve_coerce3(p1, int)));}) > > #define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > @@ -39641,12 +39643,12 @@ extern void *__ARM_undef; > __typeof(p1) __p1 = (p1); \ > __typeof(p2) __p2 = (p2); \ > _Generic( (int > (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ > eid(__p2)])0, \ > - int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \ > - int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int), p3), \ > - int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int), p3), \ > - int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce(__p2, int), p3), \ > - int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, int), p3), \ > - int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, int), p3), \ > + int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), > __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \ > + int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, > int), p3), \ > + int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, > int), p3), \ > + int > (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), > __ARM_mve_coerce3(p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce3(p2, int), p3), \ > + int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m > ve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce3(p2, int), p3), \ > int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve > _type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, > int8x16_t), p3), \ > int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve > _type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int16x8_t), p3), \ > int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve > _type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int32x4_t), p3), \ > -- > 2.25.1
RE: [PATCH 13/35] arm: further fix overloading of MVE vaddq[_m]_n intrinsic
Kyrylo Tkachov via Gcc-patches Fri, 18 Nov 2022 08:49:38 -0800
- [PATCH 17/35] arm: improve tests and... Andrea Corallo via Gcc-patches
- RE: [PATCH 17/35] arm: improve ... Kyrylo Tkachov via Gcc-patches
- [PATCH 01/35] arm: improve vcreateq*... Andrea Corallo via Gcc-patches
- RE: [PATCH 01/35] arm: improve ... Kyrylo Tkachov via Gcc-patches
- [PATCH 15/35] arm: Explicitly specif... Andrea Corallo via Gcc-patches
- RE: [PATCH 15/35] arm: Explicit... Kyrylo Tkachov via Gcc-patches
- Re: [PATCH 15/35] arm: Expl... Ramana Radhakrishnan via Gcc-patches
- [PATCH 15/35] arm: Expl... Stam Markianos-Wright via Gcc-patches
- Re: [PATCH 15/35] arm: Expl... Stam Markianos-Wright via Gcc-patches
- [PATCH 13/35] arm: further fix overl... Andrea Corallo via Gcc-patches
- RE: [PATCH 13/35] arm: further ... Kyrylo Tkachov via Gcc-patches
- Re: [PATCH 13/35] arm: furt... Stam Markianos-Wright via Gcc-patches
- Re: [PATCH 00/35] arm: rework MVE te... Andrea Corallo via Gcc-patches