Hi Christophe > -----Original Message----- > From: Gcc-patches <gcc-patches-boun...@gcc.gnu.org> On Behalf Of > Christophe Lyon via Gcc-patches > Sent: 06 October 2020 16:59 > To: gcc-patches@gcc.gnu.org > Subject: [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics > > This patch adds: > vqdmlashq_m_n_s16 > vqdmlashq_m_n_s32 > vqdmlashq_m_n_s8 > vqdmlashq_n_s16 > vqdmlashq_n_s32 > vqdmlashq_n_s8 > > v2: rebased after Srinath's reorganization patch >
Ok. Thanks, Kyrill > 2020-10-05 Christophe Lyon <christophe.l...@linaro.org> > > gcc/ > PR target/96914 > * config/arm/arm_mve.h (vqdmlashq, vqdmlashq_m): Define. > * config/arm/arm_mve_builtins.def (vqdmlashq_n_s) > (vqdmlashq_m_n_s,): New. > * config/arm/unspecs.md (VQDMLASHQ_N_S, VQDMLASHQ_M_N_S): > New > unspecs. > * config/arm/iterators.md (VQDMLASHQ_N_S, > VQDMLASHQ_M_N_S): New > attributes. > (VQDMLASHQ_N): New iterator. > * config/arm/mve.md (mve_vqdmlashq_n_, mve_vqdmlashq_m_n_s): > New > patterns. > > gcc/tetsuite/ > PR target/96914 > * gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c: New test. > * gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c: New test. > * gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c: New test. > * gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c: New test. > * gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c: New test. > * gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c: New test. > --- > gcc/config/arm/arm_mve.h | 116 > +++++++++++++++++++++ > gcc/config/arm/arm_mve_builtins.def | 2 + > gcc/config/arm/iterators.md | 3 + > gcc/config/arm/mve.md | 33 ++++++ > gcc/config/arm/unspecs.md | 2 + > .../arm/mve/intrinsics/vqdmlashq_m_n_s16.c | 23 ++++ > .../arm/mve/intrinsics/vqdmlashq_m_n_s32.c | 23 ++++ > .../arm/mve/intrinsics/vqdmlashq_m_n_s8.c | 23 ++++ > .../arm/mve/intrinsics/vqdmlashq_n_s16.c | 21 ++++ > .../arm/mve/intrinsics/vqdmlashq_n_s32.c | 21 ++++ > .../gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c | 21 ++++ > 11 files changed, 288 insertions(+) > create mode 100644 > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c > create mode 100644 > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c > create mode 100644 > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c > create mode 100644 > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c > create mode 100644 > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c > create mode 100644 > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c > > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h > index d9bfb203..7626ad1 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -141,6 +141,7 @@ > #define vrev64q_m(__inactive, __a, __p) __arm_vrev64q_m(__inactive, __a, > __p) > #define vqrdmlashq(__a, __b, __c) __arm_vqrdmlashq(__a, __b, __c) > #define vqrdmlahq(__a, __b, __c) __arm_vqrdmlahq(__a, __b, __c) > +#define vqdmlashq(__a, __b, __c) __arm_vqdmlashq(__a, __b, __c) > #define vqdmlahq(__a, __b, __c) __arm_vqdmlahq(__a, __b, __c) > #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, > __p) > #define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c) > @@ -260,6 +261,7 @@ > #define vorrq_m(__inactive, __a, __b, __p) __arm_vorrq_m(__inactive, __a, > __b, __p) > #define vqaddq_m(__inactive, __a, __b, __p) __arm_vqaddq_m(__inactive, > __a, __b, __p) > #define vqdmladhq_m(__inactive, __a, __b, __p) > __arm_vqdmladhq_m(__inactive, __a, __b, __p) > +#define vqdmlashq_m(__a, __b, __c, __p) __arm_vqdmlashq_m(__a, __b, > __c, __p) > #define vqdmladhxq_m(__inactive, __a, __b, __p) > __arm_vqdmladhxq_m(__inactive, __a, __b, __p) > #define vqdmlahq_m(__a, __b, __c, __p) __arm_vqdmlahq_m(__a, __b, __c, > __p) > #define vqdmlsdhq_m(__inactive, __a, __b, __p) > __arm_vqdmlsdhq_m(__inactive, __a, __b, __p) > @@ -1307,6 +1309,7 @@ > #define vqdmlsdhxq_s8(__inactive, __a, __b) > __arm_vqdmlsdhxq_s8(__inactive, __a, __b) > #define vqdmlsdhq_s8(__inactive, __a, __b) > __arm_vqdmlsdhq_s8(__inactive, __a, __b) > #define vqdmlahq_n_s8(__a, __b, __c) __arm_vqdmlahq_n_s8(__a, __b, __c) > +#define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b, > __c) > #define vqdmladhxq_s8(__inactive, __a, __b) > __arm_vqdmladhxq_s8(__inactive, __a, __b) > #define vqdmladhq_s8(__inactive, __a, __b) > __arm_vqdmladhq_s8(__inactive, __a, __b) > #define vmlsdavaxq_s8(__a, __b, __c) __arm_vmlsdavaxq_s8(__a, __b, __c) > @@ -1391,6 +1394,7 @@ > #define vqrdmladhq_s16(__inactive, __a, __b) > __arm_vqrdmladhq_s16(__inactive, __a, __b) > #define vqdmlsdhxq_s16(__inactive, __a, __b) > __arm_vqdmlsdhxq_s16(__inactive, __a, __b) > #define vqdmlsdhq_s16(__inactive, __a, __b) > __arm_vqdmlsdhq_s16(__inactive, __a, __b) > +#define vqdmlashq_n_s16(__a, __b, __c) __arm_vqdmlashq_n_s16(__a, > __b, __c) > #define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b, > __c) > #define vqdmladhxq_s16(__inactive, __a, __b) > __arm_vqdmladhxq_s16(__inactive, __a, __b) > #define vqdmladhq_s16(__inactive, __a, __b) > __arm_vqdmladhq_s16(__inactive, __a, __b) > @@ -1476,6 +1480,7 @@ > #define vqrdmladhq_s32(__inactive, __a, __b) > __arm_vqrdmladhq_s32(__inactive, __a, __b) > #define vqdmlsdhxq_s32(__inactive, __a, __b) > __arm_vqdmlsdhxq_s32(__inactive, __a, __b) > #define vqdmlsdhq_s32(__inactive, __a, __b) > __arm_vqdmlsdhq_s32(__inactive, __a, __b) > +#define vqdmlashq_n_s32(__a, __b, __c) __arm_vqdmlashq_n_s32(__a, > __b, __c) > #define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b, > __c) > #define vqdmladhxq_s32(__inactive, __a, __b) > __arm_vqdmladhxq_s32(__inactive, __a, __b) > #define vqdmladhq_s32(__inactive, __a, __b) > __arm_vqdmladhq_s32(__inactive, __a, __b) > @@ -1902,6 +1907,9 @@ > #define vqdmladhxq_m_s8(__inactive, __a, __b, __p) > __arm_vqdmladhxq_m_s8(__inactive, __a, __b, __p) > #define vqdmladhxq_m_s32(__inactive, __a, __b, __p) > __arm_vqdmladhxq_m_s32(__inactive, __a, __b, __p) > #define vqdmladhxq_m_s16(__inactive, __a, __b, __p) > __arm_vqdmladhxq_m_s16(__inactive, __a, __b, __p) > +#define vqdmlashq_m_n_s8(__a, __b, __c, __p) > __arm_vqdmlashq_m_n_s8(__a, __b, __c, __p) > +#define vqdmlashq_m_n_s32(__a, __b, __c, __p) > __arm_vqdmlashq_m_n_s32(__a, __b, __c, __p) > +#define vqdmlashq_m_n_s16(__a, __b, __c, __p) > __arm_vqdmlashq_m_n_s16(__a, __b, __c, __p) > #define vqdmlahq_m_n_s8(__a, __b, __c, __p) > __arm_vqdmlahq_m_n_s8(__a, __b, __c, __p) > #define vqdmlahq_m_n_s32(__a, __b, __c, __p) > __arm_vqdmlahq_m_n_s32(__a, __b, __c, __p) > #define vqdmlahq_m_n_s16(__a, __b, __c, __p) > __arm_vqdmlahq_m_n_s16(__a, __b, __c, __p) > @@ -7425,6 +7433,13 @@ __arm_vqrdmlashq_n_s8 (int8x16_t __a, > int8x16_t __b, int8_t __c) > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c) > +{ > + return __builtin_mve_vqdmlashq_n_sv16qi (__a, __b, __c); > +} > + > +__extension__ extern __inline int8x16_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c) > { > return __builtin_mve_vqrdmlahq_n_sv16qi (__a, __b, __c); > @@ -8020,6 +8035,13 @@ __arm_vqrdmlashq_n_s16 (int16x8_t __a, > int16x8_t __b, int16_t __c) > > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) > +{ > + return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c); > +} > + > +__extension__ extern __inline int16x8_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) > { > return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c); > @@ -8615,6 +8637,13 @@ __arm_vqrdmlashq_n_s32 (int32x4_t __a, > int32x4_t __b, int32_t __c) > > __extension__ extern __inline int32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) > +{ > + return __builtin_mve_vqdmlashq_n_sv4si (__a, __b, __c); > +} > + > +__extension__ extern __inline int32x4_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) > { > return __builtin_mve_vqrdmlahq_n_sv4si (__a, __b, __c); > @@ -11142,6 +11171,27 @@ __arm_vqrdmlashq_m_n_s16 (int16x8_t __a, > int16x8_t __b, int16_t __c, mve_pred16_ > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, > mve_pred16_t __p) > +{ > + return __builtin_mve_vqdmlashq_m_n_sv16qi (__a, __b, __c, __p); > +} > + > +__extension__ extern __inline int16x8_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, > mve_pred16_t __p) > +{ > + return __builtin_mve_vqdmlashq_m_n_sv8hi (__a, __b, __c, __p); > +} > + > +__extension__ extern __inline int32x4_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, > mve_pred16_t __p) > +{ > + return __builtin_mve_vqdmlashq_m_n_sv4si (__a, __b, __c, __p); > +} > + > +__extension__ extern __inline int8x16_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlsdhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t > __b, mve_pred16_t __p) > { > return __builtin_mve_vqrdmlsdhq_m_sv16qi (__inactive, __a, __b, __p); > @@ -24212,6 +24262,13 @@ __arm_vqrdmlashq (int8x16_t __a, int8x16_t > __b, int8_t __c) > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c) > +{ > + return __arm_vqdmlashq_n_s8 (__a, __b, __c); > +} > + > +__extension__ extern __inline int8x16_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c) > { > return __arm_vqrdmlahq_n_s8 (__a, __b, __c); > @@ -24807,6 +24864,13 @@ __arm_vqrdmlashq (int16x8_t __a, int16x8_t > __b, int16_t __c) > > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c) > +{ > + return __arm_vqdmlashq_n_s16 (__a, __b, __c); > +} > + > +__extension__ extern __inline int16x8_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c) > { > return __arm_vqrdmlahq_n_s16 (__a, __b, __c); > @@ -25402,6 +25466,13 @@ __arm_vqrdmlashq (int32x4_t __a, int32x4_t > __b, int32_t __c) > > __extension__ extern __inline int32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c) > +{ > + return __arm_vqdmlashq_n_s32 (__a, __b, __c); > +} > + > +__extension__ extern __inline int32x4_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c) > { > return __arm_vqrdmlahq_n_s32 (__a, __b, __c); > @@ -27929,6 +28000,27 @@ __arm_vqrdmlashq_m (int16x8_t __a, > int16x8_t __b, int16_t __c, mve_pred16_t __p) > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_m (int8x16_t __a, int8x16_t __b, int8_t __c, > mve_pred16_t __p) > +{ > + return __arm_vqdmlashq_m_n_s8 (__a, __b, __c, __p); > +} > + > +__extension__ extern __inline int16x8_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, > mve_pred16_t __p) > +{ > + return __arm_vqdmlashq_m_n_s16 (__a, __b, __c, __p); > +} > + > +__extension__ extern __inline int32x4_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vqdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c, > mve_pred16_t __p) > +{ > + return __arm_vqdmlashq_m_n_s32 (__a, __b, __c, __p); > +} > + > +__extension__ extern __inline int8x16_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vqrdmlsdhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, > mve_pred16_t __p) > { > return __arm_vqrdmlsdhq_m_s8 (__inactive, __a, __b, __p); > @@ -36798,6 +36890,14 @@ extern void *__ARM_undef; > int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_ > mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, uint16_t)), \ > int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_ > mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, uint32_t)));}) > > +#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > + __typeof(p1) __p1 = (p1); \ > + __typeof(p2) __p2 = (p2); \ > + _Generic( (int > (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ > eid(__p2)])0, \ > + int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, > int8_t)), \ > + int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int16_t)), \ > + int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int32_t)));}) > + > #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > __typeof(p2) __p2 = (p2); \ > @@ -39270,6 +39370,14 @@ extern void *__ARM_undef; > int > (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_ > mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), > __ARM_mve_coerce(__p2, uint16_t)), \ > int > (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_ > mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), > __ARM_mve_coerce(__p2, uint32_t)));}) > > +#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > + __typeof(p1) __p1 = (p1); \ > + __typeof(p2) __p2 = (p2); \ > + _Generic( (int > (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ > eid(__p2)])0, \ > + int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, > int8_t)), \ > + int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int16_t)), \ > + int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int32_t)));}) > + > #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > __typeof(p2) __p2 = (p2); \ > @@ -40811,6 +40919,14 @@ extern void *__ARM_undef; > int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv > e_type_int_n]: __arm_vqrdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int16_t), p3), \ > int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv > e_type_int_n]: __arm_vqrdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int32_t), p3));}) > > +#define __arm_vqdmlashq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ > + __typeof(p1) __p1 = (p1); \ > + __typeof(p2) __p2 = (p2); \ > + _Generic( (int > (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ > eid(__p2)])0, \ > + int > (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_m_n_s8 (__ARM_mve_coerce(__p0, > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, > int8_t), p3), \ > + int > (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, > int16_t), p3), \ > + int > (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv > e_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, > int32_t), p3));}) > + > #define __arm_vqrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > __typeof(p2) __p2 = (p2); \ > diff --git a/gcc/config/arm/arm_mve_builtins.def > b/gcc/config/arm/arm_mve_builtins.def > index 753e40a..9f3ecfe 100644 > --- a/gcc/config/arm/arm_mve_builtins.def > +++ b/gcc/config/arm/arm_mve_builtins.def > @@ -384,6 +384,7 @@ VAR3 (TERNOP_NONE_NONE_NONE_NONE, > vqrdmladhq_s, v16qi, v8hi, v4si) > VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhxq_s, v16qi, v8hi, > v4si) > VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhq_s, v16qi, v8hi, v4si) > VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlahq_n_s, v16qi, v8hi, > v4si) > +VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlashq_n_s, v16qi, v8hi, > v4si) > VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhxq_s, v16qi, v8hi, > v4si) > VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhq_s, v16qi, v8hi, v4si) > VAR3 (TERNOP_NONE_NONE_NONE_NONE, vmlsdavaxq_s, v16qi, v8hi, v4si) > @@ -574,6 +575,7 @@ VAR3 > (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulhq_m_n_s, v16qi, > v8hi, v4si) > VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhxq_m_s, > v16qi, v8hi, v4si) > VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhq_m_s, > v16qi, v8hi, v4si) > VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlahq_m_n_s, > v16qi, v8hi, v4si) > +VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlashq_m_n_s, > v16qi, v8hi, v4si) > VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhxq_m_s, > v16qi, v8hi, v4si) > VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhq_m_s, > v16qi, v8hi, v4si) > VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqaddq_m_s, v16qi, > v8hi, v4si) > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md > index 7f8c235..0dbf1b2 100644 > --- a/gcc/config/arm/iterators.md > +++ b/gcc/config/arm/iterators.md > @@ -1285,6 +1285,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") > (VCVTQ_TO_F_U "u") (VREV16Q_S "s") > (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u") > (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s") > (VPSELQ_U "u") (VQDMLAHQ_N_S "s") > (VQDMLAHQ_N_U "u") > + (VQDMLASHQ_N_S "s") > (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u") > (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u") > (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u") > @@ -1326,6 +1327,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") > (VCVTQ_TO_F_U "u") (VREV16Q_S "s") > (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U > "u") > (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u") > (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") > (VQSUBQ_M_U "u") > + (VQDMLASHQ_M_N_S "s") > (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s") > (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") > (VRHADDQ_M_U "u") > (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") > (VHSUBQ_M_U "u") > @@ -1577,6 +1579,7 @@ (define_int_iterator VMLASQ_N [VMLASQ_N_S > VMLASQ_N_U]) > (define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U]) > (define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U]) > (define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U]) > +(define_int_iterator VQDMLASHQ_N [VQDMLASHQ_N_S]) > (define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S > VQRDMLAHQ_N_U]) > (define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S > VQRDMLASHQ_N_U]) > (define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U]) > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 4322adf..d406ab1 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -3678,6 +3678,22 @@ (define_insn "mve_vqdmlahq_n_<supf><mode>" > ]) > > ;; > +;; [vqdmlashq_n_s]) > +;; > +(define_insn "mve_vqdmlashq_n_<supf><mode>" > + [ > + (set (match_operand:MVE_2 0 "s_register_operand" "=w") > + (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") > + (match_operand:MVE_2 2 "s_register_operand" "w") > + (match_operand:<V_elem> 3 "s_register_operand" "r")] > + VQDMLASHQ_N)) > + ] > + "TARGET_HAVE_MVE" > + "vqdmlash.s%#<V_sz_elem>\t%q0, %q2, %3" > + [(set_attr "type" "mve_move") > +]) > + > +;; > ;; [vqnegq_m_s]) > ;; > (define_insn "mve_vqnegq_m_s<mode>" > @@ -5904,6 +5920,23 @@ (define_insn "mve_vqdmlahq_m_n_s<mode>" > (set_attr "length""8")]) > > ;; > +;; [vqdmlashq_m_n_s]) > +;; > +(define_insn "mve_vqdmlashq_m_n_s<mode>" > + [ > + (set (match_operand:MVE_2 0 "s_register_operand" "=w") > + (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") > + (match_operand:MVE_2 2 "s_register_operand" "w") > + (match_operand:<V_elem> 3 "s_register_operand" "r") > + (match_operand:HI 4 "vpr_register_operand" "Up")] > + VQDMLASHQ_M_N_S)) > + ] > + "TARGET_HAVE_MVE" > + "vpst\;vqdmlasht.s%#<V_sz_elem>\t%q0, %q2, %3" > + [(set_attr "type" "mve_move") > + (set_attr "length""8")]) > + > +;; > ;; [vqrdmlahq_m_n_s]) > ;; > (define_insn "mve_vqrdmlahq_m_n_s<mode>" > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md > index caee18a..a98ac09 100644 > --- a/gcc/config/arm/unspecs.md > +++ b/gcc/config/arm/unspecs.md > @@ -877,6 +877,7 @@ (define_c_enum "unspec" [ > VQABSQ_M_S > VQDMLAHQ_N_S > VQDMLAHQ_N_U > + VQDMLASHQ_N_S > VQNEGQ_M_S > VQRDMLADHQ_S > VQRDMLADHXQ_S > @@ -1069,6 +1070,7 @@ (define_c_enum "unspec" [ > VRHADDQ_M_S > VMULQ_M_S > VMULQ_M_U > + VQDMLASHQ_M_N_S > VQRDMLASHQ_M_N_S > VRSHLQ_M_S > VRSHLQ_M_U > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c > new file mode 100644 > index 0000000..7c2e5cf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c > @@ -0,0 +1,23 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int16x8_t > +foo (int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) > +{ > + return vqdmlashq_m_n_s16 (a, b, c, p); > +} > + > +/* { dg-final { scan-assembler "vpst" } } */ > +/* { dg-final { scan-assembler "vqdmlasht.s16" } } */ > + > +int16x8_t > +foo1 (int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p) > +{ > + return vqdmlashq_m (a, b, c, p); > +} > + > +/* { dg-final { scan-assembler "vpst" } } */ > +/* { dg-final { scan-assembler "vqdmlasht.s16" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c > new file mode 100644 > index 0000000..cea9d9b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c > @@ -0,0 +1,23 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int32x4_t > +foo (int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) > +{ > + return vqdmlashq_m_n_s32 (a, b, c, p); > +} > + > +/* { dg-final { scan-assembler "vpst" } } */ > +/* { dg-final { scan-assembler "vqdmlasht.s32" } } */ > + > +int32x4_t > +foo1 (int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p) > +{ > + return vqdmlashq_m (a, b, c, p); > +} > + > +/* { dg-final { scan-assembler "vpst" } } */ > +/* { dg-final { scan-assembler "vqdmlasht.s32" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c > new file mode 100644 > index 0000000..83ee258 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c > @@ -0,0 +1,23 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int8x16_t > +foo (int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) > +{ > + return vqdmlashq_m_n_s8 (a, b, c, p); > +} > + > +/* { dg-final { scan-assembler "vpst" } } */ > +/* { dg-final { scan-assembler "vqdmlasht.s8" } } */ > + > +int8x16_t > +foo1 (int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p) > +{ > + return vqdmlashq_m (a, b, c, p); > +} > + > +/* { dg-final { scan-assembler "vpst" } } */ > +/* { dg-final { scan-assembler "vqdmlasht.s8" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c > new file mode 100644 > index 0000000..c71a61c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c > @@ -0,0 +1,21 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int16x8_t > +foo (int16x8_t a, int16x8_t b, int16_t c) > +{ > + return vqdmlashq_n_s16 (a, b, c); > +} > + > +/* { dg-final { scan-assembler "vqdmlash.s16" } } */ > + > +int16x8_t > +foo1 (int16x8_t a, int16x8_t b, int16_t c) > +{ > + return vqdmlashq (a, b, c); > +} > + > +/* { dg-final { scan-assembler "vqdmlash.s16" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c > new file mode 100644 > index 0000000..61f6c66 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c > @@ -0,0 +1,21 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int32x4_t > +foo (int32x4_t a, int32x4_t b, int32_t c) > +{ > + return vqdmlashq_n_s32 (a, b, c); > +} > + > +/* { dg-final { scan-assembler "vqdmlash.s32" } } */ > + > +int32x4_t > +foo1 (int32x4_t a, int32x4_t b, int32_t c) > +{ > + return vqdmlashq (a, b, c); > +} > + > +/* { dg-final { scan-assembler "vqdmlash.s32" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c > new file mode 100644 > index 0000000..a078928 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c > @@ -0,0 +1,21 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +int8x16_t > +foo (int8x16_t a, int8x16_t b, int8_t c) > +{ > + return vqdmlashq_n_s8 (a, b, c); > +} > + > +/* { dg-final { scan-assembler "vqdmlash.s8" } } */ > + > +int8x16_t > +foo1 (int8x16_t a, int8x16_t b, int8_t c) > +{ > + return vqdmlashq (a, b, c); > +} > + > +/* { dg-final { scan-assembler "vqdmlash.s8" } } */ > -- > 2.7.4