Hello Kyrill, Following patch is the rebased version of v1. (version v1) https://gcc.gnu.org/pipermail/gcc-patches/2019-November/534357.html
#### Hello, This patch supports following MVE ACLE intrinsics with writeback. vldrdq_gather_base_wb_s64, vldrdq_gather_base_wb_u64, vldrdq_gather_base_wb_z_s64, vldrdq_gather_base_wb_z_u64, vldrwq_gather_base_wb_f32, vldrwq_gather_base_wb_s32, vldrwq_gather_base_wb_u32, vldrwq_gather_base_wb_z_f32, vldrwq_gather_base_wb_z_s32, vldrwq_gather_base_wb_z_u32, vstrdq_scatter_base_wb_p_s64, vstrdq_scatter_base_wb_p_u64, vstrdq_scatter_base_wb_s64, vstrdq_scatter_base_wb_u64, vstrwq_scatter_base_wb_p_s32, vstrwq_scatter_base_wb_p_f32, vstrwq_scatter_base_wb_p_u32, vstrwq_scatter_base_wb_s32, vstrwq_scatter_base_wb_u32, vstrwq_scatter_base_wb_f32. Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details. [1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics Regression tested on arm-none-eabi and found no regressions. Ok for trunk? Thanks, Srinath. gcc/ChangeLog: 2020-03-20 Srinath Parvathaneni <srinath.parvathan...@arm.com> Andre Vieira <andre.simoesdiasvie...@arm.com> Mihail Ionescu <mihail.ione...@arm.com> * config/arm/arm-builtins.c (LDRGBWBS_QUALIFIERS): Define builtin qualifier. (LDRGBWBU_QUALIFIERS): Likewise. (LDRGBWBS_Z_QUALIFIERS): Likewise. (LDRGBWBU_Z_QUALIFIERS): Likewise. (STRSBWBS_QUALIFIERS): Likewise. (STRSBWBU_QUALIFIERS): Likewise. (STRSBWBS_P_QUALIFIERS): Likewise. (STRSBWBU_P_QUALIFIERS): Likewise. * config/arm/arm_mve.h (vldrdq_gather_base_wb_s64): Define macro. (vldrdq_gather_base_wb_u64): Likewise. (vldrdq_gather_base_wb_z_s64): Likewise. (vldrdq_gather_base_wb_z_u64): Likewise. (vldrwq_gather_base_wb_f32): Likewise. (vldrwq_gather_base_wb_s32): Likewise. (vldrwq_gather_base_wb_u32): Likewise. (vldrwq_gather_base_wb_z_f32): Likewise. (vldrwq_gather_base_wb_z_s32): Likewise. (vldrwq_gather_base_wb_z_u32): Likewise. (vstrdq_scatter_base_wb_p_s64): Likewise. (vstrdq_scatter_base_wb_p_u64): Likewise. (vstrdq_scatter_base_wb_s64): Likewise. (vstrdq_scatter_base_wb_u64): Likewise. (vstrwq_scatter_base_wb_p_s32): Likewise. (vstrwq_scatter_base_wb_p_f32): Likewise. (vstrwq_scatter_base_wb_p_u32): Likewise. (vstrwq_scatter_base_wb_s32): Likewise. (vstrwq_scatter_base_wb_u32): Likewise. (vstrwq_scatter_base_wb_f32): Likewise. (__arm_vldrdq_gather_base_wb_s64): Define intrinsic. (__arm_vldrdq_gather_base_wb_u64): Likewise. (__arm_vldrdq_gather_base_wb_z_s64): Likewise. (__arm_vldrdq_gather_base_wb_z_u64): Likewise. (__arm_vldrwq_gather_base_wb_s32): Likewise. (__arm_vldrwq_gather_base_wb_u32): Likewise. (__arm_vldrwq_gather_base_wb_z_s32): Likewise. (__arm_vldrwq_gather_base_wb_z_u32): Likewise. (__arm_vstrdq_scatter_base_wb_s64): Likewise. (__arm_vstrdq_scatter_base_wb_u64): Likewise. (__arm_vstrdq_scatter_base_wb_p_s64): Likewise. (__arm_vstrdq_scatter_base_wb_p_u64): Likewise. (__arm_vstrwq_scatter_base_wb_p_s32): Likewise. (__arm_vstrwq_scatter_base_wb_p_u32): Likewise. (__arm_vstrwq_scatter_base_wb_s32): Likewise. (__arm_vstrwq_scatter_base_wb_u32): Likewise. (__arm_vldrwq_gather_base_wb_f32): Likewise. (__arm_vldrwq_gather_base_wb_z_f32): Likewise. (__arm_vstrwq_scatter_base_wb_f32): Likewise. (__arm_vstrwq_scatter_base_wb_p_f32): Likewise. (vstrwq_scatter_base_wb): Define polymorphic variant. (vstrwq_scatter_base_wb_p): Likewise. (vstrdq_scatter_base_wb_p): Likewise. (vstrdq_scatter_base_wb): Likewise. * config/arm/arm_mve_builtins.def (LDRGBWBS_QUALIFIERS): Use builtin qualifier. * config/arm/mve.md (mve_vstrwq_scatter_base_wb_<supf>v4si): Define RTL pattern. (mve_vstrwq_scatter_base_wb_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_<supf>v4si_insn): Likewise. (mve_vstrwq_scatter_base_wb_p_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf_insn): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf_insn): Likewise. (mve_vstrdq_scatter_base_wb_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_<supf>v2di_insn): Likewise. (mve_vstrdq_scatter_base_wb_p_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn): Likewise. (mve_vldrwq_gather_base_wb_<supf>v4si): Likewise. (mve_vldrwq_gather_base_wb_<supf>v4si_insn): Likewise. (mve_vldrwq_gather_base_wb_z_<supf>v4si): Likewise. (mve_vldrwq_gather_base_wb_z_<supf>v4si_insn): Likewise. (mve_vldrwq_gather_base_wb_fv4sf): Likewise. (mve_vldrwq_gather_base_wb_fv4sf_insn): Likewise. (mve_vldrwq_gather_base_wb_z_fv4sf): Likewise. (mve_vldrwq_gather_base_wb_z_fv4sf_insn): Likewise. (mve_vldrdq_gather_base_wb_<supf>v2di): Likewise. (mve_vldrdq_gather_base_wb_<supf>v2di_insn): Likewise. (mve_vldrdq_gather_base_wb_z_<supf>v2di): Likewise. (mve_vldrdq_gather_base_wb_z_<supf>v2di_insn): Likewise. gcc/testsuite/ChangeLog: 2020-03-20 Srinath Parvathaneni <srinath.parvathan...@arm.com> Andre Vieira <andre.simoesdiasvie...@arm.com> Mihail Ionescu <mihail.ione...@arm.com> * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: New test. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c: Likewise. ############### Attachment also inlined for ease of reply ############### diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index cefc144e46d1781c8b05507ab49afe8be0fabcf3..ecdd95fdb753be0c53f568b036df1396a8d8f485 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -718,6 +718,50 @@ arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE_QUALIFIERS \ (arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers) +static enum arm_type_qualifiers +arm_ldrgbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_immediate}; +#define LDRGBWBS_QUALIFIERS (arm_ldrgbwbs_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; +#define LDRGBWBU_QUALIFIERS (arm_ldrgbwbu_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned}; +#define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned}; +#define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_none}; +#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_unsigned}; +#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, + qualifier_none, qualifier_unsigned}; +#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, + qualifier_unsigned, qualifier_unsigned}; +#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers) + /* End of Qualifier for MVE builtins. */ /* void ([T element type] *, T, immediate). */ diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 00f2242a6e9cfdd2db15c9e545446f1f2ab7afb9..969908b60f30184b0879d8e47d1fa01e12a7e092 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -2054,6 +2054,26 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define viwdupq_wb_u8( __a, __b, __imm) __arm_viwdupq_wb_u8( __a, __b, __imm) #define viwdupq_wb_u32( __a, __b, __imm) __arm_viwdupq_wb_u32( __a, __b, __imm) #define viwdupq_wb_u16( __a, __b, __imm) __arm_viwdupq_wb_u16( __a, __b, __imm) +#define vldrdq_gather_base_wb_s64(__addr, __offset) __arm_vldrdq_gather_base_wb_s64(__addr, __offset) +#define vldrdq_gather_base_wb_u64(__addr, __offset) __arm_vldrdq_gather_base_wb_u64(__addr, __offset) +#define vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) +#define vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) +#define vldrwq_gather_base_wb_f32(__addr, __offset) __arm_vldrwq_gather_base_wb_f32(__addr, __offset) +#define vldrwq_gather_base_wb_s32(__addr, __offset) __arm_vldrwq_gather_base_wb_s32(__addr, __offset) +#define vldrwq_gather_base_wb_u32(__addr, __offset) __arm_vldrwq_gather_base_wb_u32(__addr, __offset) +#define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) +#define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) +#define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) +#define vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) +#define vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) +#define vstrdq_scatter_base_wb_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_s64(__addr, __offset, __value) +#define vstrdq_scatter_base_wb_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_u64(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_s32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_s32(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_u32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_u32(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_f32(__addr, __offset, __value) #endif __extension__ extern __inline void @@ -13388,6 +13408,150 @@ __arm_viwdupq_wb_u16 (uint32_t * __a, uint32_t __b, const int __imm) return __res; } +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) +{ + int64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_u64 (uint64x2_t * __addr, const int __offset) +{ + uint64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_z_s64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) +{ + int64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_z_u64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) +{ + uint64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_s32 (uint32x4_t * __addr, const int __offset) +{ + int32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_u32 (uint32x4_t * __addr, const int __offset) +{ + uint32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_s32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + int32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + uint32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value) +{ + __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value); + __builtin_mve_vstrdq_scatter_base_wb_add_sv2di (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value) +{ + __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value); + __builtin_mve_vstrdq_scatter_base_wb_add_uv2di (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p); + __builtin_mve_vstrdq_scatter_base_wb_p_add_sv2di (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p); + __builtin_mve_vstrdq_scatter_base_wb_p_add_uv2di (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_sv4si (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_uv4si (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_sv4si (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_uv4si (*__addr, __offset, *__addr); +} + #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -16024,6 +16188,42 @@ __arm_vreinterpretq_f32_u8 (uint8x16_t __a) return (float32x4_t) __a; } +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) +{ + float32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + float32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_fv4sf (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_fv4sf (*__addr, __offset, *__addr, __p); +} + #endif enum { @@ -18940,8 +19140,34 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint64x2_t]: __arm_vreinterpretq_u8_u64 (__ARM_mve_coerce(__p0, uint64x2_t)), \ int (*)[__ARM_mve_type_float32x4_t]: __arm_vreinterpretq_u8_f32 (__ARM_mve_coerce(__p0, float32x4_t)));}) +#define vstrwq_scatter_base_wb(p0,p1,p2) __arm_vstrwq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));}) + +#define vstrwq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));}) + #else /* MVE Integer. */ +#define vstrwq_scatter_base_wb(p0,p1,p2) __arm_vstrwq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));}) + +#define vstrwq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + #define vst4q(p0,p1) __arm_vst4q(p0,p1) #define __arm_vst4q(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -21424,6 +21650,18 @@ extern void *__ARM_undef; #endif /* MVE Integer. */ +#define vstrdq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \ + int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));}) + +#define vstrdq_scatter_base_wb(p0,p1,p2) __arm_vstrdq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrdq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \ + int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));}) + #define vldrdq_gather_offset(p0,p1) __arm_vldrdq_gather_offset(p0,p1) #define __arm_vldrdq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 2ed7886a6d08f896c840693430112f12fc3b4ab0..9fc0a8a0c62b22cfd6d37658831cd91704f79885 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -827,3 +827,33 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vddupq_m_n_u, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_u, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_s, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_f, v4sf) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_u, v2di) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_s, v2di) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_u, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_s, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_f, v4sf) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_u, v2di) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_s, v2di) +VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si) +VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf) +VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di) +VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_s, v4si) +VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_f, v4sf) +VAR1 (STRSBWBS_P, vstrdq_scatter_base_wb_p_s, v2di) +VAR1 (LDRGBWBU_Z, vldrwq_gather_base_wb_z_u, v4si) +VAR1 (LDRGBWBU_Z, vldrdq_gather_base_wb_z_u, v2di) +VAR1 (LDRGBWBU, vldrwq_gather_base_wb_u, v4si) +VAR1 (LDRGBWBU, vldrdq_gather_base_wb_u, v2di) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_s, v4si) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_f, v4sf) +VAR1 (LDRGBWBS_Z, vldrdq_gather_base_wb_z_s, v2di) +VAR1 (LDRGBWBS, vldrwq_gather_base_wb_s, v4si) +VAR1 (LDRGBWBS, vldrwq_gather_base_wb_f, v4sf) +VAR1 (LDRGBWBS, vldrdq_gather_base_wb_s, v2di) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index ce58213d5f84686f60c4ea43a3220db86232c93e..2573cbb719e24f257ac5c8cde4c3aafee6c527a6 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -208,7 +208,10 @@ VSTRDQSSO_U VSTRWQSO_S VSTRWQSO_U VSTRWQSSO_S VSTRWQSSO_U VSTRHQSO_F VSTRHQSSO_F VSTRWQSB_F VSTRWQSO_F VSTRWQSSO_F VDDUPQ VDDUPQ_M VDWDUPQ - VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M]) + VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M + VSTRWQSBWB_S VSTRWQSBWB_U VLDRWQGBWB_S VLDRWQGBWB_U + VSTRWQSBWB_F VLDRWQGBWB_F VSTRDQSBWB_S VSTRDQSBWB_U + VLDRDQGBWB_S VLDRDQGBWB_U]) (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") (V4SF "V4SI")]) @@ -377,7 +380,10 @@ (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s") (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u") (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u") - (VSTRWQSSO_S "s")]) + (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u") + (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") + (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") + (VSTRDQSBWB_U "u")]) (define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") @@ -626,6 +632,10 @@ (define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U]) (define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U]) (define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U]) +(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U]) +(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) +(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U]) +(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) (define_insn "*mve_mov<mode>" [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Us") @@ -10042,3 +10052,572 @@ "vpst\;\tviwdupt.u%#<V_sz_elem>\t%q2, %3, %4, %5" [(set_attr "type" "mve_move") (set_attr "length""8")]) +(define_expand "mve_vstrwq_scatter_base_wb_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (ignore_wb, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_add_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_s vstrdq_scatter_base_wb_u] +;; +(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 3 "s_register_operand" "w")] + VSTRWSBWBQ)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrwq_scatter_base_wb_p_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_p_add_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u] +;; +(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRWSBWBQ)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vstrwq_scatter_base_wb_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (ignore_wb,operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_add_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_vec = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_f] +;; +(define_insn "mve_vstrwq_scatter_base_wb_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 3 "s_register_operand" "w")] + VSTRWQSBWB_F)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWQSBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrwq_scatter_base_wb_p_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_p_add_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_vec = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_p_f] +;; +(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRWQSBWB_F)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWQSBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vstrdq_scatter_base_wb_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "w") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (ignore_wb, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrdq_scatter_base_wb_add_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "0") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u] +;; +(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V2DI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 3 "s_register_operand" "w")] + VSTRDSBWBQ)) + (set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 1) (match_dup 2)] + VSTRDSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrdq_scatter_base_wb_p_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrdq_scatter_base_wb_p_add_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u] +;; +(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V2DI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRDSBWBQ)) + (set (match_operand:V2DI 0 "s_register_operand" "=w") + (unspec:V2DI [(match_dup 1) (match_dup 2)] + VSTRDSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_s vldrwq_gather_base_wb_u] +;; +(define_insn "mve_vldrwq_gather_base_wb_<supf>v4si_insn" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRWGBWBQ)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrwq_gather_base_wb_z_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u] +;; +(define_insn "mve_vldrwq_gather_base_wb_z_<supf>v4si_insn" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRWGBWBQ)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrwq_gather_base_wb_fv4sf" + [(match_operand:V4SF 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_fv4sf_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_f] +;; +(define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRWQGBWB_F)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWQGBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf" + [(match_operand:V4SF 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_z_f] +;; +(define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRWQGBWB_F)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWQGBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrdq_gather_base_wb_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] +;; +(define_insn "mve_vldrdq_gather_base_wb_<supf>v2di_insn" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRDGBWBQ)) + (set (match_operand:V2DI 1 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 2) (match_dup 3)] + VLDRDGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u] +;; +(define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRDGBWBQ)) + (set (match_operand:V2DI 1 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 2) (match_dup 3)] + VLDRDGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrdt.u64\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..763a72e27ea8a5705b093883670d11f29c8ee5a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int64x2_t +foo (uint64x2_t * addr) +{ + return vldrdq_gather_base_wb_s64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..df719f9bb6e40cf2ca989019c673ff30e57c0fa3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64x2_t * addr) +{ + return vldrdq_gather_base_wb_u64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..c22adfc9e702fcb49eb8a0bf408513c3059d628b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ +#include "arm_mve.h" + +int64x2_t foo (uint64x2_t * addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_s64 (addr, 1016, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..385c0d92e4017b1d4ccff475286d0a0b0cac4dfd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ +#include "arm_mve.h" + +uint64x2_t foo (uint64x2_t * addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_u64 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..12473c3770444158f9583e6fbd24308099445cf5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_f32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..619e41a3111def6f184ef06f000e51c3bae8f203 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_s32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..144e7f4b0a733322850054ebde7e4c8511365b5b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_u32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..d69f9bd7f4abb956a96692b7c1f88115d55e7d3a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_f32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..620dec6480a0ac10e9a89d3d49c2cdb78b0b6b14 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_s32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..409ecf46c63a8e6a4a528281b1fdb33f9fb35478 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_u32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..9fc4e3418b3ce226950d922c754d43ee9932fadf --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, int64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p_s64 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, int64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..0434f6ded9c95f95c22ad14d95e41338ade5e2a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, uint64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p_u64 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, uint64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..9989564d2432913c068c73716edf03276bd93cdb --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, int64x2_t value) +{ + vstrdq_scatter_base_wb_s64 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, int64x2_t value) +{ + vstrdq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..60c71d5cb05cf8533151e2424b5f2c9f835a7290 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, uint64x2_t value) +{ + vstrdq_scatter_base_wb_u64 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, uint64x2_t value) +{ + vstrdq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..2bae3801eb0ecf26478fb1bfd3639ef10a066207 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, float32x4_t value) +{ + vstrwq_scatter_base_wb_f32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, float32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..dee941349743df37b1dcfa6152da6fac0b38cc2a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, float32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_f32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, float32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..3a0423aa71308634cd40d74a1f7dada19660c0a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_s32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..32eb757be5ee523edad96a80a5bb6af4026b9057 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_u32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..4c232e148a8c77f34a380e3a8d3106c5bdc28eb3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, int32x4_t value) +{ + vstrwq_scatter_base_wb_s32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, int32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..7171a9f7c25abcb7ebcc0b250ae29e08424aa4b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, uint32x4_t value) +{ + vstrwq_scatter_base_wb_u32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, uint32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */
rb12717.patch.gz
Description: application/gzip