Implement vldr?q_gather_base_wb using the new MVE builtins framework. gcc/ChangeLog:
* config/arm/arm-builtins.cc (arm_ldrgbwbxu_qualifiers) (arm_ldrgbwbxu_z_qualifiers, arm_ldrgbwbs_qualifiers) (arm_ldrgbwbu_qualifiers, arm_ldrgbwbs_z_qualifiers) (arm_ldrgbwbu_z_qualifiers): Delete. * config/arm/arm-mve-builtins-base.cc (vldrq_gather_base_impl): Add support for MODE_wb. * config/arm/arm-mve-builtins-shapes.cc (struct load_gather_base_def): Likewise. * config/arm/arm_mve.h (vldrdq_gather_base_wb_s64): Delete. (vldrdq_gather_base_wb_u64): Delete. (vldrdq_gather_base_wb_z_s64): Delete. (vldrdq_gather_base_wb_z_u64): Delete. (vldrwq_gather_base_wb_f32): Delete. (vldrwq_gather_base_wb_s32): Delete. (vldrwq_gather_base_wb_u32): Delete. (vldrwq_gather_base_wb_z_f32): Delete. (vldrwq_gather_base_wb_z_s32): Delete. (vldrwq_gather_base_wb_z_u32): Delete. (__arm_vldrdq_gather_base_wb_s64): Delete. (__arm_vldrdq_gather_base_wb_u64): Delete. (__arm_vldrdq_gather_base_wb_z_s64): Delete. (__arm_vldrdq_gather_base_wb_z_u64): Delete. (__arm_vldrwq_gather_base_wb_s32): Delete. (__arm_vldrwq_gather_base_wb_u32): Delete. (__arm_vldrwq_gather_base_wb_z_s32): Delete. (__arm_vldrwq_gather_base_wb_z_u32): Delete. (__arm_vldrwq_gather_base_wb_f32): Delete. (__arm_vldrwq_gather_base_wb_z_f32): Delete. * config/arm/arm_mve_builtins.def (vldrwq_gather_base_nowb_z_u) (vldrdq_gather_base_nowb_z_u, vldrwq_gather_base_nowb_u) (vldrdq_gather_base_nowb_u, vldrwq_gather_base_nowb_z_s) (vldrwq_gather_base_nowb_z_f, vldrdq_gather_base_nowb_z_s) (vldrwq_gather_base_nowb_s, vldrwq_gather_base_nowb_f) (vldrdq_gather_base_nowb_s, vldrdq_gather_base_wb_z_s) (vldrdq_gather_base_wb_z_u, vldrdq_gather_base_wb_s) (vldrdq_gather_base_wb_u, vldrwq_gather_base_wb_z_s) (vldrwq_gather_base_wb_z_f, vldrwq_gather_base_wb_z_u) (vldrwq_gather_base_wb_s, vldrwq_gather_base_wb_f) (vldrwq_gather_base_wb_u): Delete * config/arm/iterators.md (supf): Remove VLDRWQGBWB_S, VLDRWQGBWB_U, VLDRDQGBWB_S, VLDRDQGBWB_U. (VLDRWGBWBQ, VLDRDGBWBQ): Delete. * config/arm/mve.md (mve_vldrwq_gather_base_wb_<supf>v4si): Delete. (mve_vldrwq_gather_base_nowb_<supf>v4si): Delete. (mve_vldrwq_gather_base_wb_<supf>v4si_insn): Delete. (mve_vldrwq_gather_base_wb_z_<supf>v4si): Delete. (mve_vldrwq_gather_base_nowb_z_<supf>v4si): Delete. (mve_vldrwq_gather_base_wb_z_<supf>v4si_insn): Delete. (mve_vldrwq_gather_base_wb_fv4sf): Delete. (mve_vldrwq_gather_base_nowb_fv4sf): Delete. (mve_vldrwq_gather_base_wb_fv4sf_insn): Delete. (mve_vldrwq_gather_base_wb_z_fv4sf): Delete. (mve_vldrwq_gather_base_nowb_z_fv4sf): Delete. (mve_vldrwq_gather_base_wb_z_fv4sf_insn): Delete. (mve_vldrdq_gather_base_wb_<supf>v2di): Delete. (mve_vldrdq_gather_base_nowb_<supf>v2di): Delete. (mve_vldrdq_gather_base_wb_<supf>v2di_insn): Delete. (mve_vldrdq_gather_base_wb_z_<supf>v2di): Delete. (mve_vldrdq_gather_base_nowb_z_<supf>v2di): Delete. (mve_vldrdq_gather_base_wb_z_<supf>v2di_insn): Delete. (@mve_vldrq_gather_base_wb_<mode>): New. (@mve_vldrq_gather_base_wb_z_<mode>): New. * config/arm/unspecs.md (VLDRWQGBWB_S, VLDRWQGBWB_U, VLDRWQGBWB_F) (VLDRDQGBWB_S, VLDRDQGBWB_U): Delete (VLDRGBWBQ, VLDRGBWBQ_Z): New. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: Update expected output. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. --- gcc/config/arm/arm-builtins.cc | 33 -- gcc/config/arm/arm-mve-builtins-base.cc | 39 +- gcc/config/arm/arm-mve-builtins-shapes.cc | 4 +- gcc/config/arm/arm_mve.h | 110 ------ gcc/config/arm/arm_mve_builtins.def | 20 - gcc/config/arm/iterators.md | 5 +- gcc/config/arm/mve.md | 352 ++---------------- gcc/config/arm/unspecs.md | 7 +- .../intrinsics/vldrdq_gather_base_wb_s64.c | 4 +- .../intrinsics/vldrdq_gather_base_wb_u64.c | 4 +- 10 files changed, 78 insertions(+), 500 deletions(-) diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index 60ee12839fb..01bdbbf943d 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -610,39 +610,6 @@ arm_quadop_unone_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUADOP_UNONE_UNONE_UNONE_NONE_PRED_QUALIFIERS \ (arm_quadop_unone_unone_unone_none_pred_qualifiers) -static enum arm_type_qualifiers -arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; -#define LDRGBWBXU_QUALIFIERS (arm_ldrgbwbxu_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_unsigned, qualifier_immediate}; -#define LDRGBWBS_QUALIFIERS (arm_ldrgbwbs_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; -#define LDRGBWBU_QUALIFIERS (arm_ldrgbwbu_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) - static enum arm_type_qualifiers arm_lsll_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_none}; diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index 3a1a8af4fcd..7938efcdf68 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -483,19 +483,48 @@ class vldrq_gather_base_impl : public load_extending public: using load_extending::load_extending; + machine_mode memory_vector_mode (const function_instance &fi) const override + { + unsigned int element_bits = fi.type_suffix (0).element_bits; + type_suffix_index suffix = find_type_suffix (TYPE_unsigned, element_bits); + return type_suffixes[suffix].vector_mode; + } + rtx expand (function_expander &e) const override { insn_code icode; - rtx insns; + rtx insns, base_ptr, new_base; + machine_mode base_mode; + + if ((e.mode_suffix_id != MODE_none) + && (e.mode_suffix_id != MODE_wb)) + gcc_unreachable (); + + /* In _wb mode, the start offset is passed via a pointer, + dereference it. */ + if (e.mode_suffix_id == MODE_wb) + { + base_mode = e.memory_vector_mode (); + rtx base = gen_reg_rtx (base_mode); + base_ptr = e.args[0]; + emit_insn (gen_rtx_SET (base, gen_rtx_MEM (base_mode, base_ptr))); + e.args[0] = base; + new_base = gen_reg_rtx (base_mode); + e.args.quick_insert (0, new_base); + } switch (e.pred) { case PRED_none: - icode = code_for_mve_vldrq_gather_base (e.vector_mode (0)); + icode = (e.mode_suffix_id == MODE_none) + ? code_for_mve_vldrq_gather_base (e.vector_mode (0)) + : code_for_mve_vldrq_gather_base_wb (e.vector_mode (0)); break; case PRED_z: - icode = code_for_mve_vldrq_gather_base_z (e.vector_mode (0)); + icode = (e.mode_suffix_id == MODE_none) + ? code_for_mve_vldrq_gather_base_z (e.vector_mode (0)) + : code_for_mve_vldrq_gather_base_wb_z (e.vector_mode (0)); break; default: @@ -503,6 +532,10 @@ public: } insns = e.use_exact_insn (icode); + /* Update offset as appropriate. */ + if (e.mode_suffix_id == MODE_wb) + emit_insn (gen_rtx_SET (gen_rtx_MEM (base_mode, base_ptr), new_base)); + return insns; } }; diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc index fa4fee072e6..d7cfdca3acd 100644 --- a/gcc/config/arm/arm-mve-builtins-shapes.cc +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc @@ -1558,7 +1558,8 @@ struct load_ext_gather : public overloaded_base<0> Example: vldrwq_gather_base int32x4_t [__arm_]vldrwq_gather_base_s32(uint32x4_t addr, const int offset) - float32x4_t [__arm_]vldrwq_gather_base_z_f32(uint32x4_t addr, const int offset, mve_pred16_t p) */ + float32x4_t [__arm_]vldrwq_gather_base_z_f32(uint32x4_t addr, const int offset, mve_pred16_t p) + int64x2_t [__arm_]vldrdq_gather_base_wb_s64(uint64x2_t *addr, const int offset) */ struct load_gather_base_def : public nonoverloaded_base { bool @@ -1578,6 +1579,7 @@ struct load_gather_base_def : public nonoverloaded_base bool preserve_user_namespace) const override { build_all (b, "v0,vu0,ss64", group, MODE_none, preserve_user_namespace); + build_all (b, "v0,b,ss64", group, MODE_wb, preserve_user_namespace); } bool diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 4862ac9e491..21d6159b5ff 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -69,16 +69,6 @@ #define vuninitializedq_s64(void) __arm_vuninitializedq_s64(void) #define vuninitializedq_f16(void) __arm_vuninitializedq_f16(void) #define vuninitializedq_f32(void) __arm_vuninitializedq_f32(void) -#define vldrdq_gather_base_wb_s64(__addr, __offset) __arm_vldrdq_gather_base_wb_s64(__addr, __offset) -#define vldrdq_gather_base_wb_u64(__addr, __offset) __arm_vldrdq_gather_base_wb_u64(__addr, __offset) -#define vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) -#define vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) -#define vldrwq_gather_base_wb_f32(__addr, __offset) __arm_vldrwq_gather_base_wb_f32(__addr, __offset) -#define vldrwq_gather_base_wb_s32(__addr, __offset) __arm_vldrwq_gather_base_wb_s32(__addr, __offset) -#define vldrwq_gather_base_wb_u32(__addr, __offset) __arm_vldrwq_gather_base_wb_u32(__addr, __offset) -#define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) -#define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) -#define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) #define vst2q_s8(__addr, __value) __arm_vst2q_s8(__addr, __value) #define vst2q_u8(__addr, __value) __arm_vst2q_u8(__addr, __value) #define vld2q_s8(__addr) __arm_vld2q_s8(__addr) @@ -214,86 +204,6 @@ __arm_vpnot (mve_pred16_t __a) return __builtin_mve_vpnotv16bi (__a); } -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) -{ - int64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_sv2di (*__addr, __offset); - *__addr = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset); - return result; -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_u64 (uint64x2_t * __addr, const int __offset) -{ - uint64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_uv2di (*__addr, __offset); - *__addr = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset); - return result; -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_z_s64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) -{ - int64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_z_sv2di (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p); - return result; -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_z_u64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) -{ - uint64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_z_uv2di (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p); - return result; -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_s32 (uint32x4_t * __addr, const int __offset) -{ - int32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_sv4si (*__addr, __offset); - *__addr = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset); - return result; -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_u32 (uint32x4_t * __addr, const int __offset) -{ - uint32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_uv4si (*__addr, __offset); - *__addr = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset); - return result; -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_z_s32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) -{ - int32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_z_sv4si (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p); - return result; -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) -{ - uint32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_z_uv4si (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p); - return result; -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vst2q_s8 (int8_t * __addr, int8x16x2_t __value) @@ -724,26 +634,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value) __builtin_mve_vst4qv4sf (__addr, __rv.__o); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) -{ - float32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_fv4sf (*__addr, __offset); - *__addr = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset); - return result; -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) -{ - float32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_z_fv4sf (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p); - return result; -} - __extension__ extern __inline float16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vld4q_f16 (float16_t const * __addr) diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 44428104d3e..b85b334a81e 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -663,26 +663,6 @@ VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf) -VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si) -VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di) -VAR1 (LDRGBWBU, vldrwq_gather_base_nowb_u, v4si) -VAR1 (LDRGBWBU, vldrdq_gather_base_nowb_u, v2di) -VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_s, v4si) -VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_f, v4sf) -VAR1 (LDRGBWBS_Z, vldrdq_gather_base_nowb_z_s, v2di) -VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_s, v4si) -VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_f, v4sf) -VAR1 (LDRGBWBS, vldrdq_gather_base_nowb_s, v2di) -VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_s, v2di) -VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_u, v2di) -VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_s, v2di) -VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_u, v2di) -VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_s, v4si) -VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_f, v4sf) -VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_u, v4si) -VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_s, v4si) -VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_f, v4sf) -VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vadciq_s, v4si) VAR1 (BINOP_UNONE_UNONE_UNONE, vadciq_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vadcq_s, v4si) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 69c457c8d76..834c81da56e 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -2538,8 +2538,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VMLALDAVAXQ_P_S "s") (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u") (VSTRDQSB_S "s") (VSTRDQSB_U "u") - (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") - (VLDRDQGBWB_U "u") (VADCQ_M_S "s") + (VADCQ_M_S "s") (VSBCQ_U "u") (VSBCQ_M_U "u") (VSBCQ_S "s") (VSBCQ_M_S "s") (VSBCIQ_U "u") (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s") @@ -2938,8 +2937,6 @@ (define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S]) (define_int_iterator VSHLLxQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S]) (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U]) (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U]) -(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) -(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) (define_int_iterator VxCIQ [VADCIQ_U VADCIQ_S VSBCIQ_U VSBCIQ_S]) (define_int_iterator VxCIQ_M [VADCIQ_M_U VADCIQ_M_S VSBCIQ_M_U VSBCIQ_M_S]) (define_int_iterator VxCQ [VADCQ_U VADCQ_S VSBCQ_U VSBCQ_S]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index ef4448ef65a..a0a59da4040 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -3917,313 +3917,51 @@ (define_insn "@mve_vstrq_scatter_base_wb_p_<mode>" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>")) (set_attr "length" "8")]) -(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (ignore_result, operands[0], - operands[1], operands[2])); - DONE; -}) - -(define_expand "mve_vldrwq_gather_base_nowb_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (operands[0], ignore_wb, - operands[1], operands[2])); - DONE; -}) - +;; Vector gather loads with base and write-back ;; ;; [vldrwq_gather_base_wb_s vldrwq_gather_base_wb_u] -;; -(define_insn "mve_vldrwq_gather_base_wb_<supf>v4si_insn" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (mem:BLK (scratch))] - VLDRWGBWBQ)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn")) - (set_attr "length" "4")]) - -(define_expand "mve_vldrwq_gather_base_wb_z_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (ignore_result, operands[0], - operands[1], operands[2], - operands[3])); - DONE; -}) -(define_expand "mve_vldrwq_gather_base_nowb_z_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (operands[0], ignore_wb, - operands[1], operands[2], - operands[3])); - DONE; -}) - -;; -;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u] -;; -(define_insn "mve_vldrwq_gather_base_wb_z_<supf>v4si_insn" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:V4BI 4 "vpr_register_operand" "Up") - (mem:BLK (scratch))] - VLDRWGBWBQ)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn")) - (set_attr "length" "8")]) - -(define_expand "mve_vldrwq_gather_base_wb_fv4sf" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_result = gen_reg_rtx (V4SFmode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_fv4sf_insn (ignore_result, operands[0], - operands[1], operands[2])); - DONE; -}) - -(define_expand "mve_vldrwq_gather_base_nowb_fv4sf" - [(match_operand:V4SF 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_fv4sf_insn (operands[0], ignore_wb, - operands[1], operands[2])); - DONE; -}) - -;; ;; [vldrwq_gather_base_wb_f] +;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] ;; -(define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (mem:BLK (scratch))] - VLDRWQGBWB_F)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWQGBWB_F)) +(define_insn "@mve_vldrq_gather_base_wb_<mode>" + [(set (match_operand:MVE_4 0 "s_register_operand" "=&w") + (unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRGBWBQ)) + (set (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "=&w") + (unspec:<MVE_scatter_offset> [(match_dup 2) (match_dup 3)] + VLDRGBWBQ)) ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn")) + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vldr<MVE_elem_ch>.u<V_sz_elem>\t%q0, [%q1, %3]!" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_wb_<mode>")) (set_attr "length" "4")]) -(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_result = gen_reg_rtx (V4SFmode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (ignore_result, operands[0], - operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_expand "mve_vldrwq_gather_base_nowb_z_fv4sf" - [(match_operand:V4SF 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (operands[0], ignore_wb, - operands[1], operands[2], - operands[3])); - DONE; -}) - +;; Predicated vector gather loads with base and write-back ;; +;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u] ;; [vldrwq_gather_base_wb_z_f] +;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u] ;; -(define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:V4BI 4 "vpr_register_operand" "Up") - (mem:BLK (scratch))] - VLDRWQGBWB_F)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWQGBWB_F)) +(define_insn "@mve_vldrq_gather_base_wb_z_<mode>" + [(set (match_operand:MVE_4 0 "s_register_operand" "=&w") + (unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRGBWBQ_Z)) + (set (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "=&w") + (unspec:<MVE_scatter_offset> [(match_dup 2) (match_dup 3)] + VLDRGBWBQ_Z)) ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn")) + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vpst\;vldr<MVE_elem_ch>t.u<V_sz_elem>\t%q0, [%q1, %3]!" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_wb_<mode>")) (set_attr "length" "8")]) -(define_expand "mve_vldrdq_gather_base_wb_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (ignore_result, operands[0], - operands[1], operands[2])); - DONE; -}) - -(define_expand "mve_vldrdq_gather_base_nowb_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (operands[0], ignore_wb, - operands[1], operands[2])); - DONE; -}) - - -;; -;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] -;; -(define_insn "mve_vldrdq_gather_base_wb_<supf>v2di_insn" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (mem:BLK (scratch))] - VLDRDGBWBQ)) - (set (match_operand:V2DI 1 "s_register_operand" "=&w") - (unspec:V2DI [(match_dup 2) (match_dup 3)] - VLDRDGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn")) - (set_attr "length" "4")]) - -(define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V2QI 3 "vpr_register_operand") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (ignore_result, operands[0], - operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_expand "mve_vldrdq_gather_base_nowb_z_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V2QI 3 "vpr_register_operand") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (operands[0], ignore_wb, - operands[1], operands[2], - operands[3])); - DONE; -}) - (define_insn "get_fpscr_nzcvqc" [(set (match_operand:SI 0 "register_operand" "=r") (unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))] @@ -4239,32 +3977,6 @@ (define_insn "set_fpscr_nzcvqc" "vmsr\\tFPSCR_nzcvqc, %0" [(set_attr "type" "mve_move")]) -;; -;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u] -;; -(define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:V2QI 4 "vpr_register_operand" "Up") - (mem:BLK (scratch))] - VLDRDGBWBQ)) - (set (match_operand:V2DI 1 "s_register_operand" "=&w") - (unspec:V2DI [(match_dup 2) (match_dup 3)] - VLDRDGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vpst\;vldrdt.u64\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn")) - (set_attr "length" "8")]) - ;; ;; [vadciq_u, vadciq_s] ;; [vsbciq_s, vsbciq_u] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 63a0168ea19..866e659938e 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -1186,11 +1186,8 @@ (define_c_enum "unspec" [ VIWDUPQ_M VSTRSBWBQ VSTRSBWBQ_P - VLDRWQGBWB_S - VLDRWQGBWB_U - VLDRWQGBWB_F - VLDRDQGBWB_S - VLDRDQGBWB_U + VLDRGBWBQ + VLDRGBWBQ_Z VADCQ_U VADCQ_M_U VADCQ_S diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c index e3fd7f16a31..5fb9510d64b 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c @@ -12,7 +12,7 @@ extern "C" { /* **foo: ** ... -** vldrd.64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) +** vldrd.u64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) ** ... */ int64x2_t @@ -25,4 +25,4 @@ foo (uint64x2_t *addr) } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c index 161cf00b65e..2eb36f4d3d8 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c @@ -12,7 +12,7 @@ extern "C" { /* **foo: ** ... -** vldrd.64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) +** vldrd.u64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) ** ... */ uint64x2_t @@ -25,4 +25,4 @@ foo (uint64x2_t *addr) } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ -- 2.34.1