Implement vldr?q_gather_base using the new MVE builtins framework. The patch updates two testcases rather than using different iterators for predicated and non-predicated versions. According to ACLE: vldrdq_gather_base_s64 is expected to generate VLDRD.64 vldrdq_gather_base_z_s64 is expected to generate VLDRDT.U64
Both are equally valid, however. gcc/ChangeLog: * config/arm/arm-builtins.cc (arm_ldrgbs_qualifiers) (arm_ldrgbu_qualifiers, arm_ldrgbs_z_qualifiers) (arm_ldrgbu_z_qualifiers): Delete. * config/arm/arm-mve-builtins-base.cc (class vldrq_gather_base_impl): New. (vldrdq_gather_base, vldrwq_gather_base): New. * config/arm/arm-mve-builtins-base.def (vldrdq_gather_base) (vldrwq_gather_base): New. * config/arm/arm-mve-builtins-base.h: (vldrdq_gather_base) (vldrwq_gather_base): New. * config/arm/arm_mve.h (vldrwq_gather_base_s32): Delete. (vldrwq_gather_base_u32): Delete. (vldrwq_gather_base_z_u32): Delete. (vldrwq_gather_base_z_s32): Delete. (vldrdq_gather_base_s64): Delete. (vldrdq_gather_base_u64): Delete. (vldrdq_gather_base_z_s64): Delete. (vldrdq_gather_base_z_u64): Delete. (vldrwq_gather_base_f32): Delete. (vldrwq_gather_base_z_f32): Delete. (__arm_vldrwq_gather_base_s32): Delete. (__arm_vldrwq_gather_base_u32): Delete. (__arm_vldrwq_gather_base_z_s32): Delete. (__arm_vldrwq_gather_base_z_u32): Delete. (__arm_vldrdq_gather_base_s64): Delete. (__arm_vldrdq_gather_base_u64): Delete. (__arm_vldrdq_gather_base_z_s64): Delete. (__arm_vldrdq_gather_base_z_u64): Delete. (__arm_vldrwq_gather_base_f32): Delete. (__arm_vldrwq_gather_base_z_f32): Delete. * config/arm/arm_mve_builtins.def (vldrwq_gather_base_s) (vldrwq_gather_base_u, vldrwq_gather_base_z_s) (vldrwq_gather_base_z_u, vldrdq_gather_base_s) (vldrwq_gather_base_f, vldrdq_gather_base_z_s) (vldrwq_gather_base_z_f, vldrdq_gather_base_u) (vldrdq_gather_base_z_u): Delete. * config/arm/iterators.md (supf): Remove VLDRWQGB_S, VLDRWQGB_U, VLDRDQGB_S, VLDRDQGB_U. (VLDRWGBQ, VLDRDGBQ): Delete. * config/arm/mve.md (mve_vldrwq_gather_base_<supf>v4si): Delete. (mve_vldrwq_gather_base_z_<supf>v4si): Delete. (mve_vldrdq_gather_base_<supf>v2di): Delete. (mve_vldrdq_gather_base_z_<supf>v2di): Delete. (mve_vldrwq_gather_base_fv4sf): Delete. (mve_vldrwq_gather_base_z_fv4sf): Delete. (@mve_vldrq_gather_base_<mode>): New. (@mve_vldrq_gather_base_z_<mode>): New. * config/arm/unspecs.md (VLDRWQGB_S, VLDRWQGB_U, VLDRDQGB_S) (VLDRDQGB_U, VLDRWQGB_F): Delete. (VLDRGBQ, VLDRGBQ_Z): New. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c: Update expected output. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c: Likewise. --- gcc/config/arm/arm-builtins.cc | 22 --- gcc/config/arm/arm-mve-builtins-base.cc | 32 ++++ gcc/config/arm/arm-mve-builtins-base.def | 3 + gcc/config/arm/arm-mve-builtins-base.h | 2 + gcc/config/arm/arm_mve.h | 80 --------- gcc/config/arm/arm_mve_builtins.def | 10 -- gcc/config/arm/iterators.md | 5 - gcc/config/arm/mve.md | 155 ++++-------------- gcc/config/arm/unspecs.md | 7 +- .../mve/intrinsics/vldrdq_gather_base_s64.c | 4 +- .../mve/intrinsics/vldrdq_gather_base_u64.c | 4 +- 11 files changed, 75 insertions(+), 249 deletions(-) diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index 40056f14981..60ee12839fb 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -610,28 +610,6 @@ arm_quadop_unone_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUADOP_UNONE_UNONE_UNONE_NONE_PRED_QUALIFIERS \ (arm_quadop_unone_unone_unone_none_pred_qualifiers) -static enum arm_type_qualifiers -arm_ldrgbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_unsigned, qualifier_immediate}; -#define LDRGBS_QUALIFIERS (arm_ldrgbs_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; -#define LDRGBU_QUALIFIERS (arm_ldrgbu_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBS_Z_QUALIFIERS (arm_ldrgbs_z_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBU_Z_QUALIFIERS (arm_ldrgbu_z_qualifiers) - static enum arm_type_qualifiers arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index 42843f1f5fc..3a1a8af4fcd 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -477,6 +477,36 @@ public: } }; + /* Builds the vldrq_gather_base intrinsics. */ +class vldrq_gather_base_impl : public load_extending +{ +public: + using load_extending::load_extending; + + rtx expand (function_expander &e) const override + { + insn_code icode; + rtx insns; + + switch (e.pred) + { + case PRED_none: + icode = code_for_mve_vldrq_gather_base (e.vector_mode (0)); + break; + + case PRED_z: + icode = code_for_mve_vldrq_gather_base_z (e.vector_mode (0)); + break; + + default: + gcc_unreachable (); + } + insns = e.use_exact_insn (icode); + + return insns; + } +}; + /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics. */ class vctpq_impl : public function_base { @@ -1276,12 +1306,14 @@ FUNCTION (vld1q, vld1_impl,) FUNCTION (vldrbq, vldrq_impl, (TYPE_SUFFIX_s8, TYPE_SUFFIX_u8)) FUNCTION (vldrbq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s8, TYPE_SUFFIX_u8)) FUNCTION (vldrdq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s64, TYPE_SUFFIX_u64, NUM_TYPE_SUFFIXES)) +FUNCTION (vldrdq_gather_base, vldrq_gather_base_impl, (TYPE_SUFFIX_s64, TYPE_SUFFIX_u64)) FUNCTION (vldrdq_gather_shifted, vldrq_gather_impl, (true, TYPE_SUFFIX_s64, TYPE_SUFFIX_u64, NUM_TYPE_SUFFIXES)) FUNCTION (vldrhq, vldrq_impl, (TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16)) FUNCTION (vldrhq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16)) FUNCTION (vldrhq_gather_shifted, vldrq_gather_impl, (true, TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16)) FUNCTION (vldrwq, vldrq_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32)) FUNCTION (vldrwq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32)) +FUNCTION (vldrwq_gather_base, vldrq_gather_base_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32)) FUNCTION (vldrwq_gather_shifted, vldrq_gather_impl, (true, TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32)) FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ) FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index ad2cc8bef7e..223d20436e0 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -62,12 +62,14 @@ DEF_MVE_FUNCTION (vld1q, load, all_integer, z_or_none) DEF_MVE_FUNCTION (vldrbq, load_ext, all_integer, z_or_none) DEF_MVE_FUNCTION (vldrbq_gather, load_ext_gather_offset, all_integer, z_or_none) DEF_MVE_FUNCTION (vldrdq_gather, load_ext_gather_offset, integer_64, z_or_none) +DEF_MVE_FUNCTION (vldrdq_gather_base, load_gather_base, integer_64, z_or_none) DEF_MVE_FUNCTION (vldrdq_gather_shifted, load_ext_gather_offset, integer_64, z_or_none) DEF_MVE_FUNCTION (vldrhq, load_ext, integer_16_32, z_or_none) DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, integer_16_32, z_or_none) DEF_MVE_FUNCTION (vldrhq_gather_shifted, load_ext_gather_offset, integer_16_32, z_or_none) DEF_MVE_FUNCTION (vldrwq, load_ext, integer_32, z_or_none) DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, integer_32, z_or_none) +DEF_MVE_FUNCTION (vldrwq_gather_base, load_gather_base, integer_32, z_or_none) DEF_MVE_FUNCTION (vldrwq_gather_shifted, load_ext_gather_offset, integer_32, z_or_none) DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none) DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none) @@ -237,6 +239,7 @@ DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, float_16, z_or_none) DEF_MVE_FUNCTION (vldrhq_gather_shifted, load_ext_gather_offset, float_16, z_or_none) DEF_MVE_FUNCTION (vldrwq, load_ext, float_32, z_or_none) DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, float_32, z_or_none) +DEF_MVE_FUNCTION (vldrwq_gather_base, load_gather_base, float_32, z_or_none) DEF_MVE_FUNCTION (vldrwq_gather_shifted, load_ext_gather_offset, float_32, z_or_none) DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none) DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index b45308a6c1f..3bc1e933bfc 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -85,12 +85,14 @@ extern const function_base *const vld1q; extern const function_base *const vldrbq; extern const function_base *const vldrbq_gather; extern const function_base *const vldrdq_gather; +extern const function_base *const vldrdq_gather_base; extern const function_base *const vldrdq_gather_shifted; extern const function_base *const vldrhq; extern const function_base *const vldrhq_gather; extern const function_base *const vldrhq_gather_shifted; extern const function_base *const vldrwq; extern const function_base *const vldrwq_gather; +extern const function_base *const vldrwq_gather_base; extern const function_base *const vldrwq_gather_shifted; extern const function_base *const vmaxaq; extern const function_base *const vmaxavq; diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 7ec4491fffd..4862ac9e491 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -59,16 +59,6 @@ #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value) #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value) #define vpnot(__a) __arm_vpnot(__a) -#define vldrwq_gather_base_s32(__addr, __offset) __arm_vldrwq_gather_base_s32(__addr, __offset) -#define vldrwq_gather_base_u32(__addr, __offset) __arm_vldrwq_gather_base_u32(__addr, __offset) -#define vldrwq_gather_base_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_u32(__addr, __offset, __p) -#define vldrwq_gather_base_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_s32(__addr, __offset, __p) -#define vldrdq_gather_base_s64(__addr, __offset) __arm_vldrdq_gather_base_s64(__addr, __offset) -#define vldrdq_gather_base_u64(__addr, __offset) __arm_vldrdq_gather_base_u64(__addr, __offset) -#define vldrdq_gather_base_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_z_s64(__addr, __offset, __p) -#define vldrdq_gather_base_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_z_u64(__addr, __offset, __p) -#define vldrwq_gather_base_f32(__addr, __offset) __arm_vldrwq_gather_base_f32(__addr, __offset) -#define vldrwq_gather_base_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_f32(__addr, __offset, __p) #define vuninitializedq_u8(void) __arm_vuninitializedq_u8(void) #define vuninitializedq_u16(void) __arm_vuninitializedq_u16(void) #define vuninitializedq_u32(void) __arm_vuninitializedq_u32(void) @@ -224,62 +214,6 @@ __arm_vpnot (mve_pred16_t __a) return __builtin_mve_vpnotv16bi (__a); } -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_s32 (uint32x4_t __addr, const int __offset) -{ - return __builtin_mve_vldrwq_gather_base_sv4si (__addr, __offset); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_u32 (uint32x4_t __addr, const int __offset) -{ - return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_z_s32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrwq_gather_base_z_sv4si (__addr, __offset, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_z_u32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrwq_gather_base_z_uv4si (__addr, __offset, __p); -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_s64 (uint64x2_t __addr, const int __offset) -{ - return __builtin_mve_vldrdq_gather_base_sv2di (__addr, __offset); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_u64 (uint64x2_t __addr, const int __offset) -{ - return __builtin_mve_vldrdq_gather_base_uv2di (__addr, __offset); -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_z_s64 (uint64x2_t __addr, const int __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrdq_gather_base_z_sv2di (__addr, __offset, __p); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_z_u64 (uint64x2_t __addr, const int __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrdq_gather_base_z_uv2di (__addr, __offset, __p); -} - __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) @@ -790,20 +724,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value) __builtin_mve_vst4qv4sf (__addr, __rv.__o); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_f32 (uint32x4_t __addr, const int __offset) -{ - return __builtin_mve_vldrwq_gather_base_fv4sf (__addr, __offset); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_z_f32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrwq_gather_base_z_fv4sf (__addr, __offset, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 5f859466dbe..44428104d3e 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -663,16 +663,6 @@ VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf) -VAR1 (LDRGBS, vldrwq_gather_base_s, v4si) -VAR1 (LDRGBU, vldrwq_gather_base_u, v4si) -VAR1 (LDRGBS_Z, vldrwq_gather_base_z_s, v4si) -VAR1 (LDRGBU_Z, vldrwq_gather_base_z_u, v4si) -VAR1 (LDRGBS, vldrdq_gather_base_s, v2di) -VAR1 (LDRGBS, vldrwq_gather_base_f, v4sf) -VAR1 (LDRGBS_Z, vldrdq_gather_base_z_s, v2di) -VAR1 (LDRGBS_Z, vldrwq_gather_base_z_f, v4sf) -VAR1 (LDRGBU, vldrdq_gather_base_u, v2di) -VAR1 (LDRGBU_Z, vldrdq_gather_base_z_u, v2di) VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si) VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di) VAR1 (LDRGBWBU, vldrwq_gather_base_nowb_u, v4si) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index b0e6a79921e..69c457c8d76 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -2537,9 +2537,6 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u") (VMLALDAVAXQ_P_S "s") (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u") - (VLDRWQGB_S "s") - (VLDRWQGB_U "u") - (VLDRDQGB_S "s") (VLDRDQGB_U "u") (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") (VLDRDQGBWB_U "u") (VADCQ_M_S "s") @@ -2941,8 +2938,6 @@ (define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S]) (define_int_iterator VSHLLxQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S]) (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U]) (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U]) -(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U]) -(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U]) (define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) (define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) (define_int_iterator VxCIQ [VADCIQ_U VADCIQ_S VSBCIQ_U VSBCIQ_S]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 4158d5e5a1c..ef4448ef65a 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -3502,27 +3502,6 @@ (define_insn "@mve_vldrq_gather_offset_z_extend_<mode><US>" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_extend_<mode><US>")) (set_attr "length" "8")]) -;; -;; [vldrwq_gather_base_s vldrwq_gather_base_u] -;; -(define_insn "mve_vldrwq_gather_base_<supf>v4si" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - VLDRWGBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si")) - (set_attr "length" "4")]) - ;; Predicated vector scatter stores with base ;; ;; [vstrdq_scatter_base_p_s vstrdq_scatter_base_p_u] @@ -3544,28 +3523,44 @@ (define_insn "@mve_vstrq_scatter_base_p_<mode>" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>")) (set_attr "length" "8")]) +;; Vector gather loads with base +;; +;; [vldrwq_gather_base_s vldrwq_gather_base_u] +;; [vldrwq_gather_base_f] +;; [vldrdq_gather_base_s vldrdq_gather_base_u] +;; +(define_insn "@mve_vldrq_gather_base_<mode>" + [(set (match_operand:MVE_4 0 "s_register_operand" "=&w") + (unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VLDRGBQ)) + ] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vldr<MVE_elem_ch>.u<V_sz_elem>\t%q0, [%q1, %2]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_<mode>")) + (set_attr "length" "4")]) + +;; Predicated vector gather loads with base ;; ;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u] +;; [vldrwq_gather_base_z_f] +;; [vldrdq_gather_base_z_s vldrdq_gather_base_z_u] ;; -(define_insn "mve_vldrwq_gather_base_z_<supf>v4si" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:V4BI 3 "vpr_register_operand" "Up")] - VLDRWGBQ)) +(define_insn "@mve_vldrq_gather_base_z_<mode>" + [(set (match_operand:MVE_4 0 "s_register_operand" "=&w") + (unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] + VLDRGBQ_Z)) ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si")) + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vpst\n\tvldr<MVE_elem_ch>t.u<V_sz_elem>\t%q0, [%q1, %2]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_<mode>")) (set_attr "length" "8")]) + ;; Gather loads with shifted offset ;; ;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u] @@ -3648,92 +3643,6 @@ (define_insn "@mve_vldrq_gather_shifted_offset_z_extend_v4si<US>" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_shifted_offset_extend_v4si<US>")) (set_attr "length" "4")]) -;; -;; [vldrdq_gather_base_s vldrdq_gather_base_u] -;; -(define_insn "mve_vldrdq_gather_base_<supf>v2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - VLDRDGBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vldrd.64\t%q0, [%q1, %2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di")) - (set_attr "length" "4")]) - -;; -;; [vldrdq_gather_base_z_s vldrdq_gather_base_z_u] -;; -(define_insn "mve_vldrdq_gather_base_z_<supf>v2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:V2QI 3 "vpr_register_operand" "Up")] - VLDRDGBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%q1, %2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di")) - (set_attr "length" "8")]) - -;; -;; [vldrwq_gather_base_f] -;; -(define_insn "mve_vldrwq_gather_base_fv4sf" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - VLDRWQGB_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf")) - (set_attr "length" "4")]) - -;; -;; [vldrwq_gather_base_z_f] -;; -(define_insn "mve_vldrwq_gather_base_z_fv4sf" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:V4BI 3 "vpr_register_operand" "Up")] - VLDRWQGB_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf")) - (set_attr "length" "8")]) - ;; Vector scatter stores with shifted offset ;; ;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 40d0ef73204..63a0168ea19 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -1160,15 +1160,12 @@ (define_c_enum "unspec" [ VLDRGOQ_Z VLDRGOQ_EXT VLDRGOQ_EXT_Z - VLDRWQGB_S - VLDRWQGB_U + VLDRGBQ + VLDRGBQ_Z VLDRGSOQ VLDRGSOQ_Z VLDRGSOQ_EXT VLDRGSOQ_EXT_Z - VLDRDQGB_S - VLDRDQGB_U - VLDRWQGB_F VSTRQ VSTRQ_P VSTRQ_TRUNC diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c index 3539c1e40ba..344e0d4eeb6 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_s64.c @@ -12,7 +12,7 @@ extern "C" { /* **foo: ** ... -** vldrd.64 q[0-9]+, \[q[0-9]+, #[0-9]+\](?: @.*|) +** vldrd.u64 q[0-9]+, \[q[0-9]+, #[0-9]+\](?: @.*|) ** ... */ int64x2_t @@ -25,4 +25,4 @@ foo (uint64x2_t addr) } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c index 2245df61a4e..42620ab5377 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_u64.c @@ -12,7 +12,7 @@ extern "C" { /* **foo: ** ... -** vldrd.64 q[0-9]+, \[q[0-9]+, #[0-9]+\](?: @.*|) +** vldrd.u64 q[0-9]+, \[q[0-9]+, #[0-9]+\](?: @.*|) ** ... */ uint64x2_t @@ -25,4 +25,4 @@ foo (uint64x2_t addr) } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ -- 2.34.1