On 04/09/2024 14:26, Christophe Lyon wrote: > Implement vctp using the new MVE builtins framework. > > 2024-08-21 Christophe Lyon <christophe.l...@linaro.org> > > gcc/ChangeLog: > > * config/arm/arm-mve-builtins-base.cc (class vctpq_impl): New. > (vctp16q): New. > (vctp32q): New. > (vctp64q): New. > (vctp8q): New. > * config/arm/arm-mve-builtins-base.def (vctp16q): New. > (vctp32q): New. > (vctp64q): New. > (vctp8q): New. > * config/arm/arm-mve-builtins-base.h (vctp16q): New. > (vctp32q): New. > (vctp64q): New. > (vctp8q): New. > * config/arm/arm-mve-builtins-shapes.cc (vctp): New. > * config/arm/arm-mve-builtins-shapes.h (vctp): New. > * config/arm/arm-mve-builtins.cc > (function_instance::has_inactive_argument): Add support for vctp. > * config/arm/arm_mve.h (vctp16q): Delete. > (vctp32q): Delete. > (vctp64q): Delete. > (vctp8q): Delete. > (vctp8q_m): Delete. > (vctp64q_m): Delete. > (vctp32q_m): Delete. > (vctp16q_m): Delete. > (__arm_vctp16q): Delete. > (__arm_vctp32q): Delete. > (__arm_vctp64q): Delete. > (__arm_vctp8q): Delete. > (__arm_vctp8q_m): Delete. > (__arm_vctp64q_m): Delete. > (__arm_vctp32q_m): Delete. > (__arm_vctp16q_m): Delete. > * config/arm/mve.md (mve_vctp<MVE_vctp>q<MVE_vpred>): Add '@' > prefix. > (mve_vctp<MVE_vctp>q_m<MVE_vpred>): Likewise.
OK. R. > --- > gcc/config/arm/arm-mve-builtins-base.cc | 48 +++++++++++++++++ > gcc/config/arm/arm-mve-builtins-base.def | 4 ++ > gcc/config/arm/arm-mve-builtins-base.h | 4 ++ > gcc/config/arm/arm-mve-builtins-shapes.cc | 16 ++++++ > gcc/config/arm/arm-mve-builtins-shapes.h | 1 + > gcc/config/arm/arm-mve-builtins.cc | 4 ++ > gcc/config/arm/arm_mve.h | 64 ----------------------- > gcc/config/arm/mve.md | 4 +- > 8 files changed, 79 insertions(+), 66 deletions(-) > > diff --git a/gcc/config/arm/arm-mve-builtins-base.cc > b/gcc/config/arm/arm-mve-builtins-base.cc > index f8260f5f483..89724320d43 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.cc > +++ b/gcc/config/arm/arm-mve-builtins-base.cc > @@ -139,6 +139,50 @@ public: > } > }; > > + /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics. */ > +class vctpq_impl : public function_base > +{ > +public: > + CONSTEXPR vctpq_impl (machine_mode mode) > + : m_mode (mode) > + {} > + > + /* Mode this intrinsic operates on. */ > + machine_mode m_mode; > + > + rtx > + expand (function_expander &e) const override > + { > + insn_code code; > + rtx target; > + > + if (e.mode_suffix_id != MODE_none) > + gcc_unreachable (); > + > + switch (e.pred) > + { > + case PRED_none: > + /* No predicate, no suffix. */ > + code = code_for_mve_vctpq (m_mode, m_mode); > + target = e.use_exact_insn (code); > + break; > + > + case PRED_m: > + /* No suffix, "m" predicate. */ > + code = code_for_mve_vctpq_m (m_mode, m_mode); > + target = e.use_cond_insn (code, 0); > + break; > + > + default: > + gcc_unreachable (); > + } > + > + rtx HItarget = gen_reg_rtx (HImode); > + emit_move_insn (HItarget, gen_lowpart (HImode, target)); > + return HItarget; > + } > +}; > + > /* Implements vcvtq intrinsics. */ > class vcvtq_impl : public function_base > { > @@ -506,6 +550,10 @@ FUNCTION (vcmpltq, > unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT, > FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, > UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN)) > FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, > UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN)) > FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ) > +FUNCTION (vctp8q, vctpq_impl, (V16BImode)) > +FUNCTION (vctp16q, vctpq_impl, (V8BImode)) > +FUNCTION (vctp32q, vctpq_impl, (V4BImode)) > +FUNCTION (vctp64q, vctpq_impl, (V2QImode)) > FUNCTION (vcvtq, vcvtq_impl,) > FUNCTION_WITHOUT_N_NO_F (vcvtaq, VCVTAQ) > FUNCTION_WITHOUT_N_NO_F (vcvtmq, VCVTMQ) > diff --git a/gcc/config/arm/arm-mve-builtins-base.def > b/gcc/config/arm/arm-mve-builtins-base.def > index cc76db3e0b9..dd46d882882 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.def > +++ b/gcc/config/arm/arm-mve-builtins-base.def > @@ -42,6 +42,10 @@ DEF_MVE_FUNCTION (vcmpleq, cmp, all_signed, m_or_none) > DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none) > DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none) > DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none) > +DEF_MVE_FUNCTION (vctp16q, vctp, none, m_or_none) > +DEF_MVE_FUNCTION (vctp32q, vctp, none, m_or_none) > +DEF_MVE_FUNCTION (vctp64q, vctp, none, m_or_none) > +DEF_MVE_FUNCTION (vctp8q, vctp, none, m_or_none) > DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none) > DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none) > DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none) > diff --git a/gcc/config/arm/arm-mve-builtins-base.h > b/gcc/config/arm/arm-mve-builtins-base.h > index ad2647b6758..41fcf666b11 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.h > +++ b/gcc/config/arm/arm-mve-builtins-base.h > @@ -55,6 +55,10 @@ extern const function_base *const vcmulq_rot180; > extern const function_base *const vcmulq_rot270; > extern const function_base *const vcmulq_rot90; > extern const function_base *const vcreateq; > +extern const function_base *const vctp16q; > +extern const function_base *const vctp32q; > +extern const function_base *const vctp64q; > +extern const function_base *const vctp8q; > extern const function_base *const vcvtaq; > extern const function_base *const vcvtbq; > extern const function_base *const vcvtmq; > diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc > b/gcc/config/arm/arm-mve-builtins-shapes.cc > index 6632ee49067..8a849c2bc02 100644 > --- a/gcc/config/arm/arm-mve-builtins-shapes.cc > +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc > @@ -1981,6 +1981,22 @@ struct unary_widen_acc_def : public overloaded_base<0> > }; > SHAPE (unary_widen_acc) > > +/* mve_pred16_t foo_t0(uint32_t) > + > + Example: vctp16q. > + mve_pred16_t [__arm_]vctp16q(uint32_t a) > + mve_pred16_t [__arm_]vctp16q_m(uint32_t a, mve_pred16_t p) */ > +struct vctp_def : public nonoverloaded_base > +{ > + void > + build (function_builder &b, const function_group_info &group, > + bool preserve_user_namespace) const override > + { > + build_all (b, "p,su32", group, MODE_none, preserve_user_namespace); > + } > +}; > +SHAPE (vctp) > + > /* <T0>_t foo_t0[_t1](<T1>_t) > <T0>_t foo_t0_n[_t1](<T1>_t, const int) > > diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h > b/gcc/config/arm/arm-mve-builtins-shapes.h > index ef497b6c97a..80340dc33ec 100644 > --- a/gcc/config/arm/arm-mve-builtins-shapes.h > +++ b/gcc/config/arm/arm-mve-builtins-shapes.h > @@ -77,6 +77,7 @@ namespace arm_mve > extern const function_shape *const unary_n; > extern const function_shape *const unary_widen; > extern const function_shape *const unary_widen_acc; > + extern const function_shape *const vctp; > extern const function_shape *const vcvt; > extern const function_shape *const vcvt_f16_f32; > extern const function_shape *const vcvt_f32_f16; > diff --git a/gcc/config/arm/arm-mve-builtins.cc > b/gcc/config/arm/arm-mve-builtins.cc > index 13c666b8f6a..84d94bb634f 100644 > --- a/gcc/config/arm/arm-mve-builtins.cc > +++ b/gcc/config/arm/arm-mve-builtins.cc > @@ -750,6 +750,10 @@ function_instance::has_inactive_argument () const > || base == functions::vcmpltq > || base == functions::vcmpcsq > || base == functions::vcmphiq > + || base == functions::vctp16q > + || base == functions::vctp32q > + || base == functions::vctp64q > + || base == functions::vctp8q > || (base == functions::vcvtbq && type_suffix (0).element_bits == 16) > || (base == functions::vcvttq && type_suffix (0).element_bits == 16) > || base == functions::vfmaq > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h > index 7aa61103a7d..49c4ea9afee 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -140,15 +140,7 @@ > #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value) > #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value) > #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value) > -#define vctp16q(__a) __arm_vctp16q(__a) > -#define vctp32q(__a) __arm_vctp32q(__a) > -#define vctp64q(__a) __arm_vctp64q(__a) > -#define vctp8q(__a) __arm_vctp8q(__a) > #define vpnot(__a) __arm_vpnot(__a) > -#define vctp8q_m(__a, __p) __arm_vctp8q_m(__a, __p) > -#define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p) > -#define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p) > -#define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p) > #define vshlcq_s8(__a, __b, __imm) __arm_vshlcq_s8(__a, __b, __imm) > #define vshlcq_u8(__a, __b, __imm) __arm_vshlcq_u8(__a, __b, __imm) > #define vshlcq_s16(__a, __b, __imm) __arm_vshlcq_s16(__a, __b, __imm) > @@ -603,34 +595,6 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value) > __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o); > } > > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp16q (uint32_t __a) > -{ > - return __builtin_mve_vctp16qv8bi (__a); > -} > - > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp32q (uint32_t __a) > -{ > - return __builtin_mve_vctp32qv4bi (__a); > -} > - > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp64q (uint32_t __a) > -{ > - return __builtin_mve_vctp64qv2qi (__a); > -} > - > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp8q (uint32_t __a) > -{ > - return __builtin_mve_vctp8qv16bi (__a); > -} > - > __extension__ extern __inline mve_pred16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vpnot (mve_pred16_t __a) > @@ -638,34 +602,6 @@ __arm_vpnot (mve_pred16_t __a) > return __builtin_mve_vpnotv16bi (__a); > } > > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp8q_m (uint32_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vctp8q_mv16bi (__a, __p); > -} > - > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp64q_m (uint32_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vctp64q_mv2qi (__a, __p); > -} > - > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp32q_m (uint32_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vctp32q_mv4bi (__a, __p); > -} > - > -__extension__ extern __inline mve_pred16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vctp16q_m (uint32_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vctp16q_mv8bi (__a, __p); > -} > - > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm) > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 3d8b199d9d6..62cffebd6ed 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -482,7 +482,7 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si" > ;; > ;; [vctp8q vctp16q vctp32q vctp64q]) > ;; > -(define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>" > +(define_insn "@mve_vctp<MVE_vctp>q<MVE_vpred>" > [ > (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up") > (unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")] > @@ -1272,7 +1272,7 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>" > ;; > ;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m]) > ;; > -(define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>" > +(define_insn "@mve_vctp<MVE_vctp>q_m<MVE_vpred>" > [ > (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up") > (unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")