Hi Christophe, > -----Original Message----- > From: Gcc-patches <gcc-patches- > bounces+kyrylo.tkachov=arm....@gcc.gnu.org> On Behalf Of Christophe > Lyon via Gcc-patches > Sent: Thursday, January 13, 2022 2:56 PM > To: gcc-patches@gcc.gnu.org > Subject: [PATCH v3 08/15] arm: Implement auto-vectorized MVE > comparisons with vectors of boolean predicates > > We make use of qualifier_predicate to describe MVE builtins > prototypes, restricting to auto-vectorizable vcmp* and vpsel builtins, > as they are exercised by the tests added earlier in the series. > > Special handling is needed for mve_vpselq because it has a v2di > variant, which has no natural VPR.P0 representation: we keep HImode > for it. > > The vector_compare expansion code is updated to use the right VxBI > mode instead of HI for the result. > > We extend the existing thumb2_movhi_vfp and thumb2_movhi_fp16 > patterns > to use the new MVE_7_HI iterator which covers HI and the new VxBI > modes, in conjunction with the new DB constraint for a constant vector > of booleans. > > 2022-01-13 Christophe Lyon <christophe.l...@foss.st.com> > Richard Sandiford <richard.sandif...@arm.com> > > gcc/ > PR target/100757 > PR target/101325 > * config/arm/arm-builtins.c > (BINOP_PRED_UNONE_UNONE_QUALIFIERS) > (BINOP_PRED_NONE_NONE_QUALIFIERS) > (TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS) > (TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS): New. > * config/arm/arm-protos.h (mve_const_bool_vec_to_hi): New. > * config/arm/arm.c (arm_hard_regno_mode_ok): Handle new VxBI > modes. > (arm_mode_to_pred_mode): New. > (arm_expand_vector_compare): Use the right VxBI mode instead of > HI. > (arm_expand_vcond): Likewise. > (simd_valid_immediate): Handle MODE_VECTOR_BOOL. > (mve_const_bool_vec_to_hi): New. > (neon_make_constant): Call mve_const_bool_vec_to_hi when > needed. > * config/arm/arm_mve_builtins.def (vcmpneq_, vcmphiq_, > vcmpcsq_) > (vcmpltq_, vcmpleq_, vcmpgtq_, vcmpgeq_, vcmpeqq_, vcmpneq_f) > (vcmpltq_f, vcmpleq_f, vcmpgtq_f, vcmpgeq_f, vcmpeqq_f, > vpselq_u) > (vpselq_s, vpselq_f): Use new predicated qualifiers. > * config/arm/constraints.md (DB): New. > * config/arm/iterators.md (MVE_7, MVE_7_HI): New mode iterators. > (MVE_VPRED, MVE_vpred): New attribute iterators. > * config/arm/mve.md (@mve_vcmp<mve_cmp_op>q_<mode>) > (@mve_vcmp<mve_cmp_op>q_f<mode>, > @mve_vpselq_<supf><mode>) > (@mve_vpselq_f<mode>): Use MVE_VPRED instead of HI. > (@mve_vpselq_<supf>v2di): Define separately. > (mov<mode>): New expander for VxBI modes. > * config/arm/vfp.md (thumb2_movhi_vfp, thumb2_movhi_fp16): > Use > MVE_7_HI iterator and add support for DB constraint. > > gcc/testsuite/ > PR target/100757 > PR target/101325 > * gcc.dg/rtl/arm/mve-vxbi.c: New test. > > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index 2ccfa37c302..36d71ab1a13 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -420,6 +420,12 @@ > arm_binop_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define BINOP_UNONE_UNONE_UNONE_QUALIFIERS \ > (arm_binop_unone_unone_unone_qualifiers) > > +static enum arm_type_qualifiers > +arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned }; > +#define BINOP_PRED_UNONE_UNONE_QUALIFIERS \ > + (arm_binop_pred_unone_unone_qualifiers) > + > static enum arm_type_qualifiers > arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_unsigned, qualifier_none, qualifier_immediate }; > @@ -438,6 +444,12 @@ > arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define BINOP_UNONE_NONE_NONE_QUALIFIERS \ > (arm_binop_unone_none_none_qualifiers) > > +static enum arm_type_qualifiers > +arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_predicate, qualifier_none, qualifier_none }; > +#define BINOP_PRED_NONE_NONE_QUALIFIERS \ > + (arm_binop_pred_none_none_qualifiers) > + > static enum arm_type_qualifiers > arm_binop_unone_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_unsigned, qualifier_unsigned, qualifier_none }; > @@ -509,6 +521,12 @@ > arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS > ] > #define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \ > (arm_ternop_none_none_none_unone_qualifiers) > > +static enum arm_type_qualifiers > +arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate }; > +#define TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS \ > + (arm_ternop_none_none_none_pred_qualifiers) > + > static enum arm_type_qualifiers > > arm_ternop_none_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_none, qualifier_none, qualifier_immediate, qualifier_unsigned > }; > @@ -528,6 +546,13 @@ > arm_ternop_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_A > RGS] > #define TERNOP_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \ > (arm_ternop_unone_unone_unone_unone_qualifiers) > > +static enum arm_type_qualifiers > +arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_A > RGS] > + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, > + qualifier_predicate }; > +#define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \ > + (arm_ternop_unone_unone_unone_pred_qualifiers) > + > static enum arm_type_qualifiers > arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index fb365ac5268..b978adf2038 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -101,6 +101,7 @@ extern char *neon_output_shift_immediate (const > char *, char, rtx *, > machine_mode, int, bool); > extern void neon_pairwise_reduce (rtx, rtx, machine_mode, > rtx (*) (rtx, rtx, rtx)); > +extern rtx mve_const_bool_vec_to_hi (rtx const_vec); > extern rtx neon_make_constant (rtx, bool generate = true); > extern tree arm_builtin_vectorized_function (unsigned int, tree, tree); > extern void neon_expand_vector_init (rtx, rtx); > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 64a8f2dc7de..fa18c7bd3fe 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -12750,7 +12750,10 @@ simd_valid_immediate (rtx op, machine_mode > mode, int inverse, > innersize = GET_MODE_UNIT_SIZE (mode); > > /* Only support 128-bit vectors for MVE. */ > - if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16)) > + if (TARGET_HAVE_MVE > + && (!vector > + || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) > + || n_elts * innersize != 16)) > return -1; > > /* Vectors of float constants. */ > @@ -13115,6 +13118,29 @@ neon_vdup_constant (rtx vals, bool generate) > return gen_vec_duplicate (mode, x); > } > > +/* Return a HI representation of CONST_VEC suitable for MVE predicates. > */ > +rtx > +mve_const_bool_vec_to_hi (rtx const_vec)
I was a bit confused by the "hi" in the name. I guess it means HImode rather than "high"(part). Maybe name it something like mve_bool_vec_to_const. The HImode part is implied by the fact that it's an MVE bool vector. > +{ > + int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec)); > + int repeat = 16 / n_elts; > + int i; > + int hi_val = 0; > + > + for (i = 0; i < n_elts; i++) > + { > + rtx el = CONST_VECTOR_ELT (const_vec, i); > + unsigned HOST_WIDE_INT elpart; > + > + gcc_assert (CONST_INT_P (el)); > + elpart = INTVAL (el); > + > + for (int j = 0; j < repeat; j++) > + hi_val |= elpart << (i * repeat + j); > + } > + return GEN_INT (hi_val); I think it's better to use gen_int_mode (hi_val, HImode) to ensure the HOST_WIDE_INT representation inside the CONST_INT has the right sign/zero extensions. Ok with those changes. Thanks, Kyrill > +} > + > /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only > constants (for vec_init) or CONST_VECTOR, can be effeciently loaded > into a register. > @@ -13155,6 +13181,8 @@ neon_make_constant (rtx vals, bool generate) > && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL)) > /* Load using VMOV. On Cortex-A8 this takes one cycle. */ > return const_vec; > + else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == > MODE_VECTOR_BOOL)) > + return mve_const_bool_vec_to_hi (const_vec); > else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX) > /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON > pipeline cycle; creating the constant takes one or two ARM > @@ -25313,7 +25341,10 @@ arm_hard_regno_mode_ok (unsigned int > regno, machine_mode mode) > return false; > > if (IS_VPR_REGNUM (regno)) > - return mode == HImode; > + return mode == HImode > + || mode == V16BImode > + || mode == V8BImode > + || mode == V4BImode; > > if (TARGET_THUMB1) > /* For the Thumb we only allow values bigger than SImode in > @@ -31001,6 +31032,19 @@ arm_split_atomic_op (enum rtx_code code, rtx > old_out, rtx new_out, rtx mem, > arm_post_atomic_barrier (model); > } > > > > +/* Return the mode for the MVE vector of predicates corresponding to > MODE. */ > +machine_mode > +arm_mode_to_pred_mode (machine_mode mode) > +{ > + switch (GET_MODE_NUNITS (mode)) > + { > + case 16: return V16BImode; > + case 8: return V8BImode; > + case 4: return V4BImode; > + } > + gcc_unreachable (); > +} > + > /* Expand code to compare vectors OP0 and OP1 using condition CODE. > If CAN_INVERT, store either the result or its inverse in TARGET > and return true if TARGET contains the inverse. If !CAN_INVERT, > @@ -31084,7 +31128,7 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > if (vcond_mve) > vpr_p0 = target; > else > - vpr_p0 = gen_reg_rtx (HImode); > + vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > > switch (GET_MODE_CLASS (cmp_mode)) > { > @@ -31126,7 +31170,7 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > if (vcond_mve) > vpr_p0 = target; > else > - vpr_p0 = gen_reg_rtx (HImode); > + vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > > emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, > force_reg (cmp_mode, op1))); > if (!vcond_mve) > @@ -31153,7 +31197,7 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > if (vcond_mve) > vpr_p0 = target; > else > - vpr_p0 = gen_reg_rtx (HImode); > + vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > > emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, > vpr_p0, force_reg (cmp_mode, op1), op0)); > if (!vcond_mve) > @@ -31206,7 +31250,7 @@ arm_expand_vcond (rtx *operands, > machine_mode cmp_result_mode) > if (TARGET_HAVE_MVE) > { > vcond_mve=true; > - mask = gen_reg_rtx (HImode); > + mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode)); > } > else > mask = gen_reg_rtx (cmp_result_mode); > diff --git a/gcc/config/arm/arm_mve_builtins.def > b/gcc/config/arm/arm_mve_builtins.def > index c3ae40765fe..44b41eab4c5 100644 > --- a/gcc/config/arm/arm_mve_builtins.def > +++ b/gcc/config/arm/arm_mve_builtins.def > @@ -89,7 +89,7 @@ VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, > v8hi, v4si) > VAR3 (BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si) > VAR1 (BINOP_NONE_NONE_UNONE, vaddlvq_p_s, v4si) > VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvq_p_u, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si) > VAR3 (BINOP_NONE_NONE_NONE, vshlq_s, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_NONE, vshlq_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vsubq_u, v16qi, v8hi, v4si) > @@ -117,9 +117,9 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vhsubq_n_u, > v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) > @@ -143,15 +143,15 @@ VAR3 (BINOP_UNONE_UNONE_IMM, vshlq_n_u, > v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_NONE_IMM, vqshluq_n_s, v16qi, v8hi, v4si) > VAR3 (BINOP_NONE_NONE_UNONE, vaddvq_p_s, v16qi, v8hi, v4si) > @@ -219,17 +219,17 @@ VAR2 (BINOP_UNONE_UNONE_IMM, vshllbq_n_u, > v16qi, v8hi) > VAR2 (BINOP_UNONE_UNONE_IMM, vorrq_n_u, v8hi, v4si) > VAR2 (BINOP_UNONE_UNONE_IMM, vbicq_n_u, v8hi, v4si) > VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_n_f, v8hf, v4sf) > -VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_f, v8hf, v4sf) > +VAR2 (BINOP_PRED_NONE_NONE, vcmpneq_f, v8hf, v4sf) > VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_n_f, v8hf, v4sf) > -VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_f, v8hf, v4sf) > +VAR2 (BINOP_PRED_NONE_NONE, vcmpltq_f, v8hf, v4sf) > VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_n_f, v8hf, v4sf) > -VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_f, v8hf, v4sf) > +VAR2 (BINOP_PRED_NONE_NONE, vcmpleq_f, v8hf, v4sf) > VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_f, v8hf, v4sf) > -VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_f, v8hf, v4sf) > +VAR2 (BINOP_PRED_NONE_NONE, vcmpgtq_f, v8hf, v4sf) > VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_f, v8hf, v4sf) > -VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_f, v8hf, v4sf) > +VAR2 (BINOP_PRED_NONE_NONE, vcmpgeq_f, v8hf, v4sf) > VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_f, v8hf, v4sf) > -VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_f, v8hf, v4sf) > +VAR2 (BINOP_PRED_NONE_NONE, vcmpeqq_f, v8hf, v4sf) > VAR2 (BINOP_NONE_NONE_NONE, vsubq_f, v8hf, v4sf) > VAR2 (BINOP_NONE_NONE_NONE, vqmovntq_s, v8hi, v4si) > VAR2 (BINOP_NONE_NONE_NONE, vqmovnbq_s, v8hi, v4si) > @@ -295,8 +295,8 @@ VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, > vcvtaq_m_u, v8hi, v4si) > VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtaq_m_s, v8hi, v4si) > VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_vec_u, v16qi, v8hi, > v4si) > VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si) > -VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi, > v4si, v2di) > -VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si, > v2di) > +VAR4 (TERNOP_UNONE_UNONE_UNONE_PRED, vpselq_u, v16qi, v8hi, v4si, > v2di) > +VAR4 (TERNOP_NONE_NONE_NONE_PRED, vpselq_s, v16qi, v8hi, v4si, v2di) > VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi, > v8hi, v4si) > VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi, v8hi, > v4si) > VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi, v8hi, > v4si) > @@ -426,7 +426,7 @@ VAR2 (TERNOP_NONE_NONE_NONE_UNONE, > vrev64q_m_f, v8hf, v4sf) > VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrev32q_m_s, v16qi, v8hi) > VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovntq_m_s, v8hi, v4si) > VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovnbq_m_s, v8hi, v4si) > -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_f, v8hf, v4sf) > +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vpselq_f, v8hf, v4sf) > VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vnegq_m_f, v8hf, v4sf) > VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovntq_m_s, v8hi, v4si) > VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovnbq_m_s, v8hi, v4si) > diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md > index 1920004b450..2b411b0cb0f 100644 > --- a/gcc/config/arm/constraints.md > +++ b/gcc/config/arm/constraints.md > @@ -312,6 +312,12 @@ (define_constraint "Dz" > (and (match_code "const_vector") > (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == > CONST0_RTX (mode)"))) > > +(define_constraint "DB" > + "@internal > + In ARM/Thumb-2 state with MVE a constant vector of booleans." > + (and (match_code "const_vector") > + (match_test "TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == > MODE_VECTOR_BOOL"))) > + > (define_constraint "Da" > "@internal > In ARM/Thumb-2 state a const_int, const_double or const_vector that can > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md > index 8202c27cc82..37cf7971be8 100644 > --- a/gcc/config/arm/iterators.md > +++ b/gcc/config/arm/iterators.md > @@ -272,6 +272,8 @@ (define_mode_iterator MVE_3 [V16QI V8HI]) > (define_mode_iterator MVE_2 [V16QI V8HI V4SI]) > (define_mode_iterator MVE_5 [V8HI V4SI]) > (define_mode_iterator MVE_6 [V8HI V4SI]) > +(define_mode_iterator MVE_7 [V16BI V8BI V4BI]) > +(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI]) > > > ;;---------------------------------------------------------------------------- > ;; Code iterators > @@ -946,6 +948,10 @@ (define_mode_attr V_extr_elem [(V16QI "u8") > (V8HI "u16") (V4SI "32") > (V8HF "u16") (V4SF "32")]) > (define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w") > (V8HF "=w") (V4SF "=&w")]) > +(define_mode_attr MVE_VPRED [(V16QI "V16BI") (V8HI "V8BI") (V4SI > "V4BI") > + (V2DI "HI") (V8HF "V8BI") (V4SF "V4BI")]) > +(define_mode_attr MVE_vpred [(V16QI "v16bi") (V8HI "v8bi") (V4SI "v4bi") > + (V2DI "hi") (V8HF "v8bi") (V4SF "v4bi")]) > > > ;;---------------------------------------------------------------------------- > ;; Code attributes > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 5c3b34dce3a..983aa10e652 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -839,8 +839,8 @@ (define_insn "mve_vaddlvq_p_<supf>v4si" > ;; > (define_insn "@mve_vcmp<mve_cmp_op>q_<mode>" > [ > - (set (match_operand:HI 0 "vpr_register_operand" "=Up") > - (MVE_COMPARISONS:HI (match_operand:MVE_2 1 > "s_register_operand" "w") > + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") > + (MVE_COMPARISONS:<MVE_VPRED> (match_operand:MVE_2 1 > "s_register_operand" "w") > (match_operand:MVE_2 2 "s_register_operand" "w"))) > ] > "TARGET_HAVE_MVE" > @@ -1929,8 +1929,8 @@ (define_insn "mve_vcaddq<mve_rot><mode>" > ;; > (define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>" > [ > - (set (match_operand:HI 0 "vpr_register_operand" "=Up") > - (MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 > "s_register_operand" "w") > + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") > + (MVE_FP_COMPARISONS:<MVE_VPRED> (match_operand:MVE_0 1 > "s_register_operand" "w") > (match_operand:MVE_0 2 "s_register_operand" > "w"))) > ] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > @@ -3324,7 +3324,7 @@ (define_insn "@mve_vpselq_<supf><mode>" > (set (match_operand:MVE_1 0 "s_register_operand" "=w") > (unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" > "w") > (match_operand:MVE_1 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > + (match_operand:<MVE_VPRED> 3 > "vpr_register_operand" "Up")] > VPSELQ)) > ] > "TARGET_HAVE_MVE" > @@ -4419,7 +4419,7 @@ (define_insn "@mve_vpselq_f<mode>" > (set (match_operand:MVE_0 0 "s_register_operand" "=w") > (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" > "w") > (match_operand:MVE_0 2 "s_register_operand" "w") > - (match_operand:HI 3 "vpr_register_operand" "Up")] > + (match_operand:<MVE_VPRED> 3 > "vpr_register_operand" "Up")] > VPSELQ_F)) > ] > "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" > @@ -10516,3 +10516,14 @@ (define_insn > "*movmisalign<mode>_mve_load" > "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1" > [(set_attr "type" "mve_load")] > ) > + > +;; Expander for VxBI moves > +(define_expand "mov<mode>" > + [(set (match_operand:MVE_7 0 "nonimmediate_operand") > + (match_operand:MVE_7 1 "general_operand"))] > + "TARGET_HAVE_MVE" > + { > + if (!register_operand (operands[0], <MODE>mode)) > + operands[1] = force_reg (<MODE>mode, operands[1]); > + } > +) > diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md > index f5ccb92d097..f00d1cad3e9 100644 > --- a/gcc/config/arm/vfp.md > +++ b/gcc/config/arm/vfp.md > @@ -73,21 +73,26 @@ (define_insn "*arm_movhi_vfp" > > (define_insn "*thumb2_movhi_vfp" > [(set > - (match_operand:HI 0 "nonimmediate_operand" > + (match_operand:MVE_7_HI 0 "nonimmediate_operand" > "=rk, r, l, r, m, r, *t, r, *t, Up, r") > - (match_operand:HI 1 "general_operand" > - "rk, I, Py, n, r, m, r, *t, *t, r, Up"))] > + (match_operand:MVE_7_HI 1 "general_operand" > + "rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))] > "TARGET_THUMB2 && TARGET_VFP_BASE > && !TARGET_VFP_FP16INST > - && (register_operand (operands[0], HImode) > - || register_operand (operands[1], HImode))" > + && (register_operand (operands[0], <MODE>mode) > + || register_operand (operands[1], <MODE>mode))" > { > switch (which_alternative) > { > case 0: > - case 1: > case 2: > return "mov%?\t%0, %1\t%@ movhi"; > + case 1: > + if (GET_MODE_CLASS (GET_MODE (operands[1])) == > MODE_VECTOR_BOOL) > + operands[1] = mve_const_bool_vec_to_hi (operands[1]); > + else > + operands[1] = gen_lowpart (HImode, operands[1]); > + return "mov%?\t%0, %1\t%@ movhi"; > case 3: > return "movw%?\t%0, %L1\t%@ movhi"; > case 4: > @@ -173,20 +178,25 @@ (define_insn "*arm_movhi_fp16" > > (define_insn "*thumb2_movhi_fp16" > [(set > - (match_operand:HI 0 "nonimmediate_operand" > + (match_operand:MVE_7_HI 0 "nonimmediate_operand" > "=rk, r, l, r, m, r, *t, r, *t, Up, r") > - (match_operand:HI 1 "general_operand" > - "rk, I, Py, n, r, m, r, *t, *t, r, Up"))] > + (match_operand:MVE_7_HI 1 "general_operand" > + "rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))] > "TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE) > - && (register_operand (operands[0], HImode) > - || register_operand (operands[1], HImode))" > + && (register_operand (operands[0], <MODE>mode) > + || register_operand (operands[1], <MODE>mode))" > { > switch (which_alternative) > { > case 0: > - case 1: > case 2: > return "mov%?\t%0, %1\t%@ movhi"; > + case 1: > + if (GET_MODE_CLASS (GET_MODE (operands[1])) == > MODE_VECTOR_BOOL) > + operands[1] = mve_const_bool_vec_to_hi (operands[1]); > + else > + operands[1] = gen_lowpart (HImode, operands[1]); > + return "mov%?\t%0, %1\t%@ movhi"; > case 3: > return "movw%?\t%0, %L1\t%@ movhi"; > case 4: > diff --git a/gcc/testsuite/gcc.dg/rtl/arm/mve-vxbi.c > b/gcc/testsuite/gcc.dg/rtl/arm/mve-vxbi.c > new file mode 100644 > index 00000000000..093283ed43c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/rtl/arm/mve-vxbi.c > @@ -0,0 +1,89 @@ > +/* { dg-do compile { target arm*-*-* } } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +void __RTL (startwith ("ira")) foo (void *ptr) > +{ > + (function "foo" > + (param "ptr" > + (DECL_RTL (reg/v:SI <0> [ ptr ])) > + (DECL_RTL_INCOMING (reg:SI r0 [ ptr ])) > + ) ;; param "n" > + (insn-chain > + (block 2 > + (edge-from entry (flags "FALLTHRU")) > + (cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK) > + (insn 7 (set (reg:V4BI <1>) > + (const_vector:V4BI [(const_int 1) > + (const_int 0) > + (const_int 0) > + (const_int 1)])) (nil)) > + (insn 8 (set (mem:V4BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V4BI <1>))) > + (edge-to exit (flags "FALLTHRU")) > + ) ;; block 2 > + ) ;; insn-chain > + ) ;; function > +} > + > +void __RTL (startwith ("ira")) foo2 (void *ptr) > +{ > + (function "foo" > + (param "ptr" > + (DECL_RTL (reg/v:SI <0> [ ptr ])) > + (DECL_RTL_INCOMING (reg:SI r0 [ ptr ])) > + ) ;; param "n" > + (insn-chain > + (block 2 > + (edge-from entry (flags "FALLTHRU")) > + (cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK) > + (insn 7 (set (reg:V8BI <1>) > + (const_vector:V8BI [(const_int 1) > + (const_int 0) > + (const_int 1) > + (const_int 1) > + (const_int 1) > + (const_int 1) > + (const_int 0) > + (const_int 1)])) (nil)) > + (insn 8 (set (mem:V8BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V8BI <1>))) > + (edge-to exit (flags "FALLTHRU")) > + ) ;; block 2 > + ) ;; insn-chain > + ) ;; function > +} > + > +void __RTL (startwith ("ira")) foo3 (void *ptr) > +{ > + (function "foo" > + (param "ptr" > + (DECL_RTL (reg/v:SI <0> [ ptr ])) > + (DECL_RTL_INCOMING (reg:SI r0 [ ptr ])) > + ) ;; param "n" > + (insn-chain > + (block 2 > + (edge-from entry (flags "FALLTHRU")) > + (cnote 5 [bb 2] NOTE_INSN_BASIC_BLOCK) > + (insn 7 (set (reg:V16BI <1>) > + (const_vector:V16BI [(const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0) > + (const_int 0)])) (nil)) > + (insn 8 (set (mem:V16BI (reg:SI <0>) [1 ptr+0 S2 A16]) (reg:V16BI <1>))) > + (edge-to exit (flags "FALLTHRU")) > + ) ;; block 2 > + ) ;; insn-chain > + ) ;; function > +} > -- > 2.25.1