Hi Christophe, > -----Original Message----- > From: Gcc-patches <gcc-patches- > bounces+kyrylo.tkachov=arm....@gcc.gnu.org> On Behalf Of Christophe > Lyon via Gcc-patches > Sent: Thursday, January 13, 2022 2:56 PM > To: gcc-patches@gcc.gnu.org > Subject: [PATCH v3 09/15] arm: Fix vcond_mask expander for MVE (PR > target/100757) > > The problem in this PR is that we call VPSEL with a mask of vector > type instead of HImode. This happens because operand 3 in vcond_mask > is the pre-computed vector comparison and has vector type. > > This patch fixes it by implementing TARGET_VECTORIZE_GET_MASK_MODE, > returning the appropriate VxBI mode when targeting MVE. In turn, this > implies implementing vec_cmp<mode><MVE_vpred>, > vec_cmpu<mode><MVE_vpred> and vcond_mask_<mode><MVE_vpred>, > and we can > move vec_cmp<mode><v_cmp_result>, vec_cmpu<mode><mode> and > vcond_mask_<mode><v_cmp_result> back to neon.md since they are not > used by MVE anymore. The new *<MVE_vpred> patterns listed above are > implemented in mve.md since they are only valid for MVE. However this > may make maintenance/comparison more painful than having all of them > in vec-common.md. > > In the process, we can get rid of the recently added vcond_mve > parameter of arm_expand_vector_compare. > > Compared to neon.md's vcond_mask_<mode><v_cmp_result> before my > "arm: > Auto-vectorization for MVE: vcmp" patch (r12-834), it keeps the VDQWH > iterator added in r12-835 (to have V4HF/V8HF support), as well as the > (!<Is_float_mode> || flag_unsafe_math_optimizations) condition which > was not present before r12-834 although SF modes were enabled by VDQW > (I think this was a bug). > > Using TARGET_VECTORIZE_GET_MASK_MODE has the advantage that we no > longer need to generate vpsel with vectors of 0 and 1: the masks are > now merged via scalar 'ands' instructions operating on 16-bit masks > after converting the boolean vectors. > > In addition, this patch fixes a problem in arm_expand_vcond() where > the result would be a vector of 0 or 1 instead of operand 1 or 2. > > Since we want to skip gcc.dg/signbit-2.c for MVE, we also add a new > arm_mve effective target. > > Reducing the number of iterations in pr100757-3.c from 32 to 8, we > generate the code below: > > float a[32]; > float fn1(int d) { > float c = 4.0f; > for (int b = 0; b < 8; b++) > if (a[b] != 2.0f) > c = 5.0f; > return c; > } > > fn1: > ldr r3, .L3+48 > vldr.64 d4, .L3 // q2=(2.0,2.0,2.0,2.0) > vldr.64 d5, .L3+8 > vldrw.32 q0, [r3] // q0=a(0..3) > adds r3, r3, #16 > vcmp.f32 eq, q0, q2 // cmp a(0..3) == (2.0,2.0,2.0,2.0) > vldrw.32 q1, [r3] // q1=a(4..7) > vmrs r3, P0 > vcmp.f32 eq, q1, q2 // cmp a(4..7) == (2.0,2.0,2.0,2.0) > vmrs r2, P0 @ movhi > ands r3, r3, r2 // r3=select(a(0..3]) & select(a(4..7)) > vldr.64 d4, .L3+16 // q2=(5.0,5.0,5.0,5.0) > vldr.64 d5, .L3+24 > vmsr P0, r3 > vldr.64 d6, .L3+32 // q3=(4.0,4.0,4.0,4.0) > vldr.64 d7, .L3+40 > vpsel q3, q3, q2 // q3=vcond_mask(4.0,5.0) > vmov.32 r2, q3[1] // keep the scalar max > vmov.32 r0, q3[3] > vmov.32 r3, q3[2] > vmov.f32 s11, s12 > vmov s15, r2 > vmov s14, r3 > vmaxnm.f32 s15, s11, s15 > vmaxnm.f32 s15, s15, s14 > vmov s14, r0 > vmaxnm.f32 s15, s15, s14 > vmov r0, s15 > bx lr > .L4: > .align 3 > .L3: > .word 1073741824 // 2.0f > .word 1073741824 > .word 1073741824 > .word 1073741824 > .word 1084227584 // 5.0f > .word 1084227584 > .word 1084227584 > .word 1084227584 > .word 1082130432 // 4.0f > .word 1082130432 > .word 1082130432 > .word 1082130432 > > 2022-01-13 Christophe Lyon <christophe.l...@foss.st.com> > > PR target/100757 > gcc/ > * config/arm/arm-protos.h (arm_get_mask_mode): New prototype. > (arm_expand_vector_compare): Update prototype. > * config/arm/arm.c (TARGET_VECTORIZE_GET_MASK_MODE): New. > (arm_vector_mode_supported_p): Add support for VxBI modes. > (arm_expand_vector_compare): Remove useless generation of vpsel. > (arm_expand_vcond): Fix select operands. > (arm_get_mask_mode): New. > * config/arm/mve.md (vec_cmp<mode><MVE_vpred>): New. > (vec_cmpu<mode><MVE_vpred>): New. > (vcond_mask_<mode><MVE_vpred>): New. > * config/arm/vec-common.md (vec_cmp<mode><v_cmp_result>) > (vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>): > Move to ... > * config/arm/neon.md (vec_cmp<mode><v_cmp_result>) > (vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>): ... > here > and disable for MVE. > * doc/sourcebuild.texi (arm_mve): Document new effective-target. > > gcc/testsuite/ > * gcc.dg/signbit-2.c: Skip when targeting ARM/MVE. > * lib/target-supports.exp (check_effective_target_arm_mve): New. > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index b978adf2038..a84613104b1 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -202,6 +202,7 @@ extern void arm_init_cumulative_args > (CUMULATIVE_ARGS *, tree, rtx, tree); > extern bool arm_pad_reg_upward (machine_mode, tree, int); > #endif > extern int arm_apply_result_size (void); > +extern opt_machine_mode arm_get_mask_mode (machine_mode mode); > > #endif /* RTX_CODE */ > > @@ -378,7 +379,7 @@ extern void arm_emit_coreregs_64bit_shift (enum > rtx_code, rtx, rtx, rtx, rtx, > extern bool arm_fusion_enabled_p (tune_params::fuse_ops); > extern bool arm_valid_symbolic_address_p (rtx); > extern bool arm_validize_comparison (rtx *, rtx *, rtx *); > -extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool, > bool); > +extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool); > #endif /* RTX_CODE */ > > extern bool arm_gen_setmem (rtx *); > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index fa18c7bd3fe..7d56fa71806 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -829,6 +829,10 @@ static const struct attribute_spec > arm_attribute_table[] = > > #undef TARGET_MD_ASM_ADJUST > #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust > + > +#undef TARGET_VECTORIZE_GET_MASK_MODE > +#define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode > + > > > > /* Obstack for minipool constant handling. */ > static struct obstack minipool_obstack; > @@ -29234,7 +29238,8 @@ arm_vector_mode_supported_p > (machine_mode mode) > > if (TARGET_HAVE_MVE > && (mode == V2DImode || mode == V4SImode || mode == V8HImode > - || mode == V16QImode)) > + || mode == V16QImode > + || mode == V16BImode || mode == V8BImode || mode == > V4BImode)) > return true; > > if (TARGET_HAVE_MVE_FLOAT > @@ -31033,7 +31038,7 @@ arm_split_atomic_op (enum rtx_code code, rtx > old_out, rtx new_out, rtx mem, > } > > > > /* Return the mode for the MVE vector of predicates corresponding to > MODE. */ > -machine_mode > +opt_machine_mode > arm_mode_to_pred_mode (machine_mode mode) > { > switch (GET_MODE_NUNITS (mode)) > @@ -31042,7 +31047,7 @@ arm_mode_to_pred_mode (machine_mode > mode) > case 8: return V8BImode; > case 4: return V4BImode; > } > - gcc_unreachable (); > + return opt_machine_mode (); > } > > /* Expand code to compare vectors OP0 and OP1 using condition CODE. > @@ -31050,16 +31055,12 @@ arm_mode_to_pred_mode (machine_mode > mode) > and return true if TARGET contains the inverse. If !CAN_INVERT, > always store the result in TARGET, never its inverse. > > - If VCOND_MVE, do not emit the vpsel instruction here, let > arm_expand_vcond do > - it with the right destination type to avoid emiting two vpsel, one here > and > - one in arm_expand_vcond. > - > Note that the handling of floating-point comparisons is not > IEEE compliant. */ > > bool > arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, > - bool can_invert, bool vcond_mve) > + bool can_invert) > { > machine_mode cmp_result_mode = GET_MODE (target); > machine_mode cmp_mode = GET_MODE (op0); > @@ -31088,7 +31089,7 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > and then store its inverse in TARGET. This avoids reusing > TARGET (which for integer NE could be one of the inputs). */ > rtx tmp = gen_reg_rtx (cmp_result_mode); > - if (arm_expand_vector_compare (tmp, code, op0, op1, true, > vcond_mve)) > + if (arm_expand_vector_compare (tmp, code, op0, op1, true)) > gcc_unreachable (); > emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, > tmp))); > return false; > @@ -31124,36 +31125,22 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > case NE: > if (TARGET_HAVE_MVE) > { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > switch (GET_MODE_CLASS (cmp_mode)) > { > case MODE_VECTOR_INT: > - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, > force_reg (cmp_mode, op1))); > + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, > + op0, force_reg (cmp_mode, op1))); > break; > case MODE_VECTOR_FLOAT: > if (TARGET_HAVE_MVE_FLOAT) > - emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, > op0, force_reg (cmp_mode, op1))); > + emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target, > + op0, force_reg (cmp_mode, op1))); > else > gcc_unreachable (); > break; > default: > gcc_unreachable (); > } > - > - /* If we are not expanding a vcond, build the result here. */ > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, > target, one, zero, vpr_p0)); > - } > } > else > emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); > @@ -31165,23 +31152,8 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > case GEU: > case GTU: > if (TARGET_HAVE_MVE) > - { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, > force_reg (cmp_mode, op1))); > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, > target, one, zero, vpr_p0)); > - } > - } > + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, > + op0, force_reg (cmp_mode, op1))); > else > emit_insn (gen_neon_vc (code, cmp_mode, target, > op0, force_reg (cmp_mode, op1))); > @@ -31192,23 +31164,8 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > case LEU: > case LTU: > if (TARGET_HAVE_MVE) > - { > - rtx vpr_p0; > - if (vcond_mve) > - vpr_p0 = target; > - else > - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode)); > - > - emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, > vpr_p0, force_reg (cmp_mode, op1), op0)); > - if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, > target, one, zero, vpr_p0)); > - } > - } > + emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, > target, > + force_reg (cmp_mode, op1), op0)); > else > emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, > target, force_reg (cmp_mode, op1), op0)); > @@ -31223,8 +31180,8 @@ arm_expand_vector_compare (rtx target, > rtx_code code, rtx op0, rtx op1, > rtx gt_res = gen_reg_rtx (cmp_result_mode); > rtx alt_res = gen_reg_rtx (cmp_result_mode); > rtx_code alt_code = (code == LTGT ? LT : LE); > - if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, > vcond_mve) > - || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, > vcond_mve)) > + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true) > + || arm_expand_vector_compare (alt_res, alt_code, op0, op1, > true)) > gcc_unreachable (); > emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode, > gt_res, alt_res))); > @@ -31244,19 +31201,15 @@ arm_expand_vcond (rtx *operands, > machine_mode cmp_result_mode) > { > /* When expanding for MVE, we do not want to emit a (useless) vpsel in > arm_expand_vector_compare, and another one here. */ > - bool vcond_mve=false; > rtx mask; > > if (TARGET_HAVE_MVE) > - { > - vcond_mve=true; > - mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode)); > - } > + mask = gen_reg_rtx (arm_mode_to_pred_mode > (cmp_result_mode).require ()); > else > mask = gen_reg_rtx (cmp_result_mode); > > bool inverted = arm_expand_vector_compare (mask, GET_CODE > (operands[3]), > - operands[4], operands[5], true, > vcond_mve); > + operands[4], operands[5], true); > if (inverted) > std::swap (operands[1], operands[2]); > if (TARGET_NEON) > @@ -31264,20 +31217,20 @@ arm_expand_vcond (rtx *operands, > machine_mode cmp_result_mode) > mask, operands[1], operands[2])); > else > { > - machine_mode cmp_mode = GET_MODE (operands[4]); > - rtx vpr_p0 = mask; > - rtx zero = gen_reg_rtx (cmp_mode); > - rtx one = gen_reg_rtx (cmp_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_mode)); > + machine_mode cmp_mode = GET_MODE (operands[0]); > + > switch (GET_MODE_CLASS (cmp_mode)) > { > case MODE_VECTOR_INT: > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, > operands[0], one, zero, vpr_p0)); > + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0], > + operands[1], operands[2], mask)); > break; > case MODE_VECTOR_FLOAT: > if (TARGET_HAVE_MVE_FLOAT) > - emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, > zero, vpr_p0)); > + emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], > + operands[1], operands[2], mask)); > + else > + gcc_unreachable (); > break; > default: > gcc_unreachable (); > @@ -34187,4 +34140,15 @@ arm_mode_base_reg_class (machine_mode > mode) > > struct gcc_target targetm = TARGET_INITIALIZER; > > +/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */ > + > +opt_machine_mode > +arm_get_mask_mode (machine_mode mode) > +{ > + if (TARGET_HAVE_MVE) > + return arm_mode_to_pred_mode (mode); > + > + return default_get_mask_mode (mode); > +} > + > #include "gt-arm.h" > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 983aa10e652..35564e870bc 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -10527,3 +10527,57 @@ (define_expand "mov<mode>" > operands[1] = force_reg (<MODE>mode, operands[1]); > } > ) > + > +;; Expanders for vec_cmp and vcond > + > +(define_expand "vec_cmp<mode><MVE_vpred>" > + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand") > + (match_operator:<MVE_VPRED> 1 "comparison_operator" > + [(match_operand:MVE_VLD_ST 2 "s_register_operand") > + (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vec_cmpu<mode><MVE_vpred>" > + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand") > + (match_operator:<MVE_VPRED> 1 "comparison_operator" > + [(match_operand:MVE_2 2 "s_register_operand") > + (match_operand:MVE_2 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode><MVE_vpred>" > + [(set (match_operand:MVE_VLD_ST 0 "s_register_operand") > + (if_then_else:MVE_VLD_ST > + (match_operand:<MVE_VPRED> 3 "s_register_operand") > + (match_operand:MVE_VLD_ST 1 "s_register_operand") > + (match_operand:MVE_VLD_ST 2 "s_register_operand")))] > + "TARGET_HAVE_MVE" > +{ > + switch (GET_MODE_CLASS (<MODE>mode)) > + { > + case MODE_VECTOR_INT: > + emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], > + operands[1], operands[2], operands[3])); > + break; > + case MODE_VECTOR_FLOAT: > + if (TARGET_HAVE_MVE_FLOAT) > + emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0], > + operands[1], operands[2], operands[3])); > + else > + gcc_unreachable ();
I think this logic is a bit too complicated. The vpselq_f pattern is already guarded on TARGET_HAVE_MVE_FLOAT so the compiler will ICE if it gets generated without MVE float. So there's no need for this "if (TARGET_HAVE_MVE_FLOAT)" and gcc_unreachable (). Ok with that change. Thanks, Kyrill > + break; > + default: > + gcc_unreachable (); > + } > + DONE; > +}) > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index e06c8245672..20e9f11ec81 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -1394,6 +1394,45 @@ (define_insn "*us_sub<mode>_neon" > [(set_attr "type" "neon_qsub<q>")] > ) > > +(define_expand "vec_cmp<mode><v_cmp_result>" > + [(set (match_operand:<V_cmp_result> 0 "s_register_operand") > + (match_operator:<V_cmp_result> 1 "comparison_operator" > + [(match_operand:VDQWH 2 "s_register_operand") > + (match_operand:VDQWH 3 "reg_or_zero_operand")]))] > + "TARGET_NEON > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vec_cmpu<mode><mode>" > + [(set (match_operand:VDQIW 0 "s_register_operand") > + (match_operator:VDQIW 1 "comparison_operator" > + [(match_operand:VDQIW 2 "s_register_operand") > + (match_operand:VDQIW 3 "reg_or_zero_operand")]))] > + "TARGET_NEON" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode><v_cmp_result>" > + [(set (match_operand:VDQWH 0 "s_register_operand") > + (if_then_else:VDQWH > + (match_operand:<V_cmp_result> 3 "s_register_operand") > + (match_operand:VDQWH 1 "s_register_operand") > + (match_operand:VDQWH 2 "s_register_operand")))] > + "TARGET_NEON > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > +{ > + emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], > operands[1], > + operands[2])); > + DONE; > +}) > + > ;; Patterns for builtins. > > ; good for plain vadd, vaddq. > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec- > common.md > index cef358e44f5..20586973ed9 100644 > --- a/gcc/config/arm/vec-common.md > +++ b/gcc/config/arm/vec-common.md > @@ -363,33 +363,6 @@ (define_expand "vlshr<mode>3" > } > }) > > -(define_expand "vec_cmp<mode><v_cmp_result>" > - [(set (match_operand:<V_cmp_result> 0 "s_register_operand") > - (match_operator:<V_cmp_result> 1 "comparison_operator" > - [(match_operand:VDQWH 2 "s_register_operand") > - (match_operand:VDQWH 3 "reg_or_zero_operand")]))] > - "ARM_HAVE_<MODE>_ARITH > - && !TARGET_REALLY_IWMMXT > - && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > -{ > - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > - operands[2], operands[3], false, false); > - DONE; > -}) > - > -(define_expand "vec_cmpu<mode><mode>" > - [(set (match_operand:VDQIW 0 "s_register_operand") > - (match_operator:VDQIW 1 "comparison_operator" > - [(match_operand:VDQIW 2 "s_register_operand") > - (match_operand:VDQIW 3 "reg_or_zero_operand")]))] > - "ARM_HAVE_<MODE>_ARITH > - && !TARGET_REALLY_IWMMXT" > -{ > - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > - operands[2], operands[3], false, false); > - DONE; > -}) > - > ;; Conditional instructions. These are comparisons with conditional moves > for > ;; vectors. They perform the assignment: > ;; > @@ -461,31 +434,6 @@ (define_expand "vcondu<mode><v_cmp_result>" > DONE; > }) > > -(define_expand "vcond_mask_<mode><v_cmp_result>" > - [(set (match_operand:VDQWH 0 "s_register_operand") > - (if_then_else:VDQWH > - (match_operand:<V_cmp_result> 3 "s_register_operand") > - (match_operand:VDQWH 1 "s_register_operand") > - (match_operand:VDQWH 2 "s_register_operand")))] > - "ARM_HAVE_<MODE>_ARITH > - && !TARGET_REALLY_IWMMXT > - && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > -{ > - if (TARGET_NEON) > - { > - emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3], > - operands[1], operands[2])); > - } > - else if (TARGET_HAVE_MVE) > - { > - emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], > - operands[1], operands[2], operands[3])); > - } > - else > - gcc_unreachable (); > - DONE; > -}) > - > (define_expand "vec_load_lanesoi<mode>" > [(set (match_operand:OI 0 "s_register_operand") > (unspec:OI [(match_operand:OI 1 "neon_struct_operand") > diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi > index 6095a35cd45..8d369935396 100644 > --- a/gcc/doc/sourcebuild.texi > +++ b/gcc/doc/sourcebuild.texi > @@ -2236,6 +2236,10 @@ ARM target supports the @code{-mfloat- > abi=softfp} option. > @anchor{arm_hard_ok} > ARM target supports the @code{-mfloat-abi=hard} option. > > +@item arm_mve > +@anchor{arm_mve} > +ARM target supports generating MVE instructions. > + > @item arm_v8_1_lob_ok > @anchor{arm_v8_1_lob_ok} > ARM Target supports executing the Armv8.1-M Mainline Low Overhead Loop > diff --git a/gcc/testsuite/gcc.dg/signbit-2.c > b/gcc/testsuite/gcc.dg/signbit-2.c > index b609f67dc9f..2f2dc448286 100644 > --- a/gcc/testsuite/gcc.dg/signbit-2.c > +++ b/gcc/testsuite/gcc.dg/signbit-2.c > @@ -4,6 +4,7 @@ > /* This test does not work when the truth type does not match vector type. > */ > /* { dg-additional-options "-mno-avx512f" { target { i?86-*-* x86_64-*-* } } > } > */ > /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */ > +/* { dg-skip-if "no fallback for MVE" { arm_mve } } */ > > #include <stdint.h> > > diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target- > supports.exp > index 0fe1e1e077a..8dac516ec12 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -5234,6 +5234,18 @@ proc check_effective_target_arm_hard_ok { } { > } "-mfloat-abi=hard"] > } > > +# Return 1 if this is an ARM target supporting MVE. > +proc check_effective_target_arm_mve { } { > + if { ![istarget arm*-*-*] } { > + return 0 > + } > + return [check_no_compiler_messages arm_mve assembly { > + #if !defined (__ARM_FEATURE_MVE) > + #error FOO > + #endif > + }] > +} > + > # Return 1 if the target supports ARMv8.1-M MVE with floating point > # instructions, 0 otherwise. The test is valid for ARM. > # Record the command line options needed. > -- > 2.25.1