Hi, Sorry for the slow review. I'd initially held off from reviewing because it sounded like you were trying to treat predicates as MODE_VECTOR_BOOL instead. Is that right? If so, how did that go?
It does feel like the right long-term direction. Treating masks as integers for AVX seems to make some things more difficult than they should be. Also, RTL like: > +(define_expand "vec_cmp<mode>hi" > + [(set (match_operand:HI 0 "s_register_operand") > + (match_operator:HI 1 "comparison_operator" > + [(match_operand:MVE_VLD_ST 2 "s_register_operand") > + (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false, false); > + DONE; > +}) seems kind-of suspect, since (as I think you said in a PR), (eq:HI X Y) would normally be a single boolean. Having MODE_VECTOR_BOOL means that we can represent the comparisons “properly”, even in define_insns. But since this usage is confined to define_expands, I guess it doesn't matter much for the purposes of this patch. Any switch to MODE_VECTOR_BOOL would leave most of the patch in tact (contrary to what I'd initially assumed). > @@ -31061,13 +31065,7 @@ arm_expand_vector_compare (rtx target, rtx_code > code, rtx op0, rtx op1, > > /* If we are not expanding a vcond, build the result here. */ > if (!vcond_mve) > - { > - rtx zero = gen_reg_rtx (cmp_result_mode); > - rtx one = gen_reg_rtx (cmp_result_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, > one, zero, vpr_p0)); > - } > + emit_move_insn (target, vpr_p0); The code above this is: if (vcond_mve) vpr_p0 = target; else vpr_p0 = gen_reg_rtx (HImode); so couldn't we simply use vpr_p0 = target unconditionally (or drop vpr_p0 altogether)? Same for the other cases. > @@ -31178,20 +31164,21 @@ arm_expand_vcond (rtx *operands, machine_mode > cmp_result_mode) > mask, operands[1], operands[2])); > else > { > - machine_mode cmp_mode = GET_MODE (operands[4]); > + machine_mode cmp_mode = GET_MODE (operands[0]); > rtx vpr_p0 = mask; > - rtx zero = gen_reg_rtx (cmp_mode); > - rtx one = gen_reg_rtx (cmp_mode); > - emit_move_insn (zero, CONST0_RTX (cmp_mode)); > - emit_move_insn (one, CONST1_RTX (cmp_mode)); > + > switch (GET_MODE_CLASS (cmp_mode)) > { > case MODE_VECTOR_INT: > - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], > one, zero, vpr_p0)); > + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0], > + operands[1], operands[2], vpr_p0)); > break; > case MODE_VECTOR_FLOAT: > if (TARGET_HAVE_MVE_FLOAT) > - emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, > vpr_p0)); > + emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], > + operands[1], operands[2], vpr_p0)); > + else > + gcc_unreachable (); > break; > default: > gcc_unreachable (); Here too vpr_p0 feels a bit redundant now. > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index e393518ea88..a9840408bdd 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -10516,3 +10516,58 @@ (define_insn "*movmisalign<mode>_mve_load" > "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1" > [(set_attr "type" "mve_load")] > ) > + > +;; Expanders for vec_cmp and vcond > + > +(define_expand "vec_cmp<mode>hi" > + [(set (match_operand:HI 0 "s_register_operand") > + (match_operator:HI 1 "comparison_operator" > + [(match_operand:MVE_VLD_ST 2 "s_register_operand") > + (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))] > + "TARGET_HAVE_MVE > + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" Is flag_unsafe_math_optimizations needed for MVE? For Neon we had it because of flush to zero (at least, I think that was the only reason). > +{ > + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), > + operands[2], operands[3], false, false); > + DONE; > +}) The (snipped) tests look good, but if we do support !flag_unsafe_math_optimizations, it would be good to have some tests for unordered comparisons too. E.g.: #define ordered(A, B) (!__builtin_isunordered (A, B)) #define unordered(A, B) (__builtin_isunordered (A, B)) #define ueq(A, B) (!__builtin_islessgreater (A, B)) #define ult(A, B) (__builtin_isless (A, B)) #define ule(A, B) (__builtin_islessequal (A, B)) #define uge(A, B) (__builtin_isgreaterequal (A, B)) #define ugt(A, B) (__builtin_isgreater (A, B)) #define nueq(A, B) (__builtin_islessgreater (A, B)) #define nult(A, B) (!__builtin_isless (A, B)) #define nule(A, B) (!__builtin_islessequal (A, B)) #define nuge(A, B) (!__builtin_isgreaterequal (A, B)) #define nugt(A, B) (!__builtin_isgreater (A, B)) The main thing is just to check that they don't ICE. It might be difficult to check the assembly for correctness. Thanks, Richard