Hi,

Sorry for the slow review.  I'd initially held off from reviewing
because it sounded like you were trying to treat predicates as
MODE_VECTOR_BOOL instead.  Is that right?  If so, how did that go?

It does feel like the right long-term direction.  Treating masks as
integers for AVX seems to make some things more difficult than they
should be.  Also, RTL like:

> +(define_expand "vec_cmp<mode>hi"
> +  [(set (match_operand:HI 0 "s_register_operand")
> +     (match_operator:HI 1 "comparison_operator"
> +       [(match_operand:MVE_VLD_ST 2 "s_register_operand")
> +        (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))]
> +  "TARGET_HAVE_MVE
> +   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
> +{
> +  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> +                          operands[2], operands[3], false, false);
> +  DONE;
> +})

seems kind-of suspect, since (as I think you said in a PR),
(eq:HI X Y) would normally be a single boolean.  Having MODE_VECTOR_BOOL
means that we can represent the comparisons “properly”, even in
define_insns.

But since this usage is confined to define_expands, I guess it
doesn't matter much for the purposes of this patch.  Any switch
to MODE_VECTOR_BOOL would leave most of the patch in tact (contrary
to what I'd initially assumed).

> @@ -31061,13 +31065,7 @@ arm_expand_vector_compare (rtx target, rtx_code 
> code, rtx op0, rtx op1,
>  
>         /* If we are not expanding a vcond, build the result here.  */
>         if (!vcond_mve)
> -         {
> -           rtx zero = gen_reg_rtx (cmp_result_mode);
> -           rtx one = gen_reg_rtx (cmp_result_mode);
> -           emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
> -           emit_move_insn (one, CONST1_RTX (cmp_result_mode));
> -           emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, 
> one, zero, vpr_p0));
> -         }
> +         emit_move_insn (target, vpr_p0);

The code above this is:

          if (vcond_mve)
            vpr_p0 = target;
          else
            vpr_p0 = gen_reg_rtx (HImode);

so couldn't we simply use vpr_p0 = target unconditionally (or drop
vpr_p0 altogether)?  Same for the other cases.

> @@ -31178,20 +31164,21 @@ arm_expand_vcond (rtx *operands, machine_mode 
> cmp_result_mode)
>                           mask, operands[1], operands[2]));
>    else
>      {
> -      machine_mode cmp_mode = GET_MODE (operands[4]);
> +      machine_mode cmp_mode = GET_MODE (operands[0]);
>        rtx vpr_p0 = mask;
> -      rtx zero = gen_reg_rtx (cmp_mode);
> -      rtx one = gen_reg_rtx (cmp_mode);
> -      emit_move_insn (zero, CONST0_RTX (cmp_mode));
> -      emit_move_insn (one, CONST1_RTX (cmp_mode));
> +
>        switch (GET_MODE_CLASS (cmp_mode))
>       {
>       case MODE_VECTOR_INT:
> -       emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], 
> one, zero, vpr_p0));
> +       emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
> +                                  operands[1], operands[2], vpr_p0));
>         break;
>       case MODE_VECTOR_FLOAT:
>         if (TARGET_HAVE_MVE_FLOAT)
> -         emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, 
> vpr_p0));
> +         emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
> +                                      operands[1], operands[2], vpr_p0));
> +       else
> +         gcc_unreachable ();
>         break;
>       default:
>         gcc_unreachable ();

Here too vpr_p0 feels a bit redundant now.

> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index e393518ea88..a9840408bdd 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -10516,3 +10516,58 @@ (define_insn "*movmisalign<mode>_mve_load"
>    "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
>    [(set_attr "type" "mve_load")]
>  )
> +
> +;; Expanders for vec_cmp and vcond
> +
> +(define_expand "vec_cmp<mode>hi"
> +  [(set (match_operand:HI 0 "s_register_operand")
> +     (match_operator:HI 1 "comparison_operator"
> +       [(match_operand:MVE_VLD_ST 2 "s_register_operand")
> +        (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))]
> +  "TARGET_HAVE_MVE
> +   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"

Is flag_unsafe_math_optimizations needed for MVE?  For Neon we had
it because of flush to zero (at least, I think that was the only reason).

> +{
> +  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> +                          operands[2], operands[3], false, false);
> +  DONE;
> +})

The (snipped) tests look good, but if we do support
!flag_unsafe_math_optimizations, it would be good to have some tests
for unordered comparisons too.  E.g.:

#define ordered(A, B) (!__builtin_isunordered (A, B))
#define unordered(A, B) (__builtin_isunordered (A, B))
#define ueq(A, B) (!__builtin_islessgreater (A, B))
#define ult(A, B) (__builtin_isless (A, B))
#define ule(A, B) (__builtin_islessequal (A, B))
#define uge(A, B) (__builtin_isgreaterequal (A, B))
#define ugt(A, B) (__builtin_isgreater (A, B))
#define nueq(A, B) (__builtin_islessgreater (A, B))
#define nult(A, B) (!__builtin_isless (A, B))
#define nule(A, B) (!__builtin_islessequal (A, B))
#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
#define nugt(A, B) (!__builtin_isgreater (A, B))

The main thing is just to check that they don't ICE.  It might be
difficult to check the assembly for correctness.

Thanks,
Richard

Reply via email to