On Sat, Jan 15, 2022 at 10:56 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> On Sat, Jan 15, 2022 at 09:29:05AM +0100, Uros Bizjak wrote:
> > > --- gcc/config/i386/i386.md.jj  2022-01-14 11:51:34.432384170 +0100
> > > +++ gcc/config/i386/i386.md     2022-01-14 18:22:41.140906449 +0100
> > > @@ -23886,6 +23886,18 @@ (define_insn "hreset"
> > >    [(set_attr "type" "other")
> > >     (set_attr "length" "4")])
> > >
> > > +;; Spaceship optimization
> > > +(define_expand "spaceship<mode>3"
> > > +  [(match_operand:SI 0 "register_operand")
> > > +   (match_operand:MODEF 1 "cmp_fp_expander_operand")
> > > +   (match_operand:MODEF 2 "cmp_fp_expander_operand")]
> > > +  "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
> > > +   && TARGET_CMOVE && TARGET_IEEE_FP"
> >
> > Is there a reason that this pattern is limited to TARGET_IEEE_FP?
> > During the expansion in ix86_expand_fp_spaceship, we can just skip
> > jump on unordered, while ix86_expand_fp_compare will emit the correct
> > comparison mode depending on TARGET_IEEE_FP.
>
> For -ffast-math I thought <=> expands to just x == y ? 0 : x < y ? -1 : 1
> but apparently not, it is still x == y ? 0 : x < y ? -1 : x > y ? 1 : 2
> but it is still optimized much better than the non-fast-math case
> without the patch:
>         comisd  %xmm1, %xmm0
>         je      .L12
>         jb      .L13
>         movl    $1, %edx
>         movl    $2, %eax
>         cmova   %edx, %eax
>         ret
>         .p2align 4,,10
>         .p2align 3
> .L12:
>         xorl    %eax, %eax
>         ret
>         .p2align 4,,10
>         .p2align 3
> .L13:
>         movl    $-1, %eax
>         ret
> so just one comparison but admittedly the
>         movl    $1, %edx
>         movl    $2, %eax
>         cmova   %edx, %eax
> part is unnecessary.
> So below is an incremental patch that handles even the !HONORS_NANS
> case at the gimple pattern matching (while for HONOR_NANS we must
> obey that for NaN operand(s) >/>=/</<= is false and so need to make sure
> we look for the third comparison on the false edge of the second one,
> for !HONOR_NANS that is not the case.  With the incremental patch we get:
>         comisd  %xmm1, %xmm0
>         je      .L2
>         seta    %al
>         leal    -1(%rax,%rax), %eax
>         ret
>         .p2align 4,,10
>         .p2align 3
> .L2:
>         xorl    %eax, %eax
>         ret
> for -O2 -ffast-math.
> Also, I've added || (TARGET_SAHF && TARGET_USE_SAHF), because apparently
> we can handle that case nicely too, it is just the IX86_FPCMP_ARITH
> case where fp_compare already emits very specific code (and we can't
> call ix86_expand_fp_compare 3 times because that would for IEEE_FP
> emit different comparisons which couldn't be CSEd).
> I'll add also -ffast-math testsuite coverage and retest.
>
> Also, I wonder if I shouldn't handle XFmode the same, thoughts on that?

Yes, that would be nice. XFmode is used for long double, and not obsolete.

>
> > > +  gcc_checking_assert (ix86_fp_comparison_strategy (GT) == 
> > > IX86_FPCMP_COMI);
> > > +  rtx gt = ix86_expand_fp_compare (GT, op0, op1);
> > > +  rtx l0 = gen_label_rtx ();
> > > +  rtx l1 = gen_label_rtx ();
> > > +  rtx l2 = gen_label_rtx ();
> > > +  rtx lend = gen_label_rtx ();
> > > +  rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
> > > +                          gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> > > +  rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
> > > +                                 gen_rtx_LABEL_REF (VOIDmode, l2), 
> > > pc_rtx);
> > > +  rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > > +  add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
> >
> > Please also add JUMP_LABEL to the insn.
> >
> > > +  rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
> > > +                          gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> > > +  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
> > > +                             gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx);
> > > +  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > > +  add_reg_br_prob_note (jmp, profile_probability::unlikely ());
> > > +  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt,
> > > +                             gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx);
> > > +  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> > > +  add_reg_br_prob_note (jmp, profile_probability::even ());
> > > +  emit_move_insn (dest, constm1_rtx);
> > > +  emit_jump (lend);
> > > +  emit_label (l0);
> >
> > and LABEL_NUSES label.
>
> Why?  That seems to be a waste of time to me, unless something uses them
> already during expansion.  Because pass_expand::execute
> runs:
>   /* We need JUMP_LABEL be set in order to redirect jumps, and hence
>      split edges which edge insertions might do.  */
>   rebuild_jump_labels (get_insns ());
> which resets all LABEL_NUSES to 0 (well, to:
>       if (LABEL_P (insn))
>         LABEL_NUSES (insn) = (LABEL_PRESERVE_P (insn) != 0);
> and then recomputes them and adds JUMP_LABEL if needed:
>               JUMP_LABEL (insn) = label;

I was not aware of that detail. Thanks for sharing (and I wonder if
all other cases should be removed from the source).

Uros.

> --- gcc/config/i386/i386.md.jj  2022-01-15 09:51:25.404468342 +0100
> +++ gcc/config/i386/i386.md     2022-01-15 09:56:31.602109421 +0100
> @@ -23892,7 +23892,7 @@ (define_expand "spaceship<mode>3"
>     (match_operand:MODEF 1 "cmp_fp_expander_operand")
>     (match_operand:MODEF 2 "cmp_fp_expander_operand")]
>    "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
> -   && TARGET_CMOVE && TARGET_IEEE_FP"
> +   && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
>  {
>    ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
>    DONE;
> --- gcc/config/i386/i386-expand.c.jj    2022-01-15 09:51:25.411468242 +0100
> +++ gcc/config/i386/i386-expand.c       2022-01-15 10:38:26.924333651 +0100
> @@ -2884,18 +2884,23 @@ ix86_expand_setcc (rtx dest, enum rtx_co
>  void
>  ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1)
>  {
> -  gcc_checking_assert (ix86_fp_comparison_strategy (GT) == IX86_FPCMP_COMI);
> +  gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH);
>    rtx gt = ix86_expand_fp_compare (GT, op0, op1);
>    rtx l0 = gen_label_rtx ();
>    rtx l1 = gen_label_rtx ();
> -  rtx l2 = gen_label_rtx ();
> +  rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX;
>    rtx lend = gen_label_rtx ();
> -  rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
> -                          gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> -  rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
> +  rtx tmp;
> +  rtx_insn *jmp;
> +  if (l2)
> +    {
> +      rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
> +                              gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
> +      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
>                                   gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx);
> -  rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> -  add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
> +      jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> +      add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
> +    }
>    rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
>                            gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
>    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
> @@ -2914,8 +2919,11 @@ ix86_expand_fp_spaceship (rtx dest, rtx
>    emit_label (l1);
>    emit_move_insn (dest, const1_rtx);
>    emit_jump (lend);
> -  emit_label (l2);
> -  emit_move_insn (dest, const2_rtx);
> +  if (l2)
> +    {
> +      emit_label (l2);
> +      emit_move_insn (dest, const2_rtx);
> +    }
>    emit_label (lend);
>  }
>
> --- gcc/tree-ssa-math-opts.c.jj 2022-01-15 09:51:25.402468370 +0100
> +++ gcc/tree-ssa-math-opts.c    2022-01-15 10:35:52.366533951 +0100
> @@ -4694,7 +4694,6 @@ optimize_spaceship (gimple *stmt)
>    tree arg1 = gimple_cond_lhs (stmt);
>    tree arg2 = gimple_cond_rhs (stmt);
>    if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
> -      || !HONOR_NANS (TREE_TYPE (arg1))
>        || optab_handler (spaceship_optab,
>                         TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
>        || operand_equal_p (arg1, arg2, 0))
> @@ -4732,56 +4731,67 @@ optimize_spaceship (gimple *stmt)
>        return;
>      }
>
> -  /* With NaNs, </<=/>/>= are false, so we need to look for the
> -     third comparison on the false edge from whatever non-equality
> -     comparison the second comparison is.  */
> -  int i = (EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0;
> -  bb2 = EDGE_SUCC (bb1, i)->dest;
> -  g = last_stmt (bb2);
> -  if (g == NULL
> -      || gimple_code (g) != GIMPLE_COND
> -      || !single_pred_p (bb2)
> -      || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
> -         ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
> -         : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
> -            || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
> -      || !cond_only_block_p (bb2)
> -      || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
> -    return;
> -
> -  enum tree_code ccode2
> -    = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
> -  switch (gimple_cond_code (g))
> +  for (int i = 0; i < 2; ++i)
>      {
> -    case LT_EXPR:
> -    case LE_EXPR:
> +      /* With NaNs, </<=/>/>= are false, so we need to look for the
> +        third comparison on the false edge from whatever non-equality
> +        comparison the second comparison is.  */
> +      if (HONOR_NANS (TREE_TYPE (arg1))
> +         && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
> +       continue;
> +
> +      bb2 = EDGE_SUCC (bb1, i)->dest;
> +      g = last_stmt (bb2);
> +      if (g == NULL
> +         || gimple_code (g) != GIMPLE_COND
> +         || !single_pred_p (bb2)
> +         || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
> +             ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
> +             : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
> +                || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
> +         || !cond_only_block_p (bb2)
> +         || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
> +       continue;
> +
> +      enum tree_code ccode2
> +       = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : 
> GT_EXPR);
> +      switch (gimple_cond_code (g))
> +       {
> +       case LT_EXPR:
> +       case LE_EXPR:
> +         break;
> +       case GT_EXPR:
> +       case GE_EXPR:
> +         ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
> +         break;
> +       default:
> +         continue;
> +       }
> +      if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
> +       return;
> +
> +      if ((ccode == LT_EXPR)
> +         ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
> +       {
> +         em1 = EDGE_SUCC (bb1, 1 - i);
> +         e1 = EDGE_SUCC (bb2, 0);
> +         e2 = EDGE_SUCC (bb2, 1);
> +         if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
> +           std::swap (e1, e2);
> +       }
> +      else
> +       {
> +         e1 = EDGE_SUCC (bb1, 1 - i);
> +         em1 = EDGE_SUCC (bb2, 0);
> +         e2 = EDGE_SUCC (bb2, 1);
> +         if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
> +           std::swap (em1, e2);
> +       }
>        break;
> -    case GT_EXPR:
> -    case GE_EXPR:
> -      ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
> -      break;
> -    default:
> -      return;
>      }
> -  if (ccode == ccode2)
> -    return;
>
> -  if (ccode == LT_EXPR)
> -    {
> -      em1 = EDGE_SUCC (bb1, 1 - i);
> -      e1 = EDGE_SUCC (bb2, 0);
> -      e2 = EDGE_SUCC (bb2, 1);
> -      if ((e1->flags & EDGE_TRUE_VALUE) == 0)
> -       std::swap (e1, e2);
> -    }
> -  else
> -    {
> -      e1 = EDGE_SUCC (bb1, 1 - i);
> -      em1 = EDGE_SUCC (bb2, 0);
> -      e2 = EDGE_SUCC (bb2, 1);
> -      if ((em1->flags & EDGE_TRUE_VALUE) == 0)
> -       std::swap (em1, e2);
> -    }
> +  if (em1 == NULL)
> +    return;
>
>    g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
>    tree lhs = make_ssa_name (integer_type_node);
> @@ -4796,14 +4806,19 @@ optimize_spaceship (gimple *stmt)
>
>    g = last_stmt (bb1);
>    cond = as_a <gcond *> (g);
> -  gimple_cond_set_code (cond, EQ_EXPR);
>    gimple_cond_set_lhs (cond, lhs);
>    if (em1->src == bb1)
> -    gimple_cond_set_rhs (cond, integer_minus_one_node);
> +    {
> +      gimple_cond_set_rhs (cond, integer_minus_one_node);
> +      gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
> +                                 ? EQ_EXPR : NE_EXPR);
> +    }
>    else
>      {
>        gcc_assert (e1->src == bb1);
>        gimple_cond_set_rhs (cond, integer_one_node);
> +      gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
> +                                 ? EQ_EXPR : NE_EXPR);
>      }
>    update_stmt (g);
>
>
>
>         Jakub
>

Reply via email to