On Sat, Jan 15, 2022 at 10:56 AM Jakub Jelinek <ja...@redhat.com> wrote: > > On Sat, Jan 15, 2022 at 09:29:05AM +0100, Uros Bizjak wrote: > > > --- gcc/config/i386/i386.md.jj 2022-01-14 11:51:34.432384170 +0100 > > > +++ gcc/config/i386/i386.md 2022-01-14 18:22:41.140906449 +0100 > > > @@ -23886,6 +23886,18 @@ (define_insn "hreset" > > > [(set_attr "type" "other") > > > (set_attr "length" "4")]) > > > > > > +;; Spaceship optimization > > > +(define_expand "spaceship<mode>3" > > > + [(match_operand:SI 0 "register_operand") > > > + (match_operand:MODEF 1 "cmp_fp_expander_operand") > > > + (match_operand:MODEF 2 "cmp_fp_expander_operand")] > > > + "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) > > > + && TARGET_CMOVE && TARGET_IEEE_FP" > > > > Is there a reason that this pattern is limited to TARGET_IEEE_FP? > > During the expansion in ix86_expand_fp_spaceship, we can just skip > > jump on unordered, while ix86_expand_fp_compare will emit the correct > > comparison mode depending on TARGET_IEEE_FP. > > For -ffast-math I thought <=> expands to just x == y ? 0 : x < y ? -1 : 1 > but apparently not, it is still x == y ? 0 : x < y ? -1 : x > y ? 1 : 2 > but it is still optimized much better than the non-fast-math case > without the patch: > comisd %xmm1, %xmm0 > je .L12 > jb .L13 > movl $1, %edx > movl $2, %eax > cmova %edx, %eax > ret > .p2align 4,,10 > .p2align 3 > .L12: > xorl %eax, %eax > ret > .p2align 4,,10 > .p2align 3 > .L13: > movl $-1, %eax > ret > so just one comparison but admittedly the > movl $1, %edx > movl $2, %eax > cmova %edx, %eax > part is unnecessary. > So below is an incremental patch that handles even the !HONORS_NANS > case at the gimple pattern matching (while for HONOR_NANS we must > obey that for NaN operand(s) >/>=/</<= is false and so need to make sure > we look for the third comparison on the false edge of the second one, > for !HONOR_NANS that is not the case. With the incremental patch we get: > comisd %xmm1, %xmm0 > je .L2 > seta %al > leal -1(%rax,%rax), %eax > ret > .p2align 4,,10 > .p2align 3 > .L2: > xorl %eax, %eax > ret > for -O2 -ffast-math. > Also, I've added || (TARGET_SAHF && TARGET_USE_SAHF), because apparently > we can handle that case nicely too, it is just the IX86_FPCMP_ARITH > case where fp_compare already emits very specific code (and we can't > call ix86_expand_fp_compare 3 times because that would for IEEE_FP > emit different comparisons which couldn't be CSEd). > I'll add also -ffast-math testsuite coverage and retest. > > Also, I wonder if I shouldn't handle XFmode the same, thoughts on that?
Yes, that would be nice. XFmode is used for long double, and not obsolete. > > > > + gcc_checking_assert (ix86_fp_comparison_strategy (GT) == > > > IX86_FPCMP_COMI); > > > + rtx gt = ix86_expand_fp_compare (GT, op0, op1); > > > + rtx l0 = gen_label_rtx (); > > > + rtx l1 = gen_label_rtx (); > > > + rtx l2 = gen_label_rtx (); > > > + rtx lend = gen_label_rtx (); > > > + rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode, > > > + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); > > > + rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un, > > > + gen_rtx_LABEL_REF (VOIDmode, l2), > > > pc_rtx); > > > + rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > > > + add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ()); > > > > Please also add JUMP_LABEL to the insn. > > > > > + rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode, > > > + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); > > > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq, > > > + gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx); > > > + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > > > + add_reg_br_prob_note (jmp, profile_probability::unlikely ()); > > > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt, > > > + gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx); > > > + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > > > + add_reg_br_prob_note (jmp, profile_probability::even ()); > > > + emit_move_insn (dest, constm1_rtx); > > > + emit_jump (lend); > > > + emit_label (l0); > > > > and LABEL_NUSES label. > > Why? That seems to be a waste of time to me, unless something uses them > already during expansion. Because pass_expand::execute > runs: > /* We need JUMP_LABEL be set in order to redirect jumps, and hence > split edges which edge insertions might do. */ > rebuild_jump_labels (get_insns ()); > which resets all LABEL_NUSES to 0 (well, to: > if (LABEL_P (insn)) > LABEL_NUSES (insn) = (LABEL_PRESERVE_P (insn) != 0); > and then recomputes them and adds JUMP_LABEL if needed: > JUMP_LABEL (insn) = label; I was not aware of that detail. Thanks for sharing (and I wonder if all other cases should be removed from the source). Uros. > --- gcc/config/i386/i386.md.jj 2022-01-15 09:51:25.404468342 +0100 > +++ gcc/config/i386/i386.md 2022-01-15 09:56:31.602109421 +0100 > @@ -23892,7 +23892,7 @@ (define_expand "spaceship<mode>3" > (match_operand:MODEF 1 "cmp_fp_expander_operand") > (match_operand:MODEF 2 "cmp_fp_expander_operand")] > "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) > - && TARGET_CMOVE && TARGET_IEEE_FP" > + && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))" > { > ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]); > DONE; > --- gcc/config/i386/i386-expand.c.jj 2022-01-15 09:51:25.411468242 +0100 > +++ gcc/config/i386/i386-expand.c 2022-01-15 10:38:26.924333651 +0100 > @@ -2884,18 +2884,23 @@ ix86_expand_setcc (rtx dest, enum rtx_co > void > ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1) > { > - gcc_checking_assert (ix86_fp_comparison_strategy (GT) == IX86_FPCMP_COMI); > + gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH); > rtx gt = ix86_expand_fp_compare (GT, op0, op1); > rtx l0 = gen_label_rtx (); > rtx l1 = gen_label_rtx (); > - rtx l2 = gen_label_rtx (); > + rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX; > rtx lend = gen_label_rtx (); > - rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode, > - gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); > - rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un, > + rtx tmp; > + rtx_insn *jmp; > + if (l2) > + { > + rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode, > + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un, > gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx); > - rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > - add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ()); > + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > + add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ()); > + } > rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode, > gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); > tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq, > @@ -2914,8 +2919,11 @@ ix86_expand_fp_spaceship (rtx dest, rtx > emit_label (l1); > emit_move_insn (dest, const1_rtx); > emit_jump (lend); > - emit_label (l2); > - emit_move_insn (dest, const2_rtx); > + if (l2) > + { > + emit_label (l2); > + emit_move_insn (dest, const2_rtx); > + } > emit_label (lend); > } > > --- gcc/tree-ssa-math-opts.c.jj 2022-01-15 09:51:25.402468370 +0100 > +++ gcc/tree-ssa-math-opts.c 2022-01-15 10:35:52.366533951 +0100 > @@ -4694,7 +4694,6 @@ optimize_spaceship (gimple *stmt) > tree arg1 = gimple_cond_lhs (stmt); > tree arg2 = gimple_cond_rhs (stmt); > if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) > - || !HONOR_NANS (TREE_TYPE (arg1)) > || optab_handler (spaceship_optab, > TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing > || operand_equal_p (arg1, arg2, 0)) > @@ -4732,56 +4731,67 @@ optimize_spaceship (gimple *stmt) > return; > } > > - /* With NaNs, </<=/>/>= are false, so we need to look for the > - third comparison on the false edge from whatever non-equality > - comparison the second comparison is. */ > - int i = (EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0; > - bb2 = EDGE_SUCC (bb1, i)->dest; > - g = last_stmt (bb2); > - if (g == NULL > - || gimple_code (g) != GIMPLE_COND > - || !single_pred_p (bb2) > - || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) > - ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) > - : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) > - || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) > - || !cond_only_block_p (bb2) > - || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) > - return; > - > - enum tree_code ccode2 > - = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR); > - switch (gimple_cond_code (g)) > + for (int i = 0; i < 2; ++i) > { > - case LT_EXPR: > - case LE_EXPR: > + /* With NaNs, </<=/>/>= are false, so we need to look for the > + third comparison on the false edge from whatever non-equality > + comparison the second comparison is. */ > + if (HONOR_NANS (TREE_TYPE (arg1)) > + && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0) > + continue; > + > + bb2 = EDGE_SUCC (bb1, i)->dest; > + g = last_stmt (bb2); > + if (g == NULL > + || gimple_code (g) != GIMPLE_COND > + || !single_pred_p (bb2) > + || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) > + ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) > + : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) > + || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) > + || !cond_only_block_p (bb2) > + || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) > + continue; > + > + enum tree_code ccode2 > + = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : > GT_EXPR); > + switch (gimple_cond_code (g)) > + { > + case LT_EXPR: > + case LE_EXPR: > + break; > + case GT_EXPR: > + case GE_EXPR: > + ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; > + break; > + default: > + continue; > + } > + if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2) > + return; > + > + if ((ccode == LT_EXPR) > + ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)) > + { > + em1 = EDGE_SUCC (bb1, 1 - i); > + e1 = EDGE_SUCC (bb2, 0); > + e2 = EDGE_SUCC (bb2, 1); > + if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0)) > + std::swap (e1, e2); > + } > + else > + { > + e1 = EDGE_SUCC (bb1, 1 - i); > + em1 = EDGE_SUCC (bb2, 0); > + e2 = EDGE_SUCC (bb2, 1); > + if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0)) > + std::swap (em1, e2); > + } > break; > - case GT_EXPR: > - case GE_EXPR: > - ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; > - break; > - default: > - return; > } > - if (ccode == ccode2) > - return; > > - if (ccode == LT_EXPR) > - { > - em1 = EDGE_SUCC (bb1, 1 - i); > - e1 = EDGE_SUCC (bb2, 0); > - e2 = EDGE_SUCC (bb2, 1); > - if ((e1->flags & EDGE_TRUE_VALUE) == 0) > - std::swap (e1, e2); > - } > - else > - { > - e1 = EDGE_SUCC (bb1, 1 - i); > - em1 = EDGE_SUCC (bb2, 0); > - e2 = EDGE_SUCC (bb2, 1); > - if ((em1->flags & EDGE_TRUE_VALUE) == 0) > - std::swap (em1, e2); > - } > + if (em1 == NULL) > + return; > > g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2); > tree lhs = make_ssa_name (integer_type_node); > @@ -4796,14 +4806,19 @@ optimize_spaceship (gimple *stmt) > > g = last_stmt (bb1); > cond = as_a <gcond *> (g); > - gimple_cond_set_code (cond, EQ_EXPR); > gimple_cond_set_lhs (cond, lhs); > if (em1->src == bb1) > - gimple_cond_set_rhs (cond, integer_minus_one_node); > + { > + gimple_cond_set_rhs (cond, integer_minus_one_node); > + gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE) > + ? EQ_EXPR : NE_EXPR); > + } > else > { > gcc_assert (e1->src == bb1); > gimple_cond_set_rhs (cond, integer_one_node); > + gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE) > + ? EQ_EXPR : NE_EXPR); > } > update_stmt (g); > > > > Jakub >