On April 12, 2017 6:33:19 PM GMT+02:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >As mentioned in the PR, for LU benchmark we generate worse code with >-Ofast >compared to -O3, because in the former we don't use a conditional move. > >The problem is during emit_conditional_move, while in both cases >swap_commutative_operands_p (op2, op3) tells us it might be better to >swap >them, the difference between -Ofast and -O3 is that >reversed_comparison_code_parts >returns in -Ofast case LE, but in -O3 UNKNOWN. For -O3 this means we >don't >swap o2 and o3, emit a GT and successfully emit a conditional move. >For -Ofast, we swap the arguments and attempt to emit a LE, but the >predicate on the cbranchdf4 expander fails in that case, because "LE" >is not >considered a trivial comparison operator, is more expensive, thus the >only >attempt we try fails and we don't emit a cmov at all. > >The following patch will do the same thing as previously, but if we >were to >return NULL_RTX, it will try to swap op2/op3 (either back, if we >swapped >them first, effectively returning to the original comparison, or if we >haven't swapped them first, trying to reverse the comparison) and see >if >that leads to a usable sequence. > >The patch caused a FAIL in the pr70465-2.c testcase, where previously >in ce1 >we weren't able to emit any cmov and now are, which tiny bit changes >the >IL that gets through regstack and thus the testcase is not testing >anymore >what it wanted - the %st(1) and %st comparison arguments are swapped >and >the fcmov operands too and thus a fxchg is seen as needed (before >regstack >it is impossible to find out which ordering is more beneficial >actually). >We could teach regstack to attempt to reverse the comparisons and swap >corresponding fcmov arguments, but that does look like a GCC8 material >to >me. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Richard. >2017-04-12 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/79390 > * optabs.c (emit_conditional_move): If the preferred op2/op3 operand > order does not result in usable sequence, retry with reversed operand > order. > > * gcc.target/i386/pr70465-2.c: Xfail the scan-assembler-not test. > >--- gcc/optabs.c.jj 2017-02-02 08:44:12.000000000 +0100 >+++ gcc/optabs.c 2017-04-12 14:30:14.905433771 +0200 >@@ -4258,12 +4258,15 @@ emit_conditional_move (rtx target, enum > if (cmode == VOIDmode) > cmode = GET_MODE (op0); > >+ enum rtx_code orig_code = code; >+ bool swapped = false; > if (swap_commutative_operands_p (op2, op3) > && ((reversed = reversed_comparison_code_parts (code, op0, op1, NULL)) > != UNKNOWN)) > { > std::swap (op2, op3); > code = reversed; >+ swapped = true; > } > > if (mode == VOIDmode) >@@ -4272,45 +4275,62 @@ emit_conditional_move (rtx target, enum > icode = direct_optab_handler (movcc_optab, mode); > > if (icode == CODE_FOR_nothing) >- return 0; >+ return NULL_RTX; > > if (!target) > target = gen_reg_rtx (mode); > >- code = unsignedp ? unsigned_condition (code) : code; >- comparison = simplify_gen_relational (code, VOIDmode, cmode, op0, >op1); >- >- /* We can get const0_rtx or const_true_rtx in some circumstances. >Just >- return NULL and let the caller figure out how best to deal with >this >- situation. */ >- if (!COMPARISON_P (comparison)) >- return NULL_RTX; >- >- saved_pending_stack_adjust save; >- save_pending_stack_adjust (&save); >- last = get_last_insn (); >- do_pending_stack_adjust (); >- prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1), >- GET_CODE (comparison), NULL_RTX, unsignedp, OPTAB_WIDEN, >- &comparison, &cmode); >- if (comparison) >+ for (int pass = 0; ; pass++) > { >- struct expand_operand ops[4]; >+ code = unsignedp ? unsigned_condition (code) : code; >+ comparison = simplify_gen_relational (code, VOIDmode, cmode, >op0, op1); > >- create_output_operand (&ops[0], target, mode); >- create_fixed_operand (&ops[1], comparison); >- create_input_operand (&ops[2], op2, mode); >- create_input_operand (&ops[3], op3, mode); >- if (maybe_expand_insn (icode, 4, ops)) >+ /* We can get const0_rtx or const_true_rtx in some >circumstances. Just >+ punt and let the caller figure out how best to deal with this >+ situation. */ >+ if (COMPARISON_P (comparison)) > { >- if (ops[0].value != target) >- convert_move (target, ops[0].value, false); >- return target; >+ saved_pending_stack_adjust save; >+ save_pending_stack_adjust (&save); >+ last = get_last_insn (); >+ do_pending_stack_adjust (); >+ prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1), >+ GET_CODE (comparison), NULL_RTX, unsignedp, >+ OPTAB_WIDEN, &comparison, &cmode); >+ if (comparison) >+ { >+ struct expand_operand ops[4]; >+ >+ create_output_operand (&ops[0], target, mode); >+ create_fixed_operand (&ops[1], comparison); >+ create_input_operand (&ops[2], op2, mode); >+ create_input_operand (&ops[3], op3, mode); >+ if (maybe_expand_insn (icode, 4, ops)) >+ { >+ if (ops[0].value != target) >+ convert_move (target, ops[0].value, false); >+ return target; >+ } >+ } >+ delete_insns_since (last); >+ restore_pending_stack_adjust (&save); > } >+ >+ if (pass == 1) >+ return NULL_RTX; >+ >+ /* If the preferred op2/op3 order is not usable, retry with >other >+ operand order, perhaps it will expand successfully. */ >+ if (swapped) >+ code = orig_code; >+ else if ((reversed = reversed_comparison_code_parts (orig_code, >op0, op1, >+ NULL)) >+ != UNKNOWN) >+ code = reversed; >+ else >+ return NULL_RTX; >+ std::swap (op2, op3); > } >- delete_insns_since (last); >- restore_pending_stack_adjust (&save); >- return NULL_RTX; > } > > >--- gcc/testsuite/gcc.target/i386/pr70465-2.c.jj 2017-02-22 >18:15:13.000000000 +0100 >+++ gcc/testsuite/gcc.target/i386/pr70465-2.c 2017-04-12 >18:07:00.784761399 +0200 >@@ -1,7 +1,7 @@ > /* PR target/70465 */ > /* { dg-do compile } */ > /* { dg-options "-Ofast -mfpmath=387 -fomit-frame-pointer" } */ >-/* { dg-final { scan-assembler-not "fxch\t%st.1" } } */ >+/* { dg-final { scan-assembler-not "fxch\t%st.1" { xfail *-*-* } } } >*/ > > extern float d[1024]; > > > Jakub