This adds ordered compares to most unordered compares, in rs6000. It does not handle the XL_COMPAT double-double compares yet (that is the pattern with 16 operands). It also does not handle the vector compare instructions; those only exist as unordered, for the equality comparisons, or as ordered, for the inequality comparisons.
The *cmpo machine instructions do exactly the same thing as the *cmpu instructions do, but they trigger an invalid operation exception (or just set the sticky flag for it) if any of the inputs is a NaN. This patch models *cmpo as a parallel of the comparison with an unspec UNSPEC_CMPO of the comparison inputs. This means an ordered compare will never be deleted. Multiple comparisons can still be combined (including with unordered combines). Questions: 1) Is this *correct*? 2) Is it *required*, or can we delete ordered compares in some cases? 2a) Like, if we test a<b and a>b, we only need one compare instruction, not the two that are generate right now. 2b) How can we model things so this happens automatically? Without having to write new passes ;-) Bootstrapped and regression tested on powerpc64-linux {-m32,-m64} (a Power7) so far. Segher 2019-08-08 Segher Boessenkool <seg...@kernel.crashing.org> PR target/58684 * config/rs6000/dfp.md (*cmp<mode>_internal1 for DDTD): Rename to ... (*cmp<mode>_cmpu for DDTD): ... this. (*cmp<mode>_cmpo for DDTD): New define_insn. * config/rs6000/rs6000.c (rs6000_generate_compare): Handle scalar floating point ordered compares, by generating a parallel with an unspec UNSPEC_CMPO, * config/rs6000/rs6000.md (unspec): Add UNSPEC_CMPO. (*cmp<mode>_fpr for SFDF): Rename to ... (*cmp<mode>_cmpu for SFDF): ... this. (*cmp<mode>_cmpo for SFDF): New define_insn. (*cmp<mode>_internal1 for IBM128): Rename to ... (*cmp<mode>_cmpu for IBM128): ... this. (*cmp<mode>_cmpo for IBM128): New define_insn. (*cmp<mode>_hw for IEEE128): Rename to ... (*cmp<mode>_cmpu for IEEE128): ... this. (*cmp<mode>_cmpo for IEEE128): New define_insn. gcc/testsuite/ * gcc.dg/torture/inf-compare-1.c: Remove powerpc xfail. * gcc.dg/torture/inf-compare-2.c: Ditto. * gcc.dg/torture/inf-compare-3.c: Ditto. * gcc.dg/torture/inf-compare-4.c: Ditto. * gcc.target/powerpc/dfp-dd.c: Expect 2 unordered and 4 ordered comparisons, instead of 6 unordered ones. * gcc.target/powerpc/dfp-td.c: Ditto. --- gcc/config/rs6000/dfp.md | 11 +++++++- gcc/config/rs6000/rs6000.c | 19 ++++++++++++-- gcc/config/rs6000/rs6000.md | 39 +++++++++++++++++++++++++--- gcc/testsuite/gcc.dg/torture/inf-compare-1.c | 2 -- gcc/testsuite/gcc.dg/torture/inf-compare-2.c | 2 -- gcc/testsuite/gcc.dg/torture/inf-compare-3.c | 2 -- gcc/testsuite/gcc.dg/torture/inf-compare-4.c | 2 -- gcc/testsuite/gcc.target/powerpc/dfp-dd.c | 3 ++- gcc/testsuite/gcc.target/powerpc/dfp-td.c | 3 ++- 9 files changed, 67 insertions(+), 16 deletions(-) diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index 659b3c9..55a8665 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -187,7 +187,7 @@ (define_insn "div<mode>3" "ddiv<q> %0,%1,%2" [(set_attr "type" "dfp")]) -(define_insn "*cmp<mode>_internal1" +(define_insn "*cmp<mode>_cmpu" [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") (compare:CCFP (match_operand:DDTD 1 "gpc_reg_operand" "d") (match_operand:DDTD 2 "gpc_reg_operand" "d")))] @@ -195,6 +195,15 @@ (define_insn "*cmp<mode>_internal1" "dcmpu<q> %0,%1,%2" [(set_attr "type" "dfp")]) +(define_insn "*cmp<mode>_cmpo" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:DDTD 1 "gpc_reg_operand" "d") + (match_operand:DDTD 2 "gpc_reg_operand" "d"))) + (unspec [(match_dup 1) (match_dup 2)] UNSPEC_CMPO)] + "TARGET_DFP" + "dcmpo<q> %0,%1,%2" + [(set_attr "type" "dfp")]) + (define_insn "floatdidd2" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") (float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))] diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 4080c82..c2299fe 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -13878,8 +13878,23 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b)); } else - emit_insn (gen_rtx_SET (compare_result, - gen_rtx_COMPARE (comp_mode, op0, op1))); + { + rtx compare = gen_rtx_SET (compare_result, + gen_rtx_COMPARE (comp_mode, op0, op1)); + + /* If this FP compare should be an ordered compare, mark it. */ + if (SCALAR_FLOAT_MODE_P (mode) + && HONOR_NANS (mode) + && (code == LT || code == GT || code == LE || code == GE)) + { + rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (2, op0, op1), + UNSPEC_CMPO); + compare = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, compare, unspec)); + } + + emit_insn (compare); + } } /* Some kinds of FP comparisons need an OR operation; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 0ef3c2c..111b652 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -71,6 +71,7 @@ (define_c_enum "unspec" UNSPEC_FRIP UNSPEC_FRIZ UNSPEC_XSRDPI + UNSPEC_CMPO UNSPEC_LD_MPIC ; load_macho_picbase UNSPEC_RELD_MPIC ; re-load_macho_picbase UNSPEC_MPIC_CORRECT ; macho_correct_pic @@ -4763,7 +4764,7 @@ (define_insn "*rsqrt<mode>2" (set_attr "isa" "*,<Fisa>")]) ;; Floating point comparisons -(define_insn "*cmp<mode>_fpr" +(define_insn "*cmp<mode>_cmpu" [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y") (compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,wa") (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,wa")))] @@ -4774,6 +4775,18 @@ (define_insn "*cmp<mode>_fpr" [(set_attr "type" "fpcompare") (set_attr "isa" "*,<Fisa>")]) +(define_insn "*cmp<mode>_cmpo" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y") + (compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,wa") + (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,wa"))) + (unspec [(match_dup 1) (match_dup 2)] UNSPEC_CMPO)] + "TARGET_HARD_FLOAT" + "@ + fcmpo %0,%1,%2 + xscmpodp %0,%x1,%x2" + [(set_attr "type" "fpcompare") + (set_attr "isa" "*,<Fisa>")]) + ;; Floating point conversions (define_expand "extendsfdf2" [(set (match_operand:DF 0 "gpc_reg_operand") @@ -11545,7 +11558,7 @@ (define_peephole2 }) ;; Only need to compare second words if first words equal -(define_insn "*cmp<mode>_internal1" +(define_insn "*cmp<mode>_cmpu" [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") (compare:CCFP (match_operand:IBM128 1 "gpc_reg_operand" "d") (match_operand:IBM128 2 "gpc_reg_operand" "d")))] @@ -11555,6 +11568,17 @@ (define_insn "*cmp<mode>_internal1" [(set_attr "type" "fpcompare") (set_attr "length" "12")]) +(define_insn "*cmp<mode>_cmpo" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:IBM128 1 "gpc_reg_operand" "d") + (match_operand:IBM128 2 "gpc_reg_operand" "d"))) + (unspec [(match_dup 1) (match_dup 2)] UNSPEC_CMPO)] + "!TARGET_XL_COMPAT && FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128" + "fcmpo %0,%1,%2\;bne %0,$+8\;fcmpu %0,%L1,%L2" + [(set_attr "type" "fpcompare") + (set_attr "length" "12")]) + (define_insn_and_split "*cmp<IBM128:mode>_internal2" [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") (compare:CCFP (match_operand:IBM128 1 "gpc_reg_operand" "d") @@ -14365,7 +14389,7 @@ (define_insn "trunc<mode>df2_odd" (set_attr "size" "128")]) ;; IEEE 128-bit comparisons -(define_insn "*cmp<mode>_hw" +(define_insn "*cmp<mode>_cmpu" [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") (compare:CCFP (match_operand:IEEE128 1 "altivec_register_operand" "v") (match_operand:IEEE128 2 "altivec_register_operand" "v")))] @@ -14374,6 +14398,15 @@ (define_insn "*cmp<mode>_hw" [(set_attr "type" "veccmp") (set_attr "size" "128")]) +(define_insn "*cmp<mode>_cmpo" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v"))) + (unspec [(match_dup 1) (match_dup 2)] UNSPEC_CMPO)] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscmpoqp %0,%1,%2" + [(set_attr "type" "veccmp") + (set_attr "size" "128")]) (include "sync.md") diff --git a/gcc/testsuite/gcc.dg/torture/inf-compare-1.c b/gcc/testsuite/gcc.dg/torture/inf-compare-1.c index a4b44d6..4c8d218 100644 --- a/gcc/testsuite/gcc.dg/torture/inf-compare-1.c +++ b/gcc/testsuite/gcc.dg/torture/inf-compare-1.c @@ -1,5 +1,3 @@ -/* { dg-do run { xfail { powerpc*-*-* } } } */ -/* remove the xfail for powerpc when pr58684 is fixed */ /* { dg-add-options ieee } */ /* { dg-require-effective-target fenv_exceptions } */ diff --git a/gcc/testsuite/gcc.dg/torture/inf-compare-2.c b/gcc/testsuite/gcc.dg/torture/inf-compare-2.c index 8ee932c..e6d1eb2 100644 --- a/gcc/testsuite/gcc.dg/torture/inf-compare-2.c +++ b/gcc/testsuite/gcc.dg/torture/inf-compare-2.c @@ -1,5 +1,3 @@ -/* { dg-do run { xfail { powerpc*-*-* } } } */ -/* remove the xfail for powerpc when pr58684 is fixed */ /* { dg-add-options ieee } */ /* { dg-require-effective-target fenv_exceptions } */ diff --git a/gcc/testsuite/gcc.dg/torture/inf-compare-3.c b/gcc/testsuite/gcc.dg/torture/inf-compare-3.c index c8605ad..a7676d5 100644 --- a/gcc/testsuite/gcc.dg/torture/inf-compare-3.c +++ b/gcc/testsuite/gcc.dg/torture/inf-compare-3.c @@ -1,5 +1,3 @@ -/* { dg-do run { xfail { powerpc*-*-* } } } */ -/* remove the xfail for powerpc when pr58684 is fixed */ /* { dg-add-options ieee } */ /* { dg-require-effective-target fenv_exceptions } */ diff --git a/gcc/testsuite/gcc.dg/torture/inf-compare-4.c b/gcc/testsuite/gcc.dg/torture/inf-compare-4.c index 55a0dfc..b804a66 100644 --- a/gcc/testsuite/gcc.dg/torture/inf-compare-4.c +++ b/gcc/testsuite/gcc.dg/torture/inf-compare-4.c @@ -1,5 +1,3 @@ -/* { dg-do run { xfail { powerpc*-*-* } } } */ -/* remove the xfail for powerpc when pr58684 is fixed */ /* { dg-add-options ieee } */ /* { dg-require-effective-target fenv_exceptions } */ diff --git a/gcc/testsuite/gcc.target/powerpc/dfp-dd.c b/gcc/testsuite/gcc.target/powerpc/dfp-dd.c index 2c2a10c..1462bec 100644 --- a/gcc/testsuite/gcc.target/powerpc/dfp-dd.c +++ b/gcc/testsuite/gcc.target/powerpc/dfp-dd.c @@ -7,7 +7,8 @@ /* { dg-final { scan-assembler "ddiv" } } */ /* { dg-final { scan-assembler "dmul" } } */ /* { dg-final { scan-assembler "dsub" } } */ -/* { dg-final { scan-assembler-times "dcmpu" 6 } } */ +/* { dg-final { scan-assembler-times "dcmpu" 2 } } */ +/* { dg-final { scan-assembler-times "dcmpo" 4 } } */ /* { dg-final { scan-assembler-times "dctfix" 2 } } */ /* { dg-final { scan-assembler-times "drintn" 2 } } */ /* { dg-final { scan-assembler-times "dcffixq" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/dfp-td.c b/gcc/testsuite/gcc.target/powerpc/dfp-td.c index 1760804..2590772 100644 --- a/gcc/testsuite/gcc.target/powerpc/dfp-td.c +++ b/gcc/testsuite/gcc.target/powerpc/dfp-td.c @@ -7,7 +7,8 @@ /* { dg-final { scan-assembler "ddivq" } } */ /* { dg-final { scan-assembler "dmulq" } } */ /* { dg-final { scan-assembler "dsubq" } } */ -/* { dg-final { scan-assembler-times "dcmpuq" 6 } } */ +/* { dg-final { scan-assembler-times "dcmpuq" 2 } } */ +/* { dg-final { scan-assembler-times "dcmpoq" 4 } } */ /* { dg-final { scan-assembler-times "dctfixq" 2 } } */ /* { dg-final { scan-assembler-times "drintnq" 2 } } */ /* { dg-final { scan-assembler-times "dcffixq" 2 } } */ -- 1.8.3.1