This patch adds support for the IEEE 128-bit floating point minimum, maximum, and compare instructions generating a mask. These instructions were added in ISA 3.1 (i.e. power10).
Compared to the last time I submitted the patches, I changed from using -mcpu=future to -mcpu=power10. Along with the previous patch, I have done bootstrap compilers with/without the pages and there were no regressions. I did this on a little endian power9 system and a big endian power8 system. The power8 system had support for both 32/64-bit. Can I check these patches into the master branch? gcc/ 2020-06-30 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/rs6000.c (emit_fp_min_max_insn): Update comment. (emit_fp_cmove_with_mask_xxsel): Update comment. (rs6000_emit_cmove): Add support for IEEE 128-bit min, max, and comparisons on ISA 3.1. (rs6000_emit_minmax): Add support for IEEE 128-bit min/max on ISA 3.1. * config/rs6000/rs6000.md (s<minmax><mode>3, IEEE128 iterator): New insns for IEEE 128-bit min/max. (mov<mode>cc, IEEE128 iterator): New insns for IEEE 128-bit conditional move. (mov<mode>cc_future, IEEE128 iterator): New insns for IEEE 128-bit conditional move. (mov<mode>cc_invert_future, IEEE128 iterator): New insns for IEEE 128-bit conditional move. (fpmask<mode>, IEEE128 iterator): New insns for IEEE 128-bit conditional move. gcc/testsuite/ 2020-06-30 Michael Meissner <meiss...@linux.ibm.com> * gcc.target/powerpc/float128-minmax-2.c: New test. --- gcc/config/rs6000/rs6000.c | 23 +++- gcc/config/rs6000/rs6000.md | 121 +++++++++++++++++++++ .../gcc.target/powerpc/float128-minmax-2.c | 70 ++++++++++++ 3 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-2.c diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 754431f..1c8d7c3 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -14965,7 +14965,7 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, } /* Min/max subcase to emit an appropriate instruction for SF/DF scalars on ISA - 3.0. + 3.0 and for IEEE 128-bit scalars on ISA 3.1. Move TRUE_COND to DEST if OP of the operands of the last comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the hardware has @@ -15009,7 +15009,8 @@ emit_fp_min_max_insn (rtx dest, rtx op, rtx true_cond, rtx false_cond) } /* Conditional move subcase to emit a floating point compare setting a mask - instruction and a XXSEL select instruction for SF/DF scalars on ISA 3.0. + instruction and a XXSEL select instruction for SF/DF scalars on ISA 3.0 and + for IEEE 128-bit scalars on ISA 3.1. Move TRUE_COND to DEST if OP of the operands of the last comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the hardware has @@ -15105,6 +15106,21 @@ rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) return 1; } + /* See if we can use the ISA 3.1 min/max/compare instructions for IEEE + 128-bit floating point. At present, don't worry about doing conditional + moves with different types for the comparison and movement (unlike SF/DF, + where you can do a conditional test between double and use float as the + if/then parts. */ + if (TARGET_FLOAT128_HW && TARGET_POWER10 && FLOAT128_IEEE_P (compare_mode) + && compare_mode == result_mode) + { + if (emit_fp_min_max_insn (dest, op, true_cond, false_cond)) + return 1; + + if (emit_fp_cmove_with_mask_xxsel (dest, op, true_cond, false_cond)) + return 1; + } + /* Don't allow using floating point comparisons for integer results for now. */ if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode)) @@ -15328,7 +15344,8 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) /* VSX/altivec have direct min/max insns. */ if ((code == SMAX || code == SMIN) && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) - || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode)))) + || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode)) + || (TARGET_FLOAT128_HW && TARGET_POWER10 && FLOAT128_IEEE_P (mode)))) { emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1))); return; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 86c8c02..0964891 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -14646,6 +14646,127 @@ (define_insn "*cmp<mode>_hw" "xscmpuqp %0,%1,%2" [(set_attr "type" "veccmp") (set_attr "size" "128")]) + +;; IEEE 128-bit min/max +(define_insn "s<minmax><mode>3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (fp_minmax:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && TARGET_POWER10 && FLOAT128_IEEE_P (<MODE>mode)" + "xs<minmax>cqp %0,%1,%2" + [(set_attr "type" "fp") + (set_attr "size" "128")]) + +;; IEEE 128-bit conditional move. At present, don't worry about doing +;; conditional moves with different types for the comparison and movement +;; (unlike SF/DF, where you can do a conditional test between double and use +;; float as the if/then parts. +(define_expand "mov<mode>cc" + [(set (match_operand:IEEE128 0 "gpc_reg_operand") + (if_then_else:IEEE128 (match_operand 1 "comparison_operator") + (match_operand:IEEE128 2 "gpc_reg_operand") + (match_operand:IEEE128 3 "gpc_reg_operand")))] + "TARGET_FLOAT128_HW && TARGET_POWER10 && FLOAT128_IEEE_P (<MODE>mode)" +{ + if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_insn_and_split "*mov<mode>cc_hardware" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") + (if_then_else:IEEE128 + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) + (match_operand:IEEE128 4 "vsx_register_operand" "wa,wa") + (match_operand:IEEE128 5 "vsx_register_operand" "wa,wa"))) + (clobber (match_scratch:V2DI 6 "=0,&wa"))] + "TARGET_FLOAT128_HW && TARGET_POWER10" + "#" + "" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 1) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:IEEE128 (ne (match_dup 6) + (match_dup 8)) + (match_dup 4) + (match_dup 5)))] +{ + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Handle inverting the fpmask comparisons. +(define_insn_and_split "*mov<mode>cc_invert_hardware" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") + (if_then_else:IEEE128 + (match_operator:CCFP 1 "invert_fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) + (match_operand:IEEE128 4 "vsx_register_operand" "wa,wa") + (match_operand:IEEE128 5 "vsx_register_operand" "wa,wa"))) + (clobber (match_scratch:V2DI 6 "=0,&wa"))] + "TARGET_FLOAT128_HW && TARGET_POWER10" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 9) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:IEEE128 (ne (match_dup 6) + (match_dup 8)) + (match_dup 5) + (match_dup 4)))] +{ + rtx op1 = operands[1]; + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); + + operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +(define_insn "*fpmask<mode>" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") + (if_then_else:V2DI + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "v")]) + (match_operand:V2DI 4 "all_ones_constant" "") + (match_operand:V2DI 5 "zero_constant" "")))] + "TARGET_FLOAT128_HW && TARGET_POWER10 && FLOAT128_IEEE_P (<MODE>mode)" + "xscmp%V1qp %0,%2,%3" + [(set_attr "type" "fpcompare") + (set_attr "size" "128")]) + +(define_insn "*xxsel<mode>" + [(set (match_operand:IEEE128 0 "vsx_register_operand" "=wa") + (if_then_else:IEEE128 + (ne (match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "zero_constant" "")) + (match_operand:IEEE128 3 "vsx_register_operand" "wa") + (match_operand:IEEE128 4 "vsx_register_operand" "wa")))] + "TARGET_FLOAT128_HW && TARGET_POWER10 && FLOAT128_IEEE_P (<MODE>mode)" + "xxsel %x0,%x4,%x3,%x1" + [(set_attr "type" "vecmove") + (set_attr "size" "128")]) ;; Miscellaneous ISA 3.0 (power9) instructions diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax-2.c b/gcc/testsuite/gcc.target/powerpc/float128-minmax-2.c new file mode 100644 index 0000000..e689678 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax-2.c @@ -0,0 +1,70 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math" } */ +/* { dg-final { scan-assembler-not "xscmpuqp" } } */ +/* { dg-final { scan-assembler "xscmpeqqp" } } */ +/* { dg-final { scan-assembler "xscmpgtqp" } } */ +/* { dg-final { scan-assembler "xscmpgeqp" } } */ +/* { dg-final { scan-assembler "xsmaxcqp" } } */ +/* { dg-final { scan-assembler "xsmincqp" } } */ +/* { dg-final { scan-assembler "xxsel" } } */ + +__float128 +f128_max1 (__float128 a, __float128 b) +{ + return (a >= b) ? a : b; +} + +__float128 +f128_max2 (__float128 a, __float128 b) +{ + return (a > b) ? a : b; +} + +__float128 +f128_min1 (__float128 a, __float128 b) +{ + return (a < b) ? a : b; +} + +__float128 +f128_min2 (__float128 a, __float128 b) +{ + return (a <= b) ? a : b; +} + +__float128 +f128_cmp_eq (__float128 a, __float128 b, __float128 c, __float128 d) +{ + return (a == b) ? c : d; +} + +__float128 +f128_cmp_ne (__float128 a, __float128 b, __float128 c, __float128 d) +{ + return (a != b) ? c : d; +} + +__float128 +f128_cmp_gt (__float128 a, __float128 b, __float128 c, __float128 d) +{ + return (a > b) ? c : d; +} + +__float128 +f128_cmp_ge (__float128 a, __float128 b, __float128 c, __float128 d) +{ + return (a >= b) ? c : d; +} + +__float128 +f128_cmp_lt (__float128 a, __float128 b, __float128 c, __float128 d) +{ + return (a < b) ? c : d; +} + +__float128 +f128_cmp_le (__float128 a, __float128 b, __float128 c, __float128 d) +{ + return (a <= b) ? c : d; +} -- 1.8.3.1