PowerPC: Add power10 xscmp{eq,gt,ge}qp support. This patch adds the conditional move support. In adding the conditional move support, the optimizers will be able to convert things like:
a = (b > c) ? b : c; into the instructions. This patch merges together the scalar SF/DF conditional move with the scalar KF/TF conditional move. It extends the optimization that was previously used for SFmode and DFmode to allow the comparison to be a different scalar floating point mode than the move. I.e. __float128 a, b, c; float x, y; /* ... */ a = (x == y) ? b : c; I did have to add an XXPERMDI if the comparison mode was SFmode or DFmode, and the move mode is KFmode or TFmode (the XSCMP{EQ,GT,GE}DP instructions explicitly set the bottom 64 bits of the vector register to 0). I have built compilers on a little endian power9 Linux system with all 4 patches applied. I did bootstrap builds and ran the testsuite, with no regressions. Previous versions of the patch was also tested on a little endian power8 Linux system. I would like to check this patch into the master branch for GCC 11. At this time, I do not anticipate needing to backport these changes to GCC 10.3. gcc/ 2020-08-26 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/predicates.md (fpmask_normal_or_invert_operator): New predicate. * config/rs6000/rs6000.c (have_compare_and_set_mask): Add IEEE 128-bit floating point types. * config/rs6000/rs6000.md (FSCALAR2): New iterator for floating point conditional moves. (mov<SFDF:mode><SFDF2:mode>cc_p9): Replace with mov<FSCALAR:mode><FSCALAR2:mode>. (mov<SFDF:mode><SFDF2:mode>cc_invert_p9): Replace with mov<FSCALAR:mode><FSCALAR2:mode>. (mov<FSCALAR:mode><FSCALAR2:mode>): Combine both mov<SFDF:mode><SFDF2:mode>cc_p9 and mov<SFDF:mode><SFDF2:mode>cc_invert_p9 patterns. Add ISA 3.1 support for IEEE 128-bit conditional moves. Always use an earlyclobber register for the mask. Use XXPERMDI to extend the mask if we have a 64-bit comparison and 128-bit move. register for the mask. (fpmask<mode>, xxsel<mode>): Add ISA 3.1 support for IEEE 128-bit conditional moves. Enable the generator functionality so mov<FSCALAR:mode><FSCALAR2:mode> can call it. Update constraints for 128-bit operations. gcc/testsuite/ 2020-08-26 Michael Meissner <meiss...@linux.ibm.com> * gcc.target/powerpc/float128-cmove.c: New test. * gcc.target/powerpc/float128-minmax-3.c: New test. --- gcc/config/rs6000/predicates.md | 5 + gcc/config/rs6000/rs6000.c | 4 + gcc/config/rs6000/rs6000.md | 154 ++++++++++-------- .../gcc.target/powerpc/float128-cmove.c | 93 +++++++++++ .../gcc.target/powerpc/float128-minmax-3.c | 15 ++ 5 files changed, 200 insertions(+), 71 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-cmove.c create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 2709e46f7e5..60b45601e9b 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1225,6 +1225,11 @@ (define_predicate "fpmask_comparison_operator" (define_predicate "invert_fpmask_comparison_operator" (match_code "ne,unlt,unle")) +;; Return 1 if OP is either a fpmask_comparison_operator or +;; invert_fpmask_comparsion_operator. +(define_predicate "fpmask_normal_or_invert_operator" + (match_code "eq,gt,ge,ne,unlt,unle")) + ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar ;; comparisons that generate a -1/0 mask. (define_predicate "vecint_comparison_operator" diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 05eb141a2cd..403897926c5 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -15177,6 +15177,10 @@ have_compare_and_set_mask (machine_mode mode) case DFmode: return TARGET_P9_MINMAX; + case KFmode: + case TFmode: + return FLOAT128_IEEE_MINMAX_P (mode); + default: break; } diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 006e60f09bc..147c635994c 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -795,6 +795,15 @@ (define_mode_iterator FSCALAR [SF (KF "FLOAT128_IEEE_MINMAX_P (KFmode)") (TF "FLOAT128_IEEE_MINMAX_P (TFmode)")]) +;; Secondary iterator for scalar binary floating point operations. This is +;; used for the conditional moves when we have a compare and set mask +;; instruction. Using this instruction allows us to do a conditional move +;; where the comparison type might be different from the values being moved. +(define_mode_iterator FSCALAR2 [SF + DF + (KF "FLOAT128_IEEE_MINMAX_P (KFmode)") + (TF "FLOAT128_IEEE_MINMAX_P (TFmode)")]) + ;; Constraints to use for scalar FP operations (define_mode_attr Fm [(SF "wa") (DF "wa") @@ -5290,10 +5299,10 @@ (define_insn "*setnbcr_<un>signed_<GPR:mode>" ;; Floating point conditional move (define_expand "mov<mode>cc" - [(set (match_operand:SFDF 0 "gpc_reg_operand") - (if_then_else:SFDF (match_operand 1 "comparison_operator") - (match_operand:SFDF 2 "gpc_reg_operand") - (match_operand:SFDF 3 "gpc_reg_operand")))] + [(set (match_operand:FSCALAR 0 "gpc_reg_operand") + (if_then_else:FSCALAR (match_operand 1 "comparison_operator") + (match_operand:FSCALAR 2 "gpc_reg_operand") + (match_operand:FSCALAR 3 "gpc_reg_operand")))] "TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT" { if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) @@ -5313,92 +5322,95 @@ (define_insn "*fsel<SFDF:mode><SFDF2:mode>4" "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) -(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_p9" - [(set (match_operand:SFDF 0 "vsx_register_operand" "=&<SFDF:Fv>,<SFDF:Fv>") - (if_then_else:SFDF - (match_operator:CCFP 1 "fpmask_comparison_operator" - [(match_operand:SFDF2 2 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>") - (match_operand:SFDF2 3 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>")]) - (match_operand:SFDF 4 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>") - (match_operand:SFDF 5 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>"))) - (clobber (match_scratch:V2DI 6 "=0,&wa"))] +;; Conditional moves using scalar floating point types if we have a compare and +;; set mask instruction (like xscmpeqdp). +;; +;; If we have a 64-bit comparison and a 128-bit mode, such as: +;; +;; double x, y; +;; __float128 a, b, c; +;; a = (x == y) ? b : c; +;; +;; We need to extend the comparison (XSCMPEQDP puts 0's in the bottom 64-bits). +(define_insn_and_split "*mov<FSCALAR:mode><FSCALAR2:mode>cc" + [(set (match_operand:FSCALAR 0 "vsx_register_operand" "=<FSCALAR:Fm>") + (if_then_else:FSCALAR + (match_operator:CCFP 1 "fpmask_normal_or_invert_operator" + [(match_operand:FSCALAR2 2 "vsx_register_operand" "<FSCALAR2:Fm>") + (match_operand:FSCALAR2 3 "vsx_register_operand" "<FSCALAR2:Fm>")]) + (match_operand:FSCALAR 4 "vsx_register_operand" "<FSCALAR:Fm>") + (match_operand:FSCALAR 5 "vsx_register_operand" "<FSCALAR:Fm>"))) + (clobber (match_scratch:V2DI 6 "=<FSCALAR2:Fm>"))] "TARGET_P9_MINMAX" "#" - "" - [(set (match_dup 6) - (if_then_else:V2DI (match_dup 1) - (match_dup 7) - (match_dup 8))) - (set (match_dup 0) - (if_then_else:SFDF (ne (match_dup 6) - (match_dup 8)) - (match_dup 4) - (match_dup 5)))] + "&& 1" + [(pc)] { - if (GET_CODE (operands[6]) == SCRATCH) - operands[6] = gen_reg_rtx (V2DImode); + rtx dest = operands[0]; + rtx cmp = operands[1]; + rtx cmp_op1 = operands[2]; + rtx cmp_op2 = operands[3]; + rtx move_t = operands[4]; + rtx move_f = operands[5]; + rtx mask_reg = operands[6]; + rtx mask_m1 = CONSTM1_RTX (V2DImode); + rtx mask_0 = CONST0_RTX (V2DImode); - operands[7] = CONSTM1_RTX (V2DImode); - operands[8] = CONST0_RTX (V2DImode); -} - [(set_attr "length" "8") - (set_attr "type" "vecperm")]) + if (GET_CODE (mask_reg) == SCRATCH) + mask_reg = gen_reg_rtx (V2DImode); -;; Handle inverting the fpmask comparisons. -(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_invert_p9" - [(set (match_operand:SFDF 0 "vsx_register_operand" "=&<SFDF:Fv>,<SFDF:Fv>") - (if_then_else:SFDF - (match_operator:CCFP 1 "invert_fpmask_comparison_operator" - [(match_operand:SFDF2 2 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>") - (match_operand:SFDF2 3 "vsx_register_operand" "<SFDF2:Fv>,<SFDF2:Fv>")]) - (match_operand:SFDF 4 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>") - (match_operand:SFDF 5 "vsx_register_operand" "<SFDF:Fv>,<SFDF:Fv>"))) - (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX" - "#" - "&& 1" - [(set (match_dup 6) - (if_then_else:V2DI (match_dup 9) - (match_dup 7) - (match_dup 8))) - (set (match_dup 0) - (if_then_else:SFDF (ne (match_dup 6) - (match_dup 8)) - (match_dup 5) - (match_dup 4)))] -{ - rtx op1 = operands[1]; - enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + /* Reverse the operands and test if the comparison operator is inverted. */ + if (invert_fpmask_comparison_operator (cmp, CCFPmode)) + { + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (cmp)); + cmp = gen_rtx_fmt_ee (cond, CCFPmode, cmp_op1, cmp_op2); + std::swap (move_t, move_f); + } - if (GET_CODE (operands[6]) == SCRATCH) - operands[6] = gen_reg_rtx (V2DImode); + /* Emit the compare and set mask instruction. */ + emit_insn (gen_fpmask<FSCALAR2:mode> (mask_reg, cmp, cmp_op1, cmp_op2, + mask_m1, mask_0)); - operands[7] = CONSTM1_RTX (V2DImode); - operands[8] = CONST0_RTX (V2DImode); + /* If we have a 64-bit comparison, but an 128-bit move, we need to extend the + mask. Because we are using the splat builtin to extend the V2DImode, we + need to use element 1 on little endian systems. */ + if (!FLOAT128_IEEE_P (<FSCALAR2:MODE>mode) + && FLOAT128_IEEE_P (<FSCALAR:MODE>mode)) + { + rtx element = WORDS_BIG_ENDIAN ? const0_rtx : const1_rtx; + emit_insn (gen_vsx_xxspltd_v2di (mask_reg, mask_reg, element)); + } - operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); + /* Now emit the XXSEL insn. */ + emit_insn (gen_xxsel<FSCALAR:mode> (dest, mask_reg, mask_0, move_t, move_f)); + DONE; } [(set_attr "length" "8") (set_attr "type" "vecperm")]) -(define_insn "*fpmask<mode>" - [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") +(define_insn "fpmask<mode>" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=<Fm>") (if_then_else:V2DI (match_operator:CCFP 1 "fpmask_comparison_operator" - [(match_operand:SFDF 2 "vsx_register_operand" "<Fv>") - (match_operand:SFDF 3 "vsx_register_operand" "<Fv>")]) + [(match_operand:FSCALAR 2 "vsx_register_operand" "<Fm>") + (match_operand:FSCALAR 3 "vsx_register_operand" "<Fm>")]) (match_operand:V2DI 4 "all_ones_constant" "") (match_operand:V2DI 5 "zero_constant" "")))] "TARGET_P9_MINMAX" - "xscmp%V1dp %x0,%x2,%x3" +{ + return (FLOAT128_IEEE_P (<MODE>mode) + ? "xscmp%V1qp %0,%2,%3" + : "xscmp%V1dp %x0,%x2,%x3"); +} [(set_attr "type" "fpcompare")]) -(define_insn "*xxsel<mode>" - [(set (match_operand:SFDF 0 "vsx_register_operand" "=<Fv>") - (if_then_else:SFDF (ne (match_operand:V2DI 1 "vsx_register_operand" "wa") - (match_operand:V2DI 2 "zero_constant" "")) - (match_operand:SFDF 3 "vsx_register_operand" "<Fv>") - (match_operand:SFDF 4 "vsx_register_operand" "<Fv>")))] +(define_insn "xxsel<mode>" + [(set (match_operand:FSCALAR 0 "vsx_register_operand" "=<Fm>") + (if_then_else:FSCALAR + (ne (match_operand:V2DI 1 "vsx_register_operand" "<Fm>") + (match_operand:V2DI 2 "zero_constant" "")) + (match_operand:FSCALAR 3 "vsx_register_operand" "<Fm>") + (match_operand:FSCALAR 4 "vsx_register_operand" "<Fm>")))] "TARGET_P9_MINMAX" "xxsel %x0,%x4,%x3,%x1" [(set_attr "type" "vecmove")]) diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c new file mode 100644 index 00000000000..639d5a77087 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c @@ -0,0 +1,93 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ppc_float128_hw } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ +/* { dg-final { scan-assembler {\mxscmpeq[dq]p\M} } } */ +/* { dg-final { scan-assembler {\mxxpermdi\M} } } */ +/* { dg-final { scan-assembler {\mxxsel\M} } } */ +/* { dg-final { scan-assembler-not {\mxscmpu[dq]p\M} } } */ +/* { dg-final { scan-assembler-not {\mfcmp[uo]\M} } } */ +/* { dg-final { scan-assembler-not {\mfsel\M} } } */ + +/* This series of tests tests whether you can do a conditional move where the + test is one floating point type, and the result is another floating point + type. + + If the comparison type is SF/DFmode, and the move type is IEEE 128-bit + floating point, we have to duplicate the mask in the lower 64-bits with + XXPERMDI because XSCMPEQDP clears the bottom 64-bits of the mask register. + + Going the other way (IEEE 128-bit comparsion, 64-bit move) is fine as the + mask word will be 128-bits. */ + +float +eq_f_d (float a, float b, double x, double y) +{ + return (x == y) ? a : b; +} + +double +eq_d_f (double a, double b, float x, float y) +{ + return (x == y) ? a : b; +} + +float +eq_f_f128 (float a, float b, __float128 x, __float128 y) +{ + return (x == y) ? a : b; +} + +double +eq_d_f128 (double a, double b, __float128 x, __float128 y) +{ + return (x == y) ? a : b; +} + +__float128 +eq_f128_f (__float128 a, __float128 b, float x, float y) +{ + return (x == y) ? a : b; +} + +__float128 +eq_f128_d (__float128 a, __float128 b, double x, double y) +{ + return (x != y) ? a : b; +} + +float +ne_f_d (float a, float b, double x, double y) +{ + return (x != y) ? a : b; +} + +double +ne_d_f (double a, double b, float x, float y) +{ + return (x != y) ? a : b; +} + +float +ne_f_f128 (float a, float b, __float128 x, __float128 y) +{ + return (x != y) ? a : b; +} + +double +ne_d_f128 (double a, double b, __float128 x, __float128 y) +{ + return (x != y) ? a : b; +} + +__float128 +ne_f128_f (__float128 a, __float128 b, float x, float y) +{ + return (x != y) ? a : b; +} + +__float128 +ne_f128_d (__float128 a, __float128 b, double x, double y) +{ + return (x != y) ? a : b; +} diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c new file mode 100644 index 00000000000..6f7627c0f2a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c @@ -0,0 +1,15 @@ +/* { dg-require-effective-target ppc_float128_hw } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#ifndef TYPE +#define TYPE _Float128 +#endif + +/* Test that the fminf128/fmaxf128 functions generate if/then/else and not a + call. */ +TYPE f128_min (TYPE a, TYPE b) { return (a < b) ? a : b; } +TYPE f128_max (TYPE a, TYPE b) { return (b > a) ? b : a; } + +/* { dg-final { scan-assembler {\mxsmaxcqp\M} } } */ +/* { dg-final { scan-assembler {\mxsmincqp\M} } } */ -- 2.22.0 -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797