Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function:
extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 ====== =================== bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9) xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.c optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction will generate an interrupt if one of the arguments is a signalling NaN and signalling NaNs can generate an interrupt. The IEEE comparison functions (isgreater, etc.) require that the comparison not raise an interrupt. The following patch changes the PowerPC back end so that ifcvt.c will not change the if/then test and move into a conditional move if the comparison is one of the comparisons that do not raise an error with signalling NaNs and -Ofast is not used. If a normal comparison is used or -Ofast is used, GCC will continue to generate XSCMPGTDP and XXSEL. For the following code: double ordered_compare (double a, double b, double c, double d) { return __builtin_isgreater (a, b) ? c : d; } /* Verify normal > does generate xscmpgtdp. */ double normal_compare (double a, double b, double c, double d) { return a > b ? c : d; } with the following patch, GCC generates the following for power9, power10, and power11: ordered_compare: fcmpu 0,1,2 fmr 1,4 bnglr 0 fmr 1,3 blr normal_compare: xscmpgtdp 1,1,2 xxsel 1,4,3,1 blr I have built bootstrap compilers on big endian power9 systems and little endian power9/power10 systems and there were no regressions. Can I check this patch into the GCC trunk, and after a waiting period, can I check this into the active older branches? 2025-01-30 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/118541 * config/rs6000/rs6000-protos.h (REVERSE_COND_ORDERED_OK): New macro. (REVERSE_COND_NO_ORDERED): Likewise. (rs6000_reverse_condition): Add argument. * config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow ordered comparisons to be reversed for floating point cmoves. (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call. * config/rs6000/rs6000.h (REVERSE_CONDITION): Likewise. * config/rs6000/rs6000.md (reverse_branch_comparison): Name insn. Adjust rs6000_reverse_condition call. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/pr118541.c: New test. --- gcc/config/rs6000/rs6000-protos.h | 6 ++- gcc/config/rs6000/rs6000.cc | 23 ++++++++--- gcc/config/rs6000/rs6000.h | 10 ++++- gcc/config/rs6000/rs6000.md | 24 +++++++----- gcc/testsuite/gcc.target/powerpc/pr118541.c | 43 +++++++++++++++++++++ 5 files changed, 89 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr118541.c diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4619142d197..112332660d3 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -114,8 +114,12 @@ extern const char *rs6000_sibcall_template (rtx *, unsigned int); extern const char *rs6000_indirect_call_template (rtx *, unsigned int); extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int); extern const char *rs6000_pltseq_template (rtx *, int); + +#define REVERSE_COND_ORDERED_OK false +#define REVERSE_COND_NO_ORDERED true + extern enum rtx_code rs6000_reverse_condition (machine_mode, - enum rtx_code); + enum rtx_code, bool); extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx); extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx); extern void rs6000_emit_sCOND (machine_mode, rtx[]); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index f9f9a0b931d..eaf79435ec3 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -15360,15 +15360,25 @@ rs6000_print_patchable_function_entry (FILE *file, } enum rtx_code -rs6000_reverse_condition (machine_mode mode, enum rtx_code code) +rs6000_reverse_condition (machine_mode mode, + enum rtx_code code, + bool no_ordered) { /* Reversal of FP compares takes care -- an ordered compare - becomes an unordered compare and vice versa. */ + becomes an unordered compare and vice versa. + + However, this is not safe for ordered comparisons (i.e. for isgreater, + etc.) starting with the power9 because ifcvt.cc will want to create a fp + cmove, and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of + the arguments is a signalling NaN. */ + if (mode == CCFPmode && (!flag_finite_math_only || code == UNLT || code == UNLE || code == UNGT || code == UNGE || code == UNEQ || code == LTGT)) - return reverse_condition_maybe_unordered (code); + return (no_ordered + ? UNKNOWN + : reverse_condition_maybe_unordered (code)); else return reverse_condition (code); } @@ -15980,11 +15990,14 @@ rs6000_emit_sCOND (machine_mode mode, rtx operands[]) rtx not_result = gen_reg_rtx (CCEQmode); rtx not_op, rev_cond_rtx; machine_mode cc_mode; + enum rtx_code rev; cc_mode = GET_MODE (XEXP (condition_rtx, 0)); - rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code), - SImode, XEXP (condition_rtx, 0), const0_rtx); + rev = rs6000_reverse_condition (cc_mode, cond_code, + REVERSE_COND_ORDERED_OK); + rev_cond_rtx = gen_rtx_fmt_ee (rev, SImode, XEXP (condition_rtx, 0), + const0_rtx); not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); emit_insn (gen_rtx_SET (not_result, not_op)); condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index ec08c96d0f6..c595d7138bc 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1812,11 +1812,17 @@ extern scalar_int_mode rs6000_pmode; /* Can the condition code MODE be safely reversed? This is safe in all cases on this port, because at present it doesn't use the - trapping FP comparisons (fcmpo). */ + trapping FP comparisons (fcmpo). + + However, this is not safe for ordered comparisons (i.e. for isgreater, etc.) + starting with the power9 because ifcvt.cc will want to create a fp cmove, + and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of the + arguments is a signalling NaN. */ #define REVERSIBLE_CC_MODE(MODE) 1 /* Given a condition code and a mode, return the inverse condition. */ -#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) +#define REVERSE_CONDITION(CODE, MODE) \ + rs6000_reverse_condition (MODE, CODE, REVERSE_COND_NO_ORDERED) /* Target cpu costs. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 65da0c65330..5f7ff36617d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -13497,7 +13497,7 @@ (define_insn "@cceq_rev_compare_<mode>" ;; If we are comparing the result of two comparisons, this can be done ;; using creqv or crxor. -(define_insn_and_split "" +(define_insn_and_split "*reverse_branch_comparison" [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y") (compare:CCEQ (match_operator 1 "branch_comparison_operator" [(match_operand 2 "cc_reg_operand" "y") @@ -13519,19 +13519,25 @@ (define_insn_and_split "" GET_MODE (operands[3])); if (! positive_1) - operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[2]), - GET_CODE (operands[1])), - SImode, - operands[2], const0_rtx); + { + enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[2]), + GET_CODE (operands[1]), + REVERSE_COND_ORDERED_OK); + gcc_assert (rev != UNKNOWN); + operands[1] = gen_rtx_fmt_ee (rev, SImode, operands[2], const0_rtx); + } else if (GET_MODE (operands[1]) != SImode) operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, operands[2], const0_rtx); if (! positive_2) - operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[4]), - GET_CODE (operands[3])), - SImode, - operands[4], const0_rtx); + { + enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[4]), + GET_CODE (operands[3]), + REVERSE_COND_ORDERED_OK); + gcc_assert (rev != UNKNOWN); + operands[3] = gen_rtx_fmt_ee (rev, SImode, operands[4], const0_rtx); + } else if (GET_MODE (operands[3]) != SImode) operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode, operands[4], const0_rtx); diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541.c b/gcc/testsuite/gcc.target/powerpc/pr118541.c new file mode 100644 index 00000000000..6ef67a2cf76 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr118541.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_vsx } */ + +/* PR target/118541 says that the ordered comparison functions like isgreater + should not optimize floating point conditional moves to use + x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause traps + if one of the arguments is a signaling NaN. */ + +/* Verify isgreater does not generate xscmpgtdp. */ + +double +ordered_compare (double a, double b, double c, double d) +{ + /* + * fcmpu 0,1,2 + * fmr 1,4 + * bnglr 0 + * fmr 1,3 + * blr + */ + + return __builtin_isgreater (a, b) ? c : d; +} + +/* Verify normal > does generate xscmpgtdp. */ + +double +normal_compare (double a, double b, double c, double d) +{ + /* + * xscmpgtdp 1,1,2 + * xxsel 1,4,3,1 + * blr + */ + + return a > b ? c : d; +} + +/* { dg-final { scan-assembler-times {\mxscmpg[te]dp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxsel\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxscmpudp\M|\mfcmpu\M} 1 } } */ + -- 2.48.1 -- Michael Meissner, IBM PO Box 98, Ayer, Massachusetts, USA, 01432 email: meiss...@linux.ibm.com