We have unordered FP comparisons implemented as RTL insns that produce multiple machine instructions. Such RTL insns are hard to match with a processor pipeline description and additionally there is a redundant SNEZ instruction produced on the result of these comparisons even though the FLT.fmt and FLE.fmt machine instructions already produce either 0 or 1, e.g.:
long flt (double x, double y) { return __builtin_isless (x, y); } with `-O2 -fno-finite-math-only -fno-signaling-nans' gets compiled to: .globl flt .type flt, @function flt: frflags a5 flt.d a0,fa0,fa1 fsflags a5 snez a0,a0 ret .size flt, .-flt because the middle end can't see through the UNSPEC operation unordered FP comparisons have been defined in terms of. These instructions are only produced via an expander already, so change the expander to emit individual RTL insns for each machine instruction in the ultimate ultimate sequence produced rather than deferring to a single RTL insn producing the whole sequence at once. gcc/ * config/riscv/riscv.md (UNSPECV_FSNVSNAN): New constant. (QUIET_PATTERN): New int attribute. (f<quiet_pattern>_quiet<ANYF:mode><X:mode>4): Emit the intended RTL insns entirely within the preparation statements. (*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_default) (*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_snan): Remove insns. (*riscv_fsnvsnan<mode>2): New insn. gcc/testsuite/ * gcc.target/riscv/fle-ieee.c: New test. * gcc.target/riscv/fle-snan.c: New test. * gcc.target/riscv/fle.c: New test. * gcc.target/riscv/flef-ieee.c: New test. * gcc.target/riscv/flef-snan.c: New test. * gcc.target/riscv/flef.c: New test. * gcc.target/riscv/flt-ieee.c: New test. * gcc.target/riscv/flt-snan.c: New test. * gcc.target/riscv/flt.c: New test. * gcc.target/riscv/fltf-ieee.c: New test. * gcc.target/riscv/fltf-snan.c: New test. * gcc.target/riscv/fltf.c: New test. --- Hi, I think it is a step in the right direction, however ultimately I think we ought to actually tell GCC about the IEEE exception flags, so that the compiler can track data dependencies and we do not have to resort to UNSPECs which the compiler cannot see through. E.g. for a piece of code like: long fltlt (double x, double y, double z) { return __builtin_isless (x, y) + __builtin_isless (x, z); } (using an addition here for clarity because for a logical operation even more horror is produced) we get: .globl fltlt .type fltlt, @function fltlt: frflags a5 # 8 [c=4 l=4] riscv_frflags flt.d a0,fa0,fa1 # 9 [c=4 l=4] *cstoredfdi4 fsflags a5 # 10 [c=0 l=4] riscv_fsflags frflags a4 # 16 [c=4 l=4] riscv_frflags flt.d a5,fa0,fa2 # 17 [c=4 l=4] *cstoredfdi4 fsflags a4 # 18 [c=0 l=4] riscv_fsflags addw a0,a0,a5 # 30 [c=8 l=4] *addsi3_extended/0 ret # 40 [c=0 l=4] simple_return .size fltlt, .-fltlt where the middle FSFLAGS/FRFLAGS pair makes no sense of course and is a waste of both space and cycles. I'm yet running some benchmarking to see if the use of UNSPEC_VOLATILEs makes any noticeable performance difference, but I suspect it does not as the compiler could not do much about the original multiple-instruction single RTL insns anyway. No regressions with the GCC (with and w/o `-fsignaling-nans') and glibc testsuites (as per commit 1fcbfb00fc67 ("RISC-V: Fix -fsignaling-nans for glibc testsuite.")). OK to apply? Maciej --- gcc/config/riscv/riscv.md | 67 +++++++++++++++-------------- gcc/testsuite/gcc.target/riscv/fle-ieee.c | 12 +++++ gcc/testsuite/gcc.target/riscv/fle-snan.c | 12 +++++ gcc/testsuite/gcc.target/riscv/fle.c | 12 +++++ gcc/testsuite/gcc.target/riscv/flef-ieee.c | 12 +++++ gcc/testsuite/gcc.target/riscv/flef-snan.c | 12 +++++ gcc/testsuite/gcc.target/riscv/flef.c | 12 +++++ gcc/testsuite/gcc.target/riscv/flt-ieee.c | 12 +++++ gcc/testsuite/gcc.target/riscv/flt-snan.c | 12 +++++ gcc/testsuite/gcc.target/riscv/flt.c | 12 +++++ gcc/testsuite/gcc.target/riscv/fltf-ieee.c | 12 +++++ gcc/testsuite/gcc.target/riscv/fltf-snan.c | 12 +++++ gcc/testsuite/gcc.target/riscv/fltf.c | 12 +++++ 13 files changed, 179 insertions(+), 32 deletions(-) gcc-riscv-fcmp-split.diff Index: gcc/gcc/config/riscv/riscv.md =================================================================== --- gcc.orig/gcc/config/riscv/riscv.md +++ gcc/gcc/config/riscv/riscv.md @@ -57,6 +57,7 @@ ;; Floating-point unspecs. UNSPECV_FRFLAGS UNSPECV_FSFLAGS + UNSPECV_FSNVSNAN ;; Interrupt handler instructions. UNSPECV_MRET @@ -360,6 +361,7 @@ ;; Iterator and attributes for quiet comparisons. (define_int_iterator QUIET_COMPARISON [UNSPEC_FLT_QUIET UNSPEC_FLE_QUIET]) (define_int_attr quiet_pattern [(UNSPEC_FLT_QUIET "lt") (UNSPEC_FLE_QUIET "le")]) +(define_int_attr QUIET_PATTERN [(UNSPEC_FLT_QUIET "LT") (UNSPEC_FLE_QUIET "LE")]) ;; This code iterator allows signed and unsigned widening multiplications ;; to use the same template. @@ -2326,39 +2328,31 @@ (set_attr "mode" "<UNITMODE>")]) (define_expand "f<quiet_pattern>_quiet<ANYF:mode><X:mode>4" - [(parallel [(set (match_operand:X 0 "register_operand") - (unspec:X - [(match_operand:ANYF 1 "register_operand") - (match_operand:ANYF 2 "register_operand")] - QUIET_COMPARISON)) - (clobber (match_scratch:X 3))])] - "TARGET_HARD_FLOAT") - -(define_insn "*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_default" - [(set (match_operand:X 0 "register_operand" "=r") - (unspec:X - [(match_operand:ANYF 1 "register_operand" " f") - (match_operand:ANYF 2 "register_operand" " f")] - QUIET_COMPARISON)) - (clobber (match_scratch:X 3 "=&r"))] - "TARGET_HARD_FLOAT && ! HONOR_SNANS (<ANYF:MODE>mode)" - "frflags\t%3\n\tf<quiet_pattern>.<fmt>\t%0,%1,%2\n\tfsflags\t%3" - [(set_attr "type" "fcmp") - (set_attr "mode" "<UNITMODE>") - (set (attr "length") (const_int 12))]) + [(set (match_operand:X 0 "register_operand") + (unspec:X [(match_operand:ANYF 1 "register_operand") + (match_operand:ANYF 2 "register_operand")] + QUIET_COMPARISON))] + "TARGET_HARD_FLOAT" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx tmp = gen_reg_rtx (SImode); + rtx cmp = gen_rtx_<QUIET_PATTERN> (<X:MODE>mode, op1, op2); + rtx frflags = gen_rtx_UNSPEC_VOLATILE (SImode, gen_rtvec (1, const0_rtx), + UNSPECV_FRFLAGS); + rtx fsflags = gen_rtx_UNSPEC_VOLATILE (SImode, gen_rtvec (1, tmp), + UNSPECV_FSFLAGS); -(define_insn "*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_snan" - [(set (match_operand:X 0 "register_operand" "=r") - (unspec:X - [(match_operand:ANYF 1 "register_operand" " f") - (match_operand:ANYF 2 "register_operand" " f")] - QUIET_COMPARISON)) - (clobber (match_scratch:X 3 "=&r"))] - "TARGET_HARD_FLOAT && HONOR_SNANS (<ANYF:MODE>mode)" - "frflags\t%3\n\tf<quiet_pattern>.<fmt>\t%0,%1,%2\n\tfsflags\t%3\n\tfeq.<fmt>\tzero,%1,%2" - [(set_attr "type" "fcmp") - (set_attr "mode" "<UNITMODE>") - (set (attr "length") (const_int 16))]) + emit_insn (gen_rtx_SET (tmp, frflags)); + emit_insn (gen_rtx_SET (op0, cmp)); + emit_insn (fsflags); + if (HONOR_SNANS (<ANYF:MODE>mode)) + emit_insn (gen_rtx_UNSPEC_VOLATILE (<ANYF:MODE>mode, + gen_rtvec (2, op1, op2), + UNSPECV_FSNVSNAN)); + DONE; +}) (define_insn "*seq_zero_<X:mode><GPR:mode>" [(set (match_operand:GPR 0 "register_operand" "=r") @@ -2766,6 +2760,15 @@ "TARGET_HARD_FLOAT" "fsflags\t%0") +(define_insn "*riscv_fsnvsnan<mode>2" + [(unspec_volatile [(match_operand:ANYF 0 "register_operand") + (match_operand:ANYF 1 "register_operand")] + UNSPECV_FSNVSNAN)] + "TARGET_HARD_FLOAT" + "feq.<fmt>\tzero,%0,%1" + [(set_attr "type" "fcmp") + (set_attr "mode" "<UNITMODE>")]) + (define_insn "riscv_mret" [(return) (unspec_volatile [(const_int 0)] UNSPECV_MRET)] Index: gcc/gcc/testsuite/gcc.target/riscv/fle-ieee.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/fle-ieee.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */ + +long +fle (double x, double y) +{ + return __builtin_islessequal (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tfle\\.d\t\[^\n\]*\n\tfsflags\t\\1\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/fle-snan.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/fle-snan.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */ + +long +fle (double x, double y) +{ + return __builtin_islessequal (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tfle\\.d\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.d\tzero,\\2,\\3\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/fle.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/fle.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-ffinite-math-only" } */ + +long +fle (double x, double y) +{ + return __builtin_islessequal (x, y); +} + +/* { dg-final { scan-assembler "\tf(?:gt|le)\\.d\t\[^\n\]*\n" } } */ +/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/flef-ieee.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/flef-ieee.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */ + +long +flef (float x, float y) +{ + return __builtin_islessequal (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tfle\\.s\t\[^\n\]*\n\tfsflags\t\\1\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/flef-snan.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/flef-snan.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */ + +long +flef (float x, float y) +{ + return __builtin_islessequal (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tfle\\.s\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.s\tzero,\\2,\\3\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/flef.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/flef.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-ffinite-math-only" } */ + +long +flef (float x, float y) +{ + return __builtin_islessequal (x, y); +} + +/* { dg-final { scan-assembler "\tf(?:gt|le)\\.s\t\[^\n\]*\n" } } */ +/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/flt-ieee.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/flt-ieee.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */ + +long +flt (double x, double y) +{ + return __builtin_isless (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tflt\\.d\t\[^\n\]*\n\tfsflags\t\\1\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/flt-snan.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/flt-snan.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */ + +long +flt (double x, double y) +{ + return __builtin_isless (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tflt\\.d\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.d\tzero,\\2,\\3\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/flt.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/flt.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-ffinite-math-only" } */ + +long +flt (double x, double y) +{ + return __builtin_isless (x, y); +} + +/* { dg-final { scan-assembler "\tf(?:ge|lt)\\.d\t\[^\n\]*\n" } } */ +/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/fltf-ieee.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/fltf-ieee.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */ + +long +fltf (float x, float y) +{ + return __builtin_isless (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tflt\\.s\t\[^\n\]*\n\tfsflags\t\\1\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/fltf-snan.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/fltf-snan.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */ + +long +fltf (float x, float y) +{ + return __builtin_isless (x, y); +} + +/* { dg-final { scan-assembler "\tfrflags\t(\[^\n\]*)\n\tflt\\.s\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.s\tzero,\\2,\\3\n" } } */ +/* { dg-final { scan-assembler-not "snez" } } */ Index: gcc/gcc/testsuite/gcc.target/riscv/fltf.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/riscv/fltf.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-options "-ffinite-math-only" } */ + +long +fltf (float x, float y) +{ + return __builtin_isless (x, y); +} + +/* { dg-final { scan-assembler "\tf(?:ge|lt)\\.s\t\[^\n\]*\n" } } */ +/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */