We have unordered FP comparisons implemented as RTL insns that produce
multiple machine instructions. Such RTL insns are hard to match with a
processor pipeline description and additionally there is a redundant
SNEZ instruction produced on the result of these comparisons even though
the FLT.fmt and FLE.fmt machine instructions already produce either 0 or
1, e.g.:
long
flt (double x, double y)
{
return __builtin_isless (x, y);
}
with `-O2 -fno-finite-math-only -fno-signaling-nans' gets compiled to:
.globl flt
.type flt, @function
flt:
frflags a5
flt.d a0,fa0,fa1
fsflags a5
snez a0,a0
ret
.size flt, .-flt
because the middle end can't see through the UNSPEC operation unordered
FP comparisons have been defined in terms of.
These instructions are only produced via an expander already, so change
the expander to emit individual RTL insns for each machine instruction
in the ultimate ultimate sequence produced rather than deferring to a
single RTL insn producing the whole sequence at once.
gcc/
* config/riscv/riscv.md (UNSPECV_FSNVSNAN): New constant.
(QUIET_PATTERN): New int attribute.
(f<quiet_pattern>_quiet<ANYF:mode><X:mode>4): Emit the intended
RTL insns entirely within the preparation statements.
(*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_default)
(*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_snan): Remove
insns.
(*riscv_fsnvsnan<mode>2): New insn.
gcc/testsuite/
* gcc.target/riscv/fle-ieee.c: New test.
* gcc.target/riscv/fle-snan.c: New test.
* gcc.target/riscv/fle.c: New test.
* gcc.target/riscv/flef-ieee.c: New test.
* gcc.target/riscv/flef-snan.c: New test.
* gcc.target/riscv/flef.c: New test.
* gcc.target/riscv/flt-ieee.c: New test.
* gcc.target/riscv/flt-snan.c: New test.
* gcc.target/riscv/flt.c: New test.
* gcc.target/riscv/fltf-ieee.c: New test.
* gcc.target/riscv/fltf-snan.c: New test.
* gcc.target/riscv/fltf.c: New test.
---
Hi,
I think it is a step in the right direction, however ultimately I think
we ought to actually tell GCC about the IEEE exception flags, so that the
compiler can track data dependencies and we do not have to resort to
UNSPECs which the compiler cannot see through. E.g. for a piece of code
like:
long
fltlt (double x, double y, double z)
{
return __builtin_isless (x, y) + __builtin_isless (x, z);
}
(using an addition here for clarity because for a logical operation even
more horror is produced) we get:
.globl fltlt
.type fltlt, @function
fltlt:
frflags a5 # 8 [c=4 l=4] riscv_frflags
flt.d a0,fa0,fa1 # 9 [c=4 l=4] *cstoredfdi4
fsflags a5 # 10 [c=0 l=4] riscv_fsflags
frflags a4 # 16 [c=4 l=4] riscv_frflags
flt.d a5,fa0,fa2 # 17 [c=4 l=4] *cstoredfdi4
fsflags a4 # 18 [c=0 l=4] riscv_fsflags
addw a0,a0,a5 # 30 [c=8 l=4] *addsi3_extended/0
ret # 40 [c=0 l=4] simple_return
.size fltlt, .-fltlt
where the middle FSFLAGS/FRFLAGS pair makes no sense of course and is a
waste of both space and cycles.
I'm yet running some benchmarking to see if the use of UNSPEC_VOLATILEs
makes any noticeable performance difference, but I suspect it does not as
the compiler could not do much about the original multiple-instruction
single RTL insns anyway.
No regressions with the GCC (with and w/o `-fsignaling-nans') and glibc
testsuites (as per commit 1fcbfb00fc67 ("RISC-V: Fix -fsignaling-nans for
glibc testsuite.")). OK to apply?
Maciej
---
gcc/config/riscv/riscv.md | 67 +++++++++++++++--------------
gcc/testsuite/gcc.target/riscv/fle-ieee.c | 12 +++++
gcc/testsuite/gcc.target/riscv/fle-snan.c | 12 +++++
gcc/testsuite/gcc.target/riscv/fle.c | 12 +++++
gcc/testsuite/gcc.target/riscv/flef-ieee.c | 12 +++++
gcc/testsuite/gcc.target/riscv/flef-snan.c | 12 +++++
gcc/testsuite/gcc.target/riscv/flef.c | 12 +++++
gcc/testsuite/gcc.target/riscv/flt-ieee.c | 12 +++++
gcc/testsuite/gcc.target/riscv/flt-snan.c | 12 +++++
gcc/testsuite/gcc.target/riscv/flt.c | 12 +++++
gcc/testsuite/gcc.target/riscv/fltf-ieee.c | 12 +++++
gcc/testsuite/gcc.target/riscv/fltf-snan.c | 12 +++++
gcc/testsuite/gcc.target/riscv/fltf.c | 12 +++++
13 files changed, 179 insertions(+), 32 deletions(-)
gcc-riscv-fcmp-split.diff
Index: gcc/gcc/config/riscv/riscv.md
===================================================================
--- gcc.orig/gcc/config/riscv/riscv.md
+++ gcc/gcc/config/riscv/riscv.md
@@ -57,6 +57,7 @@
;; Floating-point unspecs.
UNSPECV_FRFLAGS
UNSPECV_FSFLAGS
+ UNSPECV_FSNVSNAN
;; Interrupt handler instructions.
UNSPECV_MRET
@@ -360,6 +361,7 @@
;; Iterator and attributes for quiet comparisons.
(define_int_iterator QUIET_COMPARISON [UNSPEC_FLT_QUIET UNSPEC_FLE_QUIET])
(define_int_attr quiet_pattern [(UNSPEC_FLT_QUIET "lt") (UNSPEC_FLE_QUIET
"le")])
+(define_int_attr QUIET_PATTERN [(UNSPEC_FLT_QUIET "LT") (UNSPEC_FLE_QUIET
"LE")])
;; This code iterator allows signed and unsigned widening multiplications
;; to use the same template.
@@ -2326,39 +2328,31 @@
(set_attr "mode" "<UNITMODE>")])
(define_expand "f<quiet_pattern>_quiet<ANYF:mode><X:mode>4"
- [(parallel [(set (match_operand:X 0 "register_operand")
- (unspec:X
- [(match_operand:ANYF 1 "register_operand")
- (match_operand:ANYF 2 "register_operand")]
- QUIET_COMPARISON))
- (clobber (match_scratch:X 3))])]
- "TARGET_HARD_FLOAT")
-
-(define_insn "*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_default"
- [(set (match_operand:X 0 "register_operand" "=r")
- (unspec:X
- [(match_operand:ANYF 1 "register_operand" " f")
- (match_operand:ANYF 2 "register_operand" " f")]
- QUIET_COMPARISON))
- (clobber (match_scratch:X 3 "=&r"))]
- "TARGET_HARD_FLOAT && ! HONOR_SNANS (<ANYF:MODE>mode)"
- "frflags\t%3\n\tf<quiet_pattern>.<fmt>\t%0,%1,%2\n\tfsflags\t%3"
- [(set_attr "type" "fcmp")
- (set_attr "mode" "<UNITMODE>")
- (set (attr "length") (const_int 12))])
+ [(set (match_operand:X 0 "register_operand")
+ (unspec:X [(match_operand:ANYF 1 "register_operand")
+ (match_operand:ANYF 2 "register_operand")]
+ QUIET_COMPARISON))]
+ "TARGET_HARD_FLOAT"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx tmp = gen_reg_rtx (SImode);
+ rtx cmp = gen_rtx_<QUIET_PATTERN> (<X:MODE>mode, op1, op2);
+ rtx frflags = gen_rtx_UNSPEC_VOLATILE (SImode, gen_rtvec (1, const0_rtx),
+ UNSPECV_FRFLAGS);
+ rtx fsflags = gen_rtx_UNSPEC_VOLATILE (SImode, gen_rtvec (1, tmp),
+ UNSPECV_FSFLAGS);
-(define_insn "*f<quiet_pattern>_quiet<ANYF:mode><X:mode>4_snan"
- [(set (match_operand:X 0 "register_operand" "=r")
- (unspec:X
- [(match_operand:ANYF 1 "register_operand" " f")
- (match_operand:ANYF 2 "register_operand" " f")]
- QUIET_COMPARISON))
- (clobber (match_scratch:X 3 "=&r"))]
- "TARGET_HARD_FLOAT && HONOR_SNANS (<ANYF:MODE>mode)"
-
"frflags\t%3\n\tf<quiet_pattern>.<fmt>\t%0,%1,%2\n\tfsflags\t%3\n\tfeq.<fmt>\tzero,%1,%2"
- [(set_attr "type" "fcmp")
- (set_attr "mode" "<UNITMODE>")
- (set (attr "length") (const_int 16))])
+ emit_insn (gen_rtx_SET (tmp, frflags));
+ emit_insn (gen_rtx_SET (op0, cmp));
+ emit_insn (fsflags);
+ if (HONOR_SNANS (<ANYF:MODE>mode))
+ emit_insn (gen_rtx_UNSPEC_VOLATILE (<ANYF:MODE>mode,
+ gen_rtvec (2, op1, op2),
+ UNSPECV_FSNVSNAN));
+ DONE;
+})
(define_insn "*seq_zero_<X:mode><GPR:mode>"
[(set (match_operand:GPR 0 "register_operand" "=r")
@@ -2766,6 +2760,15 @@
"TARGET_HARD_FLOAT"
"fsflags\t%0")
+(define_insn "*riscv_fsnvsnan<mode>2"
+ [(unspec_volatile [(match_operand:ANYF 0 "register_operand")
+ (match_operand:ANYF 1 "register_operand")]
+ UNSPECV_FSNVSNAN)]
+ "TARGET_HARD_FLOAT"
+ "feq.<fmt>\tzero,%0,%1"
+ [(set_attr "type" "fcmp")
+ (set_attr "mode" "<UNITMODE>")])
+
(define_insn "riscv_mret"
[(return)
(unspec_volatile [(const_int 0)] UNSPECV_MRET)]
Index: gcc/gcc/testsuite/gcc.target/riscv/fle-ieee.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/fle-ieee.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */
+
+long
+fle (double x, double y)
+{
+ return __builtin_islessequal (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tfle\\.d\t\[^\n\]*\n\tfsflags\t\\1\n" } } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/fle-snan.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/fle-snan.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */
+
+long
+fle (double x, double y)
+{
+ return __builtin_islessequal (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tfle\\.d\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.d\tzero,\\2,\\3\n"
} } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/fle.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/fle.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-ffinite-math-only" } */
+
+long
+fle (double x, double y)
+{
+ return __builtin_islessequal (x, y);
+}
+
+/* { dg-final { scan-assembler "\tf(?:gt|le)\\.d\t\[^\n\]*\n" } } */
+/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/flef-ieee.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/flef-ieee.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */
+
+long
+flef (float x, float y)
+{
+ return __builtin_islessequal (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tfle\\.s\t\[^\n\]*\n\tfsflags\t\\1\n" } } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/flef-snan.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/flef-snan.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */
+
+long
+flef (float x, float y)
+{
+ return __builtin_islessequal (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tfle\\.s\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.s\tzero,\\2,\\3\n"
} } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/flef.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/flef.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-ffinite-math-only" } */
+
+long
+flef (float x, float y)
+{
+ return __builtin_islessequal (x, y);
+}
+
+/* { dg-final { scan-assembler "\tf(?:gt|le)\\.s\t\[^\n\]*\n" } } */
+/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/flt-ieee.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/flt-ieee.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */
+
+long
+flt (double x, double y)
+{
+ return __builtin_isless (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tflt\\.d\t\[^\n\]*\n\tfsflags\t\\1\n" } } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/flt-snan.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/flt-snan.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */
+
+long
+flt (double x, double y)
+{
+ return __builtin_isless (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tflt\\.d\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.d\tzero,\\2,\\3\n"
} } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/flt.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/flt.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-ffinite-math-only" } */
+
+long
+flt (double x, double y)
+{
+ return __builtin_isless (x, y);
+}
+
+/* { dg-final { scan-assembler "\tf(?:ge|lt)\\.d\t\[^\n\]*\n" } } */
+/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/fltf-ieee.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/fltf-ieee.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fno-signaling-nans" } */
+
+long
+fltf (float x, float y)
+{
+ return __builtin_isless (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tflt\\.s\t\[^\n\]*\n\tfsflags\t\\1\n" } } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/fltf-snan.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/fltf-snan.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-fno-finite-math-only -fsignaling-nans" } */
+
+long
+fltf (float x, float y)
+{
+ return __builtin_isless (x, y);
+}
+
+/* { dg-final { scan-assembler
"\tfrflags\t(\[^\n\]*)\n\tflt\\.s\t\[^,\]*,(\[^,\]*),(\[^,\]*)\n\tfsflags\t\\1\n\tfeq\\.s\tzero,\\2,\\3\n"
} } */
+/* { dg-final { scan-assembler-not "snez" } } */
Index: gcc/gcc/testsuite/gcc.target/riscv/fltf.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/riscv/fltf.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-ffinite-math-only" } */
+
+long
+fltf (float x, float y)
+{
+ return __builtin_isless (x, y);
+}
+
+/* { dg-final { scan-assembler "\tf(?:ge|lt)\\.s\t\[^\n\]*\n" } } */
+/* { dg-final { scan-assembler-not "f\[rs\]flags" } } */