On Tue, May 07, 2019 at 09:55:21AM +0200, Jakub Jelinek wrote: > On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote: > > Will leave the "correctness check" for other folks > > but the above is
BTW, as I wanted to be sure about the correctness, I wrote a simple program (below). And actually it seems that we could optimize the plus1 == plus2 cases even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv (FE_DOWNWARD) mode the testcase prints the first two (in all other modes all 4). So here is also an updated version of the patch: 2019-05-07 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/90356 * match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible. * gcc.dg/tree-ssa/pr90356-1.c: New test. * gcc.dg/tree-ssa/pr90356-2.c: New test. * gcc.dg/tree-ssa/pr90356-3.c: New test. * gcc.dg/tree-ssa/pr90356-4.c: New test. * gcc.dg/tree-ssa/pr90356-5.c: New test. * gcc.dg/tree-ssa/pr90356-6.c: New test. --- gcc/match.pd.jj 2019-05-06 23:47:52.642628123 +0200 +++ gcc/match.pd 2019-05-07 10:40:25.475136027 +0200 @@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY (if (fold_real_zero_addition_p (type, @1, 1)) (non_lvalue @0))) +/* Even if the fold_real_zero_addition_p can't simplify X + 0.0 + into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0 + or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0 + if not -frounding-math (for (X + 0.0) + 0.0 and (X - 0.0) - 0.0 + even if -frounding-math). For sNaNs the first operation would raise + exceptions but turn the result into qNan, so the second operation + would not raise it. */ +(for inner_op (plus minus) + (for outer_op (plus minus) + (simplify + (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2) + (if (real_zerop (@1) && real_zerop (@2)) + (with { bool inner_plus = ((inner_op == PLUS_EXPR) + ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1))); + bool outer_plus + = ((outer_op == PLUS_EXPR) + ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); } + (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) || outer_plus == inner_plus) + (if (outer_plus && !inner_plus) + (outer_op @0 @2) + (inner_op @0 @1)))))))) + /* Simplify x - x. This is unsafe for certain floats even in non-IEEE formats. In IEEE, it is unsafe because it does wrong for NaNs. --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj 2019-05-07 10:34:07.270208201 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c 2019-05-07 10:34:07.270208201 +0200 @@ -0,0 +1,23 @@ +/* PR tree-optimization/90356 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */ + +double f1 (double x) { return (x + 0.0) + 0.0; } +double f2 (double y) { return (y + (-0.0)) + (-0.0); } +double f3 (double y) { return (y - 0.0) - 0.0; } +double f4 (double x) { return (x - (-0.0)) - (-0.0); } +double f5 (double x) { return (x + 0.0) - 0.0; } +double f6 (double x) { return (x + (-0.0)) - (-0.0); } +double f7 (double x) { return (x - 0.0) + 0.0; } +double f8 (double x) { return (x - (-0.0)) + (-0.0); } +double f9 (double x) { double t = x + 0.0; return t + 0.0; } +double f10 (double y) { double t = y + (-0.0); return t + (-0.0); } +double f11 (double y) { double t = y - 0.0; return t - 0.0; } +double f12 (double x) { double t = x - (-0.0); return t - (-0.0); } +double f13 (double x) { double t = x + 0.0; return t - 0.0; } +double f14 (double x) { double t = x + (-0.0); return t - (-0.0); } +double f15 (double x) { double t = x - 0.0; return t + 0.0; } +double f16 (double x) { double t = x - (-0.0); return t + (-0.0); } --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj 2019-05-07 10:34:07.270208201 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c 2019-05-07 10:34:07.270208201 +0200 @@ -0,0 +1,8 @@ +/* PR tree-optimization/90356 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */ + +#include "pr90356-1.c" --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj 2019-05-07 10:34:07.271208185 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c 2019-05-07 11:00:50.345488636 +0200 @@ -0,0 +1,15 @@ +/* PR tree-optimization/90356 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 8 "optimized" } } */ + +double f1 (double x) { return (x + 0.0) + 0.0; } +double f2 (double y) { return (y + (-0.0)) + (-0.0); } +double f3 (double y) { return (y - 0.0) - 0.0; } +double f4 (double x) { return (x - (-0.0)) - (-0.0); } +double f9 (double x) { double t = x + 0.0; return t + 0.0; } +double f10 (double y) { double t = y + (-0.0); return t + (-0.0); } +double f11 (double y) { double t = y - 0.0; return t - 0.0; } +double f12 (double x) { double t = x - (-0.0); return t - (-0.0); } --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c.jj 2019-05-07 10:34:07.271208185 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c 2019-05-07 11:01:11.567148473 +0200 @@ -0,0 +1,8 @@ +/* PR tree-optimization/90356 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 8 "optimized" } } */ + +#include "pr90356-3.c" --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-5.c.jj 2019-05-07 11:01:21.704985970 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-5.c 2019-05-07 11:01:45.981596834 +0200 @@ -0,0 +1,13 @@ +/* PR tree-optimization/90356 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */ + +double f5 (double x) { return (x + 0.0) - 0.0; } +double f6 (double x) { return (x + (-0.0)) - (-0.0); } +double f7 (double x) { return (x - 0.0) + 0.0; } +double f8 (double x) { return (x - (-0.0)) + (-0.0); } +double f13 (double x) { double t = x + 0.0; return t - 0.0; } +double f14 (double x) { double t = x + (-0.0); return t - (-0.0); } +double f15 (double x) { double t = x - 0.0; return t + 0.0; } +double f16 (double x) { double t = x - (-0.0); return t + (-0.0); } --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-6.c.jj 2019-05-07 11:01:58.456396880 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-6.c 2019-05-07 11:02:09.451220639 +0200 @@ -0,0 +1,6 @@ +/* PR tree-optimization/90356 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */ + +#include "pr90356-5.c" __attribute__((noipa)) double f1 (double x) { return x + 0.0; } __attribute__((noipa)) double f2 (double x) { return x + (-0.0); } __attribute__((noipa)) double f3 (double x) { return x - 0.0; } __attribute__((noipa)) double f4 (double x) { return x - (-0.0); } int main () { double d[] = { -2.0, -0.0, 0.0, 2.0 }; int i; for (i = 0; i < 4; i++) { double r1, r2; r1 = f1 (d[i]); r2 = f1 (f1 (d[i])); if (__builtin_memcmp (&r1, &r2, sizeof (double))) break; } if (i == 4) __builtin_printf ("f1 (f1) == f1\n"); for (i = 0; i < 4; i++) { double r1, r2; r1 = f2 (d[i]); r2 = f2 (f2 (d[i])); if (__builtin_memcmp (&r1, &r2, sizeof (double))) break; } if (i == 4) __builtin_printf ("f2 (f2) == f2\n"); for (i = 0; i < 4; i++) { double r1, r2; r1 = f1 (d[i]); r2 = f2 (f1 (d[i])); if (__builtin_memcmp (&r1, &r2, sizeof (double))) break; } if (i == 4) __builtin_printf ("f2 (f1) == f1\n"); for (i = 0; i < 4; i++) { double r1, r2; r1 = f1 (d[i]); r2 = f1 (f2 (d[i])); if (__builtin_memcmp (&r1, &r2, sizeof (double))) break; } if (i == 4) __builtin_printf ("f1 (f2) == f1\n"); return 0; } Jakub