On Tue, May 07, 2019 at 09:55:21AM +0200, Jakub Jelinek wrote:
> On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote:
> > Will leave the "correctness check" for other folks
> > but the above is

BTW, as I wanted to be sure about the correctness, I wrote a simple program
(below).
And actually it seems that we could optimize the plus1 == plus2 cases
even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
(FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
4).

So here is also an updated version of the patch:

2019-05-07  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/90356
        * match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.

        * gcc.dg/tree-ssa/pr90356-1.c: New test.
        * gcc.dg/tree-ssa/pr90356-2.c: New test.
        * gcc.dg/tree-ssa/pr90356-3.c: New test.
        * gcc.dg/tree-ssa/pr90356-4.c: New test.
        * gcc.dg/tree-ssa/pr90356-5.c: New test.
        * gcc.dg/tree-ssa/pr90356-6.c: New test.

--- gcc/match.pd.jj     2019-05-06 23:47:52.642628123 +0200
+++ gcc/match.pd        2019-05-07 10:40:25.475136027 +0200
@@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
  (if (fold_real_zero_addition_p (type, @1, 1))
   (non_lvalue @0)))
 
+/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
+   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
+   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
+   if not -frounding-math (for (X + 0.0) + 0.0 and (X - 0.0) - 0.0
+   even if -frounding-math).  For sNaNs the first operation would raise
+   exceptions but turn the result into qNan, so the second operation
+   would not raise it.   */
+(for inner_op (plus minus)
+ (for outer_op (plus minus)
+  (simplify
+   (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2)
+    (if (real_zerop (@1) && real_zerop (@2))
+     (with { bool inner_plus = ((inner_op == PLUS_EXPR)
+                               ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
+            bool outer_plus
+              = ((outer_op == PLUS_EXPR)
+                 ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
+      (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) || outer_plus == inner_plus)
+       (if (outer_plus && !inner_plus)
+       (outer_op @0 @2)
+       (inner_op @0 @1))))))))
+
 /* Simplify x - x.
    This is unsafe for certain floats even in non-IEEE formats.
    In IEEE, it is unsafe because it does wrong for NaNs.
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj        2019-05-07 
10:34:07.270208201 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c   2019-05-07 10:34:07.270208201 
+0200
@@ -0,0 +1,23 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj        2019-05-07 
10:34:07.270208201 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c   2019-05-07 10:34:07.270208201 
+0200
@@ -0,0 +1,8 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */
+
+#include "pr90356-1.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj        2019-05-07 
10:34:07.271208185 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c   2019-05-07 11:00:50.345488636 
+0200
@@ -0,0 +1,15 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 4 "optimized" } } 
*/
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 8 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c.jj        2019-05-07 
10:34:07.271208185 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c   2019-05-07 11:01:11.567148473 
+0200
@@ -0,0 +1,8 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 4 "optimized" } } 
*/
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 8 "optimized" } } */
+
+#include "pr90356-3.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-5.c.jj        2019-05-07 
11:01:21.704985970 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-5.c   2019-05-07 11:01:45.981596834 
+0200
@@ -0,0 +1,13 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-6.c.jj        2019-05-07 
11:01:58.456396880 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-6.c   2019-05-07 11:02:09.451220639 
+0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+#include "pr90356-5.c"


__attribute__((noipa)) double f1 (double x) { return x + 0.0; }
__attribute__((noipa)) double f2 (double x) { return x + (-0.0); }
__attribute__((noipa)) double f3 (double x) { return x - 0.0; }
__attribute__((noipa)) double f4 (double x) { return x - (-0.0); }

int
main ()
{
  double d[] = { -2.0, -0.0, 0.0, 2.0 };
  int i;
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f1 (d[i]); r2 = f1 (f1 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f1 (f1) == f1\n");
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f2 (d[i]); r2 = f2 (f2 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f2 (f2) == f2\n");
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f1 (d[i]); r2 = f2 (f1 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f2 (f1) == f1\n");
  for (i = 0; i < 4; i++)
    {
      double r1, r2;
      r1 = f1 (d[i]); r2 = f1 (f2 (d[i]));
      if (__builtin_memcmp (&r1, &r2, sizeof (double))) break;
    }
  if (i == 4) __builtin_printf ("f1 (f2) == f1\n");
  return 0;
}


        Jakub

Reply via email to