Fixed all issues pointed in the previous iteration. There is now a significant change regarding how the sin(atan(x)) constant is calculated, as now it checks for which values such that computing 1 + x*x won't overflow. There are two reasons for this change: (1) Avoid an intermediate infinity value when optimizing cos(atan(x)), and (2) avoid the requirement of separate constants for sin(atan(x)) and cos(atan(x)), thus making easier to maintain the code.
gcc/ChangeLog 2018-10-09 Giuliano Belinassi <giuliano.belina...@usp.br> PR tree-optimization/86829 * match.pd: Added sin(atan(x)) and cos(atan(x)) simplification rules. * real.c (build_sinatan_real): New function to build a constant equal to the largest value c such that 1 + c*c will not overflow. * real.h (build_sinatan_real): Allows this function to be called externally. gcc/testsuite/gcc.dg/ChangeLog 2018-10-09 Giuliano Belinassi <giuliano.belina...@usp.br> PR tree-optimization/86829 * gcc.dg/sinatan-1.c: New test. * gcc.dg/sinatan-2.c: New test. * gcc.dg/sinatan-3.c: New test. There are no tests broken in trunk that seems related to this PR.
Index: gcc/match.pd =================================================================== --- gcc/match.pd (revision 264941) +++ gcc/match.pd (working copy) @@ -4223,6 +4223,45 @@ (tans (atans @0)) @0))) + /* Simplify sin(atan(x)) -> x / sqrt(x*x + 1). */ + (for sins (SIN) + atans (ATAN) + sqrts (SQRT) + copysigns (COPYSIGN) + (simplify + (sins (atans:s @0)) + (with + { + REAL_VALUE_TYPE r_cst; + build_sinatan_real (&r_cst, type); + tree t_cst = build_real (type, r_cst); + tree t_one = build_one_cst (type); + } + (if (SCALAR_FLOAT_TYPE_P (type)) + (cond (le (abs @0) { t_cst; }) + (rdiv @0 (sqrts (plus (mult @0 @0) { t_one; }))) + (copysigns { t_one; } @0)))))) + +/* Simplify cos(atan(x)) -> 1 / sqrt(x*x + 1). */ + (for coss (COS) + atans (ATAN) + sqrts (SQRT) + copysigns (COPYSIGN) + (simplify + (coss (atans:s @0)) + (with + { + REAL_VALUE_TYPE r_cst; + build_sinatan_real (&r_cst, type); + tree t_cst = build_real (type, r_cst); + tree t_one = build_one_cst (type); + tree t_zero = build_zero_cst (type); + } + (if (SCALAR_FLOAT_TYPE_P (type)) + (cond (le (abs @0) { t_cst; }) + (rdiv { t_one; } (sqrts (plus (mult @0 @0) { t_one; }))) + (copysigns { t_zero; } @0)))))) + /* cabs(x+0i) or cabs(0+xi) -> abs(x). */ (simplify (CABS (complex:C @0 real_zerop@1)) Index: gcc/real.c =================================================================== --- gcc/real.c (revision 264941) +++ gcc/real.c (working copy) @@ -5279,3 +5279,29 @@ { return HONOR_SIGN_DEPENDENT_ROUNDING (GET_MODE (x)); } + +/* Fills r with the largest value such that 1 + r*r won't overflow. + This is used in both sin (atan (x)) and cos (atan(x)) optimizations. */ + +void +build_sinatan_real (REAL_VALUE_TYPE * r, tree type) +{ + REAL_VALUE_TYPE maxval; + mpfr_t mpfr_const1, mpfr_c, mpfr_maxval; + machine_mode mode = TYPE_MODE (type); + const struct real_format * fmt = REAL_MODE_FORMAT (mode); + + real_maxval (&maxval, 0, mode); + + mpfr_inits (mpfr_const1, mpfr_c, mpfr_maxval, NULL); + + mpfr_from_real (mpfr_const1, &dconst1, GMP_RNDN); + mpfr_from_real (mpfr_maxval, &maxval, GMP_RNDN); + + mpfr_sub (mpfr_c, mpfr_maxval, mpfr_const1, GMP_RNDN); + mpfr_sqrt (mpfr_c, mpfr_c, GMP_RNDZ); + + real_from_mpfr (r, mpfr_c, fmt, GMP_RNDZ); + + mpfr_clears (mpfr_const1, mpfr_c, mpfr_maxval, NULL); +} Index: gcc/real.h =================================================================== --- gcc/real.h (revision 264941) +++ gcc/real.h (working copy) @@ -523,4 +523,8 @@ const wide_int_ref &, signop); #endif +/* Fills r with the largest value such that 1 + r*r won't overflow. + This is used in both sin (atan (x)) and cos (atan(x)) optimizations. */ +extern void build_sinatan_real (REAL_VALUE_TYPE *, tree); + #endif /* ! GCC_REAL_H */ Index: gcc/testsuite/gcc.dg/sinatan-1.c =================================================================== --- gcc/testsuite/gcc.dg/sinatan-1.c (nonexistent) +++ gcc/testsuite/gcc.dg/sinatan-1.c (working copy) @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-options "-Ofast" } */ + +extern float sinf (float); +extern float cosf (float); +extern float atanf (float); +extern float sqrtf (float); +extern float nextafterf (float, float); +extern double sin (double); +extern double cos (double); +extern double atan (double); +extern double sqrt (double); +extern double nextafter (double, double); +extern long double sinl (long double); +extern long double cosl (long double); +extern long double atanl (long double); +extern long double sqrtl (long double); +extern long double nextafterl (long double, long double); + +extern void abort (); + +double __attribute__ ((noinline, optimize("Ofast"))) +sinatan (double x) +{ + return sin (atan (x)); +} + +double __attribute__ ((noinline, optimize("Ofast"))) +cosatan (double x) +{ + return cos (atan (x)); +} + +float __attribute__ ((noinline, optimize("Ofast"))) +sinatanf(float x) +{ + return sinf (atanf (x)); +} + +float __attribute__ ((noinline, optimize("Ofast"))) +cosatanf(float x) +{ + return cosf (atanf (x)); +} + +long double __attribute__ ((noinline, optimize("Ofast"))) +sinatanl (long double x) +{ + return sinl (atanl (x)); +} + +long double __attribute__ ((noinline, optimize("Ofast"))) +cosatanl (long double x) +{ + return cosl (atanl (x)); +} + +int +main() +{ + /* Get first x such that 1 + x*x will overflow */ + float fc = nextafterf (sqrtf (__FLT_MAX__ - 1), __FLT_MAX__); + double c = nextafter (sqrt (__DBL_MAX__ - 1), __DBL_MAX__); + long double lc = nextafter (sqrtl (__LDBL_MAX__ - 1), __LDBL_MAX__); + + /* Force move from FPU to memory, otherwise comparison may + fail due to possible more accurate registers (see 387) */ + volatile float fy; + volatile double y; + volatile long double ly; + + fy = sinatanf (fc); + y = sinatan (c); + ly = sinatanl (lc); + + if (fy != 1.f || y != 1 || ly != 1.L) + abort (); + + fy = cosatanf (fc); + y = cosatan (c); + ly = cosatanl (lc); + + if (fy != 0.f || y != 0. || ly != 0.L) + abort (); + + fy = sinatanf (-fc); + y = sinatan (-c); + ly = sinatanl (-lc); + + if (fy != -1.f || y != -1. || ly != -1.L) + abort (); + + fy = cosatanf (-fc); + y = cosatan (-c); + ly = cosatanl (-lc); + + if (fy != 0.f || y != 0. || ly != 0.L) + abort (); + + return 0; +} Index: gcc/testsuite/gcc.dg/sinatan-2.c =================================================================== --- gcc/testsuite/gcc.dg/sinatan-2.c (nonexistent) +++ gcc/testsuite/gcc.dg/sinatan-2.c (working copy) @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-optimized" } */ + +extern float sinf (float); +extern float cosf (float); +extern float atanf (float); +extern double sin (double); +extern double cos (double); +extern double atan (double); +extern long double sinl (long double); +extern long double cosl (long double); +extern long double atanl (long double); + +double __attribute__ ((noinline)) +sinatan_ (double x) +{ + return sin (atan (x)); +} + +double __attribute__ ((noinline)) +cosatan_ (double x) +{ + return cos (atan (x)); +} + +float __attribute__ ((noinline)) +sinatanf_(float x) +{ + return sinf (atanf (x)); +} + +float __attribute__ ((noinline)) +cosatanf_(float x) +{ + return cosf (atanf (x)); +} + +long double __attribute__ ((noinline)) +sinatanl_ (long double x) +{ + return sinl (atanl (x)); +} + +long double __attribute__ ((noinline)) +cosatanl_ (long double x) +{ + return cosl (atanl (x)); +} + +/* There must be no calls to sin, cos, or atan */ +/* {dg-final { scan-tree-dump-not "sin " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "cos " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "atan " "optimized" }} */ +/* {dg-final { scan-tree-dump-not "sinf " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "cosf " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "atanf " "optimized" }} */ +/* {dg-final { scan-tree-dump-not "sinl " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "cosl " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "atanl " "optimized" }} */ Index: gcc/testsuite/gcc.dg/sinatan-3.c =================================================================== --- gcc/testsuite/gcc.dg/sinatan-3.c (nonexistent) +++ gcc/testsuite/gcc.dg/sinatan-3.c (working copy) @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-optimized" } */ + +extern float sinf (float); +extern float cosf (float); +extern float atanf (float); +extern double sin (double); +extern double cos (double); +extern double atan (double); +extern long double sinl (long double); +extern long double cosl (long double); +extern long double atanl (long double); + +float __attribute__ ((noinline)) +cosatanf_(float x) +{ + float atg = atanf(x); + return cosf(atg) + atg; +} + +double __attribute__ ((noinline)) +cosatan_(double x) +{ + double atg = atan(x); + return cos(atg) + atg; +} + +long double __attribute__ ((noinline)) +cosatanl_(long double x) +{ + long double atg = atanl(x); + return cosl(atg) + atg; +} + +float __attribute__ ((noinline)) +sinatanf_(float x) +{ + float atg = atanf(x); + return sinf(atg) + atg; +} + +double __attribute__ ((noinline)) +sinatan_(double x) +{ + double atg = atan(x); + return sin(atg) + atg; +} + +long double __attribute__ ((noinline)) +sinatanl_(long double x) +{ + long double atg = atanl(x); + return sinl(atg) + atg; +} + +/* There should be calls to both sin and atan */ +/* { dg-final { scan-tree-dump "cos " "optimized" } } */ +/* { dg-final { scan-tree-dump "sin " "optimized" } } */ +/* { dg-final { scan-tree-dump "atan " "optimized" } } */ +/* { dg-final { scan-tree-dump "cosf " "optimized" } } */ +/* { dg-final { scan-tree-dump "sinf " "optimized" } } */ +/* { dg-final { scan-tree-dump "atanf " "optimized" } } */ +/* { dg-final { scan-tree-dump "cosl " "optimized" } } */ +/* { dg-final { scan-tree-dump "sinl " "optimized" } } */ +/* { dg-final { scan-tree-dump "atanl " "optimized" } } */