Author: das
Date: Tue Oct 11 05:17:45 2011
New Revision: 226245
URL: http://svn.freebsd.org/changeset/base/226245

Log:
  Refactor this code by introducing separate functions to handle the
  extra-precision add and multiply operations. This simplifies future
  work but shouldn't result in any functional change.

Modified:
  head/lib/msun/src/s_fma.c
  head/lib/msun/src/s_fmal.c

Modified: head/lib/msun/src/s_fma.c
==============================================================================
--- head/lib/msun/src/s_fma.c   Tue Oct 11 05:17:26 2011        (r226244)
+++ head/lib/msun/src/s_fma.c   Tue Oct 11 05:17:45 2011        (r226245)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 David Schultz <d...@freebsd.org>
+ * Copyright (c) 2005-2011 David Schultz <d...@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,63 @@ __FBSDID("$FreeBSD$");
 #include <math.h>
 
 /*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a double.  We maintain the invariant that "hi" stores the 53 high-order
+ * bits of the result.
+ */
+struct dd {
+       double hi;
+       double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+dd_add(double a, double b)
+{
+       struct dd ret;
+       double s;
+
+       ret.hi = a + b;
+       s = ret.hi - a;
+       ret.lo = (a - (ret.hi - s)) + (b - s);
+       return (ret);
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(double a, double b)
+{
+       static const double split = 0x1p27 + 1.0;
+       struct dd ret;
+       double ha, hb, la, lb, p, q;
+
+       p = a * split;
+       ha = a - p;
+       ha += p;
+       la = a - ha;
+
+       p = b * split;
+       hb = b - p;
+       hb += p;
+       lb = b - hb;
+
+       p = ha * hb;
+       q = ha * lb + la * hb;
+
+       ret.hi = p + q;
+       ret.lo = p - ret.hi + q + la * lb;
+       return (ret);
+}
+
+/*
  * Fused multiply-add: Compute x * y + z with a single rounding error.
  *
  * We use scaling to avoid overflow/underflow, along with the
@@ -52,10 +109,10 @@ __FBSDID("$FreeBSD$");
 double
 fma(double x, double y, double z)
 {
-       static const double split = 0x1p27 + 1.0;
        double xs, ys, zs;
-       double c, cc, hx, hy, p, q, tx, ty;
-       double r, rr, s;
+       struct dd xy, r, r2;
+       double p;
+       double s;
        int oround;
        int ex, ey, ez;
        int spread;
@@ -95,29 +152,29 @@ fma(double x, double y, double z)
                        if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
                                return (x * y);
                        feholdexcept(&env);
-                       r = x * y;
+                       s = x * y;
                        if (!fetestexcept(FE_INEXACT))
-                               r = nextafter(r, 0);
+                               s = nextafter(s, 0);
                        feupdateenv(&env);
-                       return (r);
+                       return (s);
                case FE_DOWNWARD:
                        if (z > 0.0)
                                return (x * y);
                        feholdexcept(&env);
-                       r = x * y;
+                       s = x * y;
                        if (!fetestexcept(FE_INEXACT))
-                               r = nextafter(r, -INFINITY);
+                               s = nextafter(s, -INFINITY);
                        feupdateenv(&env);
-                       return (r);
+                       return (s);
                default:        /* FE_UPWARD */
                        if (z < 0.0)
                                return (x * y);
                        feholdexcept(&env);
-                       r = x * y;
+                       s = x * y;
                        if (!fetestexcept(FE_INEXACT))
-                               r = nextafter(r, INFINITY);
+                               s = nextafter(s, INFINITY);
                        feupdateenv(&env);
-                       return (r);
+                       return (s);
                }
        }
        if (spread < -DBL_MANT_DIG) {
@@ -145,50 +202,29 @@ fma(double x, double y, double z)
                }
        }
 
-       /*
-        * Use Dekker's algorithm to perform the multiplication and
-        * subsequent addition in twice the machine precision.
-        * Arrange so that x * y = c + cc, and x * y + z = r + rr.
-        */
        fesetround(FE_TONEAREST);
 
-       p = xs * split;
-       hx = xs - p;
-       hx += p;
-       tx = xs - hx;
-
-       p = ys * split;
-       hy = ys - p;
-       hy += p;
-       ty = ys - hy;
-
-       p = hx * hy;
-       q = hx * ty + tx * hy;
-       c = p + q;
-       cc = p - c + q + tx * ty;
-
+       xy = dd_mul(xs, ys);
        zs = ldexp(zs, -spread);
-       r = c + zs;
-       s = r - c;
-       rr = (c - (r - s)) + (zs - s) + cc;
+       r = dd_add(xy.hi, zs);
+       r.lo += xy.lo;
 
        spread = ex + ey;
-       if (spread + ilogb(r) > -1023) {
+       if (spread + ilogb(r.hi) > -1023) {
                fesetround(oround);
-               r = r + rr;
+               r.hi = r.hi + r.lo;
        } else {
                /*
                 * The result is subnormal, so we round before scaling to
                 * avoid double rounding.
                 */
-               p = ldexp(copysign(0x1p-1022, r), -spread);
-               c = r + p;
-               s = c - r;
-               cc = (r - (c - s)) + (p - s) + rr;
+               p = ldexp(copysign(0x1p-1022, r.hi), -spread);
+               r2 = dd_add(r.hi, p);
+               r2.lo += r.lo;
                fesetround(oround);
-               r = (c + cc) - p;
+               r.hi = (r2.hi + r2.lo) - p;
        }
-       return (ldexp(r, spread));
+       return (ldexp(r.hi, spread));
 }
 #else  /* LDBL_MANT_DIG == 113 */
 /*

Modified: head/lib/msun/src/s_fmal.c
==============================================================================
--- head/lib/msun/src/s_fmal.c  Tue Oct 11 05:17:26 2011        (r226244)
+++ head/lib/msun/src/s_fmal.c  Tue Oct 11 05:17:45 2011        (r226245)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 David Schultz <d...@freebsd.org>
+ * Copyright (c) 2005-2011 David Schultz <d...@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,67 @@ __FBSDID("$FreeBSD$");
 #include <math.h>
 
 /*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a long double.  We maintain the invariant that "hi" stores the high-order
+ * bits of the result.
+ */
+struct dd {
+       long double hi;
+       long double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+dd_add(long double a, long double b)
+{
+       struct dd ret;
+       long double s;
+
+       ret.hi = a + b;
+       s = ret.hi - a;
+       ret.lo = (a - (ret.hi - s)) + (b - s);
+       return (ret);
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(long double a, long double b)
+{
+#if LDBL_MANT_DIG == 64
+       static const long double split = 0x1p32L + 1.0;
+#elif LDBL_MANT_DIG == 113
+       static const long double split = 0x1p57L + 1.0;
+#endif
+       struct dd ret;
+       long double ha, hb, la, lb, p, q;
+
+       p = a * split;
+       ha = a - p;
+       ha += p;
+       la = a - ha;
+
+       p = b * split;
+       hb = b - p;
+       hb += p;
+       lb = b - hb;
+
+       p = ha * hb;
+       q = ha * lb + la * hb;
+
+       ret.hi = p + q;
+       ret.lo = p - ret.hi + q + la * lb;
+       return (ret);
+}
+
+/*
  * Fused multiply-add: Compute x * y + z with a single rounding error.
  *
  * We use scaling to avoid overflow/underflow, along with the
@@ -43,14 +104,10 @@ __FBSDID("$FreeBSD$");
 long double
 fmal(long double x, long double y, long double z)
 {
-#if LDBL_MANT_DIG == 64
-       static const long double split = 0x1p32L + 1.0;
-#elif LDBL_MANT_DIG == 113
-       static const long double split = 0x1p57L + 1.0;
-#endif
        long double xs, ys, zs;
-       long double c, cc, hx, hy, p, q, tx, ty;
-       long double r, rr, s;
+       struct dd xy, r, r2;
+       long double p;
+       long double s;
        int oround;
        int ex, ey, ez;
        int spread;
@@ -90,29 +147,29 @@ fmal(long double x, long double y, long 
                        if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
                                return (x * y);
                        feholdexcept(&env);
-                       r = x * y;
+                       s = x * y;
                        if (!fetestexcept(FE_INEXACT))
-                               r = nextafterl(r, 0);
+                               s = nextafterl(s, 0);
                        feupdateenv(&env);
-                       return (r);
+                       return (s);
                case FE_DOWNWARD:
                        if (z > 0.0)
                                return (x * y);
                        feholdexcept(&env);
-                       r = x * y;
+                       s = x * y;
                        if (!fetestexcept(FE_INEXACT))
-                               r = nextafterl(r, -INFINITY);
+                               s = nextafterl(s, -INFINITY);
                        feupdateenv(&env);
-                       return (r);
+                       return (s);
                default:        /* FE_UPWARD */
                        if (z < 0.0)
                                return (x * y);
                        feholdexcept(&env);
-                       r = x * y;
+                       s = x * y;
                        if (!fetestexcept(FE_INEXACT))
-                               r = nextafterl(r, INFINITY);
+                               s = nextafterl(s, INFINITY);
                        feupdateenv(&env);
-                       return (r);
+                       return (s);
                }
        }
        if (spread < -LDBL_MANT_DIG) {
@@ -140,48 +197,27 @@ fmal(long double x, long double y, long 
                }
        }
 
-       /*
-        * Use Dekker's algorithm to perform the multiplication and
-        * subsequent addition in twice the machine precision.
-        * Arrange so that x * y = c + cc, and x * y + z = r + rr.
-        */
        fesetround(FE_TONEAREST);
 
-       p = xs * split;
-       hx = xs - p;
-       hx += p;
-       tx = xs - hx;
-
-       p = ys * split;
-       hy = ys - p;
-       hy += p;
-       ty = ys - hy;
-
-       p = hx * hy;
-       q = hx * ty + tx * hy;
-       c = p + q;
-       cc = p - c + q + tx * ty;
-
+       xy = dd_mul(xs, ys);
        zs = ldexpl(zs, -spread);
-       r = c + zs;
-       s = r - c;
-       rr = (c - (r - s)) + (zs - s) + cc;
+       r = dd_add(xy.hi, zs);
+       r.lo += xy.lo;
 
        spread = ex + ey;
-       if (spread + ilogbl(r) > -16383) {
+       if (spread + ilogbl(r.hi) > -16383) {
                fesetround(oround);
-               r = r + rr;
+               r.hi = r.hi + r.lo;
        } else {
                /*
                 * The result is subnormal, so we round before scaling to
                 * avoid double rounding.
                 */
-               p = ldexpl(copysignl(0x1p-16382L, r), -spread);
-               c = r + p;
-               s = c - r;
-               cc = (r - (c - s)) + (p - s) + rr;
+               p = ldexpl(copysignl(0x1p-16382L, r.hi), -spread);
+               r2 = dd_add(r.hi, p);
+               r2.lo += r.lo;
                fesetround(oround);
-               r = (c + cc) - p;
+               r.hi = (r2.hi + r2.lo) - p;
        }
-       return (ldexpl(r, spread));
+       return (ldexpl(r.hi, spread));
 }
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to