On systems that don't have a native version, we use an implementation of 
sqrtl() (square-root of long double) that -- to do its job -- pokes at the 
floating-point exception state and rounding mode.  In particular, at a key 
point it clears any previous "inexact" exception and sets the rounding 
mode to "toward zero" and then does a division.  It then tests whether 
that raised an "inexact" exception and fixes the result up based on that, 
and finally restores the rounding mode before returning.

The current version does that using the old, non-standard fp* routines: 
fp{set,get}sticky() and fpsetround().  This diff switches it to the new, 
standardized fe* routines: fe{clear,test}except() and fe{get,set}round().

(Why bother?  The fp* routines are defined in libc, while the fe* routines 
are defined inside libm itself...which means that with some symbol 
redirection they can be made to call directly, without going through the 
PLT.  This diff is thus a prelude to the larger diff I have sitting in my 
tree to do exactly that, reducing 136 PLT entries to just 22 on amd64, for 
example.  Even on a HW-FP-poor arch like mips64 it gets reduced from 204 
to only 56 PLT entries.)

ok?


Philip Guenther


Index: src/e_sqrtl.c
===================================================================
RCS file: /data/src/openbsd/src/lib/libm/src/e_sqrtl.c,v
retrieving revision 1.1
diff -u -p -r1.1 e_sqrtl.c
--- src/e_sqrtl.c       9 Dec 2008 20:00:35 -0000       1.1
+++ src/e_sqrtl.c       11 Sep 2016 03:47:45 -0000
@@ -26,9 +26,9 @@
  */
 
 #include <sys/types.h>
-#include <machine/ieee.h>
+#include <machine/ieee.h>      /* for struct ieee_ext */
+#include <fenv.h>
 #include <float.h>
-#include <ieeefp.h>
 #include <math.h>
 
 #ifdef EXT_IMPLICIT_NBIT
@@ -204,27 +204,28 @@ sqrtl(long double x)
        u.e = xn + lo;                  /* Combine everything. */
        u.bits.ext_exp += (k >> 1) - 1;
 
-       fpsetsticky(fpgetsticky() & ~FP_X_IMP);
-       r = fpsetround(FP_RZ);          /* Set to round-toward-zero. */
+       feclearexcept(FE_INEXACT);
+       r = fegetround();
+       fesetround(FE_TOWARDZERO);      /* Set to round-toward-zero. */
        xn = x / u.e;                   /* Chopped quotient (inexact?). */
 
-       if (!(fpgetsticky() & FP_X_IMP)) { /* Quotient is exact. */
+       if (!fetestexcept(FE_INEXACT)) { /* Quotient is exact. */
                if (xn == u.e) {
-                       fpsetround(r);
+                       fesetround(r);
                        return (u.e);
                }
                /* Round correctly for inputs like x = y**2 - ulp. */
                xn = dec(xn);           /* xn = xn - ulp. */
        }
 
-       if (r == FP_RN) {
+       if (r == FE_TONEAREST) {
                xn = inc(xn);           /* xn = xn + ulp. */
-       } else if (r == FP_RP) {
+       } else if (r == FE_UPWARD) {
                u.e = inc(u.e);         /* u.e = u.e + ulp. */
                xn = inc(xn);           /* xn  = xn + ulp. */
        }
        u.e = u.e + xn;                 /* Chopped sum. */
-       fpsetround(r);                  /* Restore env and raise inexact */
+       fesetround(r);                  /* Restore env and raise inexact */
        u.bits.ext_exp--;
        return (u.e);
 }

Reply via email to