Module Name: src Committed By: christos Date: Wed Aug 24 13:51:19 UTC 2022
Modified Files: src/lib/libm/src: e_rem_pio2.c k_rem_pio2.c math_private.h Log Message: sync with FreeBSD; bring some more inline functions. To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 src/lib/libm/src/e_rem_pio2.c cvs rdiff -u -r1.13 -r1.14 src/lib/libm/src/k_rem_pio2.c cvs rdiff -u -r1.24 -r1.25 src/lib/libm/src/math_private.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libm/src/e_rem_pio2.c diff -u src/lib/libm/src/e_rem_pio2.c:1.11 src/lib/libm/src/e_rem_pio2.c:1.12 --- src/lib/libm/src/e_rem_pio2.c:1.11 Sun May 26 18:01:52 2002 +++ src/lib/libm/src/e_rem_pio2.c Wed Aug 24 09:51:19 2022 @@ -12,7 +12,7 @@ #include <sys/cdefs.h> #if defined(LIBM_SCCS) && !defined(lint) -__RCSID("$NetBSD: e_rem_pio2.c,v 1.11 2002/05/26 22:01:52 wiz Exp $"); +__RCSID("$NetBSD: e_rem_pio2.c,v 1.12 2022/08/24 13:51:19 christos Exp $"); #endif /* __ieee754_rem_pio2(x,y) @@ -24,23 +24,6 @@ __RCSID("$NetBSD: e_rem_pio2.c,v 1.11 20 #include "math.h" #include "math_private.h" -/* - * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi - */ -static const int32_t two_over_pi[] = { -0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, -0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, -0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, -0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, -0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, -0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, -0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, -0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, -0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, -0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, -0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, -}; - static const int32_t npio2_hw[] = { 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, @@ -163,7 +146,7 @@ __ieee754_rem_pio2(double x, double *y) tx[2] = z; nx = 3; while(tx[nx-1]==zero) nx--; /* skip zero term */ - n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); + n = __kernel_rem_pio2(tx,y,e0,nx,2); if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} return n; } Index: src/lib/libm/src/k_rem_pio2.c diff -u src/lib/libm/src/k_rem_pio2.c:1.13 src/lib/libm/src/k_rem_pio2.c:1.14 --- src/lib/libm/src/k_rem_pio2.c:1.13 Thu Jun 22 08:43:43 2017 +++ src/lib/libm/src/k_rem_pio2.c Wed Aug 24 09:51:19 2022 @@ -1,9 +1,10 @@ -/* @(#)k_rem_pio2.c 5.1 93/09/24 */ + +/* @(#)k_rem_pio2.c 1.3 95/01/18 */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * - * Developed at SunPro, a Sun Microsystems, Inc. business. + * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. @@ -11,13 +12,16 @@ */ #include <sys/cdefs.h> +#if 0 +__FBSDID("$FreeBSD: head/lib/msun/src/k_rem_pio2.c 342651 2018-12-31 15:43:06Z pfg $"); +#endif #if defined(LIBM_SCCS) && !defined(lint) -__RCSID("$NetBSD: k_rem_pio2.c,v 1.13 2017/06/22 12:43:43 maya Exp $"); +__RCSID("$NetBSD: k_rem_pio2.c,v 1.14 2022/08/24 13:51:19 christos Exp $"); #endif /* - * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) - * double x[],y[]; int e0,nx,prec; int ipio2[]; + * __kernel_rem_pio2(x,y,e0,nx,prec) + * double x[],y[]; int e0,nx,prec; * * __kernel_rem_pio2 return the last three digits of N with * y = x - N*pi/2 @@ -53,7 +57,7 @@ __RCSID("$NetBSD: k_rem_pio2.c,v 1.13 20 * 64-bit precision 2 * 113-bit precision 3 * The actual value is the sum of them. Thus for 113-bit - * precison, one may have to do something like: + * precision, one may have to do something like: * * long double t,w,r_head, r_tail; * t = (long double)y[2] + (long double)y[1]; @@ -61,7 +65,8 @@ __RCSID("$NetBSD: k_rem_pio2.c,v 1.13 20 * r_head = t+w; * r_tail = w - (r_head - t); * - * e0 The exponent of x[0] + * e0 The exponent of x[0]. Must be <= 16360 or you need to + * expand the ipio2 table. * * nx dimension of x[] * @@ -71,13 +76,6 @@ __RCSID("$NetBSD: k_rem_pio2.c,v 1.13 20 * 2 64 bits (extended) * 3 113 bits (quad) * - * ipio2[] - * integer array, contains the (24*i)-th to (24*i+23)-th - * bit of 2/pi after binary point. The corresponding - * floating value is - * - * ipio2[i] * 2^(-24(i+1)). - * * External function: * double scalbn(), floor(); * @@ -85,8 +83,13 @@ __RCSID("$NetBSD: k_rem_pio2.c,v 1.13 20 * Here is the description of some local variables: * * jk jk+1 is the initial number of terms of ipio2[] needed - * in the computation. The recommended value is 2,3,4, - * 6 for single, double, extended,and quad. + * in the computation. The minimum and recommended value + * for jk is 3,4,4,6 for single, double, extended, and quad. + * jk+1 must be 2 larger than you might expect so that our + * recomputation test works. (Up to 24 bits in the integer + * part (the 24 bits of it that we compute) and 23 bits in + * the fraction part may be lost to cancellation before we + * recompute.) * * jz local integer variable indicating the number of * terms of ipio2[] used. @@ -132,10 +135,149 @@ __RCSID("$NetBSD: k_rem_pio2.c,v 1.13 20 */ #include "namespace.h" +#include <float.h> + #include "math.h" #include "math_private.h" -static const int init_jk[] = {2,3,4,6}; /* initial value for jk */ +static const int init_jk[] = {3,4,4,6}; /* initial value for jk */ + +/* + * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi + * + * integer array, contains the (24*i)-th to (24*i+23)-th + * bit of 2/pi after binary point. The corresponding + * floating value is + * + * ipio2[i] * 2^(-24(i+1)). + * + * NB: This table must have at least (e0-3)/24 + jk terms. + * For quad precision (e0 <= 16360, jk = 6), this is 686. + */ +static const int32_t ipio2[] = { +0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, +0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, +0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, +0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, +0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, +0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, +0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, +0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, +0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, +0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, +0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, + +#if LDBL_MAX_EXP > 1024 +#if LDBL_MAX_EXP > 16384 +#error "ipio2 table needs to be expanded" +#endif +0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, +0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, +0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, +0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, +0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, +0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, +0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, +0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, +0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, +0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, +0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, +0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, +0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, +0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, +0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, +0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, +0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, +0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, +0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, +0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, +0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, +0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, +0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, +0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, +0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, +0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, +0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, +0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, +0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, +0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, +0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, +0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, +0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, +0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, +0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, +0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, +0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, +0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, +0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, +0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, +0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, +0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, +0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, +0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, +0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, +0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, +0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, +0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, +0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, +0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, +0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, +0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, +0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, +0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, +0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, +0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, +0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, +0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, +0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, +0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, +0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, +0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, +0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, +0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, +0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, +0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, +0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, +0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, +0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, +0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, +0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, +0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, +0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, +0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, +0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, +0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, +0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, +0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, +0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, +0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, +0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, +0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, +0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, +0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, +0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, +0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, +0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, +0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, +0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, +0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, +0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, +0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, +0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, +0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, +0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, +0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, +0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, +0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, +0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, +0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, +0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, +0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, +0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, +0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, +#endif + +}; static const double PIo2[] = { 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ @@ -155,7 +297,7 @@ two24 = 1.67772160000000000000e+07, / twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ int -__kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int32_t *ipio2) +__kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec) { int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; double z,fw,f[20],fq[20],q[20]; @@ -281,6 +423,7 @@ recompute: case 2: fw = 0.0; for (i=jz;i>=0;i--) fw += fq[i]; + STRICT_ASSIGN(double,fw,fw); y[0] = (ih==0)? fw: -fw; fw = fq[0]-fw; for (i=1;i<=jz;i++) fw += fq[i]; Index: src/lib/libm/src/math_private.h diff -u src/lib/libm/src/math_private.h:1.24 src/lib/libm/src/math_private.h:1.25 --- src/lib/libm/src/math_private.h:1.24 Tue Apr 19 16:32:16 2022 +++ src/lib/libm/src/math_private.h Wed Aug 24 09:51:19 2022 @@ -11,7 +11,7 @@ /* * from: @(#)fdlibm.h 5.1 93/09/24 - * $NetBSD: math_private.h,v 1.24 2022/04/19 20:32:16 rillig Exp $ + * $NetBSD: math_private.h,v 1.25 2022/08/24 13:51:19 christos Exp $ */ #ifndef _MATH_PRIVATE_H_ @@ -311,7 +311,7 @@ extern double __kernel_standard __P((dou extern double __kernel_sin __P((double,double,int)); extern double __kernel_cos __P((double,double)); extern double __kernel_tan __P((double,double,int)); -extern int __kernel_rem_pio2 __P((double*,double*,int,int,int,const int32_t*)); +extern int __kernel_rem_pio2 __P((double*,double*,int,int,int)); /* ieee style elementary float functions */ @@ -383,4 +383,114 @@ struct Double { double __exp__D(double, double); struct Double __log__D(double); +/* + * The rnint() family rounds to the nearest integer for a restricted range + * range of args (up to about 2**MANT_DIG). We assume that the current + * rounding mode is FE_TONEAREST so that this can be done efficiently. + * Extra precision causes more problems in practice, and we only centralize + * this here to reduce those problems, and have not solved the efficiency + * problems. The exp2() family uses a more delicate version of this that + * requires extracting bits from the intermediate value, so it is not + * centralized here and should copy any solution of the efficiency problems. + */ + +static inline double +rnint(double x) +{ + /* + * This casts to double to kill any extra precision. This depends + * on the cast being applied to a double_t to avoid compiler bugs + * (this is a cleaner version of STRICT_ASSIGN()). This is + * inefficient if there actually is extra precision, but is hard + * to improve on. We use double_t in the API to minimise conversions + * for just calling here. Note that we cannot easily change the + * magic number to the one that works directly with double_t, since + * the rounding precision is variable at runtime on x86 so the + * magic number would need to be variable. Assuming that the + * rounding precision is always the default is too fragile. This + * and many other complications will move when the default is + * changed to FP_PE. + */ + return ((double)(x + 0x1.8p52) - 0x1.8p52); +} + +static inline float +rnintf(float x) +{ + /* + * As for rnint(), except we could just call that to handle the + * extra precision case, usually without losing efficiency. + */ + return ((float)(x + 0x1.8p23F) - 0x1.8p23F); +} + +#ifdef LDBL_MANT_DIG +/* + * The complications for extra precision are smaller for rnintl() since it + * can safely assume that the rounding precision has been increased from + * its default to FP_PE on x86. We don't exploit that here to get small + * optimizations from limiting the rangle to double. We just need it for + * the magic number to work with long doubles. ld128 callers should use + * rnint() instead of this if possible. ld80 callers should prefer + * rnintl() since for amd64 this avoids swapping the register set, while + * for i386 it makes no difference (assuming FP_PE), and for other arches + * it makes little difference. + */ +static inline long double +rnintl(long double x) +{ + return (x + ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2 - + ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2); +} +#endif /* LDBL_MANT_DIG */ + +/* + * irint() and i64rint() give the same result as casting to their integer + * return type provided their arg is a floating point integer. They can + * sometimes be more efficient because no rounding is required. + */ +#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM) +#define irint(x) \ + (sizeof(x) == sizeof(float) && \ + sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ + sizeof(x) == sizeof(double) && \ + sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ + sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) +#else +#define irint(x) ((int)(x)) +#endif + +#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ + +#if defined(__i386__) && defined(__GNUCLIKE_ASM) +static __inline int +irintf(float x) +{ + int n; + + __asm("fistl %0" : "=m" (n) : "t" (x)); + return (n); +} + +static __inline int +irintd(double x) +{ + int n; + + __asm("fistl %0" : "=m" (n) : "t" (x)); + return (n); +} +#endif + +#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM) +static __inline int +irintl(long double x) +{ + int n; + + __asm("fistl %0" : "=m" (n) : "t" (x)); + return (n); +} +#endif + #endif /* _MATH_PRIVATE_H_ */