Hi, I'm adapting this patch from work started by Matthew Wahab.
Conversions from double precision floats to the ARM __fp16 are required to round only once. A conversion function for double to __fp16 to support this on soft-fp targets. This and the following patch add this conversion function by reusing the exising float to __fp16 function config/arm/fp16.c:__gnu_f2h_internal. This patch generalizes __gnu_f2h_internal by adding a specification of the source format and reworking the code to make use of it. Initially, only the binary32 format is supported. A previous version of this patch had a bug handling rounding, the update in this patch should be sufficient to fix the bug, replacing: > else > mask = 0x00001fff; With: mask = (point - 1) >> 10; I've tested that fix throwing semi-random bit-patterns at the conversion function to confirm that the software implementation now matches the hardware behaviour for this routine. Additionally, bootstrapped again, and cross-tested with no issues. OK? Thanks, James ---- libgcc/ 2016-11-09 James Greenhalgh <james.greenha...@arm.com> Matthew Wahab <matthew.wa...@arm.com> * config/arm/fp16.c (struct format): New. (binary32): New. (__gnu_float2h_internal): New. Body moved from __gnu_f2h_internal and generalize. (_gnu_f2h_internal): Move body to function __gnu_float2h_internal. Call it with binary32.
diff --git a/libgcc/config/arm/fp16.c b/libgcc/config/arm/fp16.c index 39c863c..ba89796 100644 --- a/libgcc/config/arm/fp16.c +++ b/libgcc/config/arm/fp16.c @@ -22,40 +22,74 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ +struct format +{ + /* Number of bits. */ + unsigned long long size; + /* Exponent bias. */ + unsigned long long bias; + /* Exponent width in bits. */ + unsigned long long exponent; + /* Significand precision in explicitly stored bits. */ + unsigned long long significand; +}; + +static const struct format +binary32 = +{ + 32, /* size. */ + 127, /* bias. */ + 8, /* exponent. */ + 23 /* significand. */ +}; + static inline unsigned short -__gnu_f2h_internal(unsigned int a, int ieee) +__gnu_float2h_internal (const struct format* fmt, + unsigned long long a, int ieee) { - unsigned short sign = (a >> 16) & 0x8000; - int aexp = (a >> 23) & 0xff; - unsigned int mantissa = a & 0x007fffff; - unsigned int mask; - unsigned int increment; + unsigned long long point = 1ULL << fmt->significand;; + unsigned short sign = (a >> (fmt->size - 16)) & 0x8000; + int aexp; + unsigned long long mantissa; + unsigned long long mask; + unsigned long long increment; + + /* Get the exponent and mantissa encodings. */ + mantissa = a & (point - 1); + + mask = (1 << fmt->exponent) - 1; + aexp = (a >> fmt->significand) & mask; - if (aexp == 0xff) + /* Infinity, NaN and alternative format special case. */ + if (((unsigned int) aexp) == mask) { if (!ieee) return sign; if (mantissa == 0) return sign | 0x7c00; /* Infinity. */ /* Remaining cases are NaNs. Convert SNaN to QNaN. */ - return sign | 0x7e00 | (mantissa >> 13); + return sign | 0x7e00 | (mantissa >> (fmt->significand - 10)); } + /* Zero. */ if (aexp == 0 && mantissa == 0) return sign; - aexp -= 127; + /* Construct the exponent and mantissa. */ + aexp -= fmt->bias; + + /* Decimal point is immediately after the significand. */ + mantissa |= point; - /* Decimal point between bits 22 and 23. */ - mantissa |= 0x00800000; if (aexp < -14) { - mask = 0x00ffffff; + mask = point | (point - 1); + /* Minimum exponent for half-precision is 2^-24. */ if (aexp >= -25) mask >>= 25 + aexp; } else - mask = 0x00001fff; + mask = (point - 1) >> 10; /* Round. */ if (mantissa & mask) @@ -64,8 +98,8 @@ __gnu_f2h_internal(unsigned int a, int ieee) if ((mantissa & mask) == increment) increment = mantissa & (increment << 1); mantissa += increment; - if (mantissa >= 0x01000000) - { + if (mantissa >= (point << 1)) + { mantissa >>= 1; aexp++; } @@ -93,7 +127,13 @@ __gnu_f2h_internal(unsigned int a, int ieee) /* We leave the leading 1 in the mantissa, and subtract one from the exponent bias to compensate. */ - return sign | (((aexp + 14) << 10) + (mantissa >> 13)); + return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10))); +} + +static inline unsigned short +__gnu_f2h_internal (unsigned int a, int ieee) +{ + return __gnu_float2h_internal (&binary32, (unsigned long long) a, ieee); } unsigned int