[Patch 14/17] [libgcc, ARM] Generalise float-to-half conversion function.

James Greenhalgh Fri, 11 Nov 2016 07:43:40 -0800

Hi,

I'm adapting this patch from work started by Matthew Wahab.


Conversions from double precision floats to the ARM __fp16 are required
to round only once. A conversion function for double to __fp16 to
support this on soft-fp targets. This and the following patch add this
conversion function by reusing the exising float to __fp16 function
config/arm/fp16.c:__gnu_f2h_internal.

This patch generalizes __gnu_f2h_internal by adding a specification of
the source format and reworking the code to make use of it. Initially,
only the binary32 format is supported.

A previous version of this patch had a bug handling rounding, the update
in this patch should be sufficient to fix the bug,

replacing:

>   else
>     mask = 0x00001fff;

With:

     mask = (point - 1) >> 10;

I've tested that fix throwing semi-random bit-patterns at the conversion
function to confirm that the software implementation now matches the
hardware behaviour for this routine.

Additionally, bootstrapped again, and cross-tested with no issues.

OK?

Thanks,
James

----

libgcc/

2016-11-09  James Greenhalgh  <[email protected]>
            Matthew Wahab  <[email protected]>

        * config/arm/fp16.c (struct format): New.
        (binary32): New.
        (__gnu_float2h_internal): New.  Body moved from
        __gnu_f2h_internal and generalize.
        (_gnu_f2h_internal): Move body to function __gnu_float2h_internal.
        Call it with binary32.

diff --git a/libgcc/config/arm/fp16.c b/libgcc/config/arm/fp16.c
index 39c863c..ba89796 100644
--- a/libgcc/config/arm/fp16.c
+++ b/libgcc/config/arm/fp16.c
@@ -22,40 +22,74 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+struct format
+{
+  /* Number of bits.  */
+  unsigned long long size;
+  /* Exponent bias.  */
+  unsigned long long bias;
+  /* Exponent width in bits.  */
+  unsigned long long exponent;
+  /* Significand precision in explicitly stored bits.  */
+  unsigned long long significand;
+};
+
+static const struct format
+binary32 =
+{
+  32,   /* size.  */
+  127,  /* bias.  */
+  8,    /* exponent.  */
+  23    /* significand.  */
+};
+
 static inline unsigned short
-__gnu_f2h_internal(unsigned int a, int ieee)
+__gnu_float2h_internal (const struct format* fmt,
+			unsigned long long a, int ieee)
 {
-  unsigned short sign = (a >> 16) & 0x8000;
-  int aexp = (a >> 23) & 0xff;
-  unsigned int mantissa = a & 0x007fffff;
-  unsigned int mask;
-  unsigned int increment;
+  unsigned long long point = 1ULL << fmt->significand;;
+  unsigned short sign = (a >> (fmt->size - 16)) & 0x8000;
+  int aexp;
+  unsigned long long mantissa;
+  unsigned long long mask;
+  unsigned long long increment;
+
+  /* Get the exponent and mantissa encodings.  */
+  mantissa = a & (point - 1);
+
+  mask = (1 << fmt->exponent) - 1;
+  aexp = (a >> fmt->significand) & mask;
 
-  if (aexp == 0xff)
+  /* Infinity, NaN and alternative format special case.  */
+  if (((unsigned int) aexp) == mask)
     {
       if (!ieee)
 	return sign;
       if (mantissa == 0)
 	return sign | 0x7c00;	/* Infinity.  */
       /* Remaining cases are NaNs.  Convert SNaN to QNaN.  */
-      return sign | 0x7e00 | (mantissa >> 13);
+      return sign | 0x7e00 | (mantissa >> (fmt->significand - 10));
     }
 
+  /* Zero.  */
   if (aexp == 0 && mantissa == 0)
     return sign;
 
-  aexp -= 127;
+  /* Construct the exponent and mantissa.  */
+  aexp -= fmt->bias;
+
+  /* Decimal point is immediately after the significand.  */
+  mantissa |= point;
 
-  /* Decimal point between bits 22 and 23.  */
-  mantissa |= 0x00800000;
   if (aexp < -14)
     {
-      mask = 0x00ffffff;
+      mask = point | (point - 1);
+      /* Minimum exponent for half-precision is 2^-24.  */
       if (aexp >= -25)
 	mask >>= 25 + aexp;
     }
   else
-    mask = 0x00001fff;
+    mask = (point - 1) >> 10;
 
   /* Round.  */
   if (mantissa & mask)
@@ -64,8 +98,8 @@ __gnu_f2h_internal(unsigned int a, int ieee)
       if ((mantissa & mask) == increment)
 	increment = mantissa & (increment << 1);
       mantissa += increment;
-      if (mantissa >= 0x01000000)
-       	{
+      if (mantissa >= (point << 1))
+	{
 	  mantissa >>= 1;
 	  aexp++;
 	}
@@ -93,7 +127,13 @@ __gnu_f2h_internal(unsigned int a, int ieee)
 
   /* We leave the leading 1 in the mantissa, and subtract one
      from the exponent bias to compensate.  */
-  return sign | (((aexp + 14) << 10) + (mantissa >> 13));
+  return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10)));
+}
+
+static inline unsigned short
+__gnu_f2h_internal (unsigned int a, int ieee)
+{
+  return __gnu_float2h_internal (&binary32, (unsigned long long) a, ieee);
 }
 
 unsigned int

[Patch 14/17] [libgcc, ARM] Generalise float-to-half conversion function.

Reply via email to