On 01/24/2018 10:13 AM, Alex Bennée wrote: > We can now add float16_add/sub and use the common decompose and > canonicalize functions to have a single implementation for > float16/32/64 add and sub functions. > > Signed-off-by: Alex Bennée <alex.ben...@linaro.org> > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > > --- > > v3 > - renames for FloatParts > - shorten internal names > - fix comment style > - use FloatFmt values for for extract/deposit in raw_pack/unpack > - move #include bitops from previous patch > - add is_nan helper > - avoid temp variables where we can > --- > fpu/softfloat.c | 884 > +++++++++++++++++++++++++----------------------- > include/fpu/softfloat.h | 4 + > 2 files changed, 461 insertions(+), 427 deletions(-) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 568d555595..b179790889 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -83,6 +83,7 @@ this code that are retained. > * target-dependent and needs the TARGET_* macros. > */ > #include "qemu/osdep.h" > +#include "qemu/bitops.h" > #include "fpu/softfloat.h" > > /* We only need stdlib for abort() */ > @@ -270,6 +271,462 @@ static const FloatFmt float64_params = { > FLOAT_PARAMS(11, 52) > }; > > +/* Unpack a float to parts, but do not canonicalize. */ > +static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw) > +{ > + const int sign_pos = fmt.frac_size + fmt.exp_size; > + > + return (FloatParts) { > + .cls = float_class_unclassified, > + .sign = extract64(raw, sign_pos, 1), > + .exp = extract64(raw, fmt.frac_size, fmt.exp_size), > + .frac = extract64(raw, 0, fmt.frac_size), > + }; > +} > + > +static inline FloatParts float16_unpack_raw(float16 f) > +{ > + return unpack_raw(float16_params, f); > +} > + > +static inline FloatParts float32_unpack_raw(float32 f) > +{ > + return unpack_raw(float32_params, f); > +} > + > +static inline FloatParts float64_unpack_raw(float64 f) > +{ > + return unpack_raw(float64_params, f); > +} > + > +/* Pack a float from parts, but do not canonicalize. */ > +static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p) > +{ > + const int sign_pos = fmt.frac_size + fmt.exp_size; > + uint64_t ret = p.frac;
saving 1 line: uint64_t ret = deposit64(p.frac, sign_pos, 1, p.sign); and then eventually: return deposit64(ret, fmt.frac_size, fmt.exp_size, p.exp); > + > + ret = deposit64(ret, fmt.frac_size, fmt.exp_size, p.exp); > + ret = deposit64(ret, sign_pos, 1, p.sign); > + return ret; > +} > + > +static inline float16 float16_pack_raw(FloatParts p) > +{ > + return make_float16(pack_raw(float16_params, p)); > +} > + > +static inline float32 float32_pack_raw(FloatParts p) > +{ > + return make_float32(pack_raw(float32_params, p)); > +} > + > +static inline float64 float64_pack_raw(FloatParts p) > +{ > + return make_float64(pack_raw(float64_params, p)); > +} > + > +/* Canonicalize EXP and FRAC, setting CLS. */ > +static FloatParts canonicalize(FloatParts part, const FloatFmt *parm, > + float_status *status) > +{ > + if (part.exp == parm->exp_max) { > + if (part.frac == 0) { > + part.cls = float_class_inf; > + } else { > +#ifdef NO_SIGNALING_NANS > + part.cls = float_class_qnan; > +#else > + int64_t msb = part.frac << (parm->frac_shift + 2); > + if ((msb < 0) == status->snan_bit_is_one) { > + part.cls = float_class_snan; > + } else { > + part.cls = float_class_qnan; > + } > +#endif > + } > + } else if (part.exp == 0) { > + if (likely(part.frac == 0)) { > + part.cls = float_class_zero; > + } else if (status->flush_inputs_to_zero) { > + float_raise(float_flag_input_denormal, status); > + part.cls = float_class_zero; > + part.frac = 0; > + } else { > + int shift = clz64(part.frac) - 1; > + part.cls = float_class_normal; > + part.exp = parm->frac_shift - parm->exp_bias - shift + 1; > + part.frac <<= shift; > + } > + } else { > + part.cls = float_class_normal; > + part.exp -= parm->exp_bias; > + part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << > parm->frac_shift); > + } > + return part; > +} > + > +/* Round and uncanonicalize a floating-point number by parts. There > + * are FRAC_SHIFT bits that may require rounding at the bottom of the > + * fraction; these bits will be removed. The exponent will be biased > + * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. > + */ > + > +static FloatParts round_canonical(FloatParts p, float_status *s, > + const FloatFmt *parm) > +{ > + const uint64_t frac_lsbm1 = parm->frac_lsbm1; > + const uint64_t round_mask = parm->round_mask; > + const uint64_t roundeven_mask = parm->roundeven_mask; > + const int exp_max = parm->exp_max; > + const int frac_shift = parm->frac_shift; > + uint64_t frac, inc; > + int exp, flags = 0; > + bool overflow_norm; > + > + frac = p.frac; > + exp = p.exp; > + > + switch (p.cls) { > + case float_class_normal: > + switch (s->float_rounding_mode) { > + case float_round_nearest_even: > + overflow_norm = false; > + inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0); > + break; > + case float_round_ties_away: > + overflow_norm = false; > + inc = frac_lsbm1; > + break; > + case float_round_to_zero: > + overflow_norm = true; > + inc = 0; > + break; > + case float_round_up: > + inc = p.sign ? 0 : round_mask; > + overflow_norm = p.sign; > + break; > + case float_round_down: > + inc = p.sign ? round_mask : 0; > + overflow_norm = !p.sign; > + break; > + default: > + g_assert_not_reached(); > + } > + > + exp += parm->exp_bias; > + if (likely(exp > 0)) { > + if (frac & round_mask) { > + flags |= float_flag_inexact; > + frac += inc; > + if (frac & DECOMPOSED_OVERFLOW_BIT) { > + frac >>= 1; > + exp++; > + } > + } > + frac >>= frac_shift; > + > + if (unlikely(exp >= exp_max)) { > + flags |= float_flag_overflow | float_flag_inexact; > + if (overflow_norm) { > + exp = exp_max - 1; > + frac = -1; > + } else { > + p.cls = float_class_inf; > + goto do_inf; > + } > + } > + } else if (s->flush_to_zero) { > + flags |= float_flag_output_denormal; > + p.cls = float_class_zero; > + goto do_zero; > + } else { > + bool is_tiny = (s->float_detect_tininess > + == float_tininess_before_rounding) > + || (exp < 0) > + || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT); > + > + shift64RightJamming(frac, 1 - exp, &frac); > + if (frac & round_mask) { > + /* Need to recompute round-to-even. */ > + if (s->float_rounding_mode == float_round_nearest_even) { > + inc = ((frac & roundeven_mask) != frac_lsbm1 > + ? frac_lsbm1 : 0); > + } > + flags |= float_flag_inexact; > + frac += inc; > + } > + > + exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0); > + frac >>= frac_shift; > + > + if (is_tiny && (flags & float_flag_inexact)) { > + flags |= float_flag_underflow; > + } > + if (exp == 0 && frac == 0) { > + p.cls = float_class_zero; > + } > + } > + break; > + > + case float_class_zero: > + do_zero: > + exp = 0; > + frac = 0; > + break; > + > + case float_class_inf: > + do_inf: > + exp = exp_max; > + frac = 0; > + break; > + > + case float_class_qnan: > + case float_class_snan: > + exp = exp_max; > + break; > + > + default: > + g_assert_not_reached(); > + } > + > + float_raise(flags, s); > + p.exp = exp; > + p.frac = frac; > + return p; > +} > + > +static FloatParts float16_unpack_canonical(float16 f, float_status *s) > +{ > + return canonicalize(float16_unpack_raw(f), &float16_params, s); > +} > + > +static float16 float16_round_pack_canonical(FloatParts p, float_status *s) > +{ > + switch (p.cls) { > + case float_class_dnan: > + return float16_default_nan(s); > + case float_class_msnan: > + return float16_maybe_silence_nan(float16_pack_raw(p), s); > + default: > + p = round_canonical(p, s, &float16_params); > + return float16_pack_raw(p); > + } > +} > + > +static FloatParts float32_unpack_canonical(float32 f, float_status *s) > +{ > + return canonicalize(float32_unpack_raw(f), &float32_params, s); > +} > + > +static float32 float32_round_pack_canonical(FloatParts p, float_status *s) > +{ > + switch (p.cls) { > + case float_class_dnan: > + return float32_default_nan(s); > + case float_class_msnan: > + return float32_maybe_silence_nan(float32_pack_raw(p), s); > + default: > + p = round_canonical(p, s, &float32_params); > + return float32_pack_raw(p); > + } > +} > + > +static FloatParts float64_unpack_canonical(float64 f, float_status *s) > +{ > + return canonicalize(float64_unpack_raw(f), &float64_params, s); > +} > + > +static float64 float64_round_pack_canonical(FloatParts p, float_status *s) > +{ > + switch (p.cls) { > + case float_class_dnan: > + return float64_default_nan(s); > + case float_class_msnan: > + return float64_maybe_silence_nan(float64_pack_raw(p), s); > + default: > + p = round_canonical(p, s, &float64_params); > + return float64_pack_raw(p); > + } > +} > + > +static FloatParts pick_nan_parts(FloatParts a, FloatParts b, float_status *s) > +{ > + if (a.cls == float_class_snan || b.cls == float_class_snan) { > + s->float_exception_flags |= float_flag_invalid; > + } > + > + if (s->default_nan_mode) { > + a.cls = float_class_dnan; > + } else { > + if (pickNaN(a.cls == float_class_qnan, > + a.cls == float_class_snan, > + b.cls == float_class_qnan, > + b.cls == float_class_snan, > + a.frac > b.frac > + || (a.frac == b.frac && a.sign < b.sign))) { > + a = b; > + } > + a.cls = float_class_msnan; > + } > + return a; > +} > + > +/* Simple helper for checking if FloatClass is any NaN */ > + > +static bool is_nan(FloatClass c) > +{ > + return c >= float_class_qnan; > +} > + > +/* > + * Returns the result of adding or subtracting the values of the > + * floating-point values `a' and `b'. The operation is performed > + * according to the IEC/IEEE Standard for Binary Floating-Point > + * Arithmetic. > + */ > + > +static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract, > + float_status *s) > +{ > + bool a_sign = a.sign; > + bool b_sign = b.sign ^ subtract; > + > + if (a_sign != b_sign) { > + /* Subtraction */ > + > + if (a.cls == float_class_normal && b.cls == float_class_normal) { > + if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) { > + shift64RightJamming(b.frac, a.exp - b.exp, &b.frac); > + a.frac = a.frac - b.frac; > + } else { > + shift64RightJamming(a.frac, b.exp - a.exp, &a.frac); > + a.frac = b.frac - a.frac; > + a.exp = b.exp; > + a_sign ^= 1; > + } > + > + if (a.frac == 0) { > + a.cls = float_class_zero; > + a.sign = s->float_rounding_mode == float_round_down; > + } else { > + int shift = clz64(a.frac) - 1; > + a.frac = a.frac << shift; > + a.exp = a.exp - shift; > + a.sign = a_sign; > + } > + return a; > + } > + if (is_nan(a.cls) || is_nan(b.cls)) { > + return pick_nan_parts(a, b, s); > + } > + if (a.cls == float_class_inf) { > + if (b.cls == float_class_inf) { > + float_raise(float_flag_invalid, s); > + a.cls = float_class_dnan; > + } > + return a; > + } > + if (a.cls == float_class_zero && b.cls == float_class_zero) { > + a.sign = s->float_rounding_mode == float_round_down; > + return a; > + } > + if (a.cls == float_class_zero || b.cls == float_class_inf) { > + b.sign = a_sign ^ 1; > + return b; > + } > + if (b.cls == float_class_zero) { > + return a; > + } > + } else { > + /* Addition */ > + if (a.cls == float_class_normal && b.cls == float_class_normal) { > + if (a.exp > b.exp) { > + shift64RightJamming(b.frac, a.exp - b.exp, &b.frac); > + } else if (a.exp < b.exp) { > + shift64RightJamming(a.frac, b.exp - a.exp, &a.frac); > + a.exp = b.exp; > + } > + a.frac += b.frac; > + if (a.frac & DECOMPOSED_OVERFLOW_BIT) { > + a.frac >>= 1; > + a.exp += 1; > + } > + return a; > + } > + if (is_nan(a.cls) || is_nan(b.cls)) { > + return pick_nan_parts(a, b, s); > + } > + if (a.cls == float_class_inf || b.cls == float_class_zero) { > + return a; > + } > + if (b.cls == float_class_inf || a.cls == float_class_zero) { > + b.sign = b_sign; > + return b; > + } > + } > + g_assert_not_reached(); > +} > + > +/* > + * Returns the result of adding or subtracting the floating-point > + * values `a' and `b'. The operation is performed according to the > + * IEC/IEEE Standard for Binary Floating-Point Arithmetic. > + */ > + > +float16 float16_add(float16 a, float16 b, float_status *status) > +{ > + FloatParts pa = float16_unpack_canonical(a, status); > + FloatParts pb = float16_unpack_canonical(b, status); > + FloatParts pr = addsub_floats(pa, pb, false, status); > + > + return float16_round_pack_canonical(pr, status); > +} > + > +float32 float32_add(float32 a, float32 b, float_status *status) > +{ > + FloatParts pa = float32_unpack_canonical(a, status); > + FloatParts pb = float32_unpack_canonical(b, status); > + FloatParts pr = addsub_floats(pa, pb, false, status); > + > + return float32_round_pack_canonical(pr, status); > +} > + > +float64 float64_add(float64 a, float64 b, float_status *status) > +{ > + FloatParts pa = float64_unpack_canonical(a, status); > + FloatParts pb = float64_unpack_canonical(b, status); > + FloatParts pr = addsub_floats(pa, pb, false, status); > + > + return float64_round_pack_canonical(pr, status); > +} > + > +float16 float16_sub(float16 a, float16 b, float_status *status) > +{ > + FloatParts pa = float16_unpack_canonical(a, status); > + FloatParts pb = float16_unpack_canonical(b, status); > + FloatParts pr = addsub_floats(pa, pb, true, status); > + > + return float16_round_pack_canonical(pr, status); > +} > + > +float32 float32_sub(float32 a, float32 b, float_status *status) > +{ > + FloatParts pa = float32_unpack_canonical(a, status); > + FloatParts pb = float32_unpack_canonical(b, status); > + FloatParts pr = addsub_floats(pa, pb, true, status); > + > + return float32_round_pack_canonical(pr, status); > +} > + > +float64 float64_sub(float64 a, float64 b, float_status *status) > +{ > + FloatParts pa = float64_unpack_canonical(a, status); > + FloatParts pb = float64_unpack_canonical(b, status); > + FloatParts pr = addsub_floats(pa, pb, true, status); > + > + return float64_round_pack_canonical(pr, status); > +} > + > > /*---------------------------------------------------------------------------- > | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 > | and 7, and returns the properly rounded 32-bit integer corresponding to the > @@ -2081,220 +2538,6 @@ float32 float32_round_to_int(float32 a, float_status > *status) > > } > > -/*---------------------------------------------------------------------------- > -| Returns the result of adding the absolute values of the single-precision > -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated > -| before being returned. `zSign' is ignored if the result is a NaN. > -| The addition is performed according to the IEC/IEEE Standard for Binary > -| Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -static float32 addFloat32Sigs(float32 a, float32 b, flag zSign, > - float_status *status) > -{ > - int aExp, bExp, zExp; > - uint32_t aSig, bSig, zSig; > - int expDiff; > - > - aSig = extractFloat32Frac( a ); > - aExp = extractFloat32Exp( a ); > - bSig = extractFloat32Frac( b ); > - bExp = extractFloat32Exp( b ); > - expDiff = aExp - bExp; > - aSig <<= 6; > - bSig <<= 6; > - if ( 0 < expDiff ) { > - if ( aExp == 0xFF ) { > - if (aSig) { > - return propagateFloat32NaN(a, b, status); > - } > - return a; > - } > - if ( bExp == 0 ) { > - --expDiff; > - } > - else { > - bSig |= 0x20000000; > - } > - shift32RightJamming( bSig, expDiff, &bSig ); > - zExp = aExp; > - } > - else if ( expDiff < 0 ) { > - if ( bExp == 0xFF ) { > - if (bSig) { > - return propagateFloat32NaN(a, b, status); > - } > - return packFloat32( zSign, 0xFF, 0 ); > - } > - if ( aExp == 0 ) { > - ++expDiff; > - } > - else { > - aSig |= 0x20000000; > - } > - shift32RightJamming( aSig, - expDiff, &aSig ); > - zExp = bExp; > - } > - else { > - if ( aExp == 0xFF ) { > - if (aSig | bSig) { > - return propagateFloat32NaN(a, b, status); > - } > - return a; > - } > - if ( aExp == 0 ) { > - if (status->flush_to_zero) { > - if (aSig | bSig) { > - float_raise(float_flag_output_denormal, status); > - } > - return packFloat32(zSign, 0, 0); > - } > - return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); > - } > - zSig = 0x40000000 + aSig + bSig; > - zExp = aExp; > - goto roundAndPack; > - } > - aSig |= 0x20000000; > - zSig = ( aSig + bSig )<<1; > - --zExp; > - if ( (int32_t) zSig < 0 ) { > - zSig = aSig + bSig; > - ++zExp; > - } > - roundAndPack: > - return roundAndPackFloat32(zSign, zExp, zSig, status); > - > -} > - > -/*---------------------------------------------------------------------------- > -| Returns the result of subtracting the absolute values of the single- > -| precision floating-point values `a' and `b'. If `zSign' is 1, the > -| difference is negated before being returned. `zSign' is ignored if the > -| result is a NaN. The subtraction is performed according to the IEC/IEEE > -| Standard for Binary Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -static float32 subFloat32Sigs(float32 a, float32 b, flag zSign, > - float_status *status) > -{ > - int aExp, bExp, zExp; > - uint32_t aSig, bSig, zSig; > - int expDiff; > - > - aSig = extractFloat32Frac( a ); > - aExp = extractFloat32Exp( a ); > - bSig = extractFloat32Frac( b ); > - bExp = extractFloat32Exp( b ); > - expDiff = aExp - bExp; > - aSig <<= 7; > - bSig <<= 7; > - if ( 0 < expDiff ) goto aExpBigger; > - if ( expDiff < 0 ) goto bExpBigger; > - if ( aExp == 0xFF ) { > - if (aSig | bSig) { > - return propagateFloat32NaN(a, b, status); > - } > - float_raise(float_flag_invalid, status); > - return float32_default_nan(status); > - } > - if ( aExp == 0 ) { > - aExp = 1; > - bExp = 1; > - } > - if ( bSig < aSig ) goto aBigger; > - if ( aSig < bSig ) goto bBigger; > - return packFloat32(status->float_rounding_mode == float_round_down, 0, > 0); > - bExpBigger: > - if ( bExp == 0xFF ) { > - if (bSig) { > - return propagateFloat32NaN(a, b, status); > - } > - return packFloat32( zSign ^ 1, 0xFF, 0 ); > - } > - if ( aExp == 0 ) { > - ++expDiff; > - } > - else { > - aSig |= 0x40000000; > - } > - shift32RightJamming( aSig, - expDiff, &aSig ); > - bSig |= 0x40000000; > - bBigger: > - zSig = bSig - aSig; > - zExp = bExp; > - zSign ^= 1; > - goto normalizeRoundAndPack; > - aExpBigger: > - if ( aExp == 0xFF ) { > - if (aSig) { > - return propagateFloat32NaN(a, b, status); > - } > - return a; > - } > - if ( bExp == 0 ) { > - --expDiff; > - } > - else { > - bSig |= 0x40000000; > - } > - shift32RightJamming( bSig, expDiff, &bSig ); > - aSig |= 0x40000000; > - aBigger: > - zSig = aSig - bSig; > - zExp = aExp; > - normalizeRoundAndPack: > - --zExp; > - return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status); > - > -} > - > -/*---------------------------------------------------------------------------- > -| Returns the result of adding the single-precision floating-point values `a' > -| and `b'. The operation is performed according to the IEC/IEEE Standard for > -| Binary Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -float32 float32_add(float32 a, float32 b, float_status *status) > -{ > - flag aSign, bSign; > - a = float32_squash_input_denormal(a, status); > - b = float32_squash_input_denormal(b, status); > - > - aSign = extractFloat32Sign( a ); > - bSign = extractFloat32Sign( b ); > - if ( aSign == bSign ) { > - return addFloat32Sigs(a, b, aSign, status); > - } > - else { > - return subFloat32Sigs(a, b, aSign, status); > - } > - > -} > - > -/*---------------------------------------------------------------------------- > -| Returns the result of subtracting the single-precision floating-point > values > -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard > -| for Binary Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -float32 float32_sub(float32 a, float32 b, float_status *status) > -{ > - flag aSign, bSign; > - a = float32_squash_input_denormal(a, status); > - b = float32_squash_input_denormal(b, status); > - > - aSign = extractFloat32Sign( a ); > - bSign = extractFloat32Sign( b ); > - if ( aSign == bSign ) { > - return subFloat32Sigs(a, b, aSign, status); > - } > - else { > - return addFloat32Sigs(a, b, aSign, status); > - } > - > -} > - > > /*---------------------------------------------------------------------------- > | Returns the result of multiplying the single-precision floating-point > values > | `a' and `b'. The operation is performed according to the IEC/IEEE Standard > @@ -3891,219 +4134,6 @@ float64 float64_trunc_to_int(float64 a, float_status > *status) > return res; > } > > -/*---------------------------------------------------------------------------- > -| Returns the result of adding the absolute values of the double-precision > -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated > -| before being returned. `zSign' is ignored if the result is a NaN. > -| The addition is performed according to the IEC/IEEE Standard for Binary > -| Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -static float64 addFloat64Sigs(float64 a, float64 b, flag zSign, > - float_status *status) > -{ > - int aExp, bExp, zExp; > - uint64_t aSig, bSig, zSig; > - int expDiff; > - > - aSig = extractFloat64Frac( a ); > - aExp = extractFloat64Exp( a ); > - bSig = extractFloat64Frac( b ); > - bExp = extractFloat64Exp( b ); > - expDiff = aExp - bExp; > - aSig <<= 9; > - bSig <<= 9; > - if ( 0 < expDiff ) { > - if ( aExp == 0x7FF ) { > - if (aSig) { > - return propagateFloat64NaN(a, b, status); > - } > - return a; > - } > - if ( bExp == 0 ) { > - --expDiff; > - } > - else { > - bSig |= LIT64( 0x2000000000000000 ); > - } > - shift64RightJamming( bSig, expDiff, &bSig ); > - zExp = aExp; > - } > - else if ( expDiff < 0 ) { > - if ( bExp == 0x7FF ) { > - if (bSig) { > - return propagateFloat64NaN(a, b, status); > - } > - return packFloat64( zSign, 0x7FF, 0 ); > - } > - if ( aExp == 0 ) { > - ++expDiff; > - } > - else { > - aSig |= LIT64( 0x2000000000000000 ); > - } > - shift64RightJamming( aSig, - expDiff, &aSig ); > - zExp = bExp; > - } > - else { > - if ( aExp == 0x7FF ) { > - if (aSig | bSig) { > - return propagateFloat64NaN(a, b, status); > - } > - return a; > - } > - if ( aExp == 0 ) { > - if (status->flush_to_zero) { > - if (aSig | bSig) { > - float_raise(float_flag_output_denormal, status); > - } > - return packFloat64(zSign, 0, 0); > - } > - return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); > - } > - zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; > - zExp = aExp; > - goto roundAndPack; > - } > - aSig |= LIT64( 0x2000000000000000 ); > - zSig = ( aSig + bSig )<<1; > - --zExp; > - if ( (int64_t) zSig < 0 ) { > - zSig = aSig + bSig; > - ++zExp; > - } > - roundAndPack: > - return roundAndPackFloat64(zSign, zExp, zSig, status); > - > -} > - > -/*---------------------------------------------------------------------------- > -| Returns the result of subtracting the absolute values of the double- > -| precision floating-point values `a' and `b'. If `zSign' is 1, the > -| difference is negated before being returned. `zSign' is ignored if the > -| result is a NaN. The subtraction is performed according to the IEC/IEEE > -| Standard for Binary Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -static float64 subFloat64Sigs(float64 a, float64 b, flag zSign, > - float_status *status) > -{ > - int aExp, bExp, zExp; > - uint64_t aSig, bSig, zSig; > - int expDiff; > - > - aSig = extractFloat64Frac( a ); > - aExp = extractFloat64Exp( a ); > - bSig = extractFloat64Frac( b ); > - bExp = extractFloat64Exp( b ); > - expDiff = aExp - bExp; > - aSig <<= 10; > - bSig <<= 10; > - if ( 0 < expDiff ) goto aExpBigger; > - if ( expDiff < 0 ) goto bExpBigger; > - if ( aExp == 0x7FF ) { > - if (aSig | bSig) { > - return propagateFloat64NaN(a, b, status); > - } > - float_raise(float_flag_invalid, status); > - return float64_default_nan(status); > - } > - if ( aExp == 0 ) { > - aExp = 1; > - bExp = 1; > - } > - if ( bSig < aSig ) goto aBigger; > - if ( aSig < bSig ) goto bBigger; > - return packFloat64(status->float_rounding_mode == float_round_down, 0, > 0); > - bExpBigger: > - if ( bExp == 0x7FF ) { > - if (bSig) { > - return propagateFloat64NaN(a, b, status); > - } > - return packFloat64( zSign ^ 1, 0x7FF, 0 ); > - } > - if ( aExp == 0 ) { > - ++expDiff; > - } > - else { > - aSig |= LIT64( 0x4000000000000000 ); > - } > - shift64RightJamming( aSig, - expDiff, &aSig ); > - bSig |= LIT64( 0x4000000000000000 ); > - bBigger: > - zSig = bSig - aSig; > - zExp = bExp; > - zSign ^= 1; > - goto normalizeRoundAndPack; > - aExpBigger: > - if ( aExp == 0x7FF ) { > - if (aSig) { > - return propagateFloat64NaN(a, b, status); > - } > - return a; > - } > - if ( bExp == 0 ) { > - --expDiff; > - } > - else { > - bSig |= LIT64( 0x4000000000000000 ); > - } > - shift64RightJamming( bSig, expDiff, &bSig ); > - aSig |= LIT64( 0x4000000000000000 ); > - aBigger: > - zSig = aSig - bSig; > - zExp = aExp; > - normalizeRoundAndPack: > - --zExp; > - return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status); > - > -} > - > -/*---------------------------------------------------------------------------- > -| Returns the result of adding the double-precision floating-point values `a' > -| and `b'. The operation is performed according to the IEC/IEEE Standard for > -| Binary Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -float64 float64_add(float64 a, float64 b, float_status *status) > -{ > - flag aSign, bSign; > - a = float64_squash_input_denormal(a, status); > - b = float64_squash_input_denormal(b, status); > - > - aSign = extractFloat64Sign( a ); > - bSign = extractFloat64Sign( b ); > - if ( aSign == bSign ) { > - return addFloat64Sigs(a, b, aSign, status); > - } > - else { > - return subFloat64Sigs(a, b, aSign, status); > - } > - > -} > - > -/*---------------------------------------------------------------------------- > -| Returns the result of subtracting the double-precision floating-point > values > -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard > -| for Binary Floating-Point Arithmetic. > -*----------------------------------------------------------------------------*/ > - > -float64 float64_sub(float64 a, float64 b, float_status *status) > -{ > - flag aSign, bSign; > - a = float64_squash_input_denormal(a, status); > - b = float64_squash_input_denormal(b, status); > - > - aSign = extractFloat64Sign( a ); > - bSign = extractFloat64Sign( b ); > - if ( aSign == bSign ) { > - return subFloat64Sigs(a, b, aSign, status); > - } > - else { > - return addFloat64Sigs(a, b, aSign, status); > - } > - > -} > > > /*---------------------------------------------------------------------------- > | Returns the result of multiplying the double-precision floating-point > values > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 23824a3000..693ece0974 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -236,6 +236,10 @@ float64 float16_to_float64(float16 a, flag ieee, > float_status *status); > > /*---------------------------------------------------------------------------- > | Software half-precision operations. > > *----------------------------------------------------------------------------*/ > + > +float16 float16_add(float16, float16, float_status *status); > +float16 float16_sub(float16, float16, float_status *status); > + > int float16_is_quiet_nan(float16, float_status *status); > int float16_is_signaling_nan(float16, float_status *status); > float16 float16_maybe_silence_nan(float16, float_status *status); > (note for myself, since I need more time for those 3) all bug round_canonical() pick_nan_parts() addsub_floats(): Reviewed-by: Philippe Mathieu-Daudé <f4...@amsat.org>