Changing the sign before rounding affects the correctness of the asymmetric rouding modes: float_round_up and float_round_down.
Reported-by: WANG Rui <wang...@loongson.cn> Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- fpu/softfloat.c | 54 +++++++++++++++++++++++------ tests/tcg/multiarch/fnmsub.c | 36 +++++++++++++++++++ fpu/softfloat-parts.c.inc | 4 --- tests/tcg/multiarch/Makefile.target | 1 + 4 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 tests/tcg/multiarch/fnmsub.c diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 34c962d6bd..8094358c2e 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1731,11 +1731,8 @@ static float64 float64_round_pack_canonical(FloatParts64 *p, return float64_pack_raw(p); } -static float64 float64r32_round_pack_canonical(FloatParts64 *p, - float_status *s) +static float64 float64r32_pack_raw(FloatParts64 *p) { - parts_uncanon(p, s, &float32_params); - /* * In parts_uncanon, we placed the fraction for float32 at the lsb. * We need to adjust the fraction higher so that the least N bits are @@ -1776,6 +1773,13 @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p, return float64_pack_raw(p); } +static float64 float64r32_round_pack_canonical(FloatParts64 *p, + float_status *s) +{ + parts_uncanon(p, s, &float32_params); + return float64r32_pack_raw(p); +} + static void float128_unpack_canonical(FloatParts128 *p, float128 f, float_status *s) { @@ -2240,7 +2244,12 @@ float16_muladd_scalbn(float16 a, float16 b, float16 c, float16_unpack_canonical(&pc, c, status); pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status); - return float16_round_pack_canonical(pr, status); + /* Round before applying negate result. */ + parts_uncanon(pr, status, &float16_params); + if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) { + pr->sign ^= 1; + } + return float16_pack_raw(pr); } float16 float16_muladd(float16 a, float16 b, float16 c, @@ -2260,7 +2269,12 @@ float32_muladd_scalbn(float32 a, float32 b, float32 c, float32_unpack_canonical(&pc, c, status); pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status); - return float32_round_pack_canonical(pr, status); + /* Round before applying negate result. */ + parts_uncanon(pr, status, &float32_params); + if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) { + pr->sign ^= 1; + } + return float32_pack_raw(pr); } float64 QEMU_SOFTFLOAT_ATTR @@ -2274,7 +2288,12 @@ float64_muladd_scalbn(float64 a, float64 b, float64 c, float64_unpack_canonical(&pc, c, status); pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status); - return float64_round_pack_canonical(pr, status); + /* Round before applying negate result. */ + parts_uncanon(pr, status, &float64_params); + if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) { + pr->sign ^= 1; + } + return float64_pack_raw(pr); } static bool force_soft_fma; @@ -2428,7 +2447,12 @@ float64 float64r32_muladd(float64 a, float64 b, float64 c, float64_unpack_canonical(&pc, c, status); pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status); - return float64r32_round_pack_canonical(pr, status); + /* Round before applying negate result. */ + parts_uncanon(pr, status, &float32_params); + if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) { + pr->sign ^= 1; + } + return float64r32_pack_raw(pr); } bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c, @@ -2441,7 +2465,12 @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c, bfloat16_unpack_canonical(&pc, c, status); pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status); - return bfloat16_round_pack_canonical(pr, status); + /* Round before applying negate result. */ + parts_uncanon(pr, status, &bfloat16_params); + if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) { + pr->sign ^= 1; + } + return bfloat16_pack_raw(pr); } float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c, @@ -2454,7 +2483,12 @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c, float128_unpack_canonical(&pc, c, status); pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status); - return float128_round_pack_canonical(pr, status); + /* Round before applying negate result. */ + parts_uncanon(pr, status, &float128_params); + if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) { + pr->sign ^= 1; + } + return float128_pack_raw(pr); } /* diff --git a/tests/tcg/multiarch/fnmsub.c b/tests/tcg/multiarch/fnmsub.c new file mode 100644 index 0000000000..52dc516baf --- /dev/null +++ b/tests/tcg/multiarch/fnmsub.c @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <stdio.h> +#include <math.h> +#include <fenv.h> + +union U { + double d; + unsigned long long l; +}; + +union U x = { .l = 0x4ff0000000000000ULL }; +union U y = { .l = 0x2ff0000000000000ULL }; +union U r; + +int main() +{ + fesetround(FE_DOWNWARD); + +#if defined(__loongarch__) + asm("fnmsub.d %0, %1, %1, %2" : "=f"(r.d) : "f"(x.d), "f"(y.d)); +#elif defined(__powerpc64__) + asm("fnmsub %0,%1,%1,%2" : "=f"(r.d) : "f"(x.d), "f"(y.d)); +#elif defined(__s390x__) && 0 /* need -march=z14 */ + asm("vfnms %0,%1,%1,%2,0,3" : "=f"(r.d) : "f"(x.d), "f"(y.d)); +#else + r.d = -fma(x.d, x.d, -y.d); +#endif + + if (r.l == 0xdfefffffffffffffULL) { + return 0; + } + + printf("r = %.18a (%016llx)\n", r.d, r.l); + return 1; +} diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc index 171bfd06e3..5e0438fc0b 100644 --- a/fpu/softfloat-parts.c.inc +++ b/fpu/softfloat-parts.c.inc @@ -708,10 +708,6 @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b, return_normal: a->exp += scale; finish_sign: - if (flags & float_muladd_negate_result) { - a->sign ^= 1; - } - /* * All result types except for "return the default NaN * because this is an Invalid Operation" go through here; diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index 45c9cfe18c..bfdf7197a7 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -29,6 +29,7 @@ run-float_%: float_% $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $<) $(call conditional-diff-out,$<,$(SRC_PATH)/tests/tcg/$(TARGET_NAME)/$<.ref) +fnmsub: LDFLAGS+=-lm testthread: LDFLAGS+=-lpthread -- 2.43.0