Honour the FPCR.AH "don't negate the sign of a NaN" semantics in FMLSL. We pass in the value of FPCR.AH in the SIMD data field, and use this to determine whether we should suppress the negation for NaN inputs.
Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> --- target/arm/tcg/translate-a64.c | 4 ++-- target/arm/tcg/vec_helper.c | 28 ++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 0827dff16b2..e22c2a148ab 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -5968,7 +5968,7 @@ TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) { if (fp_access_check(s)) { - int data = (is_2 << 1) | is_s; + int data = (s->fpcr_ah << 2) | (is_2 << 1) | is_s; tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), tcg_env, @@ -6738,7 +6738,7 @@ TRANS(FMLS_vi, do_fmla_vector_idx, a, true) static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) { if (fp_access_check(s)) { - int data = (a->idx << 2) | (is_2 << 1) | is_s; + int data = (s->fpcr_ah << 5) | (a->idx << 2) | (is_2 << 1) | is_s; tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), tcg_env, diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 382b5da4a9c..aa42c50f9fe 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -2083,6 +2083,26 @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) return ptr[is_q & is_2] >> ((is_2 & ~is_q) << 5); } +static uint64_t neg4_f16(uint64_t v, bool fpcr_ah) +{ + /* + * Negate all inputs for FMLSL at once. This is slightly complicated + * by the need to avoid flipping the sign of a NaN when FPCR.AH == 1 + */ + uint64_t mask = 0x8000800080008000ull; + if (fpcr_ah) { + uint64_t tmp = v, signbit = 0x8000; + for (int i = 0; i < 4; i++) { + if (float16_is_any_nan(extract64(tmp, 0, 16))) { + mask ^= signbit; + } + tmp >>= 16; + signbit <<= 16; + } + } + return v ^ mask; +} + /* * Note that FMLAL requires oprsz == 8 or oprsz == 16, * as there is not yet SVE versions that might use blocking. @@ -2094,6 +2114,7 @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, intptr_t i, oprsz = simd_oprsz(desc); int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); int is_q = oprsz == 16; uint64_t n_4, m_4; @@ -2101,9 +2122,8 @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, n_4 = load4_f16(vn, is_q, is_2); m_4 = load4_f16(vm, is_q, is_2); - /* Negate all inputs for FMLSL at once. */ if (is_s) { - n_4 ^= 0x8000800080008000ull; + n_4 = neg4_f16(n_4, fpcr_ah); } for (i = 0; i < oprsz / 4; i++) { @@ -2155,6 +2175,7 @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 5, 1); int is_q = oprsz == 16; uint64_t n_4; float32 m_1; @@ -2162,9 +2183,8 @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, /* Pre-load all of the f16 data, avoiding overlap issues. */ n_4 = load4_f16(vn, is_q, is_2); - /* Negate all inputs for FMLSL at once. */ if (is_s) { - n_4 ^= 0x8000800080008000ull; + n_4 = neg4_f16(n_4, fpcr_ah); } m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); -- 2.34.1