Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the SVE FMLS (vector) insns, by providing new helpers for the AH=1 case which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions that do the work.
Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> --- target/arm/tcg/helper-sve.h | 21 ++++++ target/arm/tcg/sve_helper.c | 114 +++++++++++++++++++++++++++------ target/arm/tcg/translate-sve.c | 18 ++++-- 3 files changed, 126 insertions(+), 27 deletions(-) diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index a2e96a498dd..0b1b5887834 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -1475,6 +1475,27 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index dc5a35b46ef..90bcf680fa4 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -4802,7 +4802,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, - uint16_t neg1, uint16_t neg3) + uint16_t neg1, uint16_t neg3, bool fpcr_ah) { intptr_t i = simd_oprsz(desc); uint64_t *g = vg; @@ -4814,9 +4814,15 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, if (likely((pg >> (i & 63)) & 1)) { float16 e1, e2, e3, r; - e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; + e1 = *(uint16_t *)(vn + H1_2(i)); e2 = *(uint16_t *)(vm + H1_2(i)); - e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; + e3 = *(uint16_t *)(va + H1_2(i)); + if (neg1 && !(fpcr_ah && float16_is_any_nan(e1))) { + e1 ^= neg1; + } + if (neg3 && !(fpcr_ah && float16_is_any_nan(e3))) { + e3 ^= neg3; + } r = float16_muladd(e1, e2, e3, 0, status); *(uint16_t *)(vd + H1_2(i)) = r; } @@ -4827,30 +4833,48 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, false); } void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, false); } void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, false); } void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, false); +} + +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, true); +} + +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, true); +} + +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, true); } static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, - uint32_t neg1, uint32_t neg3) + uint32_t neg1, uint32_t neg3, bool fpcr_ah) { intptr_t i = simd_oprsz(desc); uint64_t *g = vg; @@ -4862,9 +4886,15 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, if (likely((pg >> (i & 63)) & 1)) { float32 e1, e2, e3, r; - e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; + e1 = *(uint32_t *)(vn + H1_4(i)); e2 = *(uint32_t *)(vm + H1_4(i)); - e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; + e3 = *(uint32_t *)(va + H1_4(i)); + if (neg1 && !(fpcr_ah && float32_is_any_nan(e1))) { + e1 ^= neg1; + } + if (neg3 && !(fpcr_ah && float32_is_any_nan(e3))) { + e3 ^= neg3; + } r = float32_muladd(e1, e2, e3, 0, status); *(uint32_t *)(vd + H1_4(i)) = r; } @@ -4875,30 +4905,48 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, false); } void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, false); } void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, false); } void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, false); +} + +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, true); +} + +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, true); +} + +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, true); } static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, - uint64_t neg1, uint64_t neg3) + uint64_t neg1, uint64_t neg3, bool fpcr_ah) { intptr_t i = simd_oprsz(desc); uint64_t *g = vg; @@ -4910,9 +4958,15 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, if (likely((pg >> (i & 63)) & 1)) { float64 e1, e2, e3, r; - e1 = *(uint64_t *)(vn + i) ^ neg1; + e1 = *(uint64_t *)(vn + i); e2 = *(uint64_t *)(vm + i); - e3 = *(uint64_t *)(va + i) ^ neg3; + e3 = *(uint64_t *)(va + i); + if (neg1 && !(fpcr_ah && float64_is_any_nan(e1))) { + e1 ^= neg1; + } + if (neg3 && !(fpcr_ah && float64_is_any_nan(e3))) { + e3 ^= neg3; + } r = float64_muladd(e1, e2, e3, 0, status); *(uint64_t *)(vd + i) = r; } @@ -4923,25 +4977,43 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, false); } void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, false); } void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, false); } void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, false); +} + +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, true); +} + +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, true); +} + +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, true); } /* Two operand floating-point comparison controlled by a predicate. diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index a7033fe93ab..663634e3a39 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -3924,19 +3924,25 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) -#define DO_FMLA(NAME, name) \ +#define DO_FMLA(NAME, name, ah_name) \ static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ NULL, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ + NULL, gen_helper_sve_##ah_name##_h, \ + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ + }; \ + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64) -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) #undef DO_FMLA -- 2.34.1