Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS (vector), by implementing a new set of helpers for the AH=1 case.
The float_muladd_negate_product flag produces the same result as negating either of the multiplication operands, assuming neither of the operands are NaNs. But since FEAT_AFP does not negate NaNs, this behaviour is exactly what we need. Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> Reviewed-by: Richard Henderson <richard.hender...@linaro.org> --- v2: squashed in RTH's patch to use the muladd flag --- target/arm/helper.h | 4 ++++ target/arm/tcg/translate-a64.c | 7 ++++++- target/arm/tcg/vec_helper.c | 22 ++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index be47edff896..f0a783b7088 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -782,6 +782,10 @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 2509a29528e..c209ac84228 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -5871,7 +5871,12 @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_d, }; -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { + gen_helper_gvec_ah_vfms_h, + gen_helper_gvec_ah_vfms_s, + gen_helper_gvec_ah_vfms_d, +}; +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { gen_helper_gvec_fceq_h, diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index ae3cb50fa24..fc3e6587b81 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -1558,6 +1558,24 @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, return float64_muladd(float64_chs(op1), op2, dest, 0, stat); } +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, + float_status *stat) +{ + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, + float_status *stat) +{ + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, + float_status *stat) +{ + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + #define DO_MULADD(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *vm, \ float_status *stat, uint32_t desc) \ @@ -1584,6 +1602,10 @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) + /* For the indexed ops, SVE applies the index per 128-bit vector segment. * For AdvSIMD, there is of course only one such vector segment. */ -- 2.34.1