Currently we implement BFCVT scalar via do_fp1_scalar(). This works even though BFCVT is a narrowing operation from 32 to 16 bits, because we can use write_fp_sreg() for float16. However, FPCR.NEP support requires that we use write_fp_hreg_merging() for float16 outputs, so we can't continue to borrow the non-narrowing do_fp1_scalar() function for this. Split out trans_BFCVT_s() into its own implementation that honours FPCR.NEP.
Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> --- target/arm/tcg/translate-a64.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 66c214ed278..944bdf8cafe 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -8582,10 +8582,27 @@ static const FPScalar1 f_scalar_frintx = { }; TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) -static const FPScalar1 f_scalar_bfcvt = { - .gen_s = gen_helper_bfcvt, -}; -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar_ah, a, &f_scalar_bfcvt, -1) +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) +{ + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64; + TCGv_i32 t32; + int check; + + if (!dc_isar_feature(aa64_bf16, s)) { + return false; + } + + check = fp_access_check_scalar_hsd(s, a->esz); + + if (check <= 0) { + return check == 0; + } + + t32 = read_fp_sreg(s, a->rn); + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); + write_fp_hreg_merging(s, a->rd, a->rd, t32); + return true; +} static const FPScalar1 f_scalar_frint32 = { NULL, -- 2.34.1