We should be using the F16-specific float_status for conversions from half-precision, because halfprec inputs never set Input Denormal.
Without FEAT_AHP, using the wrong fpst here had no effect, because the only difference between the F16_A64 and A64 fpst is its handling of flush-to-zero on input and output, and the helper functions vfp_fcvt_f16_to_* and vfp_fcvt_*_to_f16 all explicitly squash the relevant flushing flags, and flush_inputs_to_zero was the only way that IDC could be set. With FEAT_AHP, the FPCR.AH=1 behaviour sets IDC for input_denormal_used, which we will only ignore in vfp_get_fpsr_from_host() for the F16_A64 fpst; so it matters that we use that one for f16 inputs (and the normal one for single/double to f16 conversions). Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> --- target/arm/tcg/translate-a64.c | 9 ++++++--- target/arm/tcg/translate-sve.c | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 74766a0bc47..a47fdcd2e48 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -8568,7 +8568,7 @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) if (fp_access_check(s)) { TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); TCGv_i32 tcg_rd = tcg_temp_new_i32(); - TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_A64); + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64); TCGv_i32 tcg_ahp = get_ahp_flag(); gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); @@ -8582,7 +8582,7 @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) if (fp_access_check(s)) { TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); TCGv_i64 tcg_rd = tcg_temp_new_i64(); - TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_A64); + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64); TCGv_i32 tcg_ahp = get_ahp_flag(); gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); @@ -9511,13 +9511,14 @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR_A64); if (a->esz == MO_64) { /* 32 -> 64 bit fp conversion */ TCGv_i64 tcg_res[2]; TCGv_i32 tcg_op = tcg_temp_new_i32(); int srcelt = a->q ? 2 : 0; + fpst = fpstatus_ptr(FPST_FPCR_A64); + for (pass = 0; pass < 2; pass++) { tcg_res[pass] = tcg_temp_new_i64(); read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); @@ -9532,6 +9533,8 @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) TCGv_i32 tcg_res[4]; TCGv_i32 ahp = get_ahp_flag(); + fpst = fpstatus_ptr(FPST_FPCR_F16_A64); + for (pass = 0; pass < 4; pass++) { tcg_res[pass] = tcg_temp_new_i32(); read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 37de816964a..fc7f0d077a5 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -3887,7 +3887,7 @@ TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR_A64) TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_A64) + gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_F16_A64) TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, gen_helper_sve_bfcvt, a, 0, FPST_FPCR_A64) @@ -3895,7 +3895,7 @@ TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR_A64) TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_A64) + gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_F16_A64) TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR_A64) TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, -- 2.34.1