On Sun, 1 Dec 2024 at 15:16, Richard Henderson <richard.hender...@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/arm/tcg/translate-a64.c | 104 ++++++++++++++++++++++----------- > target/arm/tcg/a64.decode | 7 +++ > 2 files changed, 78 insertions(+), 33 deletions(-) > > diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c > index 2d3566e45c..87731e0be4 100644 > --- a/target/arm/tcg/translate-a64.c > +++ b/target/arm/tcg/translate-a64.c > @@ -8287,6 +8287,67 @@ static bool trans_CSEL(DisasContext *s, arg_CSEL *a) > return true; > } > > +typedef struct FPScalar1Int { > + void (*gen_h)(TCGv_i32, TCGv_i32); > + void (*gen_s)(TCGv_i32, TCGv_i32); > + void (*gen_d)(TCGv_i64, TCGv_i64); > +} FPScalar1Int; > + > +static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, > + const FPScalar1Int *f) > +{ > + switch (a->esz) { > + case MO_64: > + if (fp_access_check(s)) { > + TCGv_i64 t = read_fp_dreg(s, a->rn); > + f->gen_d(t, t); > + write_fp_dreg(s, a->rd, t); > + } > + break; > + case MO_32: > + if (fp_access_check(s)) { > + TCGv_i32 t = read_fp_sreg(s, a->rn); > + f->gen_s(t, t); > + write_fp_sreg(s, a->rd, t); > + } > + break; > + case MO_16: > + if (!dc_isar_feature(aa64_fp16, s)) { > + return false; > + } > + if (fp_access_check(s)) { > + TCGv_i32 t = read_fp_hreg(s, a->rn); > + f->gen_h(t, t); > + write_fp_sreg(s, a->rd, t); > + } > + break; > + default: > + return false; > + } > + return true; > +} > + > +static const FPScalar1Int f_scalar_fmov = { > + tcg_gen_mov_i32, > + tcg_gen_mov_i32, > + tcg_gen_mov_i64, > +}; > +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) > + > +static const FPScalar1Int f_scalar_fabs = { > + gen_vfp_absh, > + gen_vfp_abss, > + gen_vfp_absd, > +}; > +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) > + > +static const FPScalar1Int f_scalar_fneg = { > + gen_vfp_negh, > + gen_vfp_negs, > + gen_vfp_negd, > +}; > +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) > + > /* Floating-point data-processing (1 source) - half precision */ > static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) > { > @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int > opcode, int rd, int rn) > TCGv_i32 tcg_res = tcg_temp_new_i32(); > > switch (opcode) { > - case 0x0: /* FMOV */ > - tcg_gen_mov_i32(tcg_res, tcg_op); > - break; > - case 0x1: /* FABS */ > - gen_vfp_absh(tcg_res, tcg_op); > - break; > - case 0x2: /* FNEG */ > - gen_vfp_negh(tcg_res, tcg_op); > - break; > case 0x3: /* FSQRT */ > fpst = fpstatus_ptr(FPST_FPCR_F16); > gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); > @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int > opcode, int rd, int rn) > gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); > break; > default: > + case 0x0: /* FMOV */ > + case 0x1: /* FABS */ > + case 0x2: /* FNEG */ > g_assert_not_reached(); > } >
In these changes to the "handle this op" functions we make the function assert if it's passed an op we've converted. But shouldn't there also be a change which makes the calling function disas_fp_1src() call unallocated_encoding() for the ops ? > @@ -8349,15 +8404,6 @@ static void handle_fp_1src_single(DisasContext *s, int > opcode, int rd, int rn) > tcg_res = tcg_temp_new_i32(); > > switch (opcode) { > - case 0x0: /* FMOV */ > - tcg_gen_mov_i32(tcg_res, tcg_op); > - goto done; > - case 0x1: /* FABS */ > - gen_vfp_abss(tcg_res, tcg_op); > - goto done; > - case 0x2: /* FNEG */ > - gen_vfp_negs(tcg_res, tcg_op); > - goto done; > case 0x3: /* FSQRT */ > gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); > goto done; > @@ -8393,6 +8439,9 @@ static void handle_fp_1src_single(DisasContext *s, int > opcode, int rd, int rn) > gen_fpst = gen_helper_frint64_s; > break; > default: > + case 0x0: /* FMOV */ > + case 0x1: /* FABS */ > + case 0x2: /* FNEG */ > g_assert_not_reached(); > } > > @@ -8417,22 +8466,10 @@ static void handle_fp_1src_double(DisasContext *s, > int opcode, int rd, int rn) > TCGv_ptr fpst; > int rmode = -1; > > - switch (opcode) { > - case 0x0: /* FMOV */ > - gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); > - return; > - } > - > tcg_op = read_fp_dreg(s, rn); > tcg_res = tcg_temp_new_i64(); > > switch (opcode) { > - case 0x1: /* FABS */ > - gen_vfp_absd(tcg_res, tcg_op); > - goto done; > - case 0x2: /* FNEG */ > - gen_vfp_negd(tcg_res, tcg_op); > - goto done; > case 0x3: /* FSQRT */ > gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); > goto done; > @@ -8465,6 +8502,9 @@ static void handle_fp_1src_double(DisasContext *s, int > opcode, int rd, int rn) > gen_fpst = gen_helper_frint64_d; > break; > default: > + case 0x0: /* FMOV */ > + case 0x1: /* FABS */ > + case 0x2: /* FNEG */ > g_assert_not_reached(); > } > > @@ -10881,13 +10921,11 @@ static void > disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) > case 0x7b: /* FCVTZU */ > gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); > break; > - case 0x6f: /* FNEG */ > - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); > - break; > case 0x7d: /* FRSQRTE */ > gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); > break; > default: > + case 0x6f: /* FNEG */ > g_assert_not_reached(); > } What's this change about? This bit of decode is not in the area that corresponds to the new patterns you add to a64.decode. Is this a bug in our existing decode where we decode something that should be undef? I think that 0x6f here corresponds to the decode table in section C4.1.96.6 ("Advanced SIMD scalar two-register miscellaneous FP16"), where it is U:a:opcode == 1 1 01111. But in that table that encoding is marked unallocated. A similar thing is true for case 0x2f FABS and case 07f FSQRT. All three of these should have set only_in_vector to true and not had the code handling in the is_scalar branch of the function. We should fix that bug in a separate patch before we do the decodetree conversion, I think. > diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode > index 928e69da69..fca64c63c3 100644 > --- a/target/arm/tcg/a64.decode > +++ b/target/arm/tcg/a64.decode > @@ -47,6 +47,7 @@ > @rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1 > @rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3 > @rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd > +@rr_hsd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_hsd > > @rrr_b ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=0 > @rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 > @@ -1321,6 +1322,12 @@ FMAXV_s 0110 1110 00 11000 01111 10 ..... > ..... @rr_q1e2 > FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h > FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2 > > +# Floating-point data processing (1 source) > + > +FMOV_s 00011110 .. 1 000000 10000 ..... ..... @rr_hsd > +FABS_s 00011110 .. 1 000001 10000 ..... ..... @rr_hsd > +FNEG_s 00011110 .. 1 000010 10000 ..... ..... @rr_hsd > + > # Floating-point Immediate > > FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd > -- > 2.43.0 thanks -- PMM