Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/tcg/helper-sme.h | 2 ++ target/arm/tcg/sme_helper.c | 38 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sme.c | 5 +++++ target/arm/tcg/sme.decode | 5 +++++ 4 files changed, 50 insertions(+)
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h index cb81f89fb3..04db920299 100644 --- a/target/arm/tcg/helper-sme.h +++ b/target/arm/tcg/helper-sme.h @@ -218,3 +218,5 @@ DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sme2_fcvt_n, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sme2_fcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvt_w, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvtl, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index 6d2b83d26a..8289a02bfa 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -1562,3 +1562,41 @@ void HELPER(sme2_fcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc) d[H2(i * 2 + 1)] = d1; } } + +/* Expand and convert */ +void HELPER(sme2_fcvt_w)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + ARMVectorReg scratch __attribute__((uninitialized)); + size_t oprsz = simd_oprsz(desc); + size_t i, n = oprsz / 4; + float16 *s = vs; + float32 *d0 = vd; + float32 *d1 = vd + sizeof(ARMVectorReg); + + if (vd == vs) { + s = memcpy(&scratch, s, oprsz); + } + + for (i = 0; i < n; ++i) { + d0[H4(i)] = float16_to_float32(s[H2(i)], true, fpst); + } + for (i = 0; i < n; ++i) { + d1[H4(i)] = float16_to_float32(s[H2(n + i)], true, fpst); + } +} + +/* Deinterleave and convert. */ +void HELPER(sme2_fcvtl)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + float16 *s = vs; + float32 *d0 = vd; + float32 *d1 = vd + sizeof(ARMVectorReg); + + for (i = 0; i < n; ++i) { + float32 v0 = float16_to_float32(s[H2(i * 2 + 0)], true, fpst); + float32 v1 = float16_to_float32(s[H2(i * 2 + 1)], true, fpst); + d0[H4(i)] = v0; + d1[H4(i)] = v1; + } +} diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index 777ea15a80..2b45244e23 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -1288,3 +1288,8 @@ TRANS_FEAT(FCVT_n, aa64_sme2, do_zz_fpst, a, 0, FPST_A64, gen_helper_sme2_fcvt_n) TRANS_FEAT(FCVTN, aa64_sme2, do_zz_fpst, a, 0, FPST_A64, gen_helper_sme2_fcvtn) + +TRANS_FEAT(FCVT_w, aa64_sme2_f16f16, do_zz_fpst, a, 0, + FPST_A64_F16, gen_helper_sme2_fcvt_w) +TRANS_FEAT(FCVTL, aa64_sme2_f16f16, do_zz_fpst, a, 0, + FPST_A64_F16, gen_helper_sme2_fcvtl) diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode index 8cca7d0d46..644794bdc1 100644 --- a/target/arm/tcg/sme.decode +++ b/target/arm/tcg/sme.decode @@ -725,9 +725,14 @@ FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3 &zz_n zd zn n @zz_1x2 ........ ... ..... ...... ..... zd:5 \ &zz_n n=1 zn=%zn_ax2 +@zz_2x1 ........ ... ..... ...... zn:5 ..... \ + &zz_n n=1 zd=%zd_ax2 BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2 BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2 FCVT_n 11000001 001 00000 111000 ....0 ..... @zz_1x2 FCVTN 11000001 001 00000 111000 ....1 ..... @zz_1x2 + +FCVT_w 11000001 101 00000 111000 ..... ....0 @zz_2x1 +FCVTL 11000001 101 00000 111000 ..... ....1 @zz_2x1 -- 2.43.0