On Sun, 22 Jun 2025 at 00:54, Richard Henderson
<richard.hender...@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
> ---
>  target/arm/tcg/helper-sme.h    |  5 ++++
>  target/arm/tcg/sme_helper.c    | 44 ++++++++++++++++++++++++++++++++++
>  target/arm/tcg/translate-sme.c | 18 ++++++++++++++
>  target/arm/tcg/translate-sve.c |  5 ++++
>  target/arm/tcg/sme.decode      | 14 +++++++++++
>  target/arm/tcg/sve.decode      |  8 +++++--
>  6 files changed, 92 insertions(+), 2 deletions(-)
>
> diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
> index cdd7058aed..ec93ff57ff 100644
> --- a/target/arm/tcg/helper-sme.h
> +++ b/target/arm/tcg/helper-sme.h
> @@ -173,3 +173,8 @@ DEF_HELPER_FLAGS_5(gvec_fmaxnum_b16, TCG_CALL_NO_RWG,
>                     void, ptr, ptr, ptr, fpst, i32)
>  DEF_HELPER_FLAGS_5(gvec_fminnum_b16, TCG_CALL_NO_RWG,
>                     void, ptr, ptr, ptr, fpst, i32)
> +
> +DEF_HELPER_FLAGS_6(sme2_fdot_h, TCG_CALL_NO_RWG,
> +                   void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_FLAGS_6(sme2_fdot_idx_h, TCG_CALL_NO_RWG,
> +                   void, ptr, ptr, ptr, ptr, env, i32)
> diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
> index 194560eafa..bd9b81d5aa 100644
> --- a/target/arm/tcg/sme_helper.c
> +++ b/target/arm/tcg/sme_helper.c
> @@ -1122,6 +1122,50 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void 
> *vzm, void *vpn,
>      }
>  }
>
> +void HELPER(sme2_fdot_h)(void *vd, void *vn, void *vm, void *va,
> +                         CPUARMState *env, uint32_t desc)
> +{
> +    intptr_t i, oprsz = simd_maxsz(desc);
> +    bool za = extract32(desc, SIMD_DATA_SHIFT, 1);
> +    float_status *fpst_std = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64];
> +    float_status *fpst_f16 = &env->vfp.fp_status[za ? FPST_ZA_F16 : 
> FPST_A64_F16];
> +    float_status fpst_odd = *fpst_std;
> +    float32 *d = vd, *a = va;
> +    uint32_t *n = vn, *m = vm;
> +
> +    set_float_rounding_mode(float_round_to_odd, &fpst_odd);
> +
> +    for (i = 0; i < oprsz / sizeof(float32); ++i) {
> +        d[H4(i)] = f16_dotadd(a[H4(i)], n[H4(i)], m[H4(i)],
> +                              fpst_f16, fpst_std, &fpst_odd);

I can never figure out with these helpers when we need the H
macros and when we don't...

> +    }
> +}

Reviewed-by: Peter Maydell <peter.mayd...@linaro.org>

thanks
-- PMM

Reply via email to