On Sun, 22 Jun 2025 at 01:01, Richard Henderson
<richard.hender...@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
> ---
>  target/arm/tcg/helper-sme.h    |  22 ++++++
>  target/arm/tcg/sme_helper.c    |  60 +++++++++++++++
>  target/arm/tcg/translate-sme.c |  78 +++++++++++++++++++
>  target/arm/tcg/sme.decode      | 135 +++++++++++++++++++++++++++++++++
>  4 files changed, 295 insertions(+)



> +#define DO_MLALL_IDX(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \
> +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
> +{                                                               \
> +    intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW);       \
> +    intptr_t eltspersegment = 16 / sizeof(TYPEW);               \
> +    intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2);         \
> +    intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 4);     \
> +    TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm;       \
> +    for (intptr_t i = 0; i < elements; i += eltspersegment) {   \
> +        TYPEW mm = m[HN(i * 4 + idx)];                          \
> +        for (intptr_t j = 0; j < eltspersegment; ++j) {         \
> +            TYPEN nn = n[HN((i + j) * 4 + sel)];                \
> +            d[HW(i + j)] = a[HW(i + j)] OP (nn * mm);           \
> +        }                                                       \
> +    }                                                           \
> +}

Here the indexed helper takes the indexes out of vm, so...

> +static void gen_helper_sme2_sumlall_idx_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
> +                                          TCGv_ptr a, TCGv_i32 desc)
> +{
> +    gen_helper_sme2_usmlall_idx_s(d, m, n, a, desc);
> +}
> +
> +static void gen_helper_sme2_sumlall_idx_d(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
> +                                          TCGv_ptr a, TCGv_i32 desc)
> +{
> +    gen_helper_sme2_usmlall_idx_d(d, m, n, a, desc);
> +}

...you can't use the usmlall_idx helper for sumlall_idx.

This should fix it:

--- a/target/arm/tcg/helper-sme.h
+++ b/target/arm/tcg/helper-sme.h
@@ -219,6 +219,8 @@ DEF_HELPER_FLAGS_5(sme2_umlsll_idx_s,
TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr,
 DEF_HELPER_FLAGS_5(sme2_umlsll_idx_d, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sme2_usmlall_idx_s, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sme2_usmlall_idx_d, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_sumlall_idx_s, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_sumlall_idx_d, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, ptr, i32)

 DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
 DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 16bdf61f51d..ba80aec6d45 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -1515,6 +1515,9 @@ DO_MLALL_IDX(sme2_umlsll_idx_d, uint64_t,
uint16_t, uint16_t, H8, H2, -)
 DO_MLALL_IDX(sme2_usmlall_idx_s, uint32_t, uint8_t, int8_t, H4, H1, +)
 DO_MLALL_IDX(sme2_usmlall_idx_d, uint64_t, uint16_t, int16_t, H8, H2, +)

+DO_MLALL_IDX(sme2_sumlall_idx_s, uint32_t, int8_t, uint8_t, H4, H1, +)
+DO_MLALL_IDX(sme2_sumlall_idx_d, uint64_t, int16_t, uint16_t, H8, H2, +)
+
 #undef DO_MLALL_IDX

 /* Convert and compress */
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index c507c7cd277..006d4e2ac09 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -1282,18 +1282,6 @@ static bool do_smlall_nx(DisasContext *s, arg_azx_n *a,
                       a->idx << 2, 0, false, fn);
 }

-static void gen_helper_sme2_sumlall_idx_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
-                                          TCGv_ptr a, TCGv_i32 desc)
-{
-    gen_helper_sme2_usmlall_idx_s(d, m, n, a, desc);
-}
-
-static void gen_helper_sme2_sumlall_idx_d(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
-                                          TCGv_ptr a, TCGv_i32 desc)
-{
-    gen_helper_sme2_usmlall_idx_d(d, m, n, a, desc);
-}
-
 TRANS_FEAT(SMLALL_nx_s, aa64_sme, do_smlall_nx, a,
gen_helper_sme2_smlall_idx_s)
 TRANS_FEAT(SMLSLL_nx_s, aa64_sme, do_smlall_nx, a,
gen_helper_sme2_smlsll_idx_s)
 TRANS_FEAT(UMLALL_nx_s, aa64_sme, do_smlall_nx, a,
gen_helper_sme2_umlall_idx_s)

-- PMM

Reply via email to