On Sun, 22 Jun 2025 at 01:01, Richard Henderson <richard.hender...@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/arm/tcg/helper-sme.h | 22 ++++++ > target/arm/tcg/sme_helper.c | 60 +++++++++++++++ > target/arm/tcg/translate-sme.c | 78 +++++++++++++++++++ > target/arm/tcg/sme.decode | 135 +++++++++++++++++++++++++++++++++ > 4 files changed, 295 insertions(+)
> +#define DO_MLALL_IDX(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \ > +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ > +{ \ > + intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW); \ > + intptr_t eltspersegment = 16 / sizeof(TYPEW); \ > + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2); \ > + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 4); \ > + TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm; \ > + for (intptr_t i = 0; i < elements; i += eltspersegment) { \ > + TYPEW mm = m[HN(i * 4 + idx)]; \ > + for (intptr_t j = 0; j < eltspersegment; ++j) { \ > + TYPEN nn = n[HN((i + j) * 4 + sel)]; \ > + d[HW(i + j)] = a[HW(i + j)] OP (nn * mm); \ > + } \ > + } \ > +} Here the indexed helper takes the indexes out of vm, so... > +static void gen_helper_sme2_sumlall_idx_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, > + TCGv_ptr a, TCGv_i32 desc) > +{ > + gen_helper_sme2_usmlall_idx_s(d, m, n, a, desc); > +} > + > +static void gen_helper_sme2_sumlall_idx_d(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, > + TCGv_ptr a, TCGv_i32 desc) > +{ > + gen_helper_sme2_usmlall_idx_d(d, m, n, a, desc); > +} ...you can't use the usmlall_idx helper for sumlall_idx. This should fix it: --- a/target/arm/tcg/helper-sme.h +++ b/target/arm/tcg/helper-sme.h @@ -219,6 +219,8 @@ DEF_HELPER_FLAGS_5(sme2_umlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, DEF_HELPER_FLAGS_5(sme2_umlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme2_usmlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme2_usmlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_sumlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_sumlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index 16bdf61f51d..ba80aec6d45 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -1515,6 +1515,9 @@ DO_MLALL_IDX(sme2_umlsll_idx_d, uint64_t, uint16_t, uint16_t, H8, H2, -) DO_MLALL_IDX(sme2_usmlall_idx_s, uint32_t, uint8_t, int8_t, H4, H1, +) DO_MLALL_IDX(sme2_usmlall_idx_d, uint64_t, uint16_t, int16_t, H8, H2, +) +DO_MLALL_IDX(sme2_sumlall_idx_s, uint32_t, int8_t, uint8_t, H4, H1, +) +DO_MLALL_IDX(sme2_sumlall_idx_d, uint64_t, int16_t, uint16_t, H8, H2, +) + #undef DO_MLALL_IDX /* Convert and compress */ diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index c507c7cd277..006d4e2ac09 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -1282,18 +1282,6 @@ static bool do_smlall_nx(DisasContext *s, arg_azx_n *a, a->idx << 2, 0, false, fn); } -static void gen_helper_sme2_sumlall_idx_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, - TCGv_ptr a, TCGv_i32 desc) -{ - gen_helper_sme2_usmlall_idx_s(d, m, n, a, desc); -} - -static void gen_helper_sme2_sumlall_idx_d(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, - TCGv_ptr a, TCGv_i32 desc) -{ - gen_helper_sme2_usmlall_idx_d(d, m, n, a, desc); -} - TRANS_FEAT(SMLALL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_smlall_idx_s) TRANS_FEAT(SMLSLL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_s) TRANS_FEAT(UMLALL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_umlall_idx_s) -- PMM