On Wed, 6 Jul 2022 at 10:12, Richard Henderson <richard.hender...@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > + > +void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, > + void *vpm, void *vst, uint32_t desc) > +{ > + intptr_t row, col, oprsz = simd_maxsz(desc); > + uint32_t neg = simd_data(desc) << 31; > + uint16_t *pn = vpn, *pm = vpm; > + float_status fpst = *(float_status *)vst;
This could use a comment mentioning that we take a copy of the float_status because this operation does not update the cumulative fp exception status. > + > + set_default_nan_mode(true, &fpst); > + > + for (row = 0; row < oprsz; ) { > + uint16_t pa = pn[H2(row >> 4)]; > + do { > + if (pa & 1) { > + void *vza_row = vza + tile_vslice_offset(row); > + uint32_t n = *(uint32_t *)(vzn + row) ^ neg; > + > + for (col = 0; col < oprsz; ) { > + uint16_t pb = pm[H2(col >> 4)]; > + do { > + if (pb & 1) { > + uint32_t *a = vza_row + col; > + uint32_t *m = vzm + col; All these accesses to uint32_t elements are missing some kind of H macro somewhere, I think ? > + *a = float32_muladd(n, *m, *a, 0, vst); > + } > + col += 4; > + pb >>= 4; > + } while (col & 15); > + } > + } > + row += 4; > + pa >>= 4; > + } while (row & 15); > + } > +} thanks -- PMM