在 2022/3/7 下午8:21, ~eopxd 写道:
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
TD s1 = *((TD *)vs1 + HD(0)); \
\
@@ -4562,6 +4565,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,
\
} \
*((TD *)vd + HD(0)) = s1; \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz, \
+ vlenb); \
}
/* vd[0] = sum(vs1[0], vs2[*]) */
@@ -4667,6 +4673,9 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t esz = sizeof(uint32_t);
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
vlenb also can get from maxsz field of desc, that is
uint32_tvlenb= simd_maxsz(desc)
Regards,
Weiwei Li