On 7/22/20 2:16 AM, frank.ch...@sifive.com wrote: > -#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ > +#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ > void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ > CPURISCVState *env, uint32_t desc) \ > { \ > - uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ > + uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \ > uint32_t vm = vext_vm(desc); \ > - uint32_t vta = vext_vta(desc); \ > uint32_t vl = env->vl; \ > target_ulong offset = s1, i; \ > \ > for (i = 0; i < vl; ++i) { \ > + /* offset may be a large value, which j may overflow */ \ > target_ulong j = i + offset; \ > + bool is_valid = (offset >= vlmax || j >= vlmax) ? false : true; \
This is... silly verbose. But also, the test is partially loop invariant and entirely predictable, allowing loop fission. > if (!vm && !vext_elem_mask(v0, i)) { \ > continue; \ > } \ > - *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ > + *((ETYPE *)vd + H(i)) = is_valid ? *((ETYPE *)vs2 + H(j)) : 0; \ > } \ > - CLEAR_FN(vd, vta, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ > } E.g. i_max = s1 < vlmax ? vlmax - s1 : 0; for (i = 0; i < i_max; ++i) { if (vext_elem_mask(v0, i)) { *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); } } for (i = i_max; i < vl; ++i) { if (vext_elem_mask(v0, i)) { *((ETYPE *)vd + H(i)) = 0; } } r~