On Fri, Mar 15, 2024 at 3:57 AM Daniel Henrique Barboza <dbarb...@ventanamicro.com> wrote: > > Commit 8ff8ac6329 added a conditional to guard the vext_ldst_whole() > helper if vstart >= evl. But by skipping the helper we're also not > setting vstart = 0 at the end of the insns, which is incorrect. > > We'll move the conditional to vext_ldst_whole(), following in line with > the removal of all brconds vstart >= vl that the next patch will do. The > idea is to make the helpers responsible for their own vstart management. > > Fix ldst_whole isns by: > > - remove the brcond that skips the helper if vstart is >= evl; > > - vext_ldst_whole() now does an early exit with the same check, where > evl = (vlenb * nf) >> log2_esz, but the early exit will also clear > vstart. > > The 'width' param is now unneeded in ldst_whole_trans() and is also > removed. It was used for the evl calculation for the brcond and has no > other use now. The 'width' is reflected in vext_ldst_whole() via > log2_esz, which is encoded by GEN_VEXT_LD_WHOLE() as > "ctzl(sizeof(ETYPE))". > > Suggested-by: Max Chou <max.c...@sifive.com> > Fixes: 8ff8ac6329 ("target/riscv: rvv: Add missing early exit condition for > whole register load/store") > Signed-off-by: Daniel Henrique Barboza <dbarb...@ventanamicro.com>
Reviewed-by: Alistair Francis <alistair.fran...@wdc.com> Alistair > --- > target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++-------------- > target/riscv/vector_helper.c | 5 +++ > 2 files changed, 28 insertions(+), 29 deletions(-) > > diff --git a/target/riscv/insn_trans/trans_rvv.c.inc > b/target/riscv/insn_trans/trans_rvv.c.inc > index 52c26a7834..1366445e1f 100644 > --- a/target/riscv/insn_trans/trans_rvv.c.inc > +++ b/target/riscv/insn_trans/trans_rvv.c.inc > @@ -1097,13 +1097,9 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, > ld_us_check) > typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32); > > static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, > - uint32_t width, gen_helper_ldst_whole *fn, > + gen_helper_ldst_whole *fn, > DisasContext *s) > { > - uint32_t evl = s->cfg_ptr->vlenb * nf / width; > - TCGLabel *over = gen_new_label(); > - tcg_gen_brcondi_tl(TCG_COND_GEU, cpu_vstart, evl, over); > - > TCGv_ptr dest; > TCGv base; > TCGv_i32 desc; > @@ -1120,8 +1116,6 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, > uint32_t nf, > > fn(dest, base, tcg_env, desc); > > - gen_set_label(over); > - > return true; > } > > @@ -1129,42 +1123,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t > rs1, uint32_t nf, > * load and store whole register instructions ignore vtype and vl setting. > * Thus, we don't need to check vill bit. (Section 7.9) > */ > -#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, WIDTH) \ > +#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \ > static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ > { \ > if (require_rvv(s) && \ > QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ > - return ldst_whole_trans(a->rd, a->rs1, ARG_NF, WIDTH, \ > + return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \ > gen_helper_##NAME, s); \ > } \ > return false; \ > } > > -GEN_LDST_WHOLE_TRANS(vl1re8_v, 1, 1) > -GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, 2) > -GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, 4) > -GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, 8) > -GEN_LDST_WHOLE_TRANS(vl2re8_v, 2, 1) > -GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, 2) > -GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, 4) > -GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, 8) > -GEN_LDST_WHOLE_TRANS(vl4re8_v, 4, 1) > -GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, 2) > -GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, 4) > -GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, 8) > -GEN_LDST_WHOLE_TRANS(vl8re8_v, 8, 1) > -GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, 2) > -GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, 4) > -GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, 8) > +GEN_LDST_WHOLE_TRANS(vl1re8_v, 1) > +GEN_LDST_WHOLE_TRANS(vl1re16_v, 1) > +GEN_LDST_WHOLE_TRANS(vl1re32_v, 1) > +GEN_LDST_WHOLE_TRANS(vl1re64_v, 1) > +GEN_LDST_WHOLE_TRANS(vl2re8_v, 2) > +GEN_LDST_WHOLE_TRANS(vl2re16_v, 2) > +GEN_LDST_WHOLE_TRANS(vl2re32_v, 2) > +GEN_LDST_WHOLE_TRANS(vl2re64_v, 2) > +GEN_LDST_WHOLE_TRANS(vl4re8_v, 4) > +GEN_LDST_WHOLE_TRANS(vl4re16_v, 4) > +GEN_LDST_WHOLE_TRANS(vl4re32_v, 4) > +GEN_LDST_WHOLE_TRANS(vl4re64_v, 4) > +GEN_LDST_WHOLE_TRANS(vl8re8_v, 8) > +GEN_LDST_WHOLE_TRANS(vl8re16_v, 8) > +GEN_LDST_WHOLE_TRANS(vl8re32_v, 8) > +GEN_LDST_WHOLE_TRANS(vl8re64_v, 8) > > /* > * The vector whole register store instructions are encoded similar to > * unmasked unit-stride store of elements with EEW=8. > */ > -GEN_LDST_WHOLE_TRANS(vs1r_v, 1, 1) > -GEN_LDST_WHOLE_TRANS(vs2r_v, 2, 1) > -GEN_LDST_WHOLE_TRANS(vs4r_v, 4, 1) > -GEN_LDST_WHOLE_TRANS(vs8r_v, 8, 1) > +GEN_LDST_WHOLE_TRANS(vs1r_v, 1) > +GEN_LDST_WHOLE_TRANS(vs2r_v, 2) > +GEN_LDST_WHOLE_TRANS(vs4r_v, 4) > +GEN_LDST_WHOLE_TRANS(vs8r_v, 8) > > /* > *** Vector Integer Arithmetic Instructions > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index bcc553c0e2..1f4c276b21 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -572,6 +572,11 @@ vext_ldst_whole(void *vd, target_ulong base, > CPURISCVState *env, uint32_t desc, > uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; > uint32_t max_elems = vlenb >> log2_esz; > > + if (env->vstart >= ((vlenb * nf) >> log2_esz)) { > + env->vstart = 0; > + return; > + } > + > k = env->vstart / max_elems; > off = env->vstart % max_elems; > > -- > 2.44.0 > >