On 3/12/20 7:58 AM, LIU Zhiwei wrote: > + if (a->vm && s->vl_eq_vlmax) { \ > + tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd), \ > + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), \ > + MAXSZ(s), MAXSZ(s)); \
The first argument here should be just s->sew. You should have see the assert fire: tcg_debug_assert(vece <= MO_64); It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function, and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer. In general, I prefer the functions that are generated by macros like this to have exactly one executable statement -- the call to the helper that does all of the work using the arguments provided. That way a maximum number of lines are available for stepping with the debugger. > + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); > \ > + data = FIELD_DP32(data, VDATA, VM, a->vm); > \ > + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); > \ Why are these replicated in each trans_* function, and not done in opiv?_trans, where the rest of the descriptor is created? > +/* OPIVX without GVEC IR */ > +#define GEN_OPIVX_TRANS(NAME, CHECK) \ > +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ > +{ \ > + if (CHECK(s, a)) { \ > + uint32_t data = 0; \ > + static gen_helper_opivx const fns[4] = { \ > + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ > + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ > + }; \ > + \ > + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ > + data = FIELD_DP32(data, VDATA, VM, a->vm); \ > + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ > + return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \ > + } \ > + return false; \ > +} > + > +GEN_OPIVX_TRANS(vrsub_vx, opivx_check) Note that you *can* generate vector code for this, you just have to write your own helpers. E.g. static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a) { tcg_gen_vec_sub8_i64(d, b, a); } // etc, reversing the arguments and passing on to sub. static const GVecGen2s rsub_op[4] = { { .fni8 = tcg_gen_vec_rsub8_i64, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs8, .opt_opc = vecop_list_sub, .vece = MO_8 }, { .fni8 = tcg_gen_vec_rsub16_i64, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs16, .opt_opc = vecop_list_sub, .vece = MO_16 }, { .fni4 = tcg_gen_rsub_i32, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs32, .opt_opc = vecop_list_sub, .vece = MO_32 }, { .fni8 = tcg_gen_rsub_i64, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs64, .opt_opc = vecop_list_sub, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; static void gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) { tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); } static void gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t c, uint32_t oprsz, uint32_t maxsz) { tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); } > +/* generate the helpers for OPIVV */ > +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ > +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ > + void *vs2, CPURISCVState *env, uint32_t desc) \ > +{ \ > + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ > + uint32_t mlen = vext_mlen(desc); \ > + uint32_t vm = vext_vm(desc); \ > + uint32_t vl = env->vl; \ > + uint32_t i; \ > + for (i = 0; i < vl; i++) { \ > + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ > + continue; \ > + } \ > + do_##NAME(vd, vs1, vs2, i); \ > + } \ > + if (i != 0) { \ > + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ > + } \ > +} > + > +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) > +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) > +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) > +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) > +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) > +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) > +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) > +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) The body of GEN_VEXT_VV can be an inline function, calling the helper functions that you generated above. > +/* > + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits. > + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator > type. > + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations > + * or narrow operations > + */ > +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ > +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i) \ > +{ \ > + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ > + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1); \ > +} Why not just make the type of s1 be target_long in the parameter? > +/* generate the helpers for instructions with one vector and one sclar */ > +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ > +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ > + void *vs2, CPURISCVState *env, uint32_t desc) \ > +{ \ > + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ > + uint32_t mlen = vext_mlen(desc); \ > + uint32_t vm = vext_vm(desc); \ > + uint32_t vl = env->vl; \ > + uint32_t i; \ > + \ > + for (i = 0; i < vl; i++) { \ > + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ > + continue; \ > + } \ > + do_##NAME(vd, s1, vs2, i); \ > + } \ > + if (i != 0) { \ > + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ > + } \ > +} Likewise an inline function. r~