On 3/27/23 20:06, Song Gao wrote:
+static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + TCGv_vec t1; + int64_t max = (1l << imm) - 1;
This needed 1ull, but better to just use max = MAKE_64BIT_MASK(0, imm - 1);
+ int64_t min = ~max;
Extra space.
+ t1 = tcg_temp_new_vec_matching(t); + tcg_gen_dupi_vec(vece, t, min); + tcg_gen_smax_vec(vece, t, a, t);
Use tcg_constant_vec_matching(t, vece, min) instead of dupi. Three instances.
+ tcg_gen_dupi_vec(vece, t1, max); + tcg_gen_smin_vec(vece, t, t, t1); +} + +static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_smax_vec, INDEX_op_smin_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vsat_s, + .fnoi = gen_helper_vsat_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vsat_s, + .fnoi = gen_helper_vsat_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vsat_s, + .fnoi = gen_helper_vsat_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vsat_s, + .fnoi = gen_helper_vsat_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
Better to expand imm to max here, rather than both inside gen_vsat_s and the runtime do_vsats_*.
Likewise for the unsigned versions. r~