On 9/7/23 01:31, Song Gao wrote:
+static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
+ uint32_t oprsz, int vece, int bit,
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_ptr t1 = tcg_temp_new_ptr();
TCGv_i64 t2 = tcg_temp_new_i64();
+ tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
tcg_gen_shli_i64(t0, t0, vece);
if (HOST_BIG_ENDIAN) {
+ tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) -1));
}
tcg_gen_trunc_i64_ptr(t1, t0);
tcg_gen_add_ptr(t1, t1, cpu_env);
func(t2, t1, vec_full_offset(a->vj));
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, 16, t2);
+ if (oprsz == 32) {
+ func(t2, t1, offsetof(CPULoongArchState, fpr[a->vj].vreg.Q(1)));
+ tcg_gen_gvec_dup_i64(vece,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.Q(1)),
+ 16, 16, t2);
+ }
This would be clearer as a loop:
for (i = 0; i < oprsz; i += 16) {
func(t2, t1, i);
tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
}
+static bool trans_xvrepl128vei_b(DisasContext *ctx, arg_vv_i * a)
{
+ if (!avail_LASX(ctx)) {
return false;
}
+ if (!check_vec(ctx, 32)) {
return true;
}
+ tcg_gen_gvec_dup_mem(MO_8,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.B(0)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.B((a->imm))),
+ 16, 16);
+ tcg_gen_gvec_dup_mem(MO_8,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.B(16)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.B((a->imm + 16))),
+ 16, 16);
+ return true;
+}
Again, a loop. Also, I think you can easily merge all 4 of these functions
using VECE.
+#define XVREPLVE0(NAME, MOP) \
+static bool trans_## NAME(DisasContext *ctx, arg_vv * a) \
+{ \
+ if (!avail_LASX(ctx)) { \
+ return false; \
+ } \
+ \
+ if (!check_vec(ctx, 32)) { \
+ return true; \
+ } \
+ \
+ tcg_gen_gvec_dup_mem(MOP, vec_full_offset(a->vd), vec_full_offset(a->vj), \
+ 32, 32); \
+ return true; \
+}
+XVREPLVE0(xvreplve0_b, MO_8)
+XVREPLVE0(xvreplve0_h, MO_16)
+XVREPLVE0(xvreplve0_w, MO_32)
+XVREPLVE0(xvreplve0_d, MO_64)
+XVREPLVE0(xvreplve0_q, MO_128)
Should use a helper function and TRANS().
+static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+ int ofs, i, max;
+ TCGv_i64 desthigh[2], destlow[2], high[2], low[2];
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ max = (oprsz == 16) ? 1 : 2;
+
+ for (i = 0; i < max; i++) {
+ desthigh[i] = tcg_temp_new_i64();
+ destlow[i] = tcg_temp_new_i64();
+ high[i] = tcg_temp_new_i64();
+ low[i] = tcg_temp_new_i64();
+ get_vreg64(high[i], a->vj, 2 * i + 1);
+
+ ofs = ((a->imm) & 0xf) * 8;
+ if (ofs < 64) {
+ get_vreg64(low[i], a->vj, 2 * i);
+ tcg_gen_extract2_i64(destlow[i], low[i], high[i], ofs);
+ tcg_gen_shri_i64(desthigh[i], high[i], ofs);
+ } else {
+ tcg_gen_shri_i64(destlow[i], high[i], ofs - 64);
+ desthigh[i] = tcg_constant_i64(0);
+ }
+ set_vreg64(desthigh[i], a->vd, 2 * i + 1);
+ set_vreg64(destlow[i], a->vd, 2 * i);
+ }
return true;
}
Why are you using arrays? They don't seem required.
This would seem clearer as
for (i = 0; i < oprsz / 16; i++) {
TCGv desthi = tcg_temp_new_i64();
...
}
r~