loongarch: Implement xvreplve xvinsve0 xvpickve

Richard Henderson Mon, 11 Sep 2023 16:22:46 -0700

On 9/7/23 01:31, Song Gao wrote:

+static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
+                           uint32_t oprsz, int vece, int bit,
+                           void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
  {
      TCGv_i64 t0 = tcg_temp_new_i64();
      TCGv_ptr t1 = tcg_temp_new_ptr();
      TCGv_i64 t2 = tcg_temp_new_i64();

+ tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);

      tcg_gen_shli_i64(t0, t0, vece);
      if (HOST_BIG_ENDIAN) {
+        tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) -1));
      }

tcg_gen_trunc_i64_ptr(t1, t0);

      tcg_gen_add_ptr(t1, t1, cpu_env);
      func(t2, t1, vec_full_offset(a->vj));
+    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, 16, t2);
+    if (oprsz == 32) {
+        func(t2, t1,  offsetof(CPULoongArchState, fpr[a->vj].vreg.Q(1)));
+        tcg_gen_gvec_dup_i64(vece,
+                             offsetof(CPULoongArchState, fpr[a->vd].vreg.Q(1)),
+                             16, 16, t2);
+    }


This would be clearer as a loop:

    for (i = 0; i < oprsz; i += 16) {
        func(t2, t1, i);
        tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
    }

+static bool trans_xvrepl128vei_b(DisasContext *ctx, arg_vv_i * a)
 {
+    if (!avail_LASX(ctx)) {
         return false;
     }

+ if (!check_vec(ctx, 32)) {

         return true;
     }

+ tcg_gen_gvec_dup_mem(MO_8,

+                         offsetof(CPULoongArchState, fpr[a->vd].vreg.B(0)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->vj].vreg.B((a->imm))),
+                         16, 16);
+    tcg_gen_gvec_dup_mem(MO_8,
+                         offsetof(CPULoongArchState, fpr[a->vd].vreg.B(16)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->vj].vreg.B((a->imm + 16))),
+                         16, 16);
+    return true;
+}


Again, a loop.  Also, I think you can easily merge all 4 of these functions 
using VECE.

+#define XVREPLVE0(NAME, MOP)                                                  \
+static bool trans_## NAME(DisasContext *ctx, arg_vv * a)                      \
+{                                                                             \
+    if (!avail_LASX(ctx)) {                                                   \
+        return false;                                                         \
+    }                                                                         \
+                                                                              \
+    if (!check_vec(ctx, 32)) {                                                \
+        return true;                                                          \
+    }                                                                         \
+                                                                              \
+    tcg_gen_gvec_dup_mem(MOP, vec_full_offset(a->vd), vec_full_offset(a->vj), \
+                         32, 32);                                             \
+    return true;                                                              \
+}

+XVREPLVE0(xvreplve0_b, MO_8)

+XVREPLVE0(xvreplve0_h, MO_16)
+XVREPLVE0(xvreplve0_w, MO_32)
+XVREPLVE0(xvreplve0_d, MO_64)
+XVREPLVE0(xvreplve0_q, MO_128)


Should use a helper function and TRANS().

+static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+    int ofs, i, max;
+    TCGv_i64 desthigh[2], destlow[2], high[2], low[2];
+
+    if (!check_vec(ctx, 32)) {
+        return true;
+    }
+
+    max = (oprsz == 16) ? 1 : 2;
+
+    for (i = 0; i < max; i++) {
+        desthigh[i] = tcg_temp_new_i64();
+        destlow[i] = tcg_temp_new_i64();
+        high[i] = tcg_temp_new_i64();
+        low[i] = tcg_temp_new_i64();
+        get_vreg64(high[i], a->vj, 2 * i + 1);
+
+        ofs = ((a->imm) & 0xf) * 8;
+        if (ofs < 64) {
+            get_vreg64(low[i], a->vj, 2 * i);
+            tcg_gen_extract2_i64(destlow[i], low[i], high[i], ofs);
+            tcg_gen_shri_i64(desthigh[i], high[i], ofs);
+        } else {
+            tcg_gen_shri_i64(destlow[i], high[i], ofs - 64);
+            desthigh[i] = tcg_constant_i64(0);
+        }
+        set_vreg64(desthigh[i], a->vd, 2 * i + 1);
+        set_vreg64(destlow[i], a->vd, 2 * i);
+    }

return true;


Why are you using arrays?  They don't seem required.
This would seem clearer as

    for (i = 0; i < oprsz / 16; i++) {
        TCGv desthi = tcg_temp_new_i64();
        ...
    }


r~

Re: [PATCH RESEND v5 52/57] target/loongarch: Implement xvreplve xvinsve0 xvpickve

Reply via email to