On 8/30/23 01:48, Song Gao wrote:
This patch includes:
- XVREPLGR2VR.{B/H/W/D}.
Signed-off-by: Song Gao <gaos...@loongson.cn>
Reviewed-by: Richard Henderson <richard.hender...@linaro.org>
---
target/loongarch/insns.decode | 5 +++++
target/loongarch/disas.c | 10 ++++++++++
target/loongarch/insn_trans/trans_lasx.c.inc | 5 +++++
target/loongarch/insn_trans/trans_lsx.c.inc | 12 ++++++------
4 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index bcc18fb6c5..04bd238995 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1310,3 +1310,8 @@ xvsub_h 0111 01000000 11001 ..... ..... .....
@vvv
xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv
xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv
xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv
+
+xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
+xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
+xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
+xvreplgr2vr_d 0111 01101001 11110 00011 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index d8b62ba532..c47f455ed0 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1708,6 +1708,11 @@ static void output_vvv_x(DisasContext *ctx, arg_vvv * a,
const char *mnemonic)
output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
}
+static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj);
+}
+
INSN_LASX(xvadd_b, vvv)
INSN_LASX(xvadd_h, vvv)
INSN_LASX(xvadd_w, vvv)
@@ -1718,3 +1723,8 @@ INSN_LASX(xvsub_h, vvv)
INSN_LASX(xvsub_w, vvv)
INSN_LASX(xvsub_d, vvv)
INSN_LASX(xvsub_q, vvv)
+
+INSN_LASX(xvreplgr2vr_b, vr)
+INSN_LASX(xvreplgr2vr_h, vr)
+INSN_LASX(xvreplgr2vr_w, vr)
+INSN_LASX(xvreplgr2vr_d, vr)
diff --git a/target/loongarch/insn_trans/trans_lasx.c.inc
b/target/loongarch/insn_trans/trans_lasx.c.inc
index 218b8dc648..66b5abc790 100644
--- a/target/loongarch/insn_trans/trans_lasx.c.inc
+++ b/target/loongarch/insn_trans/trans_lasx.c.inc
@@ -50,3 +50,8 @@ TRANS(xvsub_b, LASX, gvec_vvv, 32, MO_8, tcg_gen_gvec_sub)
TRANS(xvsub_h, LASX, gvec_vvv, 32, MO_16, tcg_gen_gvec_sub)
TRANS(xvsub_w, LASX, gvec_vvv, 32, MO_32, tcg_gen_gvec_sub)
TRANS(xvsub_d, LASX, gvec_vvv, 32, MO_64, tcg_gen_gvec_sub)
+
+TRANS(xvreplgr2vr_b, LASX, gvec_dup, 32, MO_8)
+TRANS(xvreplgr2vr_h, LASX, gvec_dup, 32, MO_16)
+TRANS(xvreplgr2vr_w, LASX, gvec_dup, 32, MO_32)
+TRANS(xvreplgr2vr_d, LASX, gvec_dup, 32, MO_64)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 0e12213e8b..c0e7a9a372 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -4161,7 +4161,7 @@ static bool trans_vpickve2gr_du(DisasContext *ctx,
arg_rv_i *a)
return true;
}
-static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
+static bool gvec_dup(DisasContext *ctx, arg_vr *a, uint32_t oprsz, MemOp mop)
{
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
@@ -4172,14 +4172,14 @@ static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
CHECK_VEC;
tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
- 16, ctx->vl/8, src);
+ oprsz, ctx->vl / 8, src);
return true;
}
-TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
-TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
-TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
-TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
+TRANS(vreplgr2vr_b, LSX, gvec_dup, 16, MO_8)
+TRANS(vreplgr2vr_h, LSX, gvec_dup, 16, MO_16)
+TRANS(vreplgr2vr_w, LSX, gvec_dup, 16, MO_32)
+TRANS(vreplgr2vr_d, LSX, gvec_dup, 16, MO_64)
Hmm.
Ok, so revising the advice I gave versus the previous patch, I can see how having a common
CHECK_VEC is helpful. But it still needs to use oprsz not vl for the size check.
It would be better to replace with a function, like
if (!check_vec(ctx, oprsz)) {
return true;
}
rather than a macro with a hidden return. The replacement should be done in a patch by
itself, probably using check_vec(ctx, 16) for all of the existing LSX code until, step by
step, oprsz is plumbed into all of the places required.
I still think having separate minimal gen_vvv and gen_xxx helpers will help reduce the
possibility of typos, when there are a lot of instructions within an instruction format.
But when there are just 8, like here, just adding oprsz certainly looks simpler.
I wonder if it is really clearer having the LASX instructions in a separate file? Perhaps
it be better to keep all of the similar patterns together, e.g.
static bool gvec_dup(...)
{
...
}
TRANS(vreplgr2vr_b, LSX, gvec_dup, 16, MO_8)
TRANS(vreplgr2vr_h, LSX, gvec_dup, 16, MO_16)
TRANS(vreplgr2vr_w, LSX, gvec_dup, 16, MO_32)
TRANS(vreplgr2vr_d, LSX, gvec_dup, 16, MO_64)
TRANS(xvreplgr2vr_b, LASX, gvec_dup, 32, MO_8)
TRANS(xvreplgr2vr_h, LASX, gvec_dup, 32, MO_16)
TRANS(xvreplgr2vr_w, LASX, gvec_dup, 32, MO_32)
TRANS(xvreplgr2vr_d, LASX, gvec_dup, 32, MO_64)
r~