[RFC PATCH v4 02/44] target/loongarch: meson.build support build LSX

2023-04-25 Thread Song Gao
Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/insn_trans/trans_lsx.c.inc | 5 +
 target/loongarch/lsx_helper.c   | 6 ++
 target/loongarch/meson.build| 1 +
 target/loongarch/translate.c| 1 +
 4 files changed, 13 insertions(+)
 create mode 100644 target/loongarch/insn_trans/trans_lsx.c.inc
 create mode 100644 target/loongarch/lsx_helper.c

diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
new file mode 100644
index 00..1cf3ab34a9
--- /dev/null
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LSX translate functions
+ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
+ */
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
new file mode 100644
index 00..9332163aff
--- /dev/null
+++ b/target/loongarch/lsx_helper.c
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch LSX helper functions.
+ *
+ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
+ */
diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
index 9293a8ab78..1117a51c52 100644
--- a/target/loongarch/meson.build
+++ b/target/loongarch/meson.build
@@ -11,6 +11,7 @@ loongarch_tcg_ss.add(files(
   'op_helper.c',
   'translate.c',
   'gdbstub.c',
+  'lsx_helper.c',
 ))
 loongarch_tcg_ss.add(zlib)
 
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index 21d86077f4..97e019aeb4 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -171,6 +171,7 @@ static void gen_set_gpr(int reg_num, TCGv t, DisasExtend 
dst_ext)
 #include "insn_trans/trans_fmemory.c.inc"
 #include "insn_trans/trans_branch.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
+#include "insn_trans/trans_lsx.c.inc"
 
 static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
-- 
2.31.1




[RFC PATCH v4 09/44] target/loongarch: Implement vaddw/vsubw

2023-04-25 Thread Song Gao
This patch includes:
- VADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- VSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- VADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  43 ++
 target/loongarch/helper.h   |  45 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 795 
 target/loongarch/insns.decode   |  43 ++
 target/loongarch/lsx_helper.c   | 190 +
 5 files changed, 1116 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index adfd693938..8ee14916f3 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -865,3 +865,46 @@ INSN_LSX(vhsubw_hu_bu, vvv)
 INSN_LSX(vhsubw_wu_hu, vvv)
 INSN_LSX(vhsubw_du_wu, vvv)
 INSN_LSX(vhsubw_qu_du, vvv)
+
+INSN_LSX(vaddwev_h_b,  vvv)
+INSN_LSX(vaddwev_w_h,  vvv)
+INSN_LSX(vaddwev_d_w,  vvv)
+INSN_LSX(vaddwev_q_d,  vvv)
+INSN_LSX(vaddwod_h_b,  vvv)
+INSN_LSX(vaddwod_w_h,  vvv)
+INSN_LSX(vaddwod_d_w,  vvv)
+INSN_LSX(vaddwod_q_d,  vvv)
+INSN_LSX(vsubwev_h_b,  vvv)
+INSN_LSX(vsubwev_w_h,  vvv)
+INSN_LSX(vsubwev_d_w,  vvv)
+INSN_LSX(vsubwev_q_d,  vvv)
+INSN_LSX(vsubwod_h_b,  vvv)
+INSN_LSX(vsubwod_w_h,  vvv)
+INSN_LSX(vsubwod_d_w,  vvv)
+INSN_LSX(vsubwod_q_d,  vvv)
+
+INSN_LSX(vaddwev_h_bu, vvv)
+INSN_LSX(vaddwev_w_hu, vvv)
+INSN_LSX(vaddwev_d_wu, vvv)
+INSN_LSX(vaddwev_q_du, vvv)
+INSN_LSX(vaddwod_h_bu, vvv)
+INSN_LSX(vaddwod_w_hu, vvv)
+INSN_LSX(vaddwod_d_wu, vvv)
+INSN_LSX(vaddwod_q_du, vvv)
+INSN_LSX(vsubwev_h_bu, vvv)
+INSN_LSX(vsubwev_w_hu, vvv)
+INSN_LSX(vsubwev_d_wu, vvv)
+INSN_LSX(vsubwev_q_du, vvv)
+INSN_LSX(vsubwod_h_bu, vvv)
+INSN_LSX(vsubwod_w_hu, vvv)
+INSN_LSX(vsubwod_d_wu, vvv)
+INSN_LSX(vsubwod_q_du, vvv)
+
+INSN_LSX(vaddwev_h_bu_b,   vvv)
+INSN_LSX(vaddwev_w_hu_h,   vvv)
+INSN_LSX(vaddwev_d_wu_w,   vvv)
+INSN_LSX(vaddwev_q_du_d,   vvv)
+INSN_LSX(vaddwod_h_bu_b,   vvv)
+INSN_LSX(vaddwod_w_hu_h,   vvv)
+INSN_LSX(vaddwod_d_wu_w,   vvv)
+INSN_LSX(vaddwod_q_du_d,   vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 6d58dabaed..505c336ff3 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -148,3 +148,48 @@ DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
 DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
 DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
 DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
+
+DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vsubwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vaddwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vsubwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsubwod_q_du, TCG_CALL_N

[RFC PATCH v4 21/44] target/loongarch: Implement LSX logic instructions

2023-04-25 Thread Song Gao
This patch includes:
- V{AND/OR/XOR/NOR/ANDN/ORN}.V;
- V{AND/OR/XOR/NOR}I.B.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 12 +
 target/loongarch/helper.h   |  2 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 56 +
 target/loongarch/insns.decode   | 13 +
 target/loongarch/lsx_helper.c   | 11 
 5 files changed, 94 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 2725b827ee..eca0a4bb7b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1091,3 +1091,15 @@ INSN_LSX(vmskltz_w,vv)
 INSN_LSX(vmskltz_d,vv)
 INSN_LSX(vmskgez_b,vv)
 INSN_LSX(vmsknz_b, vv)
+
+INSN_LSX(vand_v,   vvv)
+INSN_LSX(vor_v,vvv)
+INSN_LSX(vxor_v,   vvv)
+INSN_LSX(vnor_v,   vvv)
+INSN_LSX(vandn_v,  vvv)
+INSN_LSX(vorn_v,   vvv)
+
+INSN_LSX(vandi_b,  vv_i)
+INSN_LSX(vori_b,   vv_i)
+INSN_LSX(vxori_b,  vv_i)
+INSN_LSX(vnori_b,  vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 34b7b2f576..617c579592 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -350,3 +350,5 @@ DEF_HELPER_3(vmskltz_w, void, env, i32, i32)
 DEF_HELPER_3(vmskltz_d, void, env, i32, i32)
 DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
 DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
+
+DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 64387f2666..e5e194106b 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2874,3 +2874,59 @@ TRANS(vmskltz_w, gen_vv, gen_helper_vmskltz_w)
 TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d)
 TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b)
 TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b)
+
+TRANS(vand_v, gvec_vvv, MO_64, tcg_gen_gvec_and)
+TRANS(vor_v, gvec_vvv, MO_64, tcg_gen_gvec_or)
+TRANS(vxor_v, gvec_vvv, MO_64, tcg_gen_gvec_xor)
+TRANS(vnor_v, gvec_vvv, MO_64, tcg_gen_gvec_nor)
+
+static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
+{
+uint32_t vd_ofs, vj_ofs, vk_ofs;
+
+CHECK_SXE;
+
+vd_ofs = vec_full_offset(a->vd);
+vj_ofs = vec_full_offset(a->vj);
+vk_ofs = vec_full_offset(a->vk);
+
+tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
+return true;
+}
+TRANS(vorn_v, gvec_vvv, MO_64, tcg_gen_gvec_orc)
+TRANS(vandi_b, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
+TRANS(vori_b, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
+TRANS(vxori_b, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
+
+static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+TCGv_vec t1;
+
+t1 = tcg_constant_vec_matching(t, vece, imm);
+tcg_gen_nor_vec(vece, t, a, t1);
+}
+
+static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
+{
+tcg_gen_movi_i64(t, dup_const(MO_8, imm));
+tcg_gen_nor_i64(t, a, t);
+}
+
+static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+   int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop_list[] = {
+INDEX_op_nor_vec, 0
+};
+static const GVecGen2i op = {
+   .fni8 = gen_vnori_b,
+   .fniv = gen_vnori,
+   .fnoi = gen_helper_vnori_b,
+   .opt_opc = vecop_list,
+   .vece = MO_8
+};
+
+tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
+}
+
+TRANS(vnori_b, gvec_vv_i, MO_8, do_vnori_b)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 47c1ef78a7..6309683be9 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -503,6 +503,7 @@ dbcl  0010 10101 ...  
@i15
 @vv_ui4   . . imm:4 vj:5 vd:5&vv_i
 @vv_ui5     . imm:5 vj:5 vd:5&vv_i
 @vv_ui6   imm:6 vj:5 vd:5&vv_i
+@vv_ui8    .. imm:8 vj:5 vd:5&vv_i
 @vv_i5     . imm:s5 vj:5 vd:5&vv_i
 
 vadd_b   0111  10100 . . .@vvv
@@ -790,3 +791,15 @@ vmskltz_w0111 00101001 11000 10010 . .
@vv
 vmskltz_d0111 00101001 11000 10011 . .@vv
 vmskgez_b0111 00101001 11000 10100 . .@vv
 vmsknz_b 0111 00101001 11000 11000 . .@vv
+
+vand_v   0111 00010010 01100 . . .@vvv
+vor_v0111 00010010 01101 . . .@vvv
+vxor_v   0111 00010010 01110 . . .@vvv
+vnor_v   0111 00010010 0 . . .@vvv
+vandn_v  0111 00010010 1 . . .@vvv
+vorn_v   0111 00010010 10001 . . .@vvv
+
+vandi_b  0111 0001 00  . .@vv_ui8

[RFC PATCH v4 42/44] target/loongarch: Implement vldi

2023-04-25 Thread Song Gao
This patch includes:
- VLDI.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|   7 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 137 
 target/loongarch/insns.decode   |   4 +
 3 files changed, 148 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 8627908fc9..5c402d944d 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -858,6 +858,11 @@ static void output_vrr(DisasContext *ctx, arg_vrr *a, 
const char *mnemonic)
 output(ctx, mnemonic, "v%d, r%d, r%d", a->vd, a->rj, a->rk);
 }
 
+static void output_v_i(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, 0x%x", a->vd, a->imm);
+}
+
 INSN_LSX(vadd_b,   vvv)
 INSN_LSX(vadd_h,   vvv)
 INSN_LSX(vadd_w,   vvv)
@@ -1143,6 +1148,8 @@ INSN_LSX(vmskltz_d,vv)
 INSN_LSX(vmskgez_b,vv)
 INSN_LSX(vmsknz_b, vv)
 
+INSN_LSX(vldi, v_i)
+
 INSN_LSX(vand_v,   vvv)
 INSN_LSX(vor_v,vvv)
 INSN_LSX(vxor_v,   vvv)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 6bf9656c2c..bf1cc9e956 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2912,6 +2912,143 @@ TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d)
 TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b)
 TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b)
 
+#define EXPAND_BYTE(bit)  ((uint64_t)(bit ? 0xff : 0))
+
+static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
+{
+int mode;
+uint64_t data, t;
+
+/*
+ * imm bit [11:8] is mode, mode value is 0-12.
+ * other values are invalid.
+ */
+mode = (imm >> 8) & 0xf;
+t =  imm & 0xff;
+switch (mode) {
+case 0:
+/* data: {2{24'0, imm[7:0]}} */
+data =  (t << 32) | t ;
+break;
+case 1:
+/* data: {2{16'0, imm[7:0], 8'0}} */
+data = (t << 24) | (t << 8);
+break;
+case 2:
+/* data: {2{8'0, imm[7:0], 16'0}} */
+data = (t << 48) | (t << 16);
+break;
+case 3:
+/* data: {2{imm[7:0], 24'0}} */
+data = (t << 56) | (t << 24);
+break;
+case 4:
+/* data: {4{8'0, imm[7:0]}} */
+data = (t << 48) | (t << 32) | (t << 16) | t;
+break;
+case 5:
+/* data: {4{imm[7:0], 8'0}} */
+data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
+break;
+case 6:
+/* data: {2{16'0, imm[7:0], 8'1}} */
+data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
+break;
+case 7:
+/* data: {2{8'0, imm[7:0], 16'1}} */
+data = (t << 48) | ((uint64_t)0x << 32) | (t << 16) | 0x;
+break;
+case 8:
+/* data: {8{imm[7:0]}} */
+data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
+  (t << 24) | (t << 16) | (t << 8) | t;
+break;
+case 9:
+/* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
+{
+uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
+b0 = t& 0x1;
+b1 = (t & 0x2) >> 1;
+b2 = (t & 0x4) >> 2;
+b3 = (t & 0x8) >> 3;
+b4 = (t & 0x10) >> 4;
+b5 = (t & 0x20) >> 5;
+b6 = (t & 0x40) >> 6;
+b7 = (t & 0x80) >> 7;
+data = (EXPAND_BYTE(b7) << 56) |
+   (EXPAND_BYTE(b6) << 48) |
+   (EXPAND_BYTE(b5) << 40) |
+   (EXPAND_BYTE(b4) << 32) |
+   (EXPAND_BYTE(b3) << 24) |
+   (EXPAND_BYTE(b2) << 16) |
+   (EXPAND_BYTE(b1) <<  8) |
+   EXPAND_BYTE(b0);
+}
+break;
+case 10:
+/* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
+{
+uint64_t b6, b7;
+uint64_t t0, t1;
+b6 = (imm & 0x40) >> 6;
+b7 = (imm & 0x80) >> 7;
+t0 = (imm & 0x3f);
+t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
+data  = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
+}
+break;
+case 11:
+/* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
+{
+uint64_t b6,b7;
+uint64_t t0, t1;
+b6 = (imm & 0x40) >> 6;
+b7 = (imm & 0x80) >> 7;
+t0 = (imm & 0x3f);
+t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
+data = (t1 << 25) | (t0 << 19);
+}
+break;
+case 12:
+/* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
+{
+uint64_t b6,b7;
+uint64_t t0, t1;
+b6 = (imm & 0x40) >> 6;
+b7 = (imm & 0x80) >> 7;
+t0 = (imm & 0x3f);
+t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
+data = (t1

[RFC PATCH v4 30/44] target/loongarch: Implement vpcnt

2023-04-25 Thread Song Gao
This patch includes:
- VPCNT.{B/H/W/D}.

Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  5 +
 target/loongarch/helper.h   |  5 +
 target/loongarch/insn_trans/trans_lsx.c.inc |  5 +
 target/loongarch/insns.decode   |  5 +
 target/loongarch/lsx_helper.c   | 18 ++
 5 files changed, 38 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 0c82a1d9d1..0ca51de9d8 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1267,3 +1267,8 @@ INSN_LSX(vclz_b,   vv)
 INSN_LSX(vclz_h,   vv)
 INSN_LSX(vclz_w,   vv)
 INSN_LSX(vclz_d,   vv)
+
+INSN_LSX(vpcnt_b,  vv)
+INSN_LSX(vpcnt_h,  vv)
+INSN_LSX(vpcnt_w,  vv)
+INSN_LSX(vpcnt_d,  vv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index e21e9b9704..96b9b16923 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -480,3 +480,8 @@ DEF_HELPER_3(vclz_b, void, env, i32, i32)
 DEF_HELPER_3(vclz_h, void, env, i32, i32)
 DEF_HELPER_3(vclz_w, void, env, i32, i32)
 DEF_HELPER_3(vclz_d, void, env, i32, i32)
+
+DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
+DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index c7649fb777..f4ebdca63c 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3106,3 +3106,8 @@ TRANS(vclz_b, gen_vv, gen_helper_vclz_b)
 TRANS(vclz_h, gen_vv, gen_helper_vclz_h)
 TRANS(vclz_w, gen_vv, gen_helper_vclz_w)
 TRANS(vclz_d, gen_vv, gen_helper_vclz_d)
+
+TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b)
+TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h)
+TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w)
+TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7591ec1bab..f865e83da5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -968,3 +968,8 @@ vclz_b   0111 00101001 11000 00100 . .
@vv
 vclz_h   0111 00101001 11000 00101 . .@vv
 vclz_w   0111 00101001 11000 00110 . .@vv
 vclz_d   0111 00101001 11000 00111 . .@vv
+
+vpcnt_b  0111 00101001 11000 01000 . .@vv
+vpcnt_h  0111 00101001 11000 01001 . .@vv
+vpcnt_w  0111 00101001 11000 01010 . .@vv
+vpcnt_d  0111 00101001 11000 01011 . .@vv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index 044032f180..f18c4a2978 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -1946,3 +1946,21 @@ DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
 DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
 DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
 DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
+
+#define VPCNT(NAME, BIT, E, FN) \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
+{   \
+int i;  \
+VReg *Vd = &(env->fpr[vd].vreg);\
+VReg *Vj = &(env->fpr[vj].vreg);\
+\
+for (i = 0; i < LSX_LEN/BIT; i++)   \
+{   \
+Vd->E(i) = FN(Vj->E(i));\
+}   \
+}
+
+VPCNT(vpcnt_b, 8, UB, ctpop8)
+VPCNT(vpcnt_h, 16, UH, ctpop16)
+VPCNT(vpcnt_w, 32, UW, ctpop32)
+VPCNT(vpcnt_d, 64, UD, ctpop64)
-- 
2.31.1




[RFC PATCH v4 36/44] target/loongarch: Implement vfcmp

2023-04-25 Thread Song Gao
This patch includes:
- VFCMP.cond.{S/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 94 +
 target/loongarch/helper.h   |  5 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 32 +++
 target/loongarch/insns.decode   |  5 ++
 target/loongarch/lsx_helper.c   | 54 
 5 files changed, 190 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e589b23f4c..64db01d2f9 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1447,3 +1447,97 @@ INSN_LSX(vslti_bu, vv_i)
 INSN_LSX(vslti_hu, vv_i)
 INSN_LSX(vslti_wu, vv_i)
 INSN_LSX(vslti_du, vv_i)
+
+#define output_vfcmp(C, PREFIX, SUFFIX) \
+{   \
+(C)->info->fprintf_func((C)->info->stream, "%08x   %s%s\t%d, f%d, f%d", \
+(C)->insn, PREFIX, SUFFIX, a->vd,   \
+a->vj, a->vk);  \
+}
+
+static bool output_vvv_fcond(DisasContext *ctx, arg_vvv_fcond * a,
+ const char *suffix)
+{
+bool ret = true;
+switch (a->fcond) {
+case 0x0:
+output_vfcmp(ctx, "vfcmp_caf_", suffix);
+break;
+case 0x1:
+output_vfcmp(ctx, "vfcmp_saf_", suffix);
+break;
+case 0x2:
+output_vfcmp(ctx, "vfcmp_clt_", suffix);
+break;
+case 0x3:
+output_vfcmp(ctx, "vfcmp_slt_", suffix);
+break;
+case 0x4:
+output_vfcmp(ctx, "vfcmp_ceq_", suffix);
+break;
+case 0x5:
+output_vfcmp(ctx, "vfcmp_seq_", suffix);
+break;
+case 0x6:
+output_vfcmp(ctx, "vfcmp_cle_", suffix);
+break;
+case 0x7:
+output_vfcmp(ctx, "vfcmp_sle_", suffix);
+break;
+case 0x8:
+output_vfcmp(ctx, "vfcmp_cun_", suffix);
+break;
+case 0x9:
+output_vfcmp(ctx, "vfcmp_sun_", suffix);
+break;
+case 0xA:
+output_vfcmp(ctx, "vfcmp_cult_", suffix);
+break;
+case 0xB:
+output_vfcmp(ctx, "vfcmp_sult_", suffix);
+break;
+case 0xC:
+output_vfcmp(ctx, "vfcmp_cueq_", suffix);
+break;
+case 0xD:
+output_vfcmp(ctx, "vfcmp_sueq_", suffix);
+break;
+case 0xE:
+output_vfcmp(ctx, "vfcmp_cule_", suffix);
+break;
+case 0xF:
+output_vfcmp(ctx, "vfcmp_sule_", suffix);
+break;
+case 0x10:
+output_vfcmp(ctx, "vfcmp_cne_", suffix);
+break;
+case 0x11:
+output_vfcmp(ctx, "vfcmp_sne_", suffix);
+break;
+case 0x14:
+output_vfcmp(ctx, "vfcmp_cor_", suffix);
+break;
+case 0x15:
+output_vfcmp(ctx, "vfcmp_sor_", suffix);
+break;
+case 0x18:
+output_vfcmp(ctx, "vfcmp_cune_", suffix);
+break;
+case 0x19:
+output_vfcmp(ctx, "vfcmp_sune_", suffix);
+break;
+default:
+ret = false;
+}
+return ret;
+}
+
+#define LSX_FCMP_INSN(suffix)\
+static bool trans_vfcmp_cond_##suffix(DisasContext *ctx, \
+ arg_vvv_fcond * a)  \
+{\
+return output_vvv_fcond(ctx, a, #suffix);\
+}
+
+LSX_FCMP_INSN(s)
+LSX_FCMP_INSN(d)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index e9e9fa7f87..867756fdb5 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -637,3 +637,8 @@ DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, 
ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 4d9f88bf4f..abb6efc09d 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3717,3 +3717,35 @@ TRANS(vslti_bu, do_vslti_u, MO_8)
 TRANS(vslti_hu, do_vslti_u, MO_16)
 TRANS(vslti_wu, do_vslti_u, MO_32)
 TRANS(vslti_du, do_vslti_u, MO_64)
+
+static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
+{
+uint32_t flags;
+void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+TCGv_i32 vd = tcg_constant_i32(a->vd);
+TCGv_i32 vj = tcg_constant_i32(a->vj);
+TCGv_i32 vk = tcg_constant_i32(a->vk);
+
+CHECK_SXE;
+
+fn = (a->fcond & 1 ? gen_helper_vfcmp_

[RFC PATCH v4 15/44] target/loongarch: Implement vmadd/vmsub/vmaddw{ev/od}

2023-04-25 Thread Song Gao
This patch includes:
- VMADD.{B/H/W/D};
- VMSUB.{B/H/W/D};
- VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- VMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  34 ++
 target/loongarch/helper.h   |  30 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 612 
 target/loongarch/insns.decode   |  34 ++
 target/loongarch/lsx_helper.c   | 107 
 5 files changed, 817 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 48e6ef5309..980e6e6375 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1010,3 +1010,37 @@ INSN_LSX(vmulwod_h_bu_b,   vvv)
 INSN_LSX(vmulwod_w_hu_h,   vvv)
 INSN_LSX(vmulwod_d_wu_w,   vvv)
 INSN_LSX(vmulwod_q_du_d,   vvv)
+
+INSN_LSX(vmadd_b,  vvv)
+INSN_LSX(vmadd_h,  vvv)
+INSN_LSX(vmadd_w,  vvv)
+INSN_LSX(vmadd_d,  vvv)
+INSN_LSX(vmsub_b,  vvv)
+INSN_LSX(vmsub_h,  vvv)
+INSN_LSX(vmsub_w,  vvv)
+INSN_LSX(vmsub_d,  vvv)
+
+INSN_LSX(vmaddwev_h_b, vvv)
+INSN_LSX(vmaddwev_w_h, vvv)
+INSN_LSX(vmaddwev_d_w, vvv)
+INSN_LSX(vmaddwev_q_d, vvv)
+INSN_LSX(vmaddwod_h_b, vvv)
+INSN_LSX(vmaddwod_w_h, vvv)
+INSN_LSX(vmaddwod_d_w, vvv)
+INSN_LSX(vmaddwod_q_d, vvv)
+INSN_LSX(vmaddwev_h_bu,vvv)
+INSN_LSX(vmaddwev_w_hu,vvv)
+INSN_LSX(vmaddwev_d_wu,vvv)
+INSN_LSX(vmaddwev_q_du,vvv)
+INSN_LSX(vmaddwod_h_bu,vvv)
+INSN_LSX(vmaddwod_w_hu,vvv)
+INSN_LSX(vmaddwod_d_wu,vvv)
+INSN_LSX(vmaddwod_q_du,vvv)
+INSN_LSX(vmaddwev_h_bu_b,  vvv)
+INSN_LSX(vmaddwev_w_hu_h,  vvv)
+INSN_LSX(vmaddwev_d_wu_w,  vvv)
+INSN_LSX(vmaddwev_q_du_d,  vvv)
+INSN_LSX(vmaddwod_h_bu_b,  vvv)
+INSN_LSX(vmaddwod_w_hu_h,  vvv)
+INSN_LSX(vmaddwod_d_wu_w,  vvv)
+INSN_LSX(vmaddwod_q_du_d,  vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 830d8cbe62..8cf9620702 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -273,3 +273,33 @@ DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, 
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmadd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmsub_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmsub_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 9a36f942e2..400c3a0339 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2064,3 +2064,615 @@ static void do_vmulwod_u_s(unsigned vece, uint32_t 
vd_ofs, uint32_t vj_ofs,
 TRANS(vmulwod_h_bu_b, gvec_vvv, MO_8, do_vmulwod_u_s)
 TRANS(vmulwod_w_hu_h, gvec_vvv, MO_16, do_vmulwod_u_s)
 TRANS(vmulwod_d_wu_w, gvec_vvv, MO_32, do_vmulwod_u_s)
+
+static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+

[RFC PATCH v4 07/44] target/loongarch: Implement vsadd/vssub

2023-04-25 Thread Song Gao
This patch includes:
- VSADD.{B/H/W/D}[U];
- VSSUB.{B/H/W/D}[U].

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 17 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 17 +
 target/loongarch/insns.decode   | 17 +
 3 files changed, 51 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 5eabb8c47a..b7f9320ba0 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -831,3 +831,20 @@ INSN_LSX(vneg_b,   vv)
 INSN_LSX(vneg_h,   vv)
 INSN_LSX(vneg_w,   vv)
 INSN_LSX(vneg_d,   vv)
+
+INSN_LSX(vsadd_b,  vvv)
+INSN_LSX(vsadd_h,  vvv)
+INSN_LSX(vsadd_w,  vvv)
+INSN_LSX(vsadd_d,  vvv)
+INSN_LSX(vsadd_bu, vvv)
+INSN_LSX(vsadd_hu, vvv)
+INSN_LSX(vsadd_wu, vvv)
+INSN_LSX(vsadd_du, vvv)
+INSN_LSX(vssub_b,  vvv)
+INSN_LSX(vssub_h,  vvv)
+INSN_LSX(vssub_w,  vvv)
+INSN_LSX(vssub_d,  vvv)
+INSN_LSX(vssub_bu, vvv)
+INSN_LSX(vssub_hu, vvv)
+INSN_LSX(vssub_wu, vvv)
+INSN_LSX(vssub_du, vvv)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index d02db6285f..082bd738ce 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -140,3 +140,20 @@ TRANS(vneg_b, gvec_vv, MO_8, tcg_gen_gvec_neg)
 TRANS(vneg_h, gvec_vv, MO_16, tcg_gen_gvec_neg)
 TRANS(vneg_w, gvec_vv, MO_32, tcg_gen_gvec_neg)
 TRANS(vneg_d, gvec_vv, MO_64, tcg_gen_gvec_neg)
+
+TRANS(vsadd_b, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
+TRANS(vsadd_h, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
+TRANS(vsadd_w, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
+TRANS(vsadd_d, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
+TRANS(vsadd_bu, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
+TRANS(vsadd_hu, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
+TRANS(vsadd_wu, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
+TRANS(vsadd_du, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
+TRANS(vssub_b, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
+TRANS(vssub_h, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
+TRANS(vssub_w, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
+TRANS(vssub_d, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
+TRANS(vssub_bu, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
+TRANS(vssub_hu, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
+TRANS(vssub_wu, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
+TRANS(vssub_du, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index d90798be11..3a29f0a9ab 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -525,3 +525,20 @@ vneg_b   0111 00101001 11000 01100 . .
@vv
 vneg_h   0111 00101001 11000 01101 . .@vv
 vneg_w   0111 00101001 11000 01110 . .@vv
 vneg_d   0111 00101001 11000 0 . .@vv
+
+vsadd_b  0111 0100 01100 . . .@vvv
+vsadd_h  0111 0100 01101 . . .@vvv
+vsadd_w  0111 0100 01110 . . .@vvv
+vsadd_d  0111 0100 0 . . .@vvv
+vsadd_bu 0111 0100 10100 . . .@vvv
+vsadd_hu 0111 0100 10101 . . .@vvv
+vsadd_wu 0111 0100 10110 . . .@vvv
+vsadd_du 0111 0100 10111 . . .@vvv
+vssub_b  0111 0100 1 . . .@vvv
+vssub_h  0111 0100 10001 . . .@vvv
+vssub_w  0111 0100 10010 . . .@vvv
+vssub_d  0111 0100 10011 . . .@vvv
+vssub_bu 0111 0100 11000 . . .@vvv
+vssub_hu 0111 0100 11001 . . .@vvv
+vssub_wu 0111 0100 11010 . . .@vvv
+vssub_du 0111 0100 11011 . . .@vvv
-- 
2.31.1




[RFC PATCH v4 01/44] target/loongarch: Add LSX data type VReg

2023-04-25 Thread Song Gao
Signed-off-by: Song Gao 
---
 linux-user/loongarch64/signal.c |  4 +-
 target/loongarch/cpu.c  |  2 +-
 target/loongarch/cpu.h  | 21 -
 target/loongarch/gdbstub.c  |  4 +-
 target/loongarch/internals.h| 22 +
 target/loongarch/machine.c  | 79 ++---
 6 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
index 7c7afb652e..bb8efb1172 100644
--- a/linux-user/loongarch64/signal.c
+++ b/linux-user/loongarch64/signal.c
@@ -128,7 +128,7 @@ static void setup_sigframe(CPULoongArchState *env,
 
 fpu_ctx = (struct target_fpu_context *)(info + 1);
 for (i = 0; i < 32; ++i) {
-__put_user(env->fpr[i], &fpu_ctx->regs[i]);
+__put_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
 }
 __put_user(read_fcc(env), &fpu_ctx->fcc);
 __put_user(env->fcsr0, &fpu_ctx->fcsr);
@@ -193,7 +193,7 @@ static void restore_sigframe(CPULoongArchState *env,
 uint64_t fcc;
 
 for (i = 0; i < 32; ++i) {
-__get_user(env->fpr[i], &fpu_ctx->regs[i]);
+__get_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
 }
 __get_user(fcc, &fpu_ctx->fcc);
 write_fcc(env, fcc);
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 97e6579f6a..18b41221a6 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -656,7 +656,7 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int 
flags)
 /* fpr */
 if (flags & CPU_DUMP_FPU) {
 for (i = 0; i < 32; i++) {
-qemu_fprintf(f, " %s %016" PRIx64, fregnames[i], env->fpr[i]);
+qemu_fprintf(f, " %s %016" PRIx64, fregnames[i], 
env->fpr[i].vreg.D(0));
 if ((i & 3) == 3) {
 qemu_fprintf(f, "\n");
 }
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index e11c875188..fd0f61936d 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -8,6 +8,7 @@
 #ifndef LOONGARCH_CPU_H
 #define LOONGARCH_CPU_H
 
+#include "qemu/int128.h"
 #include "exec/cpu-defs.h"
 #include "fpu/softfloat-types.h"
 #include "hw/registerfields.h"
@@ -241,6 +242,24 @@ FIELD(TLB_MISC, ASID, 1, 10)
 FIELD(TLB_MISC, VPPN, 13, 35)
 FIELD(TLB_MISC, PS, 48, 6)
 
+#define LSX_LEN   (128)
+typedef union VReg {
+int8_t   B[LSX_LEN / 8];
+int16_t  H[LSX_LEN / 16];
+int32_t  W[LSX_LEN / 32];
+int64_t  D[LSX_LEN / 64];
+uint8_t  UB[LSX_LEN / 8];
+uint16_t UH[LSX_LEN / 16];
+uint32_t UW[LSX_LEN / 32];
+uint64_t UD[LSX_LEN / 64];
+Int128   Q[LSX_LEN / 128];
+}VReg;
+
+typedef union fpr_t fpr_t;
+union fpr_t {
+VReg  vreg;
+};
+
 struct LoongArchTLB {
 uint64_t tlb_misc;
 /* Fields corresponding to CSR_TLBELO0/1 */
@@ -253,7 +272,7 @@ typedef struct CPUArchState {
 uint64_t gpr[32];
 uint64_t pc;
 
-uint64_t fpr[32];
+fpr_t fpr[32];
 float_status fp_status;
 bool cf[8];
 
diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c
index fa3e034d15..0752fff924 100644
--- a/target/loongarch/gdbstub.c
+++ b/target/loongarch/gdbstub.c
@@ -69,7 +69,7 @@ static int loongarch_gdb_get_fpu(CPULoongArchState *env,
  GByteArray *mem_buf, int n)
 {
 if (0 <= n && n < 32) {
-return gdb_get_reg64(mem_buf, env->fpr[n]);
+return gdb_get_reg64(mem_buf, env->fpr[n].vreg.D(0));
 } else if (n == 32) {
 uint64_t val = read_fcc(env);
 return gdb_get_reg64(mem_buf, val);
@@ -85,7 +85,7 @@ static int loongarch_gdb_set_fpu(CPULoongArchState *env,
 int length = 0;
 
 if (0 <= n && n < 32) {
-env->fpr[n] = ldq_p(mem_buf);
+env->fpr[n].vreg.D(0) = ldq_p(mem_buf);
 length = 8;
 } else if (n == 32) {
 uint64_t val = ldq_p(mem_buf);
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index f01635aed6..4c5752fc01 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -21,6 +21,28 @@
 /* Global bit for huge page */
 #define LOONGARCH_HGLOBAL_SHIFT 12
 
+#if  HOST_BIG_ENDIAN
+#define B(x)  B[15 - (x)]
+#define H(x)  H[7 - (x)]
+#define W(x)  W[3 - (x)]
+#define D(x)  D[1 - (x)]
+#define UB(x) UB[15 - (x)]
+#define UH(x) UH[7 - (x)]
+#define UW(x) UW[3 - (x)]
+#define UD(x) UD[1 -(x)]
+#define Q(x)  Q[x]
+#else
+#define B(x)  B[x]
+#define H(x)  H[x]
+#define W(x)  W[x]
+#define D(x)  D[x]
+#define UB(x) UB[x]
+#define UH(x) UH[x]
+#define UW(x) UW[x]
+#define UD(x) UD[x]
+#define Q(x)  Q[x]
+#endif
+
 void loongarch_translate_init(void);
 
 void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c
index b1e523ea72..7adc1bdff9 100644
--- a/target/loongarch/machine.c
+++ b/target/loongarch/machine.c
@@ -10,6 +10,72 @@
 #include "migration/cpu.h"
 #include "internals.h"
 
+static const VMStateDescription vmstate_fpu_reg = {
+ 

[RFC PATCH v4 24/44] target/loongarch: Implement vsrlr vsrar

2023-04-25 Thread Song Gao
This patch includes:
- VSRLR[I].{B/H/W/D};
- VSRAR[I].{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  18 
 target/loongarch/helper.h   |  18 
 target/loongarch/insn_trans/trans_lsx.c.inc |  18 
 target/loongarch/insns.decode   |  18 
 target/loongarch/lsx_helper.c   | 104 
 5 files changed, 176 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 087cac10ad..c62b6720ec 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1148,3 +1148,21 @@ INSN_LSX(vsllwil_hu_bu,vv_i)
 INSN_LSX(vsllwil_wu_hu,vv_i)
 INSN_LSX(vsllwil_du_wu,vv_i)
 INSN_LSX(vextl_qu_du,  vv)
+
+INSN_LSX(vsrlr_b,  vvv)
+INSN_LSX(vsrlr_h,  vvv)
+INSN_LSX(vsrlr_w,  vvv)
+INSN_LSX(vsrlr_d,  vvv)
+INSN_LSX(vsrlri_b, vv_i)
+INSN_LSX(vsrlri_h, vv_i)
+INSN_LSX(vsrlri_w, vv_i)
+INSN_LSX(vsrlri_d, vv_i)
+
+INSN_LSX(vsrar_b,  vvv)
+INSN_LSX(vsrar_h,  vvv)
+INSN_LSX(vsrar_w,  vvv)
+INSN_LSX(vsrar_d,  vvv)
+INSN_LSX(vsrari_b, vv_i)
+INSN_LSX(vsrari_h, vv_i)
+INSN_LSX(vsrari_w, vv_i)
+INSN_LSX(vsrari_d, vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index e98f7c3e6f..20a5e7c8e6 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -361,3 +361,21 @@ DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
 DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
 DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
 DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
+
+DEF_HELPER_4(vsrlr_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlr_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlr_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlr_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vsrar_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrar_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrar_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrar_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 037c742aa4..fb43e1b3ce 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2987,3 +2987,21 @@ TRANS(vsllwil_hu_bu, gen_vv_i, gen_helper_vsllwil_hu_bu)
 TRANS(vsllwil_wu_hu, gen_vv_i, gen_helper_vsllwil_wu_hu)
 TRANS(vsllwil_du_wu, gen_vv_i, gen_helper_vsllwil_du_wu)
 TRANS(vextl_qu_du, gen_vv, gen_helper_vextl_qu_du)
+
+TRANS(vsrlr_b, gen_vvv, gen_helper_vsrlr_b)
+TRANS(vsrlr_h, gen_vvv, gen_helper_vsrlr_h)
+TRANS(vsrlr_w, gen_vvv, gen_helper_vsrlr_w)
+TRANS(vsrlr_d, gen_vvv, gen_helper_vsrlr_d)
+TRANS(vsrlri_b, gen_vv_i, gen_helper_vsrlri_b)
+TRANS(vsrlri_h, gen_vv_i, gen_helper_vsrlri_h)
+TRANS(vsrlri_w, gen_vv_i, gen_helper_vsrlri_w)
+TRANS(vsrlri_d, gen_vv_i, gen_helper_vsrlri_d)
+
+TRANS(vsrar_b, gen_vvv, gen_helper_vsrar_b)
+TRANS(vsrar_h, gen_vvv, gen_helper_vsrar_h)
+TRANS(vsrar_w, gen_vvv, gen_helper_vsrar_w)
+TRANS(vsrar_d, gen_vvv, gen_helper_vsrar_d)
+TRANS(vsrari_b, gen_vv_i, gen_helper_vsrari_b)
+TRANS(vsrari_h, gen_vv_i, gen_helper_vsrari_h)
+TRANS(vsrari_w, gen_vv_i, gen_helper_vsrari_w)
+TRANS(vsrari_d, gen_vv_i, gen_helper_vsrari_d)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 23dd338026..a21743 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -848,3 +848,21 @@ vsllwil_hu_bu0111 0011 11000 01 ... . .   
@vv_ui3
 vsllwil_wu_hu0111 0011 11000 1  . .   @vv_ui4
 vsllwil_du_wu0111 0011 11001 . . .@vv_ui5
 vextl_qu_du  0111 0011 11010 0 . .@vv
+
+vsrlr_b  0111  0 . . .@vvv
+vsrlr_h  0111  1 . . .@vvv
+vsrlr_w  0111  00010 . . .@vvv
+vsrlr_d  0111  00011 . . .@vvv
+vsrlri_b 0111 00101010 01000 01 ... . .   @vv_ui3
+vsrlri_h 0111 00101010 01000 1  . .   @vv_ui4
+vsrlri_w 0111 00101010 01001 . . .@vv_ui5
+vsrlri_d 0111 00101010 0101 .. . .@vv_ui6
+
+vsrar_b  0111  00100 . . .@vvv
+vsrar_h  0111  00101 . . .@vvv
+vsrar_w  0111  00110 . . .@vvv
+vsrar_d  0111  00111 . . .@

[RFC PATCH v4 28/44] target/loongarch: Implement vssrlrn vssrarn

2023-04-25 Thread Song Gao
This patch includes:
- VSSRLRN.{B.H/H.W/W.D};
- VSSRARN.{B.H/H.W/W.D};
- VSSRLRN.{BU.H/HU.W/WU.D};
- VSSRARN.{BU.H/HU.W/WU.D};
- VSSRLRNI.{B.H/H.W/W.D/D.Q};
- VSSRARNI.{B.H/H.W/W.D/D.Q};
- VSSRLRNI.{BU.H/HU.W/WU.D/DU.Q};
- VSSRARNI.{BU.H/HU.W/WU.D/DU.Q}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  30 ++
 target/loongarch/helper.h   |  30 ++
 target/loongarch/insn_trans/trans_lsx.c.inc |  30 ++
 target/loongarch/insns.decode   |  30 ++
 target/loongarch/lsx_helper.c   | 358 
 5 files changed, 478 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 426d30dc01..405e8885cd 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1228,3 +1228,33 @@ INSN_LSX(vssrani_bu_h, vv_i)
 INSN_LSX(vssrani_hu_w, vv_i)
 INSN_LSX(vssrani_wu_d, vv_i)
 INSN_LSX(vssrani_du_q, vv_i)
+
+INSN_LSX(vssrlrn_b_h,  vvv)
+INSN_LSX(vssrlrn_h_w,  vvv)
+INSN_LSX(vssrlrn_w_d,  vvv)
+INSN_LSX(vssrarn_b_h,  vvv)
+INSN_LSX(vssrarn_h_w,  vvv)
+INSN_LSX(vssrarn_w_d,  vvv)
+INSN_LSX(vssrlrn_bu_h, vvv)
+INSN_LSX(vssrlrn_hu_w, vvv)
+INSN_LSX(vssrlrn_wu_d, vvv)
+INSN_LSX(vssrarn_bu_h, vvv)
+INSN_LSX(vssrarn_hu_w, vvv)
+INSN_LSX(vssrarn_wu_d, vvv)
+
+INSN_LSX(vssrlrni_b_h, vv_i)
+INSN_LSX(vssrlrni_h_w, vv_i)
+INSN_LSX(vssrlrni_w_d, vv_i)
+INSN_LSX(vssrlrni_d_q, vv_i)
+INSN_LSX(vssrlrni_bu_h,vv_i)
+INSN_LSX(vssrlrni_hu_w,vv_i)
+INSN_LSX(vssrlrni_wu_d,vv_i)
+INSN_LSX(vssrlrni_du_q,vv_i)
+INSN_LSX(vssrarni_b_h, vv_i)
+INSN_LSX(vssrarni_h_w, vv_i)
+INSN_LSX(vssrarni_w_d, vv_i)
+INSN_LSX(vssrarni_d_q, vv_i)
+INSN_LSX(vssrarni_bu_h,vv_i)
+INSN_LSX(vssrarni_hu_w,vv_i)
+INSN_LSX(vssrarni_wu_d,vv_i)
+INSN_LSX(vssrarni_du_q,vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 28f159768c..724112da81 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -441,3 +441,33 @@ DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
 DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
 DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
 DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vssrlrn_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrn_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrn_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarn_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarn_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarn_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrn_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrn_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrn_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarn_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarn_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarn_wu_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrni_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrni_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrni_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrni_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlrni_du_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 5d7e45a793..9c24cbc297 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3067,3 +3067,33 @@ TRANS(vssrani_bu_h, gen_vv_i, gen_helper_vssrani_bu_h)
 TRANS(vssrani_hu_w, gen_vv_i, gen_helper_vssrani_hu_w)
 TRANS(vssrani_wu_d, gen_vv_i, gen_helper_vssrani_wu_d)
 TRANS(vssrani_du_q, gen_vv_i, gen_helper_vssrani_du_q)
+
+TRANS(vssrlrn_b_h, gen_vvv, gen_helper_vssrlrn_b_h)
+TRANS(vssrlrn_h_w, gen_vvv, gen_helper_vssrlrn_h_w)
+TRANS(vssrlrn_w_d, gen_vvv, gen_helper_vssrlrn_w_d)
+TRANS(vssrarn_b_h, gen_vvv, gen_helper_vssrarn_b_h)
+TRANS(vssrarn_h_w, gen_vvv, gen_helper_vssrarn_h_w)
+TRANS(vssrarn_w_d, gen_vvv, gen_helper_vssrarn_w_d)
+TRANS(vssrlrn_bu_h, gen_vvv, gen_helper_vssrlrn_bu_h)
+TRANS(vssrlrn_hu_w, gen_vvv, gen_helper_vssrlrn_hu_w)
+TRANS(vssrlrn_wu_d, gen_vvv, gen_helper_vssrlrn_wu_d)
+TRANS(vssrarn_bu_h, gen_vvv, gen_helper_vssrarn_bu_h)
+TRANS(vssrarn_hu_w, gen_vvv, gen_helper_vssrarn_hu_w)
+TRANS(vssrarn_wu_d, gen_vvv, gen_helper_vssrarn_wu_d)
+
+TRANS(vssrlrni_b_h, gen_vv_i, ge

[RFC PATCH v4 27/44] target/loongarch: Implement vssrln vssran

2023-04-25 Thread Song Gao
This patch includes:
- VSSRLN.{B.H/H.W/W.D};
- VSSRAN.{B.H/H.W/W.D};
- VSSRLN.{BU.H/HU.W/WU.D};
- VSSRAN.{BU.H/HU.W/WU.D};
- VSSRLNI.{B.H/H.W/W.D/D.Q};
- VSSRANI.{B.H/H.W/W.D/D.Q};
- VSSRLNI.{BU.H/HU.W/WU.D/DU.Q};
- VSSRANI.{BU.H/HU.W/WU.D/DU.Q}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  30 ++
 target/loongarch/helper.h   |  30 ++
 target/loongarch/insn_trans/trans_lsx.c.inc |  30 ++
 target/loongarch/insns.decode   |  30 ++
 target/loongarch/lsx_helper.c   | 379 
 5 files changed, 499 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 185cd36381..426d30dc01 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1198,3 +1198,33 @@ INSN_LSX(vsrarni_b_h,  vv_i)
 INSN_LSX(vsrarni_h_w,  vv_i)
 INSN_LSX(vsrarni_w_d,  vv_i)
 INSN_LSX(vsrarni_d_q,  vv_i)
+
+INSN_LSX(vssrln_b_h,   vvv)
+INSN_LSX(vssrln_h_w,   vvv)
+INSN_LSX(vssrln_w_d,   vvv)
+INSN_LSX(vssran_b_h,   vvv)
+INSN_LSX(vssran_h_w,   vvv)
+INSN_LSX(vssran_w_d,   vvv)
+INSN_LSX(vssrln_bu_h,  vvv)
+INSN_LSX(vssrln_hu_w,  vvv)
+INSN_LSX(vssrln_wu_d,  vvv)
+INSN_LSX(vssran_bu_h,  vvv)
+INSN_LSX(vssran_hu_w,  vvv)
+INSN_LSX(vssran_wu_d,  vvv)
+
+INSN_LSX(vssrlni_b_h,  vv_i)
+INSN_LSX(vssrlni_h_w,  vv_i)
+INSN_LSX(vssrlni_w_d,  vv_i)
+INSN_LSX(vssrlni_d_q,  vv_i)
+INSN_LSX(vssrani_b_h,  vv_i)
+INSN_LSX(vssrani_h_w,  vv_i)
+INSN_LSX(vssrani_w_d,  vv_i)
+INSN_LSX(vssrani_d_q,  vv_i)
+INSN_LSX(vssrlni_bu_h, vv_i)
+INSN_LSX(vssrlni_hu_w, vv_i)
+INSN_LSX(vssrlni_wu_d, vv_i)
+INSN_LSX(vssrlni_du_q, vv_i)
+INSN_LSX(vssrani_bu_h, vv_i)
+INSN_LSX(vssrani_hu_w, vv_i)
+INSN_LSX(vssrani_wu_d, vv_i)
+INSN_LSX(vssrani_du_q, vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 0a8cfe3625..28f159768c 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -411,3 +411,33 @@ DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 6034a74bfb..5d7e45a793 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3037,3 +3037,33 @@ TRANS(vsrarni_b_h, gen_vv_i, gen_helper_vsrarni_b_h)
 TRANS(vsrarni_h_w, gen_vv_i, gen_helper_vsrarni_h_w)
 TRANS(vsrarni_w_d, gen_vv_i, gen_helper_vsrarni_w_d)
 TRANS(vsrarni_d_q, gen_vv_i, gen_helper_vsrarni_d_q)
+
+TRANS(vssrln_b_h, gen_vvv, gen_helper_vssrln_b_h)
+TRANS(vssrln_h_w, gen_vvv, gen_helper_vssrln_h_w)
+TRANS(vssrln_w_d, gen_vvv, gen_helper_vssrln_w_d)
+TRANS(vssran_b_h, gen_vvv, gen_helper_vssran_b_h)
+TRANS(vssran_h_w, gen_vvv, gen_helper_vssran_h_w)
+TRANS(vssran_w_d, gen_vvv, gen_helper_vssran_w_d)
+TRANS(vssrln_bu_h, gen_vvv, gen_helper_vssrln_bu_h)
+TRANS(vssrln_hu_w, gen_vvv, gen_helper_vssrln_hu_w)
+TRANS(vssrln_wu_d, gen_vvv, gen_helper_vssrln_wu_d)
+TRANS(vssran_bu_h, gen_vvv, gen_helper_vssran_bu_h)
+TRANS(vssran_hu_w, gen_vvv, gen_helper_vssran_hu_w)
+TRANS(vssran_wu_d, gen_vvv, gen_helper_vssran_wu_d)
+
+TRANS(vssrlni_b_h, gen_vv_i, gen_helper_vssrlni_b_h)
+TRANS(vssrlni_h_w, gen_vv_i, gen_helper_vssrlni_h_

[RFC PATCH v4 14/44] target/loongarch: Implement vmul/vmuh/vmulw{ev/od}

2023-04-25 Thread Song Gao
This patch includes:
- VMUL.{B/H/W/D};
- VMUH.{B/H/W/D}[U];
- VMULW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- VMULW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.

Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  38 ++
 target/loongarch/helper.h   |  30 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 550 
 target/loongarch/insns.decode   |  38 ++
 target/loongarch/lsx_helper.c   |  76 +++
 5 files changed, 732 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 6b0e518bfa..48e6ef5309 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -972,3 +972,41 @@ INSN_LSX(vmini_bu, vv_i)
 INSN_LSX(vmini_hu, vv_i)
 INSN_LSX(vmini_wu, vv_i)
 INSN_LSX(vmini_du, vv_i)
+
+INSN_LSX(vmul_b,   vvv)
+INSN_LSX(vmul_h,   vvv)
+INSN_LSX(vmul_w,   vvv)
+INSN_LSX(vmul_d,   vvv)
+INSN_LSX(vmuh_b,   vvv)
+INSN_LSX(vmuh_h,   vvv)
+INSN_LSX(vmuh_w,   vvv)
+INSN_LSX(vmuh_d,   vvv)
+INSN_LSX(vmuh_bu,  vvv)
+INSN_LSX(vmuh_hu,  vvv)
+INSN_LSX(vmuh_wu,  vvv)
+INSN_LSX(vmuh_du,  vvv)
+
+INSN_LSX(vmulwev_h_b,  vvv)
+INSN_LSX(vmulwev_w_h,  vvv)
+INSN_LSX(vmulwev_d_w,  vvv)
+INSN_LSX(vmulwev_q_d,  vvv)
+INSN_LSX(vmulwod_h_b,  vvv)
+INSN_LSX(vmulwod_w_h,  vvv)
+INSN_LSX(vmulwod_d_w,  vvv)
+INSN_LSX(vmulwod_q_d,  vvv)
+INSN_LSX(vmulwev_h_bu, vvv)
+INSN_LSX(vmulwev_w_hu, vvv)
+INSN_LSX(vmulwev_d_wu, vvv)
+INSN_LSX(vmulwev_q_du, vvv)
+INSN_LSX(vmulwod_h_bu, vvv)
+INSN_LSX(vmulwod_w_hu, vvv)
+INSN_LSX(vmulwod_d_wu, vvv)
+INSN_LSX(vmulwod_q_du, vvv)
+INSN_LSX(vmulwev_h_bu_b,   vvv)
+INSN_LSX(vmulwev_w_hu_h,   vvv)
+INSN_LSX(vmulwev_d_wu_w,   vvv)
+INSN_LSX(vmulwev_q_du_d,   vvv)
+INSN_LSX(vmulwod_h_bu_b,   vvv)
+INSN_LSX(vmulwod_w_hu_h,   vvv)
+INSN_LSX(vmulwod_d_wu_w,   vvv)
+INSN_LSX(vmulwod_q_du_d,   vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 379f961d1e..830d8cbe62 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -243,3 +243,33 @@ DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, 
ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(vmuh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmuh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmuh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmuh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmuh_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmuh_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmuh_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmuh_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmulwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmulwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmulwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 15bb7888d3..9a36f942e2 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -1514,3 +1514,553 @@ TRANS(vmaxi_bu, gvec_vv_i, MO_8, do_vmaxi_u)
 TRANS(vmaxi_hu, gvec_vv_i, MO_16, do_vmaxi_u)
 TRANS(vmaxi_wu, gvec_vv_i, MO_32, do_vmaxi_u)
 TRANS(vmaxi_du, gvec_vv_i, MO_64, do_vmaxi_u)
+
+TRANS(vmul_b, gvec_vvv, MO_8, tcg_gen_gvec_mul)
+TRANS(vmul_h, gvec_vvv, MO_16, tcg_

[RFC PATCH v4 32/44] target/loongarch: Implement vfrstp

2023-04-25 Thread Song Gao
This patch includes:
- VFRSTP[I].{B/H}.

Acked-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  5 +++
 target/loongarch/helper.h   |  5 +++
 target/loongarch/insn_trans/trans_lsx.c.inc |  5 +++
 target/loongarch/insns.decode   |  5 +++
 target/loongarch/lsx_helper.c   | 41 +
 5 files changed, 61 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 48c7ea47a4..be2bb9cc42 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1297,3 +1297,8 @@ INSN_LSX(vbitrevi_b,   vv_i)
 INSN_LSX(vbitrevi_h,   vv_i)
 INSN_LSX(vbitrevi_w,   vv_i)
 INSN_LSX(vbitrevi_d,   vv_i)
+
+INSN_LSX(vfrstp_b, vvv)
+INSN_LSX(vfrstp_h, vvv)
+INSN_LSX(vfrstpi_b,vv_i)
+INSN_LSX(vfrstpi_h,vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 75120ca55e..2cc235d019 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -512,3 +512,8 @@ DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, 
ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 86243b54ba..ee84a5a4ee 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3416,3 +3416,8 @@ TRANS(vbitrevi_b, gvec_vv_i, MO_8, do_vbitrevi)
 TRANS(vbitrevi_h, gvec_vv_i, MO_16, do_vbitrevi)
 TRANS(vbitrevi_w, gvec_vv_i, MO_32, do_vbitrevi)
 TRANS(vbitrevi_d, gvec_vv_i, MO_64, do_vbitrevi)
+
+TRANS(vfrstp_b, gen_vvv, gen_helper_vfrstp_b)
+TRANS(vfrstp_h, gen_vvv, gen_helper_vfrstp_h)
+TRANS(vfrstpi_b, gen_vv_i, gen_helper_vfrstpi_b)
+TRANS(vfrstpi_h, gen_vv_i, gen_helper_vfrstpi_h)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 801c97714e..4cb286ffe5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -998,3 +998,8 @@ vbitrevi_b   0111 00110001 1 01 ... . .   
@vv_ui3
 vbitrevi_h   0111 00110001 1 1  . .   @vv_ui4
 vbitrevi_w   0111 00110001 10001 . . .@vv_ui5
 vbitrevi_d   0111 00110001 1001 .. . .@vv_ui6
+
+vfrstp_b 0111 00010010 10110 . . .@vvv
+vfrstp_h 0111 00010010 10111 . . .@vvv
+vfrstpi_b0111 00101001 10100 . . .@vv_ui5
+vfrstpi_h0111 00101001 10101 . . .@vv_ui5
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index f160abfd8e..ad0d2dbe87 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -2019,3 +2019,44 @@ DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
 DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
 DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
 DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
+
+#define VFRSTP(NAME, BIT, MASK, E)   \
+void HELPER(NAME)(CPULoongArchState *env,\
+  uint32_t vd, uint32_t vj, uint32_t vk) \
+{\
+int i, m;\
+VReg *Vd = &(env->fpr[vd].vreg); \
+VReg *Vj = &(env->fpr[vj].vreg); \
+VReg *Vk = &(env->fpr[vk].vreg); \
+ \
+for (i = 0; i < LSX_LEN/BIT; i++) {  \
+if (Vj->E(i) < 0) {  \
+break;   \
+}\
+}\
+m = Vk->E(0) & MASK; \
+Vd->E(m) = i;\
+}
+
+VFRSTP(vfrstp_b, 8, 0xf, B)
+VFRSTP(vfrstp_h, 16, 0x7, H)
+
+#define VFRSTPI(NAME, BIT, E) \
+void HELPER(NAME)(CPULoongArchState *env, \
+  uint32_t vd, uint32_t vj, uint32_t imm) \
+{ \
+int i, m; \
+VReg *Vd = &(env->fpr[vd].vreg);  \
+VReg *Vj = &(env->fpr[vj].vreg);  \
+  \
+for (i = 0; i < LSX_LEN/BIT; i++) {   \
+if (Vj->E(i) < 0) {   \
+break; 

[RFC PATCH v4 12/44] target/loongarch: Implement vadda

2023-04-25 Thread Song Gao
This patch includes:
- VADDA.{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  5 ++
 target/loongarch/helper.h   |  5 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 53 +
 target/loongarch/insns.decode   |  5 ++
 target/loongarch/lsx_helper.c   | 19 
 5 files changed, 87 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e98ea37793..1f61e67d1f 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -934,3 +934,8 @@ INSN_LSX(vabsd_bu, vvv)
 INSN_LSX(vabsd_hu, vvv)
 INSN_LSX(vabsd_wu, vvv)
 INSN_LSX(vabsd_du, vvv)
+
+INSN_LSX(vadda_b,  vvv)
+INSN_LSX(vadda_h,  vvv)
+INSN_LSX(vadda_w,  vvv)
+INSN_LSX(vadda_d,  vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index c3a5d2566e..85fb8f60d2 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -220,3 +220,8 @@ DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 0e9301bf93..8ad81c8517 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -1261,3 +1261,56 @@ TRANS(vabsd_bu, gvec_vvv, MO_8, do_vabsd_u)
 TRANS(vabsd_hu, gvec_vvv, MO_16, do_vabsd_u)
 TRANS(vabsd_wu, gvec_vvv, MO_32, do_vabsd_u)
 TRANS(vabsd_du, gvec_vvv, MO_64, do_vabsd_u)
+
+static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+TCGv_vec t1, t2;
+
+t1 = tcg_temp_new_vec_matching(a);
+t2 = tcg_temp_new_vec_matching(b);
+
+tcg_gen_abs_vec(vece, t1, a);
+tcg_gen_abs_vec(vece, t2, b);
+tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop_list[] = {
+INDEX_op_abs_vec, INDEX_op_add_vec, 0
+};
+static const GVecGen3 op[4] = {
+{
+.fniv = gen_vadda,
+.fno = gen_helper_vadda_b,
+.opt_opc = vecop_list,
+.vece = MO_8
+},
+{
+.fniv = gen_vadda,
+.fno = gen_helper_vadda_h,
+.opt_opc = vecop_list,
+.vece = MO_16
+},
+{
+.fniv = gen_vadda,
+.fno = gen_helper_vadda_w,
+.opt_opc = vecop_list,
+.vece = MO_32
+},
+{
+.fniv = gen_vadda,
+.fno = gen_helper_vadda_d,
+.opt_opc = vecop_list,
+.vece = MO_64
+},
+};
+
+tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vadda_b, gvec_vvv, MO_8, do_vadda)
+TRANS(vadda_h, gvec_vvv, MO_16, do_vadda)
+TRANS(vadda_w, gvec_vvv, MO_32, do_vadda)
+TRANS(vadda_d, gvec_vvv, MO_64, do_vadda)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 825ddedf4d..6cb22f9297 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -628,3 +628,8 @@ vabsd_bu 0111 0110 00100 . . .
@vvv
 vabsd_hu 0111 0110 00101 . . .@vvv
 vabsd_wu 0111 0110 00110 . . .@vvv
 vabsd_du 0111 0110 00111 . . .@vvv
+
+vadda_b  0111 0101 11000 . . .@vvv
+vadda_h  0111 0101 11001 . . .@vvv
+vadda_w  0111 0101 11010 . . .@vvv
+vadda_d  0111 0101 11011 . . .@vvv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index f0baffa9e3..8230fe2ed5 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -318,3 +318,22 @@ DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
 DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
 DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
 DO_3OP(vabsd_du, 64, UD, DO_VABSD)
+
+#define DO_VABS(a)  ((a < 0) ? (-a) : (a))
+
+#define DO_VADDA(NAME, BIT, E, DO_OP)   \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
+{   \
+int i;  \
+VReg *Vd = (VReg *)vd;  \
+VReg *

[RFC PATCH v4 34/44] target/loongarch: Implement LSX fpu fcvt instructions

2023-04-25 Thread Song Gao
This patch includes:
- VFCVT{L/H}.{S.H/D.S};
- VFCVT.{H.S/S.D};
- VFRINT[{RNE/RZ/RP/RM}].{S/D};
- VFTINT[{RNE/RZ/RP/RM}].{W.S/L.D};
- VFTINT[RZ].{WU.S/LU.D};
- VFTINT[{RNE/RZ/RP/RM}].W.D;
- VFTINT[{RNE/RZ/RP/RM}]{L/H}.L.S;
- VFFINT.{S.W/D.L}[U];
- VFFINT.S.L, VFFINT{L/H}.D.W.

Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  56 +++
 target/loongarch/helper.h   |  56 +++
 target/loongarch/insn_trans/trans_lsx.c.inc |  56 +++
 target/loongarch/insns.decode   |  56 +++
 target/loongarch/lsx_helper.c   | 376 
 5 files changed, 600 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index b57b284e49..c04271081f 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1348,3 +1348,59 @@ INSN_LSX(vfrecip_s,vv)
 INSN_LSX(vfrecip_d,vv)
 INSN_LSX(vfrsqrt_s,vv)
 INSN_LSX(vfrsqrt_d,vv)
+
+INSN_LSX(vfcvtl_s_h,   vv)
+INSN_LSX(vfcvth_s_h,   vv)
+INSN_LSX(vfcvtl_d_s,   vv)
+INSN_LSX(vfcvth_d_s,   vv)
+INSN_LSX(vfcvt_h_s,vvv)
+INSN_LSX(vfcvt_s_d,vvv)
+
+INSN_LSX(vfrint_s, vv)
+INSN_LSX(vfrint_d, vv)
+INSN_LSX(vfrintrm_s,   vv)
+INSN_LSX(vfrintrm_d,   vv)
+INSN_LSX(vfrintrp_s,   vv)
+INSN_LSX(vfrintrp_d,   vv)
+INSN_LSX(vfrintrz_s,   vv)
+INSN_LSX(vfrintrz_d,   vv)
+INSN_LSX(vfrintrne_s,  vv)
+INSN_LSX(vfrintrne_d,  vv)
+
+INSN_LSX(vftint_w_s,   vv)
+INSN_LSX(vftint_l_d,   vv)
+INSN_LSX(vftintrm_w_s, vv)
+INSN_LSX(vftintrm_l_d, vv)
+INSN_LSX(vftintrp_w_s, vv)
+INSN_LSX(vftintrp_l_d, vv)
+INSN_LSX(vftintrz_w_s, vv)
+INSN_LSX(vftintrz_l_d, vv)
+INSN_LSX(vftintrne_w_s,vv)
+INSN_LSX(vftintrne_l_d,vv)
+INSN_LSX(vftint_wu_s,  vv)
+INSN_LSX(vftint_lu_d,  vv)
+INSN_LSX(vftintrz_wu_s,vv)
+INSN_LSX(vftintrz_lu_d,vv)
+INSN_LSX(vftint_w_d,   vvv)
+INSN_LSX(vftintrm_w_d, vvv)
+INSN_LSX(vftintrp_w_d, vvv)
+INSN_LSX(vftintrz_w_d, vvv)
+INSN_LSX(vftintrne_w_d,vvv)
+INSN_LSX(vftintl_l_s,  vv)
+INSN_LSX(vftinth_l_s,  vv)
+INSN_LSX(vftintrml_l_s,vv)
+INSN_LSX(vftintrmh_l_s,vv)
+INSN_LSX(vftintrpl_l_s,vv)
+INSN_LSX(vftintrph_l_s,vv)
+INSN_LSX(vftintrzl_l_s,vv)
+INSN_LSX(vftintrzh_l_s,vv)
+INSN_LSX(vftintrnel_l_s,   vv)
+INSN_LSX(vftintrneh_l_s,   vv)
+
+INSN_LSX(vffint_s_w,   vv)
+INSN_LSX(vffint_s_wu,  vv)
+INSN_LSX(vffint_d_l,   vv)
+INSN_LSX(vffint_d_lu,  vv)
+INSN_LSX(vffintl_d_w,  vv)
+INSN_LSX(vffinth_d_w,  vv)
+INSN_LSX(vffint_s_l,   vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index a0c9de271d..f32235aa97 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -558,3 +558,59 @@ DEF_HELPER_3(vfrecip_s, void, env, i32, i32)
 DEF_HELPER_3(vfrecip_d, void, env, i32, i32)
 DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32)
 DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32)
+
+DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32)
+DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32)
+DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32)
+DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32)
+DEF_HELPER_4(vfcvt_h_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vfcvt_s_d, void, env, i32, i32, i32)
+
+DEF_HELPER_3(vfrintrne_s, void, env, i32, i32)
+DEF_HELPER_3(vfrintrne_d, void, env, i32, i32)
+DEF_HELPER_3(vfrintrz_s, void, env, i32, i32)
+DEF_HELPER_3(vfrintrz_d, void, env, i32, i32)
+DEF_HELPER_3(vfrintrp_s, void, env, i32, i32)
+DEF_HELPER_3(vfrintrp_d, void, env, i32, i32)
+DEF_HELPER_3(vfrintrm_s, void, env, i32, i32)
+DEF_HELPER_3(vfrintrm_d, void, env, i32, i32)
+DEF_HELPER_3(vfrint_s, void, env, i32, i32)
+DEF_HELPER_3(vfrint_d, void, env, i32, i32)
+
+DEF_HELPER_3(vftintrne_w_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrne_l_d, void, env, i32, i32)
+DEF_HELPER_3(vftintrz_w_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrz_l_d, void, env, i32, i32)
+DEF_HELPER_3(vftintrp_w_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrp_l_d, void, env, i32, i32)
+DEF_HELPER_3(vftintrm_w_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrm_l_d, void, env, i32, i32)
+DEF_HELPER_3(vftint_w_s, void, env, i32, i32)
+DEF_HELPER_3(vftint_l_d, void, env, i32, i32)
+DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32)
+DEF_HELPER_3(vftint_wu_s, void, env, i32, i32)
+DEF_HELPER_3(vftint_lu_d, void, env, i32, i32)
+DEF_HELPER_4(vftintrne_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vftintrz_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vftintrp_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vftintrm_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vftint_w_d, void, env, i32, i32, i32)
+DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrzh_l_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrpl_l_s, void, env, i32, i32)
+DEF_HELPER_3(vftintrph_l_s, v

[RFC PATCH v4 41/44] target/loongarch: Implement vld vst

2023-04-25 Thread Song Gao
This patch includes:
- VLD[X], VST[X];
- VLDREPL.{B/H/W/D};
- VSTELM.{B/H/W/D}.

Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  34 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 159 
 target/loongarch/insns.decode   |  36 +
 target/loongarch/translate.c|  10 ++
 4 files changed, 239 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 0b62bbb8be..8627908fc9 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -21,11 +21,21 @@ static inline int plus_1(DisasContext *ctx, int x)
 return x + 1;
 }
 
+static inline int shl_1(DisasContext *ctx, int x)
+{
+return x << 1;
+}
+
 static inline int shl_2(DisasContext *ctx, int x)
 {
 return x << 2;
 }
 
+static inline int shl_3(DisasContext *ctx, int x)
+{
+return x << 3;
+}
+
 #define CSR_NAME(REG) \
 [LOONGARCH_CSR_##REG] = (#REG)
 
@@ -823,6 +833,11 @@ static void output_vr_i(DisasContext *ctx, arg_vr_i *a, 
const char *mnemonic)
 output(ctx, mnemonic, "v%d, r%d, 0x%x", a->vd, a->rj, a->imm);
 }
 
+static void output_vr_ii(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, 
a->imm2);
+}
+
 static void output_rv_i(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
 {
 output(ctx, mnemonic, "r%d, v%d, 0x%x", a->rd, a->vj,  a->imm);
@@ -838,6 +853,11 @@ static void output_vvr(DisasContext *ctx, arg_vvr *a, 
const char *mnemonic)
 output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk);
 }
 
+static void output_vrr(DisasContext *ctx, arg_vrr *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, r%d, r%d", a->vd, a->rj, a->rk);
+}
+
 INSN_LSX(vadd_b,   vvv)
 INSN_LSX(vadd_h,   vvv)
 INSN_LSX(vadd_w,   vvv)
@@ -1654,3 +1674,17 @@ INSN_LSX(vextrins_d,   vv_i)
 INSN_LSX(vextrins_w,   vv_i)
 INSN_LSX(vextrins_h,   vv_i)
 INSN_LSX(vextrins_b,   vv_i)
+
+INSN_LSX(vld,  vr_i)
+INSN_LSX(vst,  vr_i)
+INSN_LSX(vldx, vrr)
+INSN_LSX(vstx, vrr)
+
+INSN_LSX(vldrepl_d,vr_i)
+INSN_LSX(vldrepl_w,vr_i)
+INSN_LSX(vldrepl_h,vr_i)
+INSN_LSX(vldrepl_b,vr_i)
+INSN_LSX(vstelm_d, vr_ii)
+INSN_LSX(vstelm_w, vr_ii)
+INSN_LSX(vstelm_h, vr_ii)
+INSN_LSX(vstelm_b, vr_ii)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index e1eee6bc4c..6bf9656c2c 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -4102,3 +4102,162 @@ TRANS(vextrins_b, gen_vv_i, gen_helper_vextrins_b)
 TRANS(vextrins_h, gen_vv_i, gen_helper_vextrins_h)
 TRANS(vextrins_w, gen_vv_i, gen_helper_vextrins_w)
 TRANS(vextrins_d, gen_vv_i, gen_helper_vextrins_d)
+
+static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
+{
+TCGv addr, temp;
+TCGv_i64 rl, rh;
+TCGv_i128 val;
+
+CHECK_SXE;
+
+addr = gpr_src(ctx, a->rj, EXT_NONE);
+val = tcg_temp_new_i128();
+rl = tcg_temp_new_i64();
+rh = tcg_temp_new_i64();
+
+if (a->imm) {
+temp = tcg_temp_new();
+tcg_gen_addi_tl(temp, addr, a->imm);
+addr = temp;
+}
+
+tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128);
+tcg_gen_extr_i128_i64(rl, rh, val);
+set_vreg64(rh, a->vd, 1);
+set_vreg64(rl, a->vd, 0);
+
+return true;
+}
+
+static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
+{
+TCGv addr, temp;
+TCGv_i128 val;
+TCGv_i64 ah, al;
+
+CHECK_SXE;
+
+addr = gpr_src(ctx, a->rj, EXT_NONE);
+val = tcg_temp_new_i128();
+ah = tcg_temp_new_i64();
+al = tcg_temp_new_i64();
+
+if (a->imm) {
+temp = tcg_temp_new();
+tcg_gen_addi_tl(temp, addr, a->imm);
+addr = temp;
+}
+
+get_vreg64(ah, a->vd, 1);
+get_vreg64(al, a->vd, 0);
+tcg_gen_concat_i64_i128(val, al, ah);
+tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128);
+
+return true;
+}
+
+static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
+{
+TCGv addr, src1, src2;
+TCGv_i64 rl, rh;
+TCGv_i128 val;
+
+CHECK_SXE;
+
+addr = tcg_temp_new();
+src1 = gpr_src(ctx, a->rj, EXT_NONE);
+src2 = gpr_src(ctx, a->rk, EXT_NONE);
+val = tcg_temp_new_i128();
+rl = tcg_temp_new_i64();
+rh = tcg_temp_new_i64();
+
+tcg_gen_add_tl(addr, src1, src2);
+tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128);
+tcg_gen_extr_i128_i64(rl, rh, val);
+set_vreg64(rh, a->vd, 1);
+set_vreg64(rl, a->vd, 0);
+
+return true;
+}
+
+static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
+{
+TCGv addr, src1, src2;
+TCGv_i64 ah, al;
+TCGv_i128 val;
+
+CHECK_SXE;
+
+addr = tcg_temp_new();
+src1 = gpr_src(ctx, a->rj, EXT_NONE);
+src2 = gpr_src(ctx, a->rk, EXT_NONE);
+val = tcg_temp_new_i128();
+ah = tcg_temp_new_i

[RFC PATCH v4 38/44] target/loongarch: Implement vinsgr2vr vpickve2gr vreplgr2vr

2023-04-25 Thread Song Gao
This patch includes:
- VINSGR2VR.{B/H/W/D};
- VPICKVE2GR.{B/H/W/D}[U];
- VREPLGR2VR.{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  33 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 110 
 target/loongarch/insns.decode   |  30 ++
 3 files changed, 173 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index ecf0c7b577..7255a2aa4f 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -818,6 +818,21 @@ static void output_(DisasContext *ctx, arg_ *a, 
const char *mnemonic)
 output(ctx, mnemonic, "v%d, v%d, v%d, v%d", a->vd, a->vj, a->vk, a->va);
 }
 
+static void output_vr_i(DisasContext *ctx, arg_vr_i *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, r%d, 0x%x", a->vd, a->rj, a->imm);
+}
+
+static void output_rv_i(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "r%d, v%d, 0x%x", a->rd, a->vj,  a->imm);
+}
+
+static void output_vr(DisasContext *ctx, arg_vr *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj);
+}
+
 INSN_LSX(vadd_b,   vvv)
 INSN_LSX(vadd_h,   vvv)
 INSN_LSX(vadd_w,   vvv)
@@ -1561,3 +1576,21 @@ INSN_LSX(vsetallnez_b, cv)
 INSN_LSX(vsetallnez_h, cv)
 INSN_LSX(vsetallnez_w, cv)
 INSN_LSX(vsetallnez_d, cv)
+
+INSN_LSX(vinsgr2vr_b,  vr_i)
+INSN_LSX(vinsgr2vr_h,  vr_i)
+INSN_LSX(vinsgr2vr_w,  vr_i)
+INSN_LSX(vinsgr2vr_d,  vr_i)
+INSN_LSX(vpickve2gr_b, rv_i)
+INSN_LSX(vpickve2gr_h, rv_i)
+INSN_LSX(vpickve2gr_w, rv_i)
+INSN_LSX(vpickve2gr_d, rv_i)
+INSN_LSX(vpickve2gr_bu,rv_i)
+INSN_LSX(vpickve2gr_hu,rv_i)
+INSN_LSX(vpickve2gr_wu,rv_i)
+INSN_LSX(vpickve2gr_du,rv_i)
+
+INSN_LSX(vreplgr2vr_b, vr)
+INSN_LSX(vreplgr2vr_h, vr)
+INSN_LSX(vreplgr2vr_w, vr)
+INSN_LSX(vreplgr2vr_d, vr)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 964c3c47bf..e722b79bea 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3823,3 +3823,113 @@ TRANS(vsetallnez_b, gen_cv, gen_helper_vsetallnez_b)
 TRANS(vsetallnez_h, gen_cv, gen_helper_vsetallnez_h)
 TRANS(vsetallnez_w, gen_cv, gen_helper_vsetallnez_w)
 TRANS(vsetallnez_d, gen_cv, gen_helper_vsetallnez_d)
+
+static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
+{
+CHECK_SXE;
+tcg_gen_st8_i64(cpu_gpr[a->rj], cpu_env,
+offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
+return true;
+}
+
+static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
+{
+CHECK_SXE;
+tcg_gen_st16_i64(cpu_gpr[a->rj], cpu_env,
+offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
+return true;
+}
+
+static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
+{
+CHECK_SXE;
+tcg_gen_st32_i64(cpu_gpr[a->rj], cpu_env,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
+return true;
+}
+
+static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
+{
+CHECK_SXE;
+tcg_gen_st_i64(cpu_gpr[a->rj], cpu_env,
+   offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
+return true;
+}
+
+static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
+{
+CHECK_SXE;
+tcg_gen_ld8s_i64(cpu_gpr[a->rd], cpu_env,
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
+return true;
+}
+
+static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
+{
+CHECK_SXE;
+tcg_gen_ld16s_i64(cpu_gpr[a->rd], cpu_env,
+  offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
+return true;
+}
+
+static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
+{
+CHECK_SXE;
+tcg_gen_ld32s_i64(cpu_gpr[a->rd], cpu_env,
+  offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
+return true;
+}
+
+static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
+{
+CHECK_SXE;
+tcg_gen_ld_i64(cpu_gpr[a->rd], cpu_env,
+   offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
+return true;
+}
+
+static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
+{
+CHECK_SXE;
+tcg_gen_ld8u_i64(cpu_gpr[a->rd], cpu_env,
+ offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
+return true;
+}
+
+static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
+{
+CHECK_SXE;
+tcg_gen_ld16u_i64(cpu_gpr[a->rd], cpu_env,
+  offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
+return true;
+}
+
+static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
+{
+CHECK_SXE;
+tcg_gen_ld32u_i64(cpu_gpr[a->rd], cpu_env,
+  offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
+return true;
+}
+
+stat

[RFC PATCH v4 44/44] target/loongarch: CPUCFG support LSX

2023-04-25 Thread Song Gao
Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 55d7f9255e..c0afc21b2f 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -388,6 +388,7 @@ static void loongarch_la464_initfn(Object *obj)
 data = FIELD_DP32(data, CPUCFG2, FP_SP, 1);
 data = FIELD_DP32(data, CPUCFG2, FP_DP, 1);
 data = FIELD_DP32(data, CPUCFG2, FP_VER, 1);
+data = FIELD_DP32(data, CPUCFG2, LSX, 1),
 data = FIELD_DP32(data, CPUCFG2, LLFTP, 1);
 data = FIELD_DP32(data, CPUCFG2, LLFTP_VER, 1);
 data = FIELD_DP32(data, CPUCFG2, LAM, 1);
-- 
2.31.1




[RFC PATCH v4 37/44] target/loongarch: Implement vbitsel vset

2023-04-25 Thread Song Gao
This patch includes:
- VBITSEL.V;
- VBITSELI.B;
- VSET{EQZ/NEZ}.V;
- VSETANYEQZ.{B/H/W/D};
- VSETALLNEZ.{B/H/W/D}.

Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 20 ++
 target/loongarch/helper.h   | 11 +++
 target/loongarch/insn_trans/trans_lsx.c.inc | 74 +
 target/loongarch/insns.decode   | 17 +
 target/loongarch/lsx_helper.c   | 52 +++
 5 files changed, 174 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 64db01d2f9..ecf0c7b577 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -792,6 +792,12 @@ static bool trans_##insn(DisasContext *ctx, arg_##type * 
a) \
 return true;\
 }
 
+static void output_cv(DisasContext *ctx, arg_cv *a,
+const char *mnemonic)
+{
+output(ctx, mnemonic, "fcc%d, v%d", a->cd, a->vj);
+}
+
 static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic)
 {
 output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk);
@@ -1541,3 +1547,17 @@ static bool trans_vfcmp_cond_##suffix(DisasContext *ctx, 
\
 
 LSX_FCMP_INSN(s)
 LSX_FCMP_INSN(d)
+
+INSN_LSX(vbitsel_v,)
+INSN_LSX(vbitseli_b,   vv_i)
+
+INSN_LSX(vseteqz_v,cv)
+INSN_LSX(vsetnez_v,cv)
+INSN_LSX(vsetanyeqz_b, cv)
+INSN_LSX(vsetanyeqz_h, cv)
+INSN_LSX(vsetanyeqz_w, cv)
+INSN_LSX(vsetanyeqz_d, cv)
+INSN_LSX(vsetallnez_b, cv)
+INSN_LSX(vsetallnez_h, cv)
+INSN_LSX(vsetallnez_w, cv)
+INSN_LSX(vsetallnez_d, cv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 867756fdb5..8eb2738cd0 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -642,3 +642,14 @@ DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32)
+
+DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_3(vsetanyeqz_b, void, env, i32, i32)
+DEF_HELPER_3(vsetanyeqz_h, void, env, i32, i32)
+DEF_HELPER_3(vsetanyeqz_w, void, env, i32, i32)
+DEF_HELPER_3(vsetanyeqz_d, void, env, i32, i32)
+DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
+DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
+DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
+DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index abb6efc09d..964c3c47bf 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -65,6 +65,17 @@ static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
 return true;
 }
 
+static bool gen_cv(DisasContext *ctx, arg_cv *a,
+void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
+{
+TCGv_i32 vj = tcg_constant_i32(a->vj);
+TCGv_i32 cd = tcg_constant_i32(a->cd);
+
+CHECK_SXE;
+func(cpu_env, cd, vj);
+return true;
+}
+
 static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
  void (*func)(unsigned, uint32_t, uint32_t,
   uint32_t, uint32_t, uint32_t))
@@ -3749,3 +3760,66 @@ static bool trans_vfcmp_cond_d(DisasContext *ctx, 
arg_vvv_fcond *a)
 
 return true;
 }
+
+static bool trans_vbitsel_v(DisasContext *ctx, arg_ *a)
+{
+CHECK_SXE;
+
+tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
+vec_full_offset(a->vk), vec_full_offset(a->vj),
+16, ctx->vl/8);
+return true;
+}
+
+static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
+{
+tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
+}
+
+static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
+{
+static const GVecGen2i op = {
+   .fniv = gen_vbitseli,
+   .fnoi = gen_helper_vbitseli_b,
+   .vece = MO_8,
+   .load_dest = true
+};
+
+CHECK_SXE;
+
+tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
+16, ctx->vl/8, a->imm, &op);
+return true;
+}
+
+#define VSET(NAME, COND)   
\
+static bool trans_## NAME (DisasContext *ctx, arg_cv *a)   
\
+{  
\
+TCGv_i64 t1, al, ah;   
\
+   
\
+al = tcg_temp_new_i64();   
\
+ah = tcg_temp_new_i64();   
\
+t1 = tcg_temp_new_i64();   
\
+

[RFC PATCH v4 23/44] target/loongarch: Implement vsllwil vextl

2023-04-25 Thread Song Gao
This patch includes:
- VSLLWIL.{H.B/W.H/D.W};
- VSLLWIL.{HU.BU/WU.HU/DU.WU};
- VEXTL.Q.D, VEXTL.QU.DU.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  9 +
 target/loongarch/helper.h   |  9 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 21 +++
 target/loongarch/insns.decode   |  9 +
 target/loongarch/lsx_helper.c   | 41 +
 5 files changed, 89 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index f7d0fb4441..087cac10ad 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1139,3 +1139,12 @@ INSN_LSX(vrotri_b, vv_i)
 INSN_LSX(vrotri_h, vv_i)
 INSN_LSX(vrotri_w, vv_i)
 INSN_LSX(vrotri_d, vv_i)
+
+INSN_LSX(vsllwil_h_b,  vv_i)
+INSN_LSX(vsllwil_w_h,  vv_i)
+INSN_LSX(vsllwil_d_w,  vv_i)
+INSN_LSX(vextl_q_d,vv)
+INSN_LSX(vsllwil_hu_bu,vv_i)
+INSN_LSX(vsllwil_wu_hu,vv_i)
+INSN_LSX(vsllwil_du_wu,vv_i)
+INSN_LSX(vextl_qu_du,  vv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 617c579592..e98f7c3e6f 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -352,3 +352,12 @@ DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
 DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
 
 DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32)
+DEF_HELPER_3(vextl_q_d, void, env, i32, i32)
+DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
+DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index ad8f32ed18..037c742aa4 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -39,6 +39,18 @@ static bool gen_vv(DisasContext *ctx, arg_vv *a,
 return true;
 }
 
+static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+TCGv_i32 vd = tcg_constant_i32(a->vd);
+TCGv_i32 vj = tcg_constant_i32(a->vj);
+TCGv_i32 imm = tcg_constant_i32(a->imm);
+
+CHECK_SXE;
+func(cpu_env, vd, vj, imm);
+return true;
+}
+
 static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
  void (*func)(unsigned, uint32_t, uint32_t,
   uint32_t, uint32_t, uint32_t))
@@ -2966,3 +2978,12 @@ TRANS(vrotri_b, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
 TRANS(vrotri_h, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
 TRANS(vrotri_w, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
 TRANS(vrotri_d, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
+
+TRANS(vsllwil_h_b, gen_vv_i, gen_helper_vsllwil_h_b)
+TRANS(vsllwil_w_h, gen_vv_i, gen_helper_vsllwil_w_h)
+TRANS(vsllwil_d_w, gen_vv_i, gen_helper_vsllwil_d_w)
+TRANS(vextl_q_d, gen_vv, gen_helper_vextl_q_d)
+TRANS(vsllwil_hu_bu, gen_vv_i, gen_helper_vsllwil_hu_bu)
+TRANS(vsllwil_wu_hu, gen_vv_i, gen_helper_vsllwil_wu_hu)
+TRANS(vsllwil_du_wu, gen_vv_i, gen_helper_vsllwil_du_wu)
+TRANS(vextl_qu_du, gen_vv, gen_helper_vextl_qu_du)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7c0b0c4ac8..23dd338026 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -839,3 +839,12 @@ vrotri_b 0111 00101010 0 01 ... . .   
@vv_ui3
 vrotri_h 0111 00101010 0 1  . .   @vv_ui4
 vrotri_w 0111 00101010 1 . . .@vv_ui5
 vrotri_d 0111 00101010 0001 .. . .@vv_ui6
+
+vsllwil_h_b  0111 0011 1 01 ... . .   @vv_ui3
+vsllwil_w_h  0111 0011 1 1  . .   @vv_ui4
+vsllwil_d_w  0111 0011 10001 . . .@vv_ui5
+vextl_q_d0111 0011 10010 0 . .@vv
+vsllwil_hu_bu0111 0011 11000 01 ... . .   @vv_ui3
+vsllwil_wu_hu0111 0011 11000 1  . .   @vv_ui4
+vsllwil_du_wu0111 0011 11001 . . .@vv_ui5
+vextl_qu_du  0111 0011 11010 0 . .@vv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index ff00d60ab8..de86f41cce 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -793,3 +793,44 @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, 
uint32_t v)
 Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
 }
 }
+
+#define VSLLWIL(NAME, BIT, E1, E2)\
+void HELPER(NAME)(CPULoongArchState *env, \
+  uint32_t vd, uint32_t vj, uint32_t imm) \
+{ \
+int i

[RFC PATCH v4 08/44] target/loongarch: Implement vhaddw/vhsubw

2023-04-25 Thread Song Gao
This patch includes:
- VHADDW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU};
- VHSUBW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 17 +
 target/loongarch/helper.h   | 18 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 17 +
 target/loongarch/insns.decode   | 17 +
 target/loongarch/lsx_helper.c   | 81 +
 5 files changed, 150 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index b7f9320ba0..adfd693938 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -848,3 +848,20 @@ INSN_LSX(vssub_bu, vvv)
 INSN_LSX(vssub_hu, vvv)
 INSN_LSX(vssub_wu, vvv)
 INSN_LSX(vssub_du, vvv)
+
+INSN_LSX(vhaddw_h_b,   vvv)
+INSN_LSX(vhaddw_w_h,   vvv)
+INSN_LSX(vhaddw_d_w,   vvv)
+INSN_LSX(vhaddw_q_d,   vvv)
+INSN_LSX(vhaddw_hu_bu, vvv)
+INSN_LSX(vhaddw_wu_hu, vvv)
+INSN_LSX(vhaddw_du_wu, vvv)
+INSN_LSX(vhaddw_qu_du, vvv)
+INSN_LSX(vhsubw_h_b,   vvv)
+INSN_LSX(vhsubw_w_h,   vvv)
+INSN_LSX(vhsubw_d_w,   vvv)
+INSN_LSX(vhsubw_q_d,   vvv)
+INSN_LSX(vhsubw_hu_bu, vvv)
+INSN_LSX(vhsubw_wu_hu, vvv)
+INSN_LSX(vhsubw_du_wu, vvv)
+INSN_LSX(vhsubw_qu_du, vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 9c01823a26..6d58dabaed 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -130,3 +130,21 @@ DEF_HELPER_4(ldpte, void, env, tl, tl, i32)
 DEF_HELPER_1(ertn, void, env)
 DEF_HELPER_1(idle, void, env)
 #endif
+
+/* LoongArch LSX  */
+DEF_HELPER_4(vhaddw_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vhaddw_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vhaddw_d_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vhaddw_q_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vhaddw_hu_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vhaddw_wu_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vhaddw_du_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vhaddw_qu_du, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_d_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_q_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 082bd738ce..562096c0d7 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -157,3 +157,20 @@ TRANS(vssub_bu, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
 TRANS(vssub_hu, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
 TRANS(vssub_wu, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
 TRANS(vssub_du, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
+
+TRANS(vhaddw_h_b, gen_vvv, gen_helper_vhaddw_h_b)
+TRANS(vhaddw_w_h, gen_vvv, gen_helper_vhaddw_w_h)
+TRANS(vhaddw_d_w, gen_vvv, gen_helper_vhaddw_d_w)
+TRANS(vhaddw_q_d, gen_vvv, gen_helper_vhaddw_q_d)
+TRANS(vhaddw_hu_bu, gen_vvv, gen_helper_vhaddw_hu_bu)
+TRANS(vhaddw_wu_hu, gen_vvv, gen_helper_vhaddw_wu_hu)
+TRANS(vhaddw_du_wu, gen_vvv, gen_helper_vhaddw_du_wu)
+TRANS(vhaddw_qu_du, gen_vvv, gen_helper_vhaddw_qu_du)
+TRANS(vhsubw_h_b, gen_vvv, gen_helper_vhsubw_h_b)
+TRANS(vhsubw_w_h, gen_vvv, gen_helper_vhsubw_w_h)
+TRANS(vhsubw_d_w, gen_vvv, gen_helper_vhsubw_d_w)
+TRANS(vhsubw_q_d, gen_vvv, gen_helper_vhsubw_q_d)
+TRANS(vhsubw_hu_bu, gen_vvv, gen_helper_vhsubw_hu_bu)
+TRANS(vhsubw_wu_hu, gen_vvv, gen_helper_vhsubw_wu_hu)
+TRANS(vhsubw_du_wu, gen_vvv, gen_helper_vhsubw_du_wu)
+TRANS(vhsubw_qu_du, gen_vvv, gen_helper_vhsubw_qu_du)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 3a29f0a9ab..10a20858e5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -542,3 +542,20 @@ vssub_bu 0111 0100 11000 . . .
@vvv
 vssub_hu 0111 0100 11001 . . .@vvv
 vssub_wu 0111 0100 11010 . . .@vvv
 vssub_du 0111 0100 11011 . . .@vvv
+
+vhaddw_h_b   0111 0101 01000 . . .@vvv
+vhaddw_w_h   0111 0101 01001 . . .@vvv
+vhaddw_d_w   0111 0101 01010 . . .@vvv
+vhaddw_q_d   0111 0101 01011 . . .@vvv
+vhaddw_hu_bu 0111 0101 1 . . .@vvv
+vhaddw_wu_hu 0111 0101 10001 . . .@vvv
+vhaddw_du_wu 0111 0101 10010 . . .@vvv
+vhaddw_qu_du 0111 0101 10011 . . .@vvv
+vhsubw_h_b   0111 0101 01100 . . .@vvv
+vhsubw_w_h   0111 0101 01101 . . .@vvv
+vhsubw_d_w   0111 0101 0

[RFC PATCH v4 40/44] target/loongarch: Implement vilvl vilvh vextrins vshuf

2023-04-25 Thread Song Gao
This patch includes:
- VILV{L/H}.{B/H/W/D};
- VSHUF.{B/H/W/D};
- VSHUF4I.{B/H/W/D};
- VPERMI.W;
- VEXTRINS.{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  25 
 target/loongarch/helper.h   |  25 
 target/loongarch/insn_trans/trans_lsx.c.inc |  25 
 target/loongarch/insns.decode   |  25 
 target/loongarch/lsx_helper.c   | 148 
 5 files changed, 248 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index c6cf782725..0b62bbb8be 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1629,3 +1629,28 @@ INSN_LSX(vpickod_b,vvv)
 INSN_LSX(vpickod_h,vvv)
 INSN_LSX(vpickod_w,vvv)
 INSN_LSX(vpickod_d,vvv)
+
+INSN_LSX(vilvl_b,  vvv)
+INSN_LSX(vilvl_h,  vvv)
+INSN_LSX(vilvl_w,  vvv)
+INSN_LSX(vilvl_d,  vvv)
+INSN_LSX(vilvh_b,  vvv)
+INSN_LSX(vilvh_h,  vvv)
+INSN_LSX(vilvh_w,  vvv)
+INSN_LSX(vilvh_d,  vvv)
+
+INSN_LSX(vshuf_b,  )
+INSN_LSX(vshuf_h,  vvv)
+INSN_LSX(vshuf_w,  vvv)
+INSN_LSX(vshuf_d,  vvv)
+INSN_LSX(vshuf4i_b,vv_i)
+INSN_LSX(vshuf4i_h,vv_i)
+INSN_LSX(vshuf4i_w,vv_i)
+INSN_LSX(vshuf4i_d,vv_i)
+
+INSN_LSX(vpermi_w, vv_i)
+
+INSN_LSX(vextrins_d,   vv_i)
+INSN_LSX(vextrins_w,   vv_i)
+INSN_LSX(vextrins_h,   vv_i)
+INSN_LSX(vextrins_b,   vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 51ad694be2..b9de77d926 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -671,3 +671,28 @@ DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
 DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
 DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
 DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vilvl_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vilvl_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vilvl_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vilvl_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vilvh_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32)
+
+DEF_HELPER_5(vshuf_b, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vshuf4i_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vpermi_w, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vextrins_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vextrins_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vextrins_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vextrins_d, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 1146ace1b7..e1eee6bc4c 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -4077,3 +4077,28 @@ TRANS(vpickod_b, gen_vvv, gen_helper_vpickod_b)
 TRANS(vpickod_h, gen_vvv, gen_helper_vpickod_h)
 TRANS(vpickod_w, gen_vvv, gen_helper_vpickod_w)
 TRANS(vpickod_d, gen_vvv, gen_helper_vpickod_d)
+
+TRANS(vilvl_b, gen_vvv, gen_helper_vilvl_b)
+TRANS(vilvl_h, gen_vvv, gen_helper_vilvl_h)
+TRANS(vilvl_w, gen_vvv, gen_helper_vilvl_w)
+TRANS(vilvl_d, gen_vvv, gen_helper_vilvl_d)
+TRANS(vilvh_b, gen_vvv, gen_helper_vilvh_b)
+TRANS(vilvh_h, gen_vvv, gen_helper_vilvh_h)
+TRANS(vilvh_w, gen_vvv, gen_helper_vilvh_w)
+TRANS(vilvh_d, gen_vvv, gen_helper_vilvh_d)
+
+TRANS(vshuf_b, gen_, gen_helper_vshuf_b)
+TRANS(vshuf_h, gen_vvv, gen_helper_vshuf_h)
+TRANS(vshuf_w, gen_vvv, gen_helper_vshuf_w)
+TRANS(vshuf_d, gen_vvv, gen_helper_vshuf_d)
+TRANS(vshuf4i_b, gen_vv_i, gen_helper_vshuf4i_b)
+TRANS(vshuf4i_h, gen_vv_i, gen_helper_vshuf4i_h)
+TRANS(vshuf4i_w, gen_vv_i, gen_helper_vshuf4i_w)
+TRANS(vshuf4i_d, gen_vv_i, gen_helper_vshuf4i_d)
+
+TRANS(vpermi_w, gen_vv_i, gen_helper_vpermi_w)
+
+TRANS(vextrins_b, gen_vv_i, gen_helper_vextrins_b)
+TRANS(vextrins_h, gen_vv_i, gen_helper_vextrins_h)
+TRANS(vextrins_w, gen_vv_i, gen_helper_vextrins_w)
+TRANS(vextrins_d, gen_vv_i, gen_helper_vextrins_d)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index ab9e9e422f..0263bce28e 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1231,3 +1231,28 @@ vpickod_b0111 00010010 0 . . .   
 @vvv
 vpickod_h0111 00010010 1 . . .@vvv
 vpickod_w0111 00010010 00010 . . .@vvv
 vpickod_d0111 00010010 00011 . . .@vvv
+
+vilvl_b  0111 00010001 10100 . . .@vvv
+vilvl_h 

[RFC PATCH v4 39/44] target/loongarch: Implement vreplve vpack vpick

2023-04-25 Thread Song Gao
This patch includes:
- VREPLVE[I].{B/H/W/D};
- VBSLL.V, VBSRL.V;
- VPACK{EV/OD}.{B/H/W/D};
- VPICK{EV/OD}.{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  35 +
 target/loongarch/helper.h   |  18 +++
 target/loongarch/insn_trans/trans_lsx.c.inc | 144 
 target/loongarch/insns.decode   |  34 +
 target/loongarch/lsx_helper.c   |  88 
 5 files changed, 319 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 7255a2aa4f..c6cf782725 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -833,6 +833,11 @@ static void output_vr(DisasContext *ctx, arg_vr *a, const 
char *mnemonic)
 output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj);
 }
 
+static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk);
+}
+
 INSN_LSX(vadd_b,   vvv)
 INSN_LSX(vadd_h,   vvv)
 INSN_LSX(vadd_w,   vvv)
@@ -1594,3 +1599,33 @@ INSN_LSX(vreplgr2vr_b, vr)
 INSN_LSX(vreplgr2vr_h, vr)
 INSN_LSX(vreplgr2vr_w, vr)
 INSN_LSX(vreplgr2vr_d, vr)
+
+INSN_LSX(vreplve_b,vvr)
+INSN_LSX(vreplve_h,vvr)
+INSN_LSX(vreplve_w,vvr)
+INSN_LSX(vreplve_d,vvr)
+INSN_LSX(vreplvei_b,   vv_i)
+INSN_LSX(vreplvei_h,   vv_i)
+INSN_LSX(vreplvei_w,   vv_i)
+INSN_LSX(vreplvei_d,   vv_i)
+
+INSN_LSX(vbsll_v,  vv_i)
+INSN_LSX(vbsrl_v,  vv_i)
+
+INSN_LSX(vpackev_b,vvv)
+INSN_LSX(vpackev_h,vvv)
+INSN_LSX(vpackev_w,vvv)
+INSN_LSX(vpackev_d,vvv)
+INSN_LSX(vpackod_b,vvv)
+INSN_LSX(vpackod_h,vvv)
+INSN_LSX(vpackod_w,vvv)
+INSN_LSX(vpackod_d,vvv)
+
+INSN_LSX(vpickev_b,vvv)
+INSN_LSX(vpickev_h,vvv)
+INSN_LSX(vpickev_w,vvv)
+INSN_LSX(vpickev_d,vvv)
+INSN_LSX(vpickod_b,vvv)
+INSN_LSX(vpickod_h,vvv)
+INSN_LSX(vpickod_w,vvv)
+INSN_LSX(vpickod_d,vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 8eb2738cd0..51ad694be2 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -653,3 +653,21 @@ DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
 DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
 DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
 DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
+
+DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index e722b79bea..1146ace1b7 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3933,3 +3933,147 @@ TRANS(vreplgr2vr_b, gvec_dup, MO_8)
 TRANS(vreplgr2vr_h, gvec_dup, MO_16)
 TRANS(vreplgr2vr_w, gvec_dup, MO_32)
 TRANS(vreplgr2vr_d, gvec_dup, MO_64)
+
+static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
+{
+CHECK_SXE;
+tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+  fpr[a->vj].vreg.B((a->imm))),
+ 16, ctx->vl/8);
+return true;
+}
+
+static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
+{
+CHECK_SXE;
+tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+  fpr[a->vj].vreg.H((a->imm))),
+ 16, ctx->vl/8);
+return true;
+}
+static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
+{
+CHECK_SXE;
+tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+  fpr[a->vj].vreg.W((a->imm))),
+16, ctx->vl/8);
+return true;
+}
+static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
+{
+CHECK_SXE;
+tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+  fpr[a->vj].vreg.D((a->imm))),
+ 16, ctx->vl/8);
+return

[RFC PATCH v4 10/44] target/loongarch: Implement vavg/vavgr

2023-04-25 Thread Song Gao
This patch includes:
- VAVG.{B/H/W/D}[U];
- VAVGR.{B/H/W/D}[U].

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  17 ++
 target/loongarch/helper.h   |  18 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 197 
 target/loongarch/insns.decode   |  17 ++
 target/loongarch/lsx_helper.c   |  32 
 5 files changed, 281 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 8ee14916f3..e7592e7a34 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -908,3 +908,20 @@ INSN_LSX(vaddwod_h_bu_b,   vvv)
 INSN_LSX(vaddwod_w_hu_h,   vvv)
 INSN_LSX(vaddwod_d_wu_w,   vvv)
 INSN_LSX(vaddwod_q_du_d,   vvv)
+
+INSN_LSX(vavg_b,   vvv)
+INSN_LSX(vavg_h,   vvv)
+INSN_LSX(vavg_w,   vvv)
+INSN_LSX(vavg_d,   vvv)
+INSN_LSX(vavg_bu,  vvv)
+INSN_LSX(vavg_hu,  vvv)
+INSN_LSX(vavg_wu,  vvv)
+INSN_LSX(vavg_du,  vvv)
+INSN_LSX(vavgr_b,  vvv)
+INSN_LSX(vavgr_h,  vvv)
+INSN_LSX(vavgr_w,  vvv)
+INSN_LSX(vavgr_d,  vvv)
+INSN_LSX(vavgr_bu, vvv)
+INSN_LSX(vavgr_hu, vvv)
+INSN_LSX(vavgr_wu, vvv)
+INSN_LSX(vavgr_du, vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 505c336ff3..af0f9f9b0d 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -193,3 +193,21 @@ DEF_HELPER_FLAGS_4(vaddwod_h_bu_b, TCG_CALL_NO_RWG, void, 
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vavg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavg_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavg_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavg_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavg_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavg_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vavgr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavgr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavgr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavgr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 8edff83157..5fa4792305 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -969,3 +969,200 @@ TRANS(vaddwod_h_bu_b, gvec_vvv, MO_8, do_vaddwod_u_s)
 TRANS(vaddwod_w_hu_h, gvec_vvv, MO_16, do_vaddwod_u_s)
 TRANS(vaddwod_d_wu_w, gvec_vvv, MO_32, do_vaddwod_u_s)
 TRANS(vaddwod_q_du_d, gvec_vvv, MO_64, do_vaddwod_u_s)
+
+static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
+void (*gen_shr_vec)(unsigned, TCGv_vec,
+TCGv_vec, int64_t),
+void (*gen_round_vec)(unsigned, TCGv_vec,
+  TCGv_vec, TCGv_vec))
+{
+TCGv_vec tmp = tcg_temp_new_vec_matching(t);
+gen_round_vec(vece, tmp, a, b);
+tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
+gen_shr_vec(vece, a, a, 1);
+gen_shr_vec(vece, b, b, 1);
+tcg_gen_add_vec(vece, t, a, b);
+tcg_gen_add_vec(vece, t, t, tmp);
+}
+
+static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
+}
+
+static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
+}
+
+static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
+}
+
+static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
+}
+
+static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+  uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop_list[] = {
+INDEX_op_sari_vec, INDEX_op_add_vec, 0
+};
+static const GVecGen3 op[4] = {
+{
+.fniv = gen_vavg_s,

[RFC PATCH v4 16/44] target/loongarch: Implement vdiv/vmod

2023-04-25 Thread Song Gao
This patch includes:
- VDIV.{B/H/W/D}[U];
- VMOD.{B/H/W/D}[U].

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 17 ++
 target/loongarch/helper.h   | 17 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 17 ++
 target/loongarch/insns.decode   | 17 ++
 target/loongarch/lsx_helper.c   | 37 +
 5 files changed, 105 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 980e6e6375..6e4f676a42 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1044,3 +1044,20 @@ INSN_LSX(vmaddwod_h_bu_b,  vvv)
 INSN_LSX(vmaddwod_w_hu_h,  vvv)
 INSN_LSX(vmaddwod_d_wu_w,  vvv)
 INSN_LSX(vmaddwod_q_du_d,  vvv)
+
+INSN_LSX(vdiv_b,   vvv)
+INSN_LSX(vdiv_h,   vvv)
+INSN_LSX(vdiv_w,   vvv)
+INSN_LSX(vdiv_d,   vvv)
+INSN_LSX(vdiv_bu,  vvv)
+INSN_LSX(vdiv_hu,  vvv)
+INSN_LSX(vdiv_wu,  vvv)
+INSN_LSX(vdiv_du,  vvv)
+INSN_LSX(vmod_b,   vvv)
+INSN_LSX(vmod_h,   vvv)
+INSN_LSX(vmod_w,   vvv)
+INSN_LSX(vmod_d,   vvv)
+INSN_LSX(vmod_bu,  vvv)
+INSN_LSX(vmod_hu,  vvv)
+INSN_LSX(vmod_wu,  vvv)
+INSN_LSX(vmod_du,  vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 8cf9620702..7b7c685ede 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -303,3 +303,20 @@ DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, 
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_4(vdiv_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vdiv_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vdiv_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vdiv_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vdiv_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vdiv_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vdiv_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vdiv_du, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32)
+DEF_HELPER_4(vmod_du, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 400c3a0339..b295a9c4df 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2676,3 +2676,20 @@ static void do_vmaddwod_u_s(unsigned vece, uint32_t 
vd_ofs, uint32_t vj_ofs,
 TRANS(vmaddwod_h_bu_b, gvec_vvv, MO_8, do_vmaddwod_u_s)
 TRANS(vmaddwod_w_hu_h, gvec_vvv, MO_16, do_vmaddwod_u_s)
 TRANS(vmaddwod_d_wu_w, gvec_vvv, MO_32, do_vmaddwod_u_s)
+
+TRANS(vdiv_b, gen_vvv, gen_helper_vdiv_b)
+TRANS(vdiv_h, gen_vvv, gen_helper_vdiv_h)
+TRANS(vdiv_w, gen_vvv, gen_helper_vdiv_w)
+TRANS(vdiv_d, gen_vvv, gen_helper_vdiv_d)
+TRANS(vdiv_bu, gen_vvv, gen_helper_vdiv_bu)
+TRANS(vdiv_hu, gen_vvv, gen_helper_vdiv_hu)
+TRANS(vdiv_wu, gen_vvv, gen_helper_vdiv_wu)
+TRANS(vdiv_du, gen_vvv, gen_helper_vdiv_du)
+TRANS(vmod_b, gen_vvv, gen_helper_vmod_b)
+TRANS(vmod_h, gen_vvv, gen_helper_vmod_h)
+TRANS(vmod_w, gen_vvv, gen_helper_vmod_w)
+TRANS(vmod_d, gen_vvv, gen_helper_vmod_d)
+TRANS(vmod_bu, gen_vvv, gen_helper_vmod_bu)
+TRANS(vmod_hu, gen_vvv, gen_helper_vmod_hu)
+TRANS(vmod_wu, gen_vvv, gen_helper_vmod_wu)
+TRANS(vmod_du, gen_vvv, gen_helper_vmod_du)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index df23d4ee1e..67d016edb7 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -740,3 +740,20 @@ vmaddwod_h_bu_b  0111 1011 11100 . . .
@vvv
 vmaddwod_w_hu_h  0111 1011 11101 . . .@vvv
 vmaddwod_d_wu_w  0111 1011 0 . . .@vvv
 vmaddwod_q_du_d  0111 1011 1 . . .@vvv
+
+vdiv_b   0111 1110 0 . . .@vvv
+vdiv_h   0111 1110 1 . . .@vvv
+vdiv_w   0111 1110 00010 . . .@vvv
+vdiv_d   0111 1110 00011 . . .@vvv
+vdiv_bu  0111 1110 01000 . . .@vvv
+vdiv_hu  0111 1110 01001 . . .@vvv
+vdiv_wu  0111 1110 01010 . . .@vvv
+vdiv_du  0111 1110 01011 . . .@vvv
+vmod_b   0111 1110 00100 . . .@vvv
+vmod_h   0111 1110 00101 . . .@vvv
+vmod_w   0111 1110 00110 . . .@vvv
+v

[RFC PATCH v4 33/44] target/loongarch: Implement LSX fpu arith instructions

2023-04-25 Thread Song Gao
This patch includes:
- VF{ADD/SUB/MUL/DIV}.{S/D};
- VF{MADD/MSUB/NMADD/NMSUB}.{S/D};
- VF{MAX/MIN}.{S/D};
- VF{MAXA/MINA}.{S/D};
- VFLOGB.{S/D};
- VFCLASS.{S/D};
- VF{SQRT/RECIP/RSQRT}.{S/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/cpu.h  |   4 +
 target/loongarch/disas.c|  46 +
 target/loongarch/fpu_helper.c   |   2 +-
 target/loongarch/helper.h   |  41 +
 target/loongarch/insn_trans/trans_lsx.c.inc |  55 ++
 target/loongarch/insns.decode   |  43 +
 target/loongarch/internals.h|   1 +
 target/loongarch/lsx_helper.c   | 186 
 8 files changed, 377 insertions(+), 1 deletion(-)

diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 6755b1f0c7..1f37e36b7c 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -55,6 +55,10 @@ FIELD(FCSR0, CAUSE, 24, 5)
 do { \
 (REG) = FIELD_DP32(REG, FCSR0, CAUSE, V); \
 } while (0)
+#define UPDATE_FP_CAUSE(REG, V) \
+do { \
+(REG) |= FIELD_DP32(0, FCSR0, CAUSE, V); \
+} while (0)
 
 #define GET_FP_ENABLES(REG)FIELD_EX32(REG, FCSR0, ENABLES)
 #define SET_FP_ENABLES(REG, V) \
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index be2bb9cc42..b57b284e49 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -807,6 +807,11 @@ static void output_vv(DisasContext *ctx, arg_vv *a, const 
char *mnemonic)
 output(ctx, mnemonic, "v%d, v%d", a->vd, a->vj);
 }
 
+static void output_(DisasContext *ctx, arg_ *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, v%d, v%d, v%d", a->vd, a->vj, a->vk, a->va);
+}
+
 INSN_LSX(vadd_b,   vvv)
 INSN_LSX(vadd_h,   vvv)
 INSN_LSX(vadd_w,   vvv)
@@ -1302,3 +1307,44 @@ INSN_LSX(vfrstp_b, vvv)
 INSN_LSX(vfrstp_h, vvv)
 INSN_LSX(vfrstpi_b,vv_i)
 INSN_LSX(vfrstpi_h,vv_i)
+
+INSN_LSX(vfadd_s,  vvv)
+INSN_LSX(vfadd_d,  vvv)
+INSN_LSX(vfsub_s,  vvv)
+INSN_LSX(vfsub_d,  vvv)
+INSN_LSX(vfmul_s,  vvv)
+INSN_LSX(vfmul_d,  vvv)
+INSN_LSX(vfdiv_s,  vvv)
+INSN_LSX(vfdiv_d,  vvv)
+
+INSN_LSX(vfmadd_s, )
+INSN_LSX(vfmadd_d, )
+INSN_LSX(vfmsub_s, )
+INSN_LSX(vfmsub_d, )
+INSN_LSX(vfnmadd_s,)
+INSN_LSX(vfnmadd_d,)
+INSN_LSX(vfnmsub_s,)
+INSN_LSX(vfnmsub_d,)
+
+INSN_LSX(vfmax_s,  vvv)
+INSN_LSX(vfmax_d,  vvv)
+INSN_LSX(vfmin_s,  vvv)
+INSN_LSX(vfmin_d,  vvv)
+
+INSN_LSX(vfmaxa_s, vvv)
+INSN_LSX(vfmaxa_d, vvv)
+INSN_LSX(vfmina_s, vvv)
+INSN_LSX(vfmina_d, vvv)
+
+INSN_LSX(vflogb_s, vv)
+INSN_LSX(vflogb_d, vv)
+
+INSN_LSX(vfclass_s,vv)
+INSN_LSX(vfclass_d,vv)
+
+INSN_LSX(vfsqrt_s, vv)
+INSN_LSX(vfsqrt_d, vv)
+INSN_LSX(vfrecip_s,vv)
+INSN_LSX(vfrecip_d,vv)
+INSN_LSX(vfrsqrt_s,vv)
+INSN_LSX(vfrsqrt_d,vv)
diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c
index 4b9637210a..f6753c5875 100644
--- a/target/loongarch/fpu_helper.c
+++ b/target/loongarch/fpu_helper.c
@@ -33,7 +33,7 @@ void restore_fp_status(CPULoongArchState *env)
 set_flush_to_zero(0, &env->fp_status);
 }
 
-static int ieee_ex_to_loongarch(int xcpt)
+int ieee_ex_to_loongarch(int xcpt)
 {
 int ret = 0;
 if (xcpt & float_flag_invalid) {
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 2cc235d019..a0c9de271d 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -517,3 +517,44 @@ DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32)
 DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
 DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
 DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32)
+
+DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32)
+
+DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32)
+DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32)

[RFC PATCH v4 25/44] target/loongarch: Implement vsrln vsran

2023-04-25 Thread Song Gao
This patch includes:
- VSRLN.{B.H/H.W/W.D};
- VSRAN.{B.H/H.W/W.D};
- VSRLNI.{B.H/H.W/W.D/D.Q};
- VSRANI.{B.H/H.W/W.D/D.Q}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  16 +++
 target/loongarch/helper.h   |  16 +++
 target/loongarch/insn_trans/trans_lsx.c.inc |  16 +++
 target/loongarch/insns.decode   |  17 +++
 target/loongarch/lsx_helper.c   | 114 
 5 files changed, 179 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index c62b6720ec..f0fc2ff84b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1166,3 +1166,19 @@ INSN_LSX(vsrari_b, vv_i)
 INSN_LSX(vsrari_h, vv_i)
 INSN_LSX(vsrari_w, vv_i)
 INSN_LSX(vsrari_d, vv_i)
+
+INSN_LSX(vsrln_b_h,   vvv)
+INSN_LSX(vsrln_h_w,   vvv)
+INSN_LSX(vsrln_w_d,   vvv)
+INSN_LSX(vsran_b_h,   vvv)
+INSN_LSX(vsran_h_w,   vvv)
+INSN_LSX(vsran_w_d,   vvv)
+
+INSN_LSX(vsrlni_b_h,   vv_i)
+INSN_LSX(vsrlni_h_w,   vv_i)
+INSN_LSX(vsrlni_w_d,   vv_i)
+INSN_LSX(vsrlni_d_q,   vv_i)
+INSN_LSX(vsrani_b_h,   vv_i)
+INSN_LSX(vsrani_h_w,   vv_i)
+INSN_LSX(vsrani_w_d,   vv_i)
+INSN_LSX(vsrani_d_q,   vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 20a5e7c8e6..afe7e3d2d0 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -379,3 +379,19 @@ DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vsrln_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrln_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrln_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsran_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsran_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsran_w_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlni_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index fb43e1b3ce..ad34378ada 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3005,3 +3005,19 @@ TRANS(vsrari_b, gen_vv_i, gen_helper_vsrari_b)
 TRANS(vsrari_h, gen_vv_i, gen_helper_vsrari_h)
 TRANS(vsrari_w, gen_vv_i, gen_helper_vsrari_w)
 TRANS(vsrari_d, gen_vv_i, gen_helper_vsrari_d)
+
+TRANS(vsrln_b_h, gen_vvv, gen_helper_vsrln_b_h)
+TRANS(vsrln_h_w, gen_vvv, gen_helper_vsrln_h_w)
+TRANS(vsrln_w_d, gen_vvv, gen_helper_vsrln_w_d)
+TRANS(vsran_b_h, gen_vvv, gen_helper_vsran_b_h)
+TRANS(vsran_h_w, gen_vvv, gen_helper_vsran_h_w)
+TRANS(vsran_w_d, gen_vvv, gen_helper_vsran_w_d)
+
+TRANS(vsrlni_b_h, gen_vv_i, gen_helper_vsrlni_b_h)
+TRANS(vsrlni_h_w, gen_vv_i, gen_helper_vsrlni_h_w)
+TRANS(vsrlni_w_d, gen_vv_i, gen_helper_vsrlni_w_d)
+TRANS(vsrlni_d_q, gen_vv_i, gen_helper_vsrlni_d_q)
+TRANS(vsrani_b_h, gen_vv_i, gen_helper_vsrani_b_h)
+TRANS(vsrani_h_w, gen_vv_i, gen_helper_vsrani_h_w)
+TRANS(vsrani_w_d, gen_vv_i, gen_helper_vsrani_w_d)
+TRANS(vsrani_d_q, gen_vv_i, gen_helper_vsrani_d_q)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index a21743..ee54b632a7 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -503,6 +503,7 @@ dbcl  0010 10101 ...  
@i15
 @vv_ui4   . . imm:4 vj:5 vd:5&vv_i
 @vv_ui5     . imm:5 vj:5 vd:5&vv_i
 @vv_ui6   imm:6 vj:5 vd:5&vv_i
+@vv_ui7   ... imm:7 vj:5 vd:5&vv_i
 @vv_ui8    .. imm:8 vj:5 vd:5&vv_i
 @vv_i5     . imm:s5 vj:5 vd:5&vv_i
 
@@ -866,3 +867,19 @@ vsrari_b 0111 00101010 1 01 ... . .   
@vv_ui3
 vsrari_h 0111 00101010 1 1  . .   @vv_ui4
 vsrari_w 0111 00101010 10001 . . .@vv_ui5
 vsrari_d 0111 00101010 1001 .. . .@vv_ui6
+
+vsrln_b_h0111  01001 . . .@vvv
+vsrln_h_w0111  01010 . . .@vvv
+vsrln_w_d0111  01011 . . .@vvv
+vsran_b_h0111  01101 . . .@vvv
+vsran_h_w0111  01110 . . .@vvv
+vsran_w_d0111  0 . . .@vvv
+
+vsrlni_b_h   0111 00110100 0 1  . .   @vv_ui4
+vsrlni_h_w   

[RFC PATCH v4 05/44] target/loongarch: Implement vaddi/vsubi

2023-04-25 Thread Song Gao
This patch includes:
- VADDI.{B/H/W/D}U;
- VSUBI.{B/H/W/D}U.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 14 
 target/loongarch/insn_trans/trans_lsx.c.inc | 37 +
 target/loongarch/insns.decode   | 11 ++
 3 files changed, 62 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index a5948d7847..c1960610c2 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -797,6 +797,11 @@ static void output_vvv(DisasContext *ctx, arg_vvv *a, 
const char *mnemonic)
 output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk);
 }
 
+static void output_vv_i(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, v%d, 0x%x", a->vd, a->vj, a->imm);
+}
+
 INSN_LSX(vadd_b,   vvv)
 INSN_LSX(vadd_h,   vvv)
 INSN_LSX(vadd_w,   vvv)
@@ -807,3 +812,12 @@ INSN_LSX(vsub_h,   vvv)
 INSN_LSX(vsub_w,   vvv)
 INSN_LSX(vsub_d,   vvv)
 INSN_LSX(vsub_q,   vvv)
+
+INSN_LSX(vaddi_bu, vv_i)
+INSN_LSX(vaddi_hu, vv_i)
+INSN_LSX(vaddi_wu, vv_i)
+INSN_LSX(vaddi_du, vv_i)
+INSN_LSX(vsubi_bu, vv_i)
+INSN_LSX(vsubi_hu, vv_i)
+INSN_LSX(vsubi_wu, vv_i)
+INSN_LSX(vsubi_du, vv_i)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index ddeb9fde28..e6c1d0d2cc 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -44,6 +44,34 @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp 
mop,
 return true;
 }
 
+static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
+  void (*func)(unsigned, uint32_t, uint32_t,
+   int64_t, uint32_t, uint32_t))
+{
+uint32_t vd_ofs, vj_ofs;
+
+CHECK_SXE;
+
+vd_ofs = vec_full_offset(a->vd);
+vj_ofs = vec_full_offset(a->vj);
+
+func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8);
+return true;
+}
+
+static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
+{
+uint32_t vd_ofs, vj_ofs;
+
+CHECK_SXE;
+
+vd_ofs = vec_full_offset(a->vd);
+vj_ofs = vec_full_offset(a->vj);
+
+tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8);
+return true;
+}
+
 TRANS(vadd_b, gvec_vvv, MO_8, tcg_gen_gvec_add)
 TRANS(vadd_h, gvec_vvv, MO_16, tcg_gen_gvec_add)
 TRANS(vadd_w, gvec_vvv, MO_32, tcg_gen_gvec_add)
@@ -83,3 +111,12 @@ TRANS(vsub_b, gvec_vvv, MO_8, tcg_gen_gvec_sub)
 TRANS(vsub_h, gvec_vvv, MO_16, tcg_gen_gvec_sub)
 TRANS(vsub_w, gvec_vvv, MO_32, tcg_gen_gvec_sub)
 TRANS(vsub_d, gvec_vvv, MO_64, tcg_gen_gvec_sub)
+
+TRANS(vaddi_bu, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
+TRANS(vaddi_hu, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
+TRANS(vaddi_wu, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
+TRANS(vaddi_du, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
+TRANS(vsubi_bu, gvec_subi, MO_8)
+TRANS(vsubi_hu, gvec_subi, MO_16)
+TRANS(vsubi_wu, gvec_subi, MO_32)
+TRANS(vsubi_du, gvec_subi, MO_64)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index d18db68d51..2a98c14518 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -491,11 +491,13 @@ dbcl  0010 10101 ...  
@i15
 #
 
 &vvv  vd vj vk
+&vv_i vd vj imm
 
 #
 # LSX Formats
 #
 @vvv     . vk:5 vj:5 vd:5&vvv
+@vv_ui5     . imm:5 vj:5 vd:5&vv_i
 
 vadd_b   0111  10100 . . .@vvv
 vadd_h   0111  10101 . . .@vvv
@@ -507,3 +509,12 @@ vsub_h   0111  11001 . . .
@vvv
 vsub_w   0111  11010 . . .@vvv
 vsub_d   0111  11011 . . .@vvv
 vsub_q   0111 00010010 11011 . . .@vvv
+
+vaddi_bu 0111 00101000 10100 . . .@vv_ui5
+vaddi_hu 0111 00101000 10101 . . .@vv_ui5
+vaddi_wu 0111 00101000 10110 . . .@vv_ui5
+vaddi_du 0111 00101000 10111 . . .@vv_ui5
+vsubi_bu 0111 00101000 11000 . . .@vv_ui5
+vsubi_hu 0111 00101000 11001 . . .@vv_ui5
+vsubi_wu 0111 00101000 11010 . . .@vv_ui5
+vsubi_du 0111 00101000 11011 . . .@vv_ui5
-- 
2.31.1




[RFC PATCH v4 06/44] target/loongarch: Implement vneg

2023-04-25 Thread Song Gao
This patch includes;
- VNEG.{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 10 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 20 
 target/loongarch/insns.decode   |  7 +++
 3 files changed, 37 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index c1960610c2..5eabb8c47a 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -802,6 +802,11 @@ static void output_vv_i(DisasContext *ctx, arg_vv_i *a, 
const char *mnemonic)
 output(ctx, mnemonic, "v%d, v%d, 0x%x", a->vd, a->vj, a->imm);
 }
 
+static void output_vv(DisasContext *ctx, arg_vv *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, v%d", a->vd, a->vj);
+}
+
 INSN_LSX(vadd_b,   vvv)
 INSN_LSX(vadd_h,   vvv)
 INSN_LSX(vadd_w,   vvv)
@@ -821,3 +826,8 @@ INSN_LSX(vsubi_bu, vv_i)
 INSN_LSX(vsubi_hu, vv_i)
 INSN_LSX(vsubi_wu, vv_i)
 INSN_LSX(vsubi_du, vv_i)
+
+INSN_LSX(vneg_b,   vv)
+INSN_LSX(vneg_h,   vv)
+INSN_LSX(vneg_w,   vv)
+INSN_LSX(vneg_d,   vv)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index e6c1d0d2cc..d02db6285f 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -44,6 +44,21 @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp 
mop,
 return true;
 }
 
+static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
+void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t))
+{
+uint32_t vd_ofs, vj_ofs;
+
+CHECK_SXE;
+
+vd_ofs = vec_full_offset(a->vd);
+vj_ofs = vec_full_offset(a->vj);
+
+func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8);
+return true;
+}
+
 static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
   void (*func)(unsigned, uint32_t, uint32_t,
int64_t, uint32_t, uint32_t))
@@ -120,3 +135,8 @@ TRANS(vsubi_bu, gvec_subi, MO_8)
 TRANS(vsubi_hu, gvec_subi, MO_16)
 TRANS(vsubi_wu, gvec_subi, MO_32)
 TRANS(vsubi_du, gvec_subi, MO_64)
+
+TRANS(vneg_b, gvec_vv, MO_8, tcg_gen_gvec_neg)
+TRANS(vneg_h, gvec_vv, MO_16, tcg_gen_gvec_neg)
+TRANS(vneg_w, gvec_vv, MO_32, tcg_gen_gvec_neg)
+TRANS(vneg_d, gvec_vv, MO_64, tcg_gen_gvec_neg)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 2a98c14518..d90798be11 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -490,12 +490,14 @@ dbcl  0010 10101 ...  
@i15
 # LSX Argument sets
 #
 
+&vv   vd vj
 &vvv  vd vj vk
 &vv_i vd vj imm
 
 #
 # LSX Formats
 #
+@vv     . . vj:5 vd:5&vv
 @vvv     . vk:5 vj:5 vd:5&vvv
 @vv_ui5     . imm:5 vj:5 vd:5&vv_i
 
@@ -518,3 +520,8 @@ vsubi_bu 0111 00101000 11000 . . .
@vv_ui5
 vsubi_hu 0111 00101000 11001 . . .@vv_ui5
 vsubi_wu 0111 00101000 11010 . . .@vv_ui5
 vsubi_du 0111 00101000 11011 . . .@vv_ui5
+
+vneg_b   0111 00101001 11000 01100 . .@vv
+vneg_h   0111 00101001 11000 01101 . .@vv
+vneg_w   0111 00101001 11000 01110 . .@vv
+vneg_d   0111 00101001 11000 0 . .@vv
-- 
2.31.1




[RFC PATCH v4 13/44] target/loongarch: Implement vmax/vmin

2023-04-25 Thread Song Gao
This patch includes:
- VMAX[I].{B/H/W/D}[U];
- VMIN[I].{B/H/W/D}[U].

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  33 
 target/loongarch/helper.h   |  18 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 200 
 target/loongarch/insns.decode   |  35 
 target/loongarch/lsx_helper.c   |  33 
 5 files changed, 319 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1f61e67d1f..6b0e518bfa 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -939,3 +939,36 @@ INSN_LSX(vadda_b,  vvv)
 INSN_LSX(vadda_h,  vvv)
 INSN_LSX(vadda_w,  vvv)
 INSN_LSX(vadda_d,  vvv)
+
+INSN_LSX(vmax_b,   vvv)
+INSN_LSX(vmax_h,   vvv)
+INSN_LSX(vmax_w,   vvv)
+INSN_LSX(vmax_d,   vvv)
+INSN_LSX(vmin_b,   vvv)
+INSN_LSX(vmin_h,   vvv)
+INSN_LSX(vmin_w,   vvv)
+INSN_LSX(vmin_d,   vvv)
+INSN_LSX(vmax_bu,  vvv)
+INSN_LSX(vmax_hu,  vvv)
+INSN_LSX(vmax_wu,  vvv)
+INSN_LSX(vmax_du,  vvv)
+INSN_LSX(vmin_bu,  vvv)
+INSN_LSX(vmin_hu,  vvv)
+INSN_LSX(vmin_wu,  vvv)
+INSN_LSX(vmin_du,  vvv)
+INSN_LSX(vmaxi_b,  vv_i)
+INSN_LSX(vmaxi_h,  vv_i)
+INSN_LSX(vmaxi_w,  vv_i)
+INSN_LSX(vmaxi_d,  vv_i)
+INSN_LSX(vmini_b,  vv_i)
+INSN_LSX(vmini_h,  vv_i)
+INSN_LSX(vmini_w,  vv_i)
+INSN_LSX(vmini_d,  vv_i)
+INSN_LSX(vmaxi_bu, vv_i)
+INSN_LSX(vmaxi_hu, vv_i)
+INSN_LSX(vmaxi_wu, vv_i)
+INSN_LSX(vmaxi_du, vv_i)
+INSN_LSX(vmini_bu, vv_i)
+INSN_LSX(vmini_hu, vv_i)
+INSN_LSX(vmini_wu, vv_i)
+INSN_LSX(vmini_du, vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 85fb8f60d2..379f961d1e 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -225,3 +225,21 @@ DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vmini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmini_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmini_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmini_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmini_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmini_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(vmaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmaxi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 8ad81c8517..15bb7888d3 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -1314,3 +1314,203 @@ TRANS(vadda_b, gvec_vvv, MO_8, do_vadda)
 TRANS(vadda_h, gvec_vvv, MO_16, do_vadda)
 TRANS(vadda_w, gvec_vvv, MO_32, do_vadda)
 TRANS(vadda_d, gvec_vvv, MO_64, do_vadda)
+
+TRANS(vmax_b, gvec_vvv, MO_8, tcg_gen_gvec_smax)
+TRANS(vmax_h, gvec_vvv, MO_16, tcg_gen_gvec_smax)
+TRANS(vmax_w, gvec_vvv, MO_32, tcg_gen_gvec_smax)
+TRANS(vmax_d, gvec_vvv, MO_64, tcg_gen_gvec_smax)
+TRANS(vmax_bu, gvec_vvv, MO_8, tcg_gen_gvec_umax)
+TRANS(vmax_hu, gvec_vvv, MO_16, tcg_gen_gvec_umax)
+TRANS(vmax_wu, gvec_vvv, MO_32, tcg_gen_gvec_umax)
+TRANS(vmax_du, gvec_vvv, MO_64, tcg_gen_gvec_umax)
+
+TRANS(vmin_b, gvec_vvv, MO_8, tcg_gen_gvec_smin)
+TRANS(vmin_h, gvec_vvv, MO_16, tcg_gen_gvec_smin)
+TRANS(vmin_w, gvec_vvv, MO_32, tcg_gen_gvec_smin)
+TRANS(vmin_d, gvec_vvv, MO_64, tcg_gen_gvec_smin)
+TRANS(vmin_bu, gvec_vvv, MO_8, tcg_gen_gvec_umin)
+TRANS(vmin_hu, gvec_vvv, MO_16, tcg_gen_gvec_umin)
+TRANS(vmin_wu, gvec_vvv, MO_32, tcg_gen_gvec_umin)
+TRANS(vmin_du, gvec_vvv, MO_64, tcg_gen_gvec_umin)
+
+static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
+}
+
+static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+tc

[RFC PATCH v4 11/44] target/loongarch: Implement vabsd

2023-04-25 Thread Song Gao
This patch includes:
- VABSD.{B/H/W/D}[U].

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  9 ++
 target/loongarch/helper.h   |  9 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 95 +
 target/loongarch/insns.decode   |  9 ++
 target/loongarch/lsx_helper.c   | 11 +++
 5 files changed, 133 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e7592e7a34..e98ea37793 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -925,3 +925,12 @@ INSN_LSX(vavgr_bu, vvv)
 INSN_LSX(vavgr_hu, vvv)
 INSN_LSX(vavgr_wu, vvv)
 INSN_LSX(vavgr_du, vvv)
+
+INSN_LSX(vabsd_b,  vvv)
+INSN_LSX(vabsd_h,  vvv)
+INSN_LSX(vabsd_w,  vvv)
+INSN_LSX(vabsd_d,  vvv)
+INSN_LSX(vabsd_bu, vvv)
+INSN_LSX(vabsd_hu, vvv)
+INSN_LSX(vabsd_wu, vvv)
+INSN_LSX(vabsd_du, vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index af0f9f9b0d..c3a5d2566e 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -211,3 +211,12 @@ DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vabsd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vabsd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vabsd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vabsd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 5fa4792305..0e9301bf93 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -1166,3 +1166,98 @@ TRANS(vavgr_bu, gvec_vvv, MO_8, do_vavgr_u)
 TRANS(vavgr_hu, gvec_vvv, MO_16, do_vavgr_u)
 TRANS(vavgr_wu, gvec_vvv, MO_32, do_vavgr_u)
 TRANS(vavgr_du, gvec_vvv, MO_64, do_vavgr_u)
+
+static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+tcg_gen_smax_vec(vece, t, a, b);
+tcg_gen_smin_vec(vece, a, a, b);
+tcg_gen_sub_vec(vece, t, t, a);
+}
+
+static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+   uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop_list[] = {
+INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
+};
+static const GVecGen3 op[4] = {
+{
+.fniv = gen_vabsd_s,
+.fno = gen_helper_vabsd_b,
+.opt_opc = vecop_list,
+.vece = MO_8
+},
+{
+.fniv = gen_vabsd_s,
+.fno = gen_helper_vabsd_h,
+.opt_opc = vecop_list,
+.vece = MO_16
+},
+{
+.fniv = gen_vabsd_s,
+.fno = gen_helper_vabsd_w,
+.opt_opc = vecop_list,
+.vece = MO_32
+},
+{
+.fniv = gen_vabsd_s,
+.fno = gen_helper_vabsd_d,
+.opt_opc = vecop_list,
+.vece = MO_64
+},
+};
+
+tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+tcg_gen_umax_vec(vece, t, a, b);
+tcg_gen_umin_vec(vece, a, a, b);
+tcg_gen_sub_vec(vece, t, t, a);
+}
+
+static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+   uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop_list[] = {
+INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
+};
+static const GVecGen3 op[4] = {
+{
+.fniv = gen_vabsd_u,
+.fno = gen_helper_vabsd_bu,
+.opt_opc = vecop_list,
+.vece = MO_8
+},
+{
+.fniv = gen_vabsd_u,
+.fno = gen_helper_vabsd_hu,
+.opt_opc = vecop_list,
+.vece = MO_16
+},
+{
+.fniv = gen_vabsd_u,
+.fno = gen_helper_vabsd_wu,
+.opt_opc = vecop_list,
+.vece = MO_32
+},
+{
+.fniv = gen_vabsd_u,
+.fno = gen_helper_vabsd_du,
+.opt_opc = vecop_list,
+.vece = MO_64
+},
+};
+
+tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vabsd_b, 

[RFC PATCH v4 17/44] target/loongarch: Implement vsat

2023-04-25 Thread Song Gao
This patch includes:
- VSAT.{B/H/W/D}[U].

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|   9 ++
 target/loongarch/helper.h   |   9 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 101 
 target/loongarch/insns.decode   |  12 +++
 target/loongarch/lsx_helper.c   |  37 +++
 5 files changed, 168 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 6e4f676a42..b04aefe3ed 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1061,3 +1061,12 @@ INSN_LSX(vmod_bu,  vvv)
 INSN_LSX(vmod_hu,  vvv)
 INSN_LSX(vmod_wu,  vvv)
 INSN_LSX(vmod_du,  vvv)
+
+INSN_LSX(vsat_b,   vv_i)
+INSN_LSX(vsat_h,   vv_i)
+INSN_LSX(vsat_w,   vv_i)
+INSN_LSX(vsat_d,   vv_i)
+INSN_LSX(vsat_bu,  vv_i)
+INSN_LSX(vsat_hu,  vv_i)
+INSN_LSX(vsat_wu,  vv_i)
+INSN_LSX(vsat_du,  vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 7b7c685ede..d2b1c9f2a4 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -320,3 +320,12 @@ DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32)
 DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32)
 DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32)
 DEF_HELPER_4(vmod_du, void, env, i32, i32, i32)
+
+DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsat_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsat_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index b295a9c4df..b8f05c66a5 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2693,3 +2693,104 @@ TRANS(vmod_bu, gen_vvv, gen_helper_vmod_bu)
 TRANS(vmod_hu, gen_vvv, gen_helper_vmod_hu)
 TRANS(vmod_wu, gen_vvv, gen_helper_vmod_wu)
 TRANS(vmod_du, gen_vvv, gen_helper_vmod_du)
+
+static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
+{
+TCGv_vec min;
+
+min = tcg_temp_new_vec_matching(t);
+tcg_gen_not_vec(vece, min, max);
+tcg_gen_smax_vec(vece, t, a, min);
+tcg_gen_smin_vec(vece, t, t, max);
+}
+
+static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+  int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop_list[] = {
+INDEX_op_smax_vec, INDEX_op_smin_vec, 0
+};
+static const GVecGen2s op[4] = {
+{
+.fniv = gen_vsat_s,
+.fno = gen_helper_vsat_b,
+.opt_opc = vecop_list,
+.vece = MO_8
+},
+{
+.fniv = gen_vsat_s,
+.fno = gen_helper_vsat_h,
+.opt_opc = vecop_list,
+.vece = MO_16
+},
+{
+.fniv = gen_vsat_s,
+.fno = gen_helper_vsat_w,
+.opt_opc = vecop_list,
+.vece = MO_32
+},
+{
+.fniv = gen_vsat_s,
+.fno = gen_helper_vsat_d,
+.opt_opc = vecop_list,
+.vece = MO_64
+},
+};
+
+tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
+tcg_constant_i64((1ll<< imm) -1), &op[vece]);
+}
+
+TRANS(vsat_b, gvec_vv_i, MO_8, do_vsat_s)
+TRANS(vsat_h, gvec_vv_i, MO_16, do_vsat_s)
+TRANS(vsat_w, gvec_vv_i, MO_32, do_vsat_s)
+TRANS(vsat_d, gvec_vv_i, MO_64, do_vsat_s)
+
+static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
+{
+tcg_gen_umin_vec(vece, t, a, max);
+}
+
+static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+   int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+uint64_t max;
+static const TCGOpcode vecop_list[] = {
+INDEX_op_umin_vec, 0
+};
+static const GVecGen2s op[4] = {
+{
+.fniv = gen_vsat_u,
+.fno = gen_helper_vsat_bu,
+.opt_opc = vecop_list,
+.vece = MO_8
+},
+{
+.fniv = gen_vsat_u,
+.fno = gen_helper_vsat_hu,
+.opt_opc = vecop_list,
+.vece = MO_16
+},
+{
+.fniv = gen_vsat_u,
+.fno = gen_helper_vsat_wu,
+.opt_opc = vecop_list,
+.vece = MO_32
+},
+{
+.fniv = gen_vsat_u,
+.fno = gen_helper_vsat_du,
+.opt_opc = vecop_list,
+.vece = MO_64
+},
+};
+
+max = (imm == 0

[RFC PATCH v4 20/44] target/loongarch: Implement vmskltz/vmskgez/vmsknz

2023-04-25 Thread Song Gao
This patch includes:
- VMSKLTZ.{B/H/W/D};
- VMSKGEZ.B;
- VMSKNZ.B.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|   7 ++
 target/loongarch/helper.h   |   7 ++
 target/loongarch/insn_trans/trans_lsx.c.inc |   7 ++
 target/loongarch/insns.decode   |   7 ++
 target/loongarch/lsx_helper.c   | 113 
 5 files changed, 141 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 46e808c321..2725b827ee 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1084,3 +1084,10 @@ INSN_LSX(vsigncov_b,   vvv)
 INSN_LSX(vsigncov_h,   vvv)
 INSN_LSX(vsigncov_w,   vvv)
 INSN_LSX(vsigncov_d,   vvv)
+
+INSN_LSX(vmskltz_b,vv)
+INSN_LSX(vmskltz_h,vv)
+INSN_LSX(vmskltz_w,vv)
+INSN_LSX(vmskltz_d,vv)
+INSN_LSX(vmskgez_b,vv)
+INSN_LSX(vmsknz_b, vv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index e1e5d58697..34b7b2f576 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -343,3 +343,10 @@ DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_3(vmskltz_b, void, env, i32, i32)
+DEF_HELPER_3(vmskltz_h, void, env, i32, i32)
+DEF_HELPER_3(vmskltz_w, void, env, i32, i32)
+DEF_HELPER_3(vmskltz_d, void, env, i32, i32)
+DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
+DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 644917a695..64387f2666 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2867,3 +2867,10 @@ TRANS(vsigncov_b, gvec_vvv, MO_8, do_vsigncov)
 TRANS(vsigncov_h, gvec_vvv, MO_16, do_vsigncov)
 TRANS(vsigncov_w, gvec_vvv, MO_32, do_vsigncov)
 TRANS(vsigncov_d, gvec_vvv, MO_64, do_vsigncov)
+
+TRANS(vmskltz_b, gen_vv, gen_helper_vmskltz_b)
+TRANS(vmskltz_h, gen_vv, gen_helper_vmskltz_h)
+TRANS(vmskltz_w, gen_vv, gen_helper_vmskltz_w)
+TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d)
+TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b)
+TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 4233dd7404..47c1ef78a7 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -783,3 +783,10 @@ vsigncov_b   0111 00010010 11100 . . .
@vvv
 vsigncov_h   0111 00010010 11101 . . .@vvv
 vsigncov_w   0111 00010010 0 . . .@vvv
 vsigncov_d   0111 00010010 1 . . .@vvv
+
+vmskltz_b0111 00101001 11000 1 . .@vv
+vmskltz_h0111 00101001 11000 10001 . .@vv
+vmskltz_w0111 00101001 11000 10010 . .@vv
+vmskltz_d0111 00101001 11000 10011 . .@vv
+vmskgez_b0111 00101001 11000 10100 . .@vv
+vmsknz_b 0111 00101001 11000 11000 . .@vv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index 408815ea45..2359c63fdf 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -669,3 +669,116 @@ DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
 DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
 DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
 DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
+
+static uint64_t do_vmskltz_b(int64_t val)
+{
+uint64_t m = 0x8080808080808080ULL;
+uint64_t c =  val & m;
+c |= c << 7;
+c |= c << 14;
+c |= c << 28;
+return c >> 56;
+}
+
+void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+{
+uint16_t temp = 0;
+VReg *Vd = &(env->fpr[vd].vreg);
+VReg *Vj = &(env->fpr[vj].vreg);
+
+temp = do_vmskltz_b(Vj->D(0));
+temp |= (do_vmskltz_b(Vj->D(1)) << 8);
+Vd->D(0) = temp;
+Vd->D(1) = 0;
+}
+
+static uint64_t do_vmskltz_h(int64_t val)
+{
+uint64_t m = 0x8000800080008000ULL;
+uint64_t c =  val & m;
+c |= c << 15;
+c |= c << 30;
+return c >> 60;
+}
+
+void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+{
+uint16_t temp = 0;
+VReg *Vd = &(env->fpr[vd].vreg);
+VReg *Vj = &(env->fpr[vj].vreg);
+
+temp = do_vmskltz_h(Vj->D(0));
+temp |= (do_vmskltz_h(Vj->D(1)) << 4);
+Vd->D(0) = temp;
+Vd->D(1) = 0;
+}
+
+static uint64_t do_vmskltz_w(int64_t val)
+{
+uint64_t m = 0x80008000ULL;
+uint64_t c =  val & m;
+c |= c << 31;
+return c >> 62;
+}
+
+void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+{
+uint16_t temp = 0;
+VReg *Vd = &(env->fpr[vd].vreg);
+VReg *Vj = &(env->fpr[vj].vreg);
+

[RFC PATCH v4 29/44] target/loongarch: Implement vclo vclz

2023-04-25 Thread Song Gao
This patch includes:
- VCLO.{B/H/W/D};
- VCLZ.{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  9 ++
 target/loongarch/helper.h   |  9 ++
 target/loongarch/insn_trans/trans_lsx.c.inc |  9 ++
 target/loongarch/insns.decode   |  9 ++
 target/loongarch/lsx_helper.c   | 31 +
 5 files changed, 67 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 405e8885cd..0c82a1d9d1 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1258,3 +1258,12 @@ INSN_LSX(vssrarni_bu_h,vv_i)
 INSN_LSX(vssrarni_hu_w,vv_i)
 INSN_LSX(vssrarni_wu_d,vv_i)
 INSN_LSX(vssrarni_du_q,vv_i)
+
+INSN_LSX(vclo_b,   vv)
+INSN_LSX(vclo_h,   vv)
+INSN_LSX(vclo_w,   vv)
+INSN_LSX(vclo_d,   vv)
+INSN_LSX(vclz_b,   vv)
+INSN_LSX(vclz_h,   vv)
+INSN_LSX(vclz_w,   vv)
+INSN_LSX(vclz_d,   vv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 724112da81..e21e9b9704 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -471,3 +471,12 @@ DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32)
 DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
 DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
 DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
+
+DEF_HELPER_3(vclo_b, void, env, i32, i32)
+DEF_HELPER_3(vclo_h, void, env, i32, i32)
+DEF_HELPER_3(vclo_w, void, env, i32, i32)
+DEF_HELPER_3(vclo_d, void, env, i32, i32)
+DEF_HELPER_3(vclz_b, void, env, i32, i32)
+DEF_HELPER_3(vclz_h, void, env, i32, i32)
+DEF_HELPER_3(vclz_w, void, env, i32, i32)
+DEF_HELPER_3(vclz_d, void, env, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 9c24cbc297..c7649fb777 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3097,3 +3097,12 @@ TRANS(vssrarni_bu_h, gen_vv_i, gen_helper_vssrarni_bu_h)
 TRANS(vssrarni_hu_w, gen_vv_i, gen_helper_vssrarni_hu_w)
 TRANS(vssrarni_wu_d, gen_vv_i, gen_helper_vssrarni_wu_d)
 TRANS(vssrarni_du_q, gen_vv_i, gen_helper_vssrarni_du_q)
+
+TRANS(vclo_b, gen_vv, gen_helper_vclo_b)
+TRANS(vclo_h, gen_vv, gen_helper_vclo_h)
+TRANS(vclo_w, gen_vv, gen_helper_vclo_w)
+TRANS(vclo_d, gen_vv, gen_helper_vclo_d)
+TRANS(vclz_b, gen_vv, gen_helper_vclz_b)
+TRANS(vclz_h, gen_vv, gen_helper_vclz_h)
+TRANS(vclz_w, gen_vv, gen_helper_vclz_w)
+TRANS(vclz_d, gen_vv, gen_helper_vclz_d)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index bb4b2a8632..7591ec1bab 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -959,3 +959,12 @@ vssrarni_bu_h0111 00110110 11000 1  . .   
@vv_ui4
 vssrarni_hu_w0111 00110110 11001 . . .@vv_ui5
 vssrarni_wu_d0111 00110110 1101 .. . .@vv_ui6
 vssrarni_du_q0111 00110110 111 ... . .@vv_ui7
+
+vclo_b   0111 00101001 11000 0 . .@vv
+vclo_h   0111 00101001 11000 1 . .@vv
+vclo_w   0111 00101001 11000 00010 . .@vv
+vclo_d   0111 00101001 11000 00011 . .@vv
+vclz_b   0111 00101001 11000 00100 . .@vv
+vclz_h   0111 00101001 11000 00101 . .@vv
+vclz_w   0111 00101001 11000 00110 . .@vv
+vclz_d   0111 00101001 11000 00111 . .@vv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index fb6f29c94d..044032f180 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -1915,3 +1915,34 @@ void HELPER(vssrarni_du_q)(CPULoongArchState *env,
 VSSRARNUI(vssrarni_bu_h, 16, B, H)
 VSSRARNUI(vssrarni_hu_w, 32, H, W)
 VSSRARNUI(vssrarni_wu_d, 64, W, D)
+
+#define DO_2OP(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
+{   \
+int i;  \
+VReg *Vd = &(env->fpr[vd].vreg);\
+VReg *Vj = &(env->fpr[vj].vreg);\
+\
+for (i = 0; i < LSX_LEN/BIT; i++)   \
+{   \
+Vd->E(i) = DO_OP(Vj->E(i)); \
+}   \
+}
+
+#define DO_CLO_B(N)  (clz32(~N & 0xff) - 24)
+#define DO_CLO_H(N)  (clz32(~N & 0x) - 16)
+#define DO_CLO_W(N)  (clz32(~N))
+#define DO_CLO_D(N)  (clz64(~N))
+#define DO_CLZ_B(N)  (clz32(N) - 24)
+#define DO_CLZ_H(N)  (clz32(N) - 16)
+#define DO

[RFC PATCH v4 31/44] target/loongarch: Implement vbitclr vbitset vbitrev

2023-04-25 Thread Song Gao
This patch includes:
- VBITCLR[I].{B/H/W/D};
- VBITSET[I].{B/H/W/D};
- VBITREV[I].{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  25 ++
 target/loongarch/helper.h   |  27 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 305 
 target/loongarch/insns.decode   |  25 ++
 target/loongarch/lsx_helper.c   |  55 
 5 files changed, 437 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 0ca51de9d8..48c7ea47a4 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1272,3 +1272,28 @@ INSN_LSX(vpcnt_b,  vv)
 INSN_LSX(vpcnt_h,  vv)
 INSN_LSX(vpcnt_w,  vv)
 INSN_LSX(vpcnt_d,  vv)
+
+INSN_LSX(vbitclr_b,vvv)
+INSN_LSX(vbitclr_h,vvv)
+INSN_LSX(vbitclr_w,vvv)
+INSN_LSX(vbitclr_d,vvv)
+INSN_LSX(vbitclri_b,   vv_i)
+INSN_LSX(vbitclri_h,   vv_i)
+INSN_LSX(vbitclri_w,   vv_i)
+INSN_LSX(vbitclri_d,   vv_i)
+INSN_LSX(vbitset_b,vvv)
+INSN_LSX(vbitset_h,vvv)
+INSN_LSX(vbitset_w,vvv)
+INSN_LSX(vbitset_d,vvv)
+INSN_LSX(vbitseti_b,   vv_i)
+INSN_LSX(vbitseti_h,   vv_i)
+INSN_LSX(vbitseti_w,   vv_i)
+INSN_LSX(vbitseti_d,   vv_i)
+INSN_LSX(vbitrev_b,vvv)
+INSN_LSX(vbitrev_h,vvv)
+INSN_LSX(vbitrev_w,vvv)
+INSN_LSX(vbitrev_d,vvv)
+INSN_LSX(vbitrevi_b,   vv_i)
+INSN_LSX(vbitrevi_h,   vv_i)
+INSN_LSX(vbitrevi_w,   vv_i)
+INSN_LSX(vbitrevi_d,   vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 96b9b16923..75120ca55e 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -485,3 +485,30 @@ DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
 DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
 DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
 DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitclr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitclr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitclri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitclri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitclri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitclri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(vbitset_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitset_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitset_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitset_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitseti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitseti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitseti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitseti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(vbitrev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitrev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitrev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitrev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index f4ebdca63c..86243b54ba 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3111,3 +3111,308 @@ TRANS(vpcnt_b, gen_vv, gen_helper_vpcnt_b)
 TRANS(vpcnt_h, gen_vv, gen_helper_vpcnt_h)
 TRANS(vpcnt_w, gen_vv, gen_helper_vpcnt_w)
 TRANS(vpcnt_d, gen_vv, gen_helper_vpcnt_d)
+
+static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
+void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+TCGv_vec mask, lsh, t1, one;
+
+lsh = tcg_temp_new_vec_matching(t);
+t1 = tcg_temp_new_vec_matching(t);
+mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
+one = tcg_constant_vec_matching(t, vece, 1);
+
+tcg_gen_and_vec(vece, lsh, b, mask);
+tcg_gen_shlv_vec(vece, t1, one, lsh);
+func(vece, t, a, t1);
+}
+
+static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+do_vbit(vece, t, a, b, tcg_gen_andc_vec);
+}
+
+static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+do_vbit(vece, t, a, b, tcg_gen_or_vec);
+}
+
+static void gen_vbitrev(unsigned vece,

[RFC PATCH v4 22/44] target/loongarch: Implement vsll vsrl vsra vrotr

2023-04-25 Thread Song Gao
This patch includes:
- VSLL[I].{B/H/W/D};
- VSRL[I].{B/H/W/D};
- VSRA[I].{B/H/W/D};
- VROTR[I].{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 36 +
 target/loongarch/insn_trans/trans_lsx.c.inc | 36 +
 target/loongarch/insns.decode   | 36 +
 3 files changed, 108 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index eca0a4bb7b..f7d0fb4441 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1103,3 +1103,39 @@ INSN_LSX(vandi_b,  vv_i)
 INSN_LSX(vori_b,   vv_i)
 INSN_LSX(vxori_b,  vv_i)
 INSN_LSX(vnori_b,  vv_i)
+
+INSN_LSX(vsll_b,   vvv)
+INSN_LSX(vsll_h,   vvv)
+INSN_LSX(vsll_w,   vvv)
+INSN_LSX(vsll_d,   vvv)
+INSN_LSX(vslli_b,  vv_i)
+INSN_LSX(vslli_h,  vv_i)
+INSN_LSX(vslli_w,  vv_i)
+INSN_LSX(vslli_d,  vv_i)
+
+INSN_LSX(vsrl_b,   vvv)
+INSN_LSX(vsrl_h,   vvv)
+INSN_LSX(vsrl_w,   vvv)
+INSN_LSX(vsrl_d,   vvv)
+INSN_LSX(vsrli_b,  vv_i)
+INSN_LSX(vsrli_h,  vv_i)
+INSN_LSX(vsrli_w,  vv_i)
+INSN_LSX(vsrli_d,  vv_i)
+
+INSN_LSX(vsra_b,   vvv)
+INSN_LSX(vsra_h,   vvv)
+INSN_LSX(vsra_w,   vvv)
+INSN_LSX(vsra_d,   vvv)
+INSN_LSX(vsrai_b,  vv_i)
+INSN_LSX(vsrai_h,  vv_i)
+INSN_LSX(vsrai_w,  vv_i)
+INSN_LSX(vsrai_d,  vv_i)
+
+INSN_LSX(vrotr_b,  vvv)
+INSN_LSX(vrotr_h,  vvv)
+INSN_LSX(vrotr_w,  vvv)
+INSN_LSX(vrotr_d,  vvv)
+INSN_LSX(vrotri_b, vv_i)
+INSN_LSX(vrotri_h, vv_i)
+INSN_LSX(vrotri_w, vv_i)
+INSN_LSX(vrotri_d, vv_i)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index e5e194106b..ad8f32ed18 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2930,3 +2930,39 @@ static void do_vnori_b(unsigned vece, uint32_t vd_ofs, 
uint32_t vj_ofs,
 }
 
 TRANS(vnori_b, gvec_vv_i, MO_8, do_vnori_b)
+
+TRANS(vsll_b, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
+TRANS(vsll_h, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
+TRANS(vsll_w, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
+TRANS(vsll_d, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
+TRANS(vslli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
+TRANS(vslli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
+TRANS(vslli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
+TRANS(vslli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
+
+TRANS(vsrl_b, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
+TRANS(vsrl_h, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
+TRANS(vsrl_w, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
+TRANS(vsrl_d, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
+TRANS(vsrli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
+TRANS(vsrli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
+TRANS(vsrli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
+TRANS(vsrli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
+
+TRANS(vsra_b, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
+TRANS(vsra_h, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
+TRANS(vsra_w, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
+TRANS(vsra_d, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
+TRANS(vsrai_b, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
+TRANS(vsrai_h, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
+TRANS(vsrai_w, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
+TRANS(vsrai_d, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
+
+TRANS(vrotr_b, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
+TRANS(vrotr_h, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
+TRANS(vrotr_w, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
+TRANS(vrotr_d, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
+TRANS(vrotri_b, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
+TRANS(vrotri_h, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
+TRANS(vrotri_w, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
+TRANS(vrotri_d, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 6309683be9..7c0b0c4ac8 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -803,3 +803,39 @@ vandi_b  0111 0001 00  . .
@vv_ui8
 vori_b   0111 0001 01  . .@vv_ui8
 vxori_b  0111 0001 10  . .@vv_ui8
 vnori_b  0111 0001 11  . .@vv_ui8
+
+vsll_b   0111 1110 1 . . .@vvv
+vsll_h   0111 1110 10001 . . .@vvv
+vsll_w   0111 1110 10010 . . .@vvv
+vsll_d   0111 1110 10011 . . .@vvv
+vslli_b  0111 00110010 11000 01 ... . .   @vv_ui3
+vslli_h  0111 00110010 11000 1  . .   @vv_ui4
+vslli_w  0111 00110010 11001 . . .@vv_ui5
+vslli_d  0111 00110010 1101 .. . .@vv_ui6
+
+vsrl_b   0111 1110 10100 . . .@vvv
+vsrl_h   0111 1110 10101 . ...

[RFC PATCH v4 43/44] target/loongarch: Use {set/get}_gpr replace to cpu_fpr

2023-04-25 Thread Song Gao
Introduce set_fpr() and get_fpr() and remove cpu_fpr.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 .../loongarch/insn_trans/trans_farith.c.inc   | 72 +++
 target/loongarch/insn_trans/trans_fcmp.c.inc  | 12 ++--
 .../loongarch/insn_trans/trans_fmemory.c.inc  | 37 ++
 target/loongarch/insn_trans/trans_fmov.c.inc  | 31 +---
 target/loongarch/translate.c  | 20 --
 5 files changed, 129 insertions(+), 43 deletions(-)

diff --git a/target/loongarch/insn_trans/trans_farith.c.inc 
b/target/loongarch/insn_trans/trans_farith.c.inc
index 7081fbb89b..21ea47308b 100644
--- a/target/loongarch/insn_trans/trans_farith.c.inc
+++ b/target/loongarch/insn_trans/trans_farith.c.inc
@@ -17,18 +17,29 @@
 static bool gen_fff(DisasContext *ctx, arg_fff *a,
 void (*func)(TCGv, TCGv_env, TCGv, TCGv))
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src1 = get_fpr(ctx, a->fj);
+TCGv src2 = get_fpr(ctx, a->fk);
+
 CHECK_FPE;
 
-func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj], cpu_fpr[a->fk]);
+func(dest, cpu_env, src1, src2);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
 static bool gen_ff(DisasContext *ctx, arg_ff *a,
void (*func)(TCGv, TCGv_env, TCGv))
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src = get_fpr(ctx, a->fj);
+
 CHECK_FPE;
 
-func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj]);
+func(dest, cpu_env, src);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
@@ -37,61 +48,98 @@ static bool gen_muladd(DisasContext *ctx, arg_ *a,
int flag)
 {
 TCGv_i32 tflag = tcg_constant_i32(flag);
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src1 = get_fpr(ctx, a->fj);
+TCGv src2 = get_fpr(ctx, a->fk);
+TCGv src3 = get_fpr(ctx, a->fa);
 
 CHECK_FPE;
 
-func(cpu_fpr[a->fd], cpu_env, cpu_fpr[a->fj],
- cpu_fpr[a->fk], cpu_fpr[a->fa], tflag);
+func(dest, cpu_env, src1, src2, src3, tflag);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
 static bool trans_fcopysign_s(DisasContext *ctx, arg_fcopysign_s *a)
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src1 = get_fpr(ctx, a->fk);
+TCGv src2 = get_fpr(ctx, a->fj);
+
 CHECK_FPE;
 
-tcg_gen_deposit_i64(cpu_fpr[a->fd], cpu_fpr[a->fk], cpu_fpr[a->fj], 0, 31);
+tcg_gen_deposit_i64(dest, src1, src2, 0, 31);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
 static bool trans_fcopysign_d(DisasContext *ctx, arg_fcopysign_d *a)
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src1 = get_fpr(ctx, a->fk);
+TCGv src2 = get_fpr(ctx, a->fj);
+
 CHECK_FPE;
 
-tcg_gen_deposit_i64(cpu_fpr[a->fd], cpu_fpr[a->fk], cpu_fpr[a->fj], 0, 63);
+tcg_gen_deposit_i64(dest, src1, src2, 0, 63);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
 static bool trans_fabs_s(DisasContext *ctx, arg_fabs_s *a)
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src = get_fpr(ctx, a->fj);
+
 CHECK_FPE;
 
-tcg_gen_andi_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], MAKE_64BIT_MASK(0, 31));
-gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]);
+tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 31));
+gen_nanbox_s(dest, dest);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
 static bool trans_fabs_d(DisasContext *ctx, arg_fabs_d *a)
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src = get_fpr(ctx, a->fj);
+
 CHECK_FPE;
 
-tcg_gen_andi_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], MAKE_64BIT_MASK(0, 63));
+tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 63));
+set_fpr(a->fd, dest);
+
 return true;
 }
 
 static bool trans_fneg_s(DisasContext *ctx, arg_fneg_s *a)
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src = get_fpr(ctx, a->fj);
+
 CHECK_FPE;
 
-tcg_gen_xori_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], 0x8000);
-gen_nanbox_s(cpu_fpr[a->fd], cpu_fpr[a->fd]);
+tcg_gen_xori_i64(dest, src, 0x8000);
+gen_nanbox_s(dest, dest);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
 static bool trans_fneg_d(DisasContext *ctx, arg_fneg_d *a)
 {
+TCGv dest = get_fpr(ctx, a->fd);
+TCGv src = get_fpr(ctx, a->fj);
+
 CHECK_FPE;
 
-tcg_gen_xori_i64(cpu_fpr[a->fd], cpu_fpr[a->fj], 0x8000LL);
+tcg_gen_xori_i64(dest, src, 0x8000LL);
+set_fpr(a->fd, dest);
+
 return true;
 }
 
diff --git a/target/loongarch/insn_trans/trans_fcmp.c.inc 
b/target/loongarch/insn_trans/trans_fcmp.c.inc
index 3b0da2b9f4..a78868dbc4 100644
--- a/target/loongarch/insn_trans/trans_fcmp.c.inc
+++ b/target/loongarch/insn_trans/trans_fcmp.c.inc
@@ -25,17 +25,19 @@ static uint32_t get_fcmp_flags(int cond)
 
 static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a)
 {
-TCGv var;
+TCGv var, src1, src2;
 uint32_t flags;
 void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
 
 CHECK_FPE;
 
 var = tcg_temp_new();
+src1 = get_fpr(ctx, a->fj);
+src2 = get_fpr(ctx, a->fk);
 fn = (a->fcond & 1 ? gen_helper_fcmp_s_s

[RFC PATCH v4 04/44] target/loongarch: Implement vadd/vsub

2023-04-25 Thread Song Gao
This patch includes:
- VADD.{B/H/W/D/Q};
- VSUB.{B/H/W/D/Q}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c| 23 +++
 target/loongarch/insn_trans/trans_lsx.c.inc | 69 +
 target/loongarch/insns.decode   | 22 +++
 target/loongarch/translate.c| 24 +++
 target/loongarch/translate.h|  1 +
 5 files changed, 139 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 2e93e77e0d..a5948d7847 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -784,3 +784,26 @@ PCADD_INSN(pcaddi)
 PCADD_INSN(pcalau12i)
 PCADD_INSN(pcaddu12i)
 PCADD_INSN(pcaddu18i)
+
+#define INSN_LSX(insn, type)\
+static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
+{   \
+output_##type(ctx, a, #insn);   \
+return true;\
+}
+
+static void output_vvv(DisasContext *ctx, arg_vvv *a, const char *mnemonic)
+{
+output(ctx, mnemonic, "v%d, v%d, v%d", a->vd, a->vj, a->vk);
+}
+
+INSN_LSX(vadd_b,   vvv)
+INSN_LSX(vadd_h,   vvv)
+INSN_LSX(vadd_w,   vvv)
+INSN_LSX(vadd_d,   vvv)
+INSN_LSX(vadd_q,   vvv)
+INSN_LSX(vsub_b,   vvv)
+INSN_LSX(vsub_h,   vvv)
+INSN_LSX(vsub_w,   vvv)
+INSN_LSX(vsub_d,   vvv)
+INSN_LSX(vsub_q,   vvv)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 5dedb044d7..ddeb9fde28 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -14,3 +14,72 @@
 #else
 #define CHECK_SXE
 #endif
+
+static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
+void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+TCGv_i32 vd = tcg_constant_i32(a->vd);
+TCGv_i32 vj = tcg_constant_i32(a->vj);
+TCGv_i32 vk = tcg_constant_i32(a->vk);
+
+CHECK_SXE;
+
+func(cpu_env, vd, vj, vk);
+return true;
+}
+
+static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+  uint32_t, uint32_t, uint32_t))
+{
+uint32_t vd_ofs, vj_ofs, vk_ofs;
+
+CHECK_SXE;
+
+vd_ofs = vec_full_offset(a->vd);
+vj_ofs = vec_full_offset(a->vj);
+vk_ofs = vec_full_offset(a->vk);
+
+func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
+return true;
+}
+
+TRANS(vadd_b, gvec_vvv, MO_8, tcg_gen_gvec_add)
+TRANS(vadd_h, gvec_vvv, MO_16, tcg_gen_gvec_add)
+TRANS(vadd_w, gvec_vvv, MO_32, tcg_gen_gvec_add)
+TRANS(vadd_d, gvec_vvv, MO_64, tcg_gen_gvec_add)
+
+#define VADDSUB_Q(NAME)\
+static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
+{  \
+TCGv_i64 rh, rl, ah, al, bh, bl;   \
+   \
+CHECK_SXE; \
+   \
+rh = tcg_temp_new_i64();   \
+rl = tcg_temp_new_i64();   \
+ah = tcg_temp_new_i64();   \
+al = tcg_temp_new_i64();   \
+bh = tcg_temp_new_i64();   \
+bl = tcg_temp_new_i64();   \
+   \
+get_vreg64(ah, a->vj, 1);  \
+get_vreg64(al, a->vj, 0);  \
+get_vreg64(bh, a->vk, 1);  \
+get_vreg64(bl, a->vk, 0);  \
+   \
+tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh);   \
+   \
+set_vreg64(rh, a->vd, 1);  \
+set_vreg64(rl, a->vd, 0);  \
+   \
+return true;   \
+}
+
+VADDSUB_Q(add)
+VADDSUB_Q(sub)
+
+TRANS(vsub_b, gvec_vvv, MO_8, tcg_gen_gvec_sub)
+TRANS(vsub_h, gvec_vvv, MO_16, tcg_gen_gvec_sub)
+TRANS(vsub_w, gvec_vvv, MO_32, tcg_gen_gvec_sub)
+TRANS(vsub_d, gvec_vvv, MO_64, tcg_gen_gvec_sub)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index de7b8f0f3c..d18db68d51 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -485,3 +485,25 @@ ldpte 01100100 01  . 0
@j_i
 ertn  0110

[RFC PATCH v4 19/44] target/loongarch: Implement vsigncov

2023-04-25 Thread Song Gao
This patch includes:
- VSIGNCOV.{B/H/W/D}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  5 ++
 target/loongarch/helper.h   |  5 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 53 +
 target/loongarch/insns.decode   |  5 ++
 target/loongarch/lsx_helper.c   |  7 +++
 5 files changed, 75 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 412c1cedcb..46e808c321 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1079,3 +1079,8 @@ INSN_LSX(vexth_hu_bu,  vv)
 INSN_LSX(vexth_wu_hu,  vv)
 INSN_LSX(vexth_du_wu,  vv)
 INSN_LSX(vexth_qu_du,  vv)
+
+INSN_LSX(vsigncov_b,   vvv)
+INSN_LSX(vsigncov_h,   vvv)
+INSN_LSX(vsigncov_w,   vvv)
+INSN_LSX(vsigncov_d,   vvv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 005988be25..e1e5d58697 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -338,3 +338,8 @@ DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32)
 DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32)
 DEF_HELPER_3(vexth_du_wu, void, env, i32, i32)
 DEF_HELPER_3(vexth_qu_du, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 93ae76bc4c..644917a695 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -2814,3 +2814,56 @@ TRANS(vexth_hu_bu, gen_vv, gen_helper_vexth_hu_bu)
 TRANS(vexth_wu_hu, gen_vv, gen_helper_vexth_wu_hu)
 TRANS(vexth_du_wu, gen_vv, gen_helper_vexth_du_wu)
 TRANS(vexth_qu_du, gen_vv, gen_helper_vexth_qu_du)
+
+static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+TCGv_vec t1, zero;
+
+t1 = tcg_temp_new_vec_matching(t);
+zero = tcg_constant_vec_matching(t, vece, 0);
+
+tcg_gen_neg_vec(vece, t1, b);
+tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
+tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
+}
+
+static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop_list[] = {
+INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
+};
+static const GVecGen3 op[4] = {
+{
+.fniv = gen_vsigncov,
+.fno = gen_helper_vsigncov_b,
+.opt_opc = vecop_list,
+.vece = MO_8
+},
+{
+.fniv = gen_vsigncov,
+.fno = gen_helper_vsigncov_h,
+.opt_opc = vecop_list,
+.vece = MO_16
+},
+{
+.fniv = gen_vsigncov,
+.fno = gen_helper_vsigncov_w,
+.opt_opc = vecop_list,
+.vece = MO_32
+},
+{
+.fniv = gen_vsigncov,
+.fno = gen_helper_vsigncov_d,
+.opt_opc = vecop_list,
+.vece = MO_64
+},
+};
+
+tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vsigncov_b, gvec_vvv, MO_8, do_vsigncov)
+TRANS(vsigncov_h, gvec_vvv, MO_16, do_vsigncov)
+TRANS(vsigncov_w, gvec_vvv, MO_32, do_vsigncov)
+TRANS(vsigncov_d, gvec_vvv, MO_64, do_vsigncov)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 39c582d098..4233dd7404 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -778,3 +778,8 @@ vexth_hu_bu  0111 00101001 11101 11100 . .
@vv
 vexth_wu_hu  0111 00101001 11101 11101 . .@vv
 vexth_du_wu  0111 00101001 11101 0 . .@vv
 vexth_qu_du  0111 00101001 11101 1 . .@vv
+
+vsigncov_b   0111 00010010 11100 . . .@vvv
+vsigncov_h   0111 00010010 11101 . . .@vvv
+vsigncov_w   0111 00010010 0 . . .@vvv
+vsigncov_d   0111 00010010 1 . . .@vvv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index b4582a49d9..408815ea45 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -662,3 +662,10 @@ VEXTH(vexth_d_w, 64, D, W)
 VEXTH(vexth_hu_bu, 16, UH, UB)
 VEXTH(vexth_wu_hu, 32, UW, UH)
 VEXTH(vexth_du_wu, 64, UD, UW)
+
+#define DO_SIGNCOV(a, b)  (a == 0 ? 0 : a < 0 ? -b : b)
+
+DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
+DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
+DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
+DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
-- 
2.31.1




[RFC PATCH v4 03/44] target/loongarch: Add CHECK_SXE maccro for check LSX enable

2023-04-25 Thread Song Gao
Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c  |  2 ++
 target/loongarch/cpu.h  |  2 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 11 +++
 3 files changed, 15 insertions(+)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 18b41221a6..55d7f9255e 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -52,6 +52,7 @@ static const char * const excp_names[] = {
 [EXCCODE_FPE] = "Floating Point Exception",
 [EXCCODE_DBP] = "Debug breakpoint",
 [EXCCODE_BCE] = "Bound Check Exception",
+[EXCCODE_SXD] = "128 bit vector instructions Disable exception",
 };
 
 const char *loongarch_exception_name(int32_t exception)
@@ -187,6 +188,7 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
 case EXCCODE_FPD:
 case EXCCODE_FPE:
 case EXCCODE_BCE:
+case EXCCODE_SXD:
 env->CSR_BADV = env->pc;
 QEMU_FALLTHROUGH;
 case EXCCODE_ADEM:
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index fd0f61936d..6755b1f0c7 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -419,6 +419,7 @@ static inline int cpu_mmu_index(CPULoongArchState *env, 
bool ifetch)
 #define HW_FLAGS_PLV_MASK   R_CSR_CRMD_PLV_MASK  /* 0x03 */
 #define HW_FLAGS_CRMD_PGR_CSR_CRMD_PG_MASK   /* 0x10 */
 #define HW_FLAGS_EUEN_FPE   0x04
+#define HW_FLAGS_EUEN_SXE   0x08
 
 static inline void cpu_get_tb_cpu_state(CPULoongArchState *env,
 target_ulong *pc,
@@ -429,6 +430,7 @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState 
*env,
 *cs_base = 0;
 *flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK);
 *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE;
+*flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE;
 }
 
 void loongarch_cpu_list(void);
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index 1cf3ab34a9..5dedb044d7 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3,3 +3,14 @@
  * LSX translate functions
  * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
  */
+
+#ifndef CONFIG_USER_ONLY
+#define CHECK_SXE do { \
+if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \
+generate_exception(ctx, EXCCODE_SXD); \
+return true; \
+} \
+} while (0)
+#else
+#define CHECK_SXE
+#endif
-- 
2.31.1




[RFC PATCH v4 18/44] target/loongarch: Implement vexth

2023-04-25 Thread Song Gao
This patch includes:
- VEXTH.{H.B/W.H/D.W/Q.D};
- VEXTH.{HU.BU/WU.HU/DU.WU/QU.DU}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  9 ++
 target/loongarch/helper.h   |  9 ++
 target/loongarch/insn_trans/trans_lsx.c.inc | 20 
 target/loongarch/insns.decode   |  9 ++
 target/loongarch/lsx_helper.c   | 35 +
 5 files changed, 82 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index b04aefe3ed..412c1cedcb 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1070,3 +1070,12 @@ INSN_LSX(vsat_bu,  vv_i)
 INSN_LSX(vsat_hu,  vv_i)
 INSN_LSX(vsat_wu,  vv_i)
 INSN_LSX(vsat_du,  vv_i)
+
+INSN_LSX(vexth_h_b,vv)
+INSN_LSX(vexth_w_h,vv)
+INSN_LSX(vexth_d_w,vv)
+INSN_LSX(vexth_q_d,vv)
+INSN_LSX(vexth_hu_bu,  vv)
+INSN_LSX(vexth_wu_hu,  vv)
+INSN_LSX(vexth_du_wu,  vv)
+INSN_LSX(vexth_qu_du,  vv)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index d2b1c9f2a4..005988be25 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -329,3 +329,12 @@ DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, 
ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_3(vexth_h_b, void, env, i32, i32)
+DEF_HELPER_3(vexth_w_h, void, env, i32, i32)
+DEF_HELPER_3(vexth_d_w, void, env, i32, i32)
+DEF_HELPER_3(vexth_q_d, void, env, i32, i32)
+DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32)
+DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32)
+DEF_HELPER_3(vexth_du_wu, void, env, i32, i32)
+DEF_HELPER_3(vexth_qu_du, void, env, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index b8f05c66a5..93ae76bc4c 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -28,6 +28,17 @@ static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
 return true;
 }
 
+static bool gen_vv(DisasContext *ctx, arg_vv *a,
+   void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
+{
+TCGv_i32 vd = tcg_constant_i32(a->vd);
+TCGv_i32 vj = tcg_constant_i32(a->vj);
+
+CHECK_SXE;
+func(cpu_env, vd, vj);
+return true;
+}
+
 static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
  void (*func)(unsigned, uint32_t, uint32_t,
   uint32_t, uint32_t, uint32_t))
@@ -2794,3 +2805,12 @@ TRANS(vsat_bu, gvec_vv_i, MO_8, do_vsat_u)
 TRANS(vsat_hu, gvec_vv_i, MO_16, do_vsat_u)
 TRANS(vsat_wu, gvec_vv_i, MO_32, do_vsat_u)
 TRANS(vsat_du, gvec_vv_i, MO_64, do_vsat_u)
+
+TRANS(vexth_h_b, gen_vv, gen_helper_vexth_h_b)
+TRANS(vexth_w_h, gen_vv, gen_helper_vexth_w_h)
+TRANS(vexth_d_w, gen_vv, gen_helper_vexth_d_w)
+TRANS(vexth_q_d, gen_vv, gen_helper_vexth_q_d)
+TRANS(vexth_hu_bu, gen_vv, gen_helper_vexth_hu_bu)
+TRANS(vexth_wu_hu, gen_vv, gen_helper_vexth_wu_hu)
+TRANS(vexth_du_wu, gen_vv, gen_helper_vexth_du_wu)
+TRANS(vexth_qu_du, gen_vv, gen_helper_vexth_qu_du)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 3ed61b3d68..39c582d098 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -769,3 +769,12 @@ vsat_bu  0111 00110010 1 01 ... . .   
@vv_ui3
 vsat_hu  0111 00110010 1 1  . .   @vv_ui4
 vsat_wu  0111 00110010 10001 . . .@vv_ui5
 vsat_du  0111 00110010 1001 .. . .@vv_ui6
+
+vexth_h_b0111 00101001 11101 11000 . .@vv
+vexth_w_h0111 00101001 11101 11001 . .@vv
+vexth_d_w0111 00101001 11101 11010 . .@vv
+vexth_q_d0111 00101001 11101 11011 . .@vv
+vexth_hu_bu  0111 00101001 11101 11100 . .@vv
+vexth_wu_hu  0111 00101001 11101 11101 . .@vv
+vexth_du_wu  0111 00101001 11101 0 . .@vv
+vexth_qu_du  0111 00101001 11101 1 . .@vv
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index 9ba16ac631..b4582a49d9 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -627,3 +627,38 @@ VSAT_U(vsat_bu, 8, UB)
 VSAT_U(vsat_hu, 16, UH)
 VSAT_U(vsat_wu, 32, UW)
 VSAT_U(vsat_du, 64, UD)
+
+#define VEXTH(NAME, BIT, E1, E2)\
+void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
+{   \
+int i;  \
+VReg *Vd = &(env->fpr[vd].vreg);\
+VReg *Vj = &(env->fpr[vj].vreg);

[RFC PATCH v4 26/44] target/loongarch: Implement vsrlrn vsrarn

2023-04-25 Thread Song Gao
This patch includes:
- VSRLRN.{B.H/H.W/W.D};
- VSRARN.{B.H/H.W/W.D};
- VSRLRNI.{B.H/H.W/W.D/D.Q};
- VSRARNI.{B.H/H.W/W.D/D.Q}.

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  16 +++
 target/loongarch/helper.h   |  16 +++
 target/loongarch/insn_trans/trans_lsx.c.inc |  16 +++
 target/loongarch/insns.decode   |  16 +++
 target/loongarch/lsx_helper.c   | 126 
 5 files changed, 190 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index f0fc2ff84b..185cd36381 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1182,3 +1182,19 @@ INSN_LSX(vsrani_b_h,   vv_i)
 INSN_LSX(vsrani_h_w,   vv_i)
 INSN_LSX(vsrani_w_d,   vv_i)
 INSN_LSX(vsrani_d_q,   vv_i)
+
+INSN_LSX(vsrlrn_b_h,   vvv)
+INSN_LSX(vsrlrn_h_w,   vvv)
+INSN_LSX(vsrlrn_w_d,   vvv)
+INSN_LSX(vsrarn_b_h,   vvv)
+INSN_LSX(vsrarn_h_w,   vvv)
+INSN_LSX(vsrarn_w_d,   vvv)
+
+INSN_LSX(vsrlrni_b_h,  vv_i)
+INSN_LSX(vsrlrni_h_w,  vv_i)
+INSN_LSX(vsrlrni_w_d,  vv_i)
+INSN_LSX(vsrlrni_d_q,  vv_i)
+INSN_LSX(vsrarni_b_h,  vv_i)
+INSN_LSX(vsrarni_h_w,  vv_i)
+INSN_LSX(vsrarni_w_d,  vv_i)
+INSN_LSX(vsrarni_d_q,  vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index afe7e3d2d0..0a8cfe3625 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -395,3 +395,19 @@ DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
 DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vsrlrn_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlrn_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlrn_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrarn_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrarn_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrarn_w_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlrni_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrlrni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index ad34378ada..6034a74bfb 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3021,3 +3021,19 @@ TRANS(vsrani_b_h, gen_vv_i, gen_helper_vsrani_b_h)
 TRANS(vsrani_h_w, gen_vv_i, gen_helper_vsrani_h_w)
 TRANS(vsrani_w_d, gen_vv_i, gen_helper_vsrani_w_d)
 TRANS(vsrani_d_q, gen_vv_i, gen_helper_vsrani_d_q)
+
+TRANS(vsrlrn_b_h, gen_vvv, gen_helper_vsrlrn_b_h)
+TRANS(vsrlrn_h_w, gen_vvv, gen_helper_vsrlrn_h_w)
+TRANS(vsrlrn_w_d, gen_vvv, gen_helper_vsrlrn_w_d)
+TRANS(vsrarn_b_h, gen_vvv, gen_helper_vsrarn_b_h)
+TRANS(vsrarn_h_w, gen_vvv, gen_helper_vsrarn_h_w)
+TRANS(vsrarn_w_d, gen_vvv, gen_helper_vsrarn_w_d)
+
+TRANS(vsrlrni_b_h, gen_vv_i, gen_helper_vsrlrni_b_h)
+TRANS(vsrlrni_h_w, gen_vv_i, gen_helper_vsrlrni_h_w)
+TRANS(vsrlrni_w_d, gen_vv_i, gen_helper_vsrlrni_w_d)
+TRANS(vsrlrni_d_q, gen_vv_i, gen_helper_vsrlrni_d_q)
+TRANS(vsrarni_b_h, gen_vv_i, gen_helper_vsrarni_b_h)
+TRANS(vsrarni_h_w, gen_vv_i, gen_helper_vsrarni_h_w)
+TRANS(vsrarni_w_d, gen_vv_i, gen_helper_vsrarni_w_d)
+TRANS(vsrarni_d_q, gen_vv_i, gen_helper_vsrarni_d_q)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index ee54b632a7..29bf4a8a6a 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -883,3 +883,19 @@ vsrani_b_h   0111 00110101 1 1  . .   
@vv_ui4
 vsrani_h_w   0111 00110101 10001 . . .@vv_ui5
 vsrani_w_d   0111 00110101 1001 .. . .@vv_ui6
 vsrani_d_q   0111 00110101 101 ... . .@vv_ui7
+
+vsrlrn_b_h   0111  10001 . . .@vvv
+vsrlrn_h_w   0111  10010 . . .@vvv
+vsrlrn_w_d   0111  10011 . . .@vvv
+vsrarn_b_h   0111  10101 . . .@vvv
+vsrarn_h_w   0111  10110 . . .@vvv
+vsrarn_w_d   0111  10111 . . .@vvv
+
+vsrlrni_b_h  0111 00110100 01000 1  . .   @vv_ui4
+vsrlrni_h_w  0111 00110100 01001 . . .@vv_ui5
+vsrlrni_w_d  0111 00110100 0101 .. . .@vv_ui6
+vsrlrni_d_q  0111 00110100 011 ... . .@vv_ui7
+vsrarni_b_h  0111 00110101 11000 1  . .   @vv_ui4
+vsrarni_h_w  0111 00110101 11001 . . .@vv_ui5
+vsrarni_w_d  0111 00110101 1101 .. . .@vv_ui6
+vsrarni_d

[RFC PATCH v4 00/44] Add LoongArch LSX instructions

2023-04-25 Thread Song Gao
Hi,

This series adds LoongArch LSX instructions, Since the LoongArch
Vol2 is not open, So we use 'RFC' title.

I'm not sure when the manual will be open.
After these patches are reviewed, how about merging them?

About test:
V2 we use RISU test the LoongArch LSX instructions.

QEMU:
https://github.com/loongson/qemu/tree/tcg-old-abi-support-lsx
RISU:
https://github.com/loongson/risu/tree/loongarch-suport-lsx

Build test:
make docker-test-build@fedora-i386-cross

The following patches need to be reviewed:
  0001-target-loongarch-Add-LSX-data-type-VReg.patch
  0014-target-loongarch-Implement-vmul-vmuh-vmulw-ev-od.patch
  0030-target-loongarch-Implement-vpcnt.patch
  0034-target-loongarch-Implement-LSX-fpu-fcvt-instructions.patch
  0037-target-loongarch-Implement-vbitsel-vset.patch
  0041-target-loongarch-Implement-vld-vst.patch

V4:
  - R-b and rebase;
  - Migrate the upper half lsx regs;
  - Remove tcg_gen_mulus2_*;
  - Vsetallnez use !do_match2;
  - Use tcg_gen_concat_i64_i128/tcg_gen_extr_i128_i64 to replace 
TCGV128_LOW(val)/TCGV128_High(val);

V3:
  - R-b;
  - Add unsigned data type in vreg;
  - Add ctx->vl;
  - Use tcg_constant_vec_matching instead of dupi;
  - Use __typeof(Vd->E(0)) instead of the output type;
  - Tcg integer expansion;
  - Use tcg_gen_qemu_ld/st_i128 to implement vld/vst;
  - Fix some typos;
  - Optimize code based on Richard's comments.

V2:
  - Use gvec;
  - Fix instructions bugs;
  - Add set_fpr()/get_fpr() replace to cpu_fpr.


Song Gao (44):
  target/loongarch: Add LSX data type VReg
  target/loongarch: meson.build support build LSX
  target/loongarch: Add CHECK_SXE maccro for check LSX enable
  target/loongarch: Implement vadd/vsub
  target/loongarch: Implement vaddi/vsubi
  target/loongarch: Implement vneg
  target/loongarch: Implement vsadd/vssub
  target/loongarch: Implement vhaddw/vhsubw
  target/loongarch: Implement vaddw/vsubw
  target/loongarch: Implement vavg/vavgr
  target/loongarch: Implement vabsd
  target/loongarch: Implement vadda
  target/loongarch: Implement vmax/vmin
  target/loongarch: Implement vmul/vmuh/vmulw{ev/od}
  target/loongarch: Implement vmadd/vmsub/vmaddw{ev/od}
  target/loongarch: Implement vdiv/vmod
  target/loongarch: Implement vsat
  target/loongarch: Implement vexth
  target/loongarch: Implement vsigncov
  target/loongarch: Implement vmskltz/vmskgez/vmsknz
  target/loongarch: Implement LSX logic instructions
  target/loongarch: Implement vsll vsrl vsra vrotr
  target/loongarch: Implement vsllwil vextl
  target/loongarch: Implement vsrlr vsrar
  target/loongarch: Implement vsrln vsran
  target/loongarch: Implement vsrlrn vsrarn
  target/loongarch: Implement vssrln vssran
  target/loongarch: Implement vssrlrn vssrarn
  target/loongarch: Implement vclo vclz
  target/loongarch: Implement vpcnt
  target/loongarch: Implement vbitclr vbitset vbitrev
  target/loongarch: Implement vfrstp
  target/loongarch: Implement LSX fpu arith instructions
  target/loongarch: Implement LSX fpu fcvt instructions
  target/loongarch: Implement vseq vsle vslt
  target/loongarch: Implement vfcmp
  target/loongarch: Implement vbitsel vset
  target/loongarch: Implement vinsgr2vr vpickve2gr vreplgr2vr
  target/loongarch: Implement vreplve vpack vpick
  target/loongarch: Implement vilvl vilvh vextrins vshuf
  target/loongarch: Implement vld vst
  target/loongarch: Implement vldi
  target/loongarch: Use {set/get}_gpr replace to cpu_fpr
  target/loongarch: CPUCFG support LSX

 linux-user/loongarch64/signal.c   |4 +-
 target/loongarch/cpu.c|5 +-
 target/loongarch/cpu.h|   27 +-
 target/loongarch/disas.c  |  911 
 target/loongarch/fpu_helper.c |2 +-
 target/loongarch/gdbstub.c|4 +-
 target/loongarch/helper.h |  566 +++
 .../loongarch/insn_trans/trans_farith.c.inc   |   72 +-
 target/loongarch/insn_trans/trans_fcmp.c.inc  |   12 +-
 .../loongarch/insn_trans/trans_fmemory.c.inc  |   37 +-
 target/loongarch/insn_trans/trans_fmov.c.inc  |   31 +-
 target/loongarch/insn_trans/trans_lsx.c.inc   | 4400 +
 target/loongarch/insns.decode |  811 +++
 target/loongarch/internals.h  |   23 +
 target/loongarch/lsx_helper.c | 3004 +++
 target/loongarch/machine.c|   79 +-
 target/loongarch/meson.build  |1 +
 target/loongarch/translate.c  |   55 +-
 target/loongarch/translate.h  |1 +
 19 files changed, 9988 insertions(+), 57 deletions(-)
 create mode 100644 target/loongarch/insn_trans/trans_lsx.c.inc
 create mode 100644 target/loongarch/lsx_helper.c

-- 
2.31.1




[RFC PATCH v4 35/44] target/loongarch: Implement vseq vsle vslt

2023-04-25 Thread Song Gao
This patch includes:
- VSEQ[I].{B/H/W/D};
- VSLE[I].{B/H/W/D}[U];
- VSLT[I].{B/H/W/D/}[U].

Reviewed-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 target/loongarch/disas.c|  43 +
 target/loongarch/helper.h   |  23 +++
 target/loongarch/insn_trans/trans_lsx.c.inc | 185 
 target/loongarch/insns.decode   |  43 +
 target/loongarch/lsx_helper.c   |  38 
 5 files changed, 332 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index c04271081f..e589b23f4c 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1404,3 +1404,46 @@ INSN_LSX(vffint_d_lu,  vv)
 INSN_LSX(vffintl_d_w,  vv)
 INSN_LSX(vffinth_d_w,  vv)
 INSN_LSX(vffint_s_l,   vvv)
+
+INSN_LSX(vseq_b,   vvv)
+INSN_LSX(vseq_h,   vvv)
+INSN_LSX(vseq_w,   vvv)
+INSN_LSX(vseq_d,   vvv)
+INSN_LSX(vseqi_b,  vv_i)
+INSN_LSX(vseqi_h,  vv_i)
+INSN_LSX(vseqi_w,  vv_i)
+INSN_LSX(vseqi_d,  vv_i)
+
+INSN_LSX(vsle_b,   vvv)
+INSN_LSX(vsle_h,   vvv)
+INSN_LSX(vsle_w,   vvv)
+INSN_LSX(vsle_d,   vvv)
+INSN_LSX(vslei_b,  vv_i)
+INSN_LSX(vslei_h,  vv_i)
+INSN_LSX(vslei_w,  vv_i)
+INSN_LSX(vslei_d,  vv_i)
+INSN_LSX(vsle_bu,  vvv)
+INSN_LSX(vsle_hu,  vvv)
+INSN_LSX(vsle_wu,  vvv)
+INSN_LSX(vsle_du,  vvv)
+INSN_LSX(vslei_bu, vv_i)
+INSN_LSX(vslei_hu, vv_i)
+INSN_LSX(vslei_wu, vv_i)
+INSN_LSX(vslei_du, vv_i)
+
+INSN_LSX(vslt_b,   vvv)
+INSN_LSX(vslt_h,   vvv)
+INSN_LSX(vslt_w,   vvv)
+INSN_LSX(vslt_d,   vvv)
+INSN_LSX(vslti_b,  vv_i)
+INSN_LSX(vslti_h,  vv_i)
+INSN_LSX(vslti_w,  vv_i)
+INSN_LSX(vslti_d,  vv_i)
+INSN_LSX(vslt_bu,  vvv)
+INSN_LSX(vslt_hu,  vvv)
+INSN_LSX(vslt_wu,  vvv)
+INSN_LSX(vslt_du,  vvv)
+INSN_LSX(vslti_bu, vv_i)
+INSN_LSX(vslti_hu, vv_i)
+INSN_LSX(vslti_wu, vv_i)
+INSN_LSX(vslti_du, vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index f32235aa97..e9e9fa7f87 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -614,3 +614,26 @@ DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
 DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
 DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
 DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32)
+
+DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vseqi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vseqi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(vslei_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslei_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslei_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslei_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslei_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslei_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslei_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslei_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(vslti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc 
b/target/loongarch/insn_trans/trans_lsx.c.inc
index e0c72c6bff..4d9f88bf4f 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3532,3 +3532,188 @@ TRANS(vffint_d_lu, gen_vv, gen_helper_vffint_d_lu)
 TRANS(vffintl_d_w, gen_vv, gen_helper_vffintl_d_w)
 TRANS(vffinth_d_w, gen_vv, gen_helper_vffinth_d_w)
 TRANS(vffint_s_l, gen_vvv, gen_helper_vffint_s_l)
+
+static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
+{
+uint32_t vd_ofs, vj_ofs, vk_ofs;
+
+CHECK_SXE;
+
+vd_ofs = vec_full_offset(a->vd);
+vj_ofs = vec_full_offset(a->vj);
+vk_ofs = vec_full_offset(a->vk);
+
+tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
+return true;
+}
+
+static void do_cmpi_vec(TCGCond cond,
+unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, i

Re: [PATCH v2 03/16] qapi: Fix misspelled references

2023-04-25 Thread Juan Quintela
Markus Armbruster  wrote:
> query-cpu-definitions returns a list of CpuDefinitionInfo, but
> documentation claims CpuDefInfo, which doesn't exist.
>
> query-migrate-capabilities returns a list of
> MigrationCapabilityStatus, but documentation claims
> MigrationCapabilitiesStatus, which doesn't exist.
>
> balloon and query-balloon can fail with KVMMissingCap, but
> documentation claims KvmMissingCap, which doesn't exist.
>
> Fix the documentation.
>
> Fixes: e4e31c6324af (qapi: add query-cpu-definitions command (v2))
> Fixes: bbf6da32b5bd (Add migration capabilities)
> Fixes: d72f326431e2 (qapi: Convert balloon)
> Fixes: 96637bcdf9e0 (qapi: Convert query-balloon)
> Signed-off-by: Markus Armbruster 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> Reviewed-by: Marc-André Lureau 
> Reviewed-by: David Hildenbrand 

Reviewed-by: Juan Quintela 




Re: [PATCH v2 1/3] tests/tcg: Make the QEMU headers available to the tests

2023-04-25 Thread Thomas Huth

On 24/04/2023 15.10, Ilya Leoshkevich wrote:

On Mon, 2023-04-24 at 14:00 +0100, Alex Bennée wrote:


Ilya Leoshkevich  writes:


The QEMU headers contain macros and functions that are useful in
the
test context. Add them to tests' include path. Also provide a
header
similar to "qemu/osdep.h" for use in the freestanding environment.

Tests that include  get QEMU's copy of , which
does
not work without . Make use of the new header in these
tests
in order to fix this.

Signed-off-by: Ilya Leoshkevich 
---
  tests/include/qemu/testdep.h   | 14 ++
  tests/tcg/Makefile.target  |  4 ++--
  tests/tcg/aarch64/sve-ioctls.c |  1 +
  tests/tcg/aarch64/sysregs.c    |  1 +
  4 files changed, 18 insertions(+), 2 deletions(-)
  create mode 100644 tests/include/qemu/testdep.h

diff --git a/tests/include/qemu/testdep.h
b/tests/include/qemu/testdep.h
new file mode 100644
index 000..ddf7c543bf4
--- /dev/null
+++ b/tests/include/qemu/testdep.h
@@ -0,0 +1,14 @@
+/*
+ * Common dependencies for QEMU tests.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef QEMU_TESTDEP_H
+#define QEMU_TESTDEP_H
+
+#include 
+#include "qemu/compiler.h"
+
+#define g_assert_not_reached __builtin_trap
+
+#endif
diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
index 8318caf9247..5474395e693 100644
--- a/tests/tcg/Makefile.target
+++ b/tests/tcg/Makefile.target
@@ -85,8 +85,8 @@ TESTS=
  # additional tests which may re-use existing binaries
  EXTRA_TESTS=
  
-# Start with a blank slate, the build targets get to add stuff

first
-CFLAGS=
+# Start with the minimal build flags, the build targets will
extend them
+CFLAGS=-I$(SRC_PATH)/include -I$(SRC_PATH)/tests/include
  LDFLAGS=


Hmm I'm not so sure about this. The tests are deliberately minimal in
terms of their dependencies because its hard enough getting a plain
cross-compiler to work. Is there really much benefit to allowing
this?
What happens when a user includes another header which relies on
functionality from one of the many libraries QEMU itself links to?


I don't think this will work at all, because the idea here is to allow
using the code in the freestanding tests. However, at least bswap.h
seems to work just fine. Of course, there is now additional maintenance
overhead to keep it this way, but I would argue it's better than
making a copy.


If this is just about one single header, I guess a

#include "../../../include/qemu/bswap.h"

would be acceptable, too, instead?

 Thomas





Re: [PATCH v2 07/16] qapi: Tidy up examples

2023-04-25 Thread Juan Quintela
Markus Armbruster  wrote:
> A few examples neglect to prefix QMP input with '->'.  Fix that.
>
> Two examples have extra space after '<-'.  Delete it.
>
> A few examples neglect to show output.  Provide some.  The example
> output for query-vcpu-dirty-limit could use further improvement.  Add
> a TODO comment.
>
> Use "Examples:" instead of "Example:" where multiple examples are
> given.
>
> One example section numbers its two examples.  Not done elsewhere;
> drop.
>
> Another example section separates them with "or".  Likewise.
>
> Signed-off-by: Markus Armbruster 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> Reviewed-by: Marc-André Lureau 

Reviewed-by: Juan Quintela 




Re: [PATCH v2 1/1] migration: Disable postcopy + multifd migration

2023-04-25 Thread Juan Quintela
Leonardo Bras  wrote:
> Since the introduction of multifd, it's possible to perform a multifd
> migration and finish it using postcopy.
>
> A bug introduced by yank (fixed on cfc3bcf373) was previously preventing
> a successful use of this migration scenario, and now thing should be
> working on most scenarios.
>
> But since there is not enough testing/support nor any reported users for
> this scenario, we should disable this combination before it may cause any
> problems for users.
>
> Suggested-by: Dr. David Alan Gilbert 
> Signed-off-by: Leonardo Bras 
> Acked-by: Peter Xu 
> Reviewed-by: Dr. David Alan Gilbert 

Reviewed-by: Juan Quintela 

queued.




Re: [PATCH] pci: make ROM memory resizable

2023-04-25 Thread Michael S. Tsirkin
On Mon, Apr 24, 2023 at 11:36:47PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On migration, on target we load local ROM file. But actual ROM content
> migrates through migration channel. Original ROM content from local
> file doesn't matter. But when size mismatch - we have an error like
> 
>  Size mismatch: :00:03.0/virtio-net-pci.rom: 0x4 != 0x8: Invalid 
> argument


Oh, this is this old bug then:
https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1713490

People seem to be "fixing" this by downgrading ROMs.

Actually, I think the fix is different: we need to build
versions of ROMs for old machine types that can fit
in the old BAR size.

Gerd, Laszlo what's your take on all this?



> Let's just allow resizing of ROM memory. This way migration is not
> relate on local ROM file on target node which is loaded by default but
> is not actually needed.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  hw/pci/pci.c  |  7 +--
>  include/exec/memory.h | 26 ++
>  softmmu/memory.c  | 39 +++
>  3 files changed, 70 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index def5000e7b..72ee8f6aea 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -59,6 +59,8 @@
>  # define PCI_DPRINTF(format, ...)   do { } while (0)
>  #endif
>  
> +#define MAX_ROM_SIZE (2 * GiB)
> +
>  bool pci_available = true;
>  
>  static char *pcibus_get_dev_path(DeviceState *dev);
> @@ -2341,7 +2343,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
> is_default_rom,
>  error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
>  g_free(path);
>  return;
> -} else if (size > 2 * GiB) {
> +} else if (size > MAX_ROM_SIZE) {
>  error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 
> GiB)",
> pdev->romfile);
>  g_free(path);
> @@ -2366,7 +2368,8 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
> is_default_rom,
>  snprintf(name, sizeof(name), "%s.rom", 
> object_get_typename(OBJECT(pdev)));
>  }
>  pdev->has_rom = true;
> -memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, 
> &error_fatal);
> +memory_region_init_rom_resizable(&pdev->rom, OBJECT(pdev), name,
> + pdev->romsize, MAX_ROM_SIZE, 
> &error_fatal);
>  ptr = memory_region_get_ram_ptr(&pdev->rom);
>  if (load_image_size(path, ptr, size) < 0) {
>  error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile);
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 15ade918ba..ed1e5d9126 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -1453,6 +1453,19 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr,
>uint64_t size,
>Error **errp);
>  
> +/*
> + * memory_region_init_rom_nomigrate_resizable: same as
> + * memory_region_init_rom_nomigrate(), but initialize resizable memory 
> region.
> + *
> + * @max_size maximum allowed size.
> + */
> +void memory_region_init_rom_nomigrate_resizable(MemoryRegion *mr,
> +struct Object *owner,
> +const char *name,
> +uint64_t size,
> +uint64_t max_size,
> +Error **errp);
> +
>  /**
>   * memory_region_init_rom_device_nomigrate:  Initialize a ROM memory region.
>   * Writes are handled via callbacks.
> @@ -1562,6 +1575,19 @@ void memory_region_init_rom(MemoryRegion *mr,
>  uint64_t size,
>  Error **errp);
>  
> +/*
> + * memory_region_init_rom_resizable: same as memory_region_init_rom(),
> + * but initialize resizable memory region.
> + *
> + * @max_size maximum allowed size.
> + */
> +void memory_region_init_rom_resizable(MemoryRegion *mr,
> +  struct Object *owner,
> +  const char *name,
> +  uint64_t size,
> +  uint64_t max_size,
> +  Error **errp);
> +
>  /**
>   * memory_region_init_rom_device:  Initialize a ROM memory region.
>   * Writes are handled via callbacks.
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index b1a6cae6f5..744d03bc02 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -1701,6 +1701,18 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr,
>  mr->readonly = true;
>  }
>  
> +void memory_region_init_rom_nomigrate_resizable(MemoryRegion *mr,
> +struct Object *owner,
> +  

Re: [PATCH v8 1/8] memory: prevent dma-reentracy issues

2023-04-25 Thread Thomas Huth

On 21/04/2023 16.27, Alexander Bulekov wrote:

Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
This flag is set/checked prior to calling a device's MemoryRegion
handlers, and set when device code initiates DMA.  The purpose of this
flag is to prevent two types of DMA-based reentrancy issues:

1.) mmio -> dma -> mmio case
2.) bh -> dma write -> mmio case

These issues have led to problems such as stack-exhaustion and
use-after-frees.

Summary of the problem from Peter Maydell:
https://lore.kernel.org/qemu-devel/cafeaca_23vc7he3iam-jva6w38lk4hjowae5kcknhprd5fp...@mail.gmail.com

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
Resolves: CVE-2023-0330

Signed-off-by: Alexander Bulekov 
---
  include/exec/memory.h  |  2 ++
  include/hw/qdev-core.h |  7 +++
  softmmu/memory.c   | 14 ++
  softmmu/trace-events   |  1 +
  4 files changed, 24 insertions(+)


Reviewed-by: Thomas Huth 





[PATCH v3] hw/riscv/virt: Add a second UART for secure world

2023-04-25 Thread Yong Li
The virt machine can have two UARTs and the second UART
can be used by the secure payload, firmware or OS residing
in secure world. Will include the UART device to FDT in a
seperated patch.

Signed-off-by: Yong Li 
Reviewed-by: LIU Zhiwei 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/riscv/virt.c | 4 
 include/hw/riscv/virt.h | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 4e3efbee16..8e11c4b9b3 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -88,6 +88,7 @@ static const MemMapEntry virt_memmap[] = {
 [VIRT_APLIC_S] =  {  0xd00, APLIC_SIZE(VIRT_CPUS_MAX) },
 [VIRT_UART0] ={ 0x1000, 0x100 },
 [VIRT_VIRTIO] =   { 0x10001000,0x1000 },
+[VIRT_UART1] ={ 0x10002000, 0x100 },
 [VIRT_FW_CFG] =   { 0x1010,  0x18 },
 [VIRT_FLASH] ={ 0x2000, 0x400 },
 [VIRT_IMSIC_M] =  { 0x2400, VIRT_IMSIC_MAX_SIZE },
@@ -1506,6 +1507,9 @@ static void virt_machine_init(MachineState *machine)
 serial_mm_init(system_memory, memmap[VIRT_UART0].base,
 0, qdev_get_gpio_in(DEVICE(mmio_irqchip), UART0_IRQ), 399193,
 serial_hd(0), DEVICE_LITTLE_ENDIAN);
+serial_mm_init(system_memory, memmap[VIRT_UART1].base,
+0, qdev_get_gpio_in(DEVICE(mmio_irqchip), UART1_IRQ), 399193,
+serial_hd(1), DEVICE_LITTLE_ENDIAN);
 
 sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base,
 qdev_get_gpio_in(DEVICE(mmio_irqchip), RTC_IRQ));
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index e5c474b26e..8d2f8f225d 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -74,6 +74,7 @@ enum {
 VIRT_APLIC_S,
 VIRT_UART0,
 VIRT_VIRTIO,
+VIRT_UART1,
 VIRT_FW_CFG,
 VIRT_IMSIC_M,
 VIRT_IMSIC_S,
@@ -88,6 +89,7 @@ enum {
 enum {
 UART0_IRQ = 10,
 RTC_IRQ = 11,
+UART1_IRQ = 12,
 VIRTIO_IRQ = 1, /* 1 to 8 */
 VIRTIO_COUNT = 8,
 PCIE_IRQ = 0x20, /* 32 to 35 */
-- 
2.25.1




Re: [PATCH v8 5/8] memory: Allow disabling re-entrancy checking per-MR

2023-04-25 Thread Thomas Huth

On 21/04/2023 16.27, Alexander Bulekov wrote:

Signed-off-by: Alexander Bulekov 
Reviewed-by: Thomas Huth 
Reviewed-by: Darren Kenny 
---
  include/exec/memory.h | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 6c0a5e68d3..4e9531bd8a 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -793,6 +793,9 @@ struct MemoryRegion {
  unsigned ioeventfd_nb;
  MemoryRegionIoeventfd *ioeventfds;
  RamDiscardManager *rdm; /* Only for RAM */
+
+/* For devices designed to perform re-entrant IO into their own IO MRs */
+bool disable_reentrancy_guard;
  };
  
  struct IOMMUMemoryRegion {


Oh, wait, that check for "!mr->disable_reentrancy_guard" has been squashed 
into the first patch now ... that's bad, I think you should squash this into 
the first patch now, too!


 Thomas




Re: [PATCH] pci: make ROM memory resizable

2023-04-25 Thread Michael S. Tsirkin
On Tue, Apr 25, 2023 at 03:26:54AM -0400, Michael S. Tsirkin wrote:
> On Mon, Apr 24, 2023 at 11:36:47PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > On migration, on target we load local ROM file. But actual ROM content
> > migrates through migration channel. Original ROM content from local
> > file doesn't matter. But when size mismatch - we have an error like
> > 
> >  Size mismatch: :00:03.0/virtio-net-pci.rom: 0x4 != 0x8: 
> > Invalid argument
> 
> 
> Oh, this is this old bug then:
> https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1713490
> 
> People seem to be "fixing" this by downgrading ROMs.
> 
> Actually, I think the fix is different: we need to build
> versions of ROMs for old machine types that can fit
> in the old BAR size.
> 
> Gerd, Laszlo what's your take on all this?

Actually, ignore this - we do keep old ROMs around specifically to avoid
ROM size changes and have been for ever. E.g.:

commit c45e5b5b30ac1f5505725a7b36e68cedfce4f01f
Author: Gerd Hoffmann 
Date:   Tue Feb 26 17:46:11 2013 +0100

Switch to efi-enabled nic roms by default

All PCI nics are switched to EFI-enabled roms by default.  They are
composed from three images (legacy, efi ia32 & efi x86), so classic
pxe booting will continue to work.

Exception: eepro100 is not switched, it uses a single rom for all
emulated eepro100 variants, then goes patch the rom header on the
fly with the correct PCI IDs.  I doubt that will work as-is with
the efi roms.

Keep old roms for 1.4+older machine types via compat properties,
needed because the efi-enabled roms are larger so the pci rom bar
size would change.

Signed-off-by: Gerd Hoffmann 


So it's downstream messing up with things, overriding the
rom file then changing its size.


On fedora I find both pxe virtio and efi virtio so it gets it right.


> 
> 
> > Let's just allow resizing of ROM memory. This way migration is not
> > relate on local ROM file on target node which is loaded by default but
> > is not actually needed.
> > 
> > Signed-off-by: Vladimir Sementsov-Ogievskiy 
> > ---
> >  hw/pci/pci.c  |  7 +--
> >  include/exec/memory.h | 26 ++
> >  softmmu/memory.c  | 39 +++
> >  3 files changed, 70 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index def5000e7b..72ee8f6aea 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -59,6 +59,8 @@
> >  # define PCI_DPRINTF(format, ...)   do { } while (0)
> >  #endif
> >  
> > +#define MAX_ROM_SIZE (2 * GiB)
> > +
> >  bool pci_available = true;
> >  
> >  static char *pcibus_get_dev_path(DeviceState *dev);
> > @@ -2341,7 +2343,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
> > is_default_rom,
> >  error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
> >  g_free(path);
> >  return;
> > -} else if (size > 2 * GiB) {
> > +} else if (size > MAX_ROM_SIZE) {
> >  error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 
> > GiB)",
> > pdev->romfile);
> >  g_free(path);
> > @@ -2366,7 +2368,8 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
> > is_default_rom,
> >  snprintf(name, sizeof(name), "%s.rom", 
> > object_get_typename(OBJECT(pdev)));
> >  }
> >  pdev->has_rom = true;
> > -memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, 
> > &error_fatal);
> > +memory_region_init_rom_resizable(&pdev->rom, OBJECT(pdev), name,
> > + pdev->romsize, MAX_ROM_SIZE, 
> > &error_fatal);
> >  ptr = memory_region_get_ram_ptr(&pdev->rom);
> >  if (load_image_size(path, ptr, size) < 0) {
> >  error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile);
> > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > index 15ade918ba..ed1e5d9126 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -1453,6 +1453,19 @@ void memory_region_init_rom_nomigrate(MemoryRegion 
> > *mr,
> >uint64_t size,
> >Error **errp);
> >  
> > +/*
> > + * memory_region_init_rom_nomigrate_resizable: same as
> > + * memory_region_init_rom_nomigrate(), but initialize resizable memory 
> > region.
> > + *
> > + * @max_size maximum allowed size.
> > + */
> > +void memory_region_init_rom_nomigrate_resizable(MemoryRegion *mr,
> > +struct Object *owner,
> > +const char *name,
> > +uint64_t size,
> > +uint64_t max_size,
> > +Error **errp);
> > +
> >  /**
> >   * memory_region_init_rom_device_nomigrate:  Initialize a ROM memory 
> > region.
> >   *   

Re: [PATCH v8 7/8] bcm2835_property: disable reentrancy detection for iomem

2023-04-25 Thread Thomas Huth

On 21/04/2023 16.27, Alexander Bulekov wrote:

As the code is designed for re-entrant calls from bcm2835_property to
bcm2835_mbox and back into bcm2835_property, mark iomem as
reentrancy-safe.

Signed-off-by: Alexander Bulekov 
---
  hw/misc/bcm2835_property.c | 7 +++
  1 file changed, 7 insertions(+)


Reviewed-by: Thomas Huth 




[PULL 00/31] virtio,pc,pci: fixes, features, cleanups

2023-04-25 Thread Michael S. Tsirkin
The following changes since commit c1eb2ddf0f8075faddc5f7c3d39feae3e8e9d6b4:

  Update version for v8.0.0 release (2023-04-19 17:27:13 +0100)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to c28db9e0002df2abf88283b41dce0be17e8b0888:

  hw/pci-bridge: Make PCIe and CXL PXB Devices inherit from TYPE_PXB_DEV 
(2023-04-24 22:56:55 -0400)


virtio,pc,pci: fixes, features, cleanups

Mostly just fixes, cleanups all over the place.
Some optimizations.
More control over slot_reserved_mask.
More feature bits supported for SVQ.

Signed-off-by: Michael S. Tsirkin 


Akihiko Odaki (1):
  docs: Remove obsolete descriptions of SR-IOV support

Ani Sinha (3):
  Add my old and new work email mapping and use work email to support 
biosbits
  Add my old and new work email mapping and use work email to support acpi
  hw/acpi: limit warning on acpi table size to pc machines older than 
version 2.3

Carlos López (1):
  virtio: refresh vring region cache after updating a virtqueue size

Chuck Zmudzinski (1):
  pci: avoid accessing slot_reserved_mask directly outside of pci.c

Cornelia Huck (1):
  hw: Add compat machines for 8.1

Eugenio Pérez (2):
  vdpa: accept VIRTIO_NET_F_SPEED_DUPLEX in SVQ
  MAINTAINERS: Add Eugenio Pérez as vhost-shadow-virtqueue reviewer

Igor Mammedov (1):
  acpi: pcihp: allow repeating hot-unplug requests

Jason Wang (1):
  intel_iommu: refine iotlb hash calculation

Jonathan Cameron (2):
  hw/pci-bridge: pci_expander_bridge fix type in pxb_cxl_dev_reset()
  hw/pci-bridge: Make PCIe and CXL PXB Devices inherit from TYPE_PXB_DEV

Paolo Bonzini (1):
  tests: bios-tables-test: replace memset with initializer

Peter Maydell (3):
  docs/specs/pci-ids: Convert from txt to rST
  docs/specs: Convert pci-serial.txt to rst
  docs/specs: Convert pci-testdev.txt to rst

Peter Xu (1):
  vhost: Drop unused eventfd_add|del hooks

Philippe Mathieu-Daudé (6):
  MAINTAINERS: Mark AMD-Vi emulation as orphan
  hw/i386/amd_iommu: Explicit use of AMDVI_BASE_ADDR in amdvi_init
  hw/i386/amd_iommu: Remove intermediate AMDVIState::devid field
  hw/i386/amd_iommu: Move capab_offset from AMDVIState to AMDVIPCIState
  hw/i386/amd_iommu: Set PCI static/const fields via PCIDeviceClass
  hw/i386/amd_iommu: Factor amdvi_pci_realize out of amdvi_sysbus_realize

Stefan Weil (1):
  docs/cxl: Fix sentence

Thomas Huth (1):
  meson_options.txt: Enable qom-cast-debug by default again

Viresh Kumar (3):
  docs: vhost-user: Define memory region separately
  docs: vhost-user: Add Xen specific memory mapping support
  virtio: i2c: Check notifier helpers for VIRTIO_CONFIG_IRQ_IDX

Vladimir Sementsov-Ogievskiy (1):
  vhost-user-blk-server: notify client about disk resize

Yangming (1):
  virtio-balloon: optimize the virtio-balloon on the ARM platform

 docs/pcie_sriov.txt   |   5 +-
 docs/specs/pci-ids.txt|  70 -
 docs/specs/pci-serial.txt |  34 -
 docs/specs/pci-testdev.txt|  31 
 meson_options.txt |   2 +-
 hw/i386/amd_iommu.h   |   9 +--
 hw/i386/intel_iommu_internal.h|   6 +-
 include/hw/boards.h   |   5 ++
 include/hw/cxl/cxl.h  |   4 +-
 include/hw/i386/pc.h  |   6 ++
 include/hw/pci/pci.h  |   3 +
 include/hw/pci/pci_bridge.h   |  30 +---
 include/hw/virtio/virtio.h|   1 +
 subprojects/libvhost-user/libvhost-user.h |   2 +
 block/export/vhost-user-blk-server.c  |  24 ++
 hw/acpi/cxl.c |  11 +--
 hw/acpi/pcihp.c   |  10 +++
 hw/arm/virt.c |  11 ++-
 hw/char/serial-pci-multi.c|   2 +-
 hw/char/serial-pci.c  |   2 +-
 hw/core/machine.c |   3 +
 hw/cxl/cxl-host.c |   4 +-
 hw/i386/acpi-build.c  |  12 ++-
 hw/i386/amd_iommu.c   |  74 ++
 hw/i386/intel_iommu.c |   9 ++-
 hw/i386/pc.c  |   4 +
 hw/i386/pc_piix.c |  17 -
 hw/i386/pc_q35.c  |  14 +++-
 hw/m68k/virt.c|  11 ++-
 hw/mem/pc-dimm.c  |   7 ++
 hw/pci-bridge/pci_expander_bridge.c   |  61 +--
 hw/pci/pci.c  |  15 
 hw/ppc/spapr.c|  17 -
 hw/s390x/s390-virtio-ccw.c|  14 +++-
 hw/s390x/virtio-ccw.c  

[PULL 01/31] virtio: refresh vring region cache after updating a virtqueue size

2023-04-25 Thread Michael S. Tsirkin
From: Carlos López 

When a virtqueue size is changed by the guest via
virtio_queue_set_num(), its region cache is not automatically updated.
If the size was increased, this could lead to accessing the cache out
of bounds. For example, in vring_get_used_event():

static inline uint16_t vring_get_used_event(VirtQueue *vq)
{
return vring_avail_ring(vq, vq->vring.num);
}

static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingAvail, ring[i]);

if (!caches) {
return 0;
}

return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}

vq->vring.num will be greater than caches->avail.len, which will
trigger a failed assertion down the call path of
virtio_lduw_phys_cached().

Fix this by calling virtio_init_region_cache() after
virtio_queue_set_num() if we are not already calling
virtio_queue_set_rings(). In the legacy path this is already done by
virtio_queue_update_rings().

Signed-off-by: Carlos López 
Message-Id: <20230317002749.27379-1-clo...@suse.de>
Reviewed-by: Cornelia Huck 
Acked-by: Halil Pasic 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/virtio/virtio.h | 1 +
 hw/s390x/virtio-ccw.c  | 1 +
 hw/virtio/virtio-mmio.c| 1 +
 hw/virtio/virtio-pci.c | 1 +
 hw/virtio/virtio.c | 2 +-
 5 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index f236e94ca6..f6b38f7e9c 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -309,6 +309,7 @@ int virtio_get_num_queues(VirtIODevice *vdev);
 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
 hwaddr avail, hwaddr used);
 void virtio_queue_update_rings(VirtIODevice *vdev, int n);
+void virtio_init_region_cache(VirtIODevice *vdev, int n);
 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index e33e5207ab..f44de1a8c1 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -237,6 +237,7 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock 
*info,
 return -EINVAL;
 }
 virtio_queue_set_num(vdev, index, num);
+virtio_init_region_cache(vdev, index);
 } else if (virtio_queue_get_num(vdev, index) > num) {
 /* Fail if we don't have a big enough queue. */
 return -EINVAL;
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 23ba625eb6..c2c6d85475 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -354,6 +354,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 if (proxy->legacy) {
 virtio_queue_update_rings(vdev, vdev->queue_sel);
 } else {
+virtio_init_region_cache(vdev, vdev->queue_sel);
 proxy->vqs[vdev->queue_sel].num = value;
 }
 break;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 247325c193..02fb84a8fa 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1554,6 +1554,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr 
addr,
 proxy->vqs[vdev->queue_sel].num = val;
 virtio_queue_set_num(vdev, vdev->queue_sel,
  proxy->vqs[vdev->queue_sel].num);
+virtio_init_region_cache(vdev, vdev->queue_sel);
 break;
 case VIRTIO_PCI_COMMON_Q_MSIX:
 vector = virtio_queue_vector(vdev, vdev->queue_sel);
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 98c4819fcc..272d930721 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -226,7 +226,7 @@ static void virtio_virtqueue_reset_region_cache(struct 
VirtQueue *vq)
 }
 }
 
-static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+void virtio_init_region_cache(VirtIODevice *vdev, int n)
 {
 VirtQueue *vq = &vdev->vq[n];
 VRingMemoryRegionCaches *old = vq->vring.caches;
-- 
MST




[PULL 05/31] vhost: Drop unused eventfd_add|del hooks

2023-04-25 Thread Michael S. Tsirkin
From: Peter Xu 

These hooks were introduced in:

80a1ea3748 ("memory: move ioeventfd ops to MemoryListener", 2012-02-29)

But they seem to be never used.  Drop them.

Cc: Richard Henderson 
Signed-off-by: Peter Xu 
Message-Id: <20230306193209.516011-1-pet...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Jason Wang 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost.c | 14 --
 1 file changed, 14 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index a266396576..746d130c74 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1291,18 +1291,6 @@ void vhost_virtqueue_stop(struct vhost_dev *dev,
0, virtio_queue_get_desc_size(vdev, idx));
 }
 
-static void vhost_eventfd_add(MemoryListener *listener,
-  MemoryRegionSection *section,
-  bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
-static void vhost_eventfd_del(MemoryListener *listener,
-  MemoryRegionSection *section,
-  bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
 static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
 int n, uint32_t timeout)
 {
@@ -1457,8 +1445,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 .log_sync = vhost_log_sync,
 .log_global_start = vhost_log_global_start,
 .log_global_stop = vhost_log_global_stop,
-.eventfd_add = vhost_eventfd_add,
-.eventfd_del = vhost_eventfd_del,
 .priority = 10
 };
 
-- 
MST




[PULL 07/31] docs: vhost-user: Add Xen specific memory mapping support

2023-04-25 Thread Michael S. Tsirkin
From: Viresh Kumar 

The current model of memory mapping at the back-end works fine where a
standard call to mmap() (for the respective file descriptor) is enough
before the front-end can start accessing the guest memory.

There are other complex cases though where the back-end needs more
information and simple mmap() isn't enough. For example Xen, a type-1
hypervisor, currently supports memory mapping via two different methods,
foreign-mapping (via /dev/privcmd) and grant-dev (via /dev/gntdev). In
both these cases, the back-end needs to call mmap() and ioctl(), with
extra information like the Xen domain-id of the guest whose memory we
are trying to map.

Add a new protocol feature, 'VHOST_USER_PROTOCOL_F_XEN_MMAP', which lets
the back-end know about the additional memory mapping requirements.
When this feature is negotiated, the front-end will send the additional
information within the memory regions themselves.

Signed-off-by: Viresh Kumar 
Message-Id: 
<6d0bd7f0e1aeec3ddb603ae4ff334c75c7d0d7b3.1678351495.git.viresh.ku...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alex Bennée 
---
 docs/interop/vhost-user.rst | 21 +
 1 file changed, 21 insertions(+)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 1720d68126..5a070adbc1 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -145,6 +145,26 @@ Memory region description
 
 :mmap offset: 64-bit offset where region starts in the mapped memory
 
+When the ``VHOST_USER_PROTOCOL_F_XEN_MMAP`` protocol feature has been
+successfully negotiated, the memory region description contains two extra
+fields at the end.
+
++---+--+--+-++---+
+| guest address | size | user address | mmap offset | xen mmap flags | domid |
++---+--+--+-++---+
+
+:xen mmap flags: 32-bit bit field
+
+- Bit 0 is set for Xen foreign memory mapping.
+- Bit 1 is set for Xen grant memory mapping.
+- Bit 8 is set if the memory region can not be mapped in advance, and memory
+  areas within this region must be mapped / unmapped only when required by the
+  back-end. The back-end shouldn't try to map the entire region at once, as the
+  front-end may not allow it. The back-end should rather map only the required
+  amount of memory at once and unmap it after it is used.
+
+:domid: a 32-bit Xen hypervisor specific domain id.
+
 Single memory region description
 
 
@@ -864,6 +884,7 @@ Protocol features
   #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
   #define VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS  15
   #define VHOST_USER_PROTOCOL_F_STATUS   16
+  #define VHOST_USER_PROTOCOL_F_XEN_MMAP 17
 
 Front-end message types
 ---
-- 
MST




[PULL 08/31] virtio-balloon: optimize the virtio-balloon on the ARM platform

2023-04-25 Thread Michael S. Tsirkin
From: Yangming 

Optimize the virtio-balloon feature on the ARM platform by adding
a variable to keep track of the current hot-plugged pc-dimm size,
instead of traversing the virtual machine's memory modules to count
the current RAM size during the balloon inflation or deflation
process. This variable can be updated only when plugging or unplugging
the device, which will result in an increase of approximately 60%
efficiency of balloon process on the ARM platform.

We tested the total amount of time required for the balloon inflation process 
on ARM:
inflate the balloon to 64GB of a 128GB guest under stress.
Before: 102 seconds
After: 42 seconds

Signed-off-by: Qi Xi 
Signed-off-by: Ming Yang yangmin...@huawei.com
Message-Id: 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Acked-by: David Hildenbrand 
---
 include/hw/boards.h|  2 ++
 hw/mem/pc-dimm.c   |  7 +++
 hw/virtio/virtio-balloon.c | 33 +
 3 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 6fbbfd56c8..f840f88d54 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -292,10 +292,12 @@ struct MachineClass {
  * @base: address in guest physical address space where the memory
  * address space for memory devices starts
  * @mr: address space container for memory devices
+ * @dimm_size: the sum of plugged DIMMs' sizes
  */
 typedef struct DeviceMemoryState {
 hwaddr base;
 MemoryRegion mr;
+uint64_t dimm_size;
 } DeviceMemoryState;
 
 /**
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 50ef83215c..37f1f4ccfd 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -81,6 +81,10 @@ void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine)
 
 memory_device_plug(MEMORY_DEVICE(dimm), machine);
 vmstate_register_ram(vmstate_mr, DEVICE(dimm));
+/* count only "real" DIMMs, not NVDIMMs */
+if (!object_dynamic_cast(OBJECT(dimm), TYPE_NVDIMM)) {
+machine->device_memory->dimm_size += memory_region_size(vmstate_mr);
+}
 }
 
 void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
@@ -90,6 +94,9 @@ void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
 
 memory_device_unplug(MEMORY_DEVICE(dimm), machine);
 vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
+if (!object_dynamic_cast(OBJECT(dimm), TYPE_NVDIMM)) {
+machine->device_memory->dimm_size -= memory_region_size(vmstate_mr);
+}
 }
 
 static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 746f07c4d2..2814a47cb1 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -729,37 +729,14 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, 
uint8_t *config_data)
 memcpy(config_data, &config, virtio_balloon_config_size(dev));
 }
 
-static int build_dimm_list(Object *obj, void *opaque)
-{
-GSList **list = opaque;
-
-if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
-DeviceState *dev = DEVICE(obj);
-if (dev->realized) { /* only realized DIMMs matter */
-*list = g_slist_prepend(*list, dev);
-}
-}
-
-object_child_foreach(obj, build_dimm_list, opaque);
-return 0;
-}
-
 static ram_addr_t get_current_ram_size(void)
 {
-GSList *list = NULL, *item;
-ram_addr_t size = current_machine->ram_size;
-
-build_dimm_list(qdev_get_machine(), &list);
-for (item = list; item; item = g_slist_next(item)) {
-Object *obj = OBJECT(item->data);
-if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
-size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
-&error_abort);
-}
+MachineState *machine = MACHINE(qdev_get_machine());
+if (machine->device_memory) {
+return machine->ram_size + machine->device_memory->dimm_size;
+} else {
+return machine->ram_size;
 }
-g_slist_free(list);
-
-return size;
 }
 
 static bool virtio_balloon_page_poison_support(void *opaque)
-- 
MST




[PULL 16/31] pci: avoid accessing slot_reserved_mask directly outside of pci.c

2023-04-25 Thread Michael S. Tsirkin
From: Chuck Zmudzinski 

This patch provides accessor functions as replacements for direct
access to slot_reserved_mask according to the comment at the top
of include/hw/pci/pci_bus.h which advises that data structures for
PCIBus should not be directly accessed but instead be accessed using
accessor functions in pci.h.

Three accessor functions can conveniently replace all direct accesses
of slot_reserved_mask. With this patch, the new accessor functions are
used in hw/sparc64/sun4u.c and hw/xen/xen_pt.c and pci_bus.h is removed
from the included header files of the same two files.

No functional change intended.

Signed-off-by: Chuck Zmudzinski 
Message-Id: 

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Mark Cave-Ayland 
Tested-by: Mark Cave-Ayland  [sun4u]
---
 include/hw/pci/pci.h |  3 +++
 hw/pci/pci.c | 15 +++
 hw/sparc64/sun4u.c   |  7 +++
 hw/xen/xen_pt.c  |  7 +++
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d5a40cd058..935b4b91b4 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -287,6 +287,9 @@ void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq,
 void pci_bus_map_irqs(PCIBus *bus, pci_map_irq_fn map_irq);
 void pci_bus_irqs_cleanup(PCIBus *bus);
 int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
+uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus);
+void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask);
+void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask);
 /* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */
 static inline int pci_swizzle(int slot, int pin)
 {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index def5000e7b..8a87ccc8b0 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1116,6 +1116,21 @@ static bool pci_bus_devfn_reserved(PCIBus *bus, int 
devfn)
 return bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn));
 }
 
+uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus)
+{
+return bus->slot_reserved_mask;
+}
+
+void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask)
+{
+bus->slot_reserved_mask |= mask;
+}
+
+void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask)
+{
+bus->slot_reserved_mask &= ~mask;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
  const char *name, int devfn,
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index a25e951f9d..eae7589462 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -31,7 +31,6 @@
 #include "hw/irq.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bridge.h"
-#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_host.h"
 #include "hw/qdev-properties.h"
 #include "hw/pci-host/sabre.h"
@@ -608,9 +607,9 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
 /* Only in-built Simba APBs can exist on the root bus, slot 0 on busA is
reserved (leaving no slots free after on-board devices) however slots
0-3 are free on busB */
-pci_bus->slot_reserved_mask = 0xfffc;
-pci_busA->slot_reserved_mask = 0xfff1;
-pci_busB->slot_reserved_mask = 0xfff0;
+pci_bus_set_slot_reserved_mask(pci_bus, 0xfffc);
+pci_bus_set_slot_reserved_mask(pci_busA, 0xfff1);
+pci_bus_set_slot_reserved_mask(pci_busB, 0xfff0);
 
 ebus = pci_new_multifunction(PCI_DEVFN(1, 0), true, TYPE_EBUS);
 qdev_prop_set_uint64(DEVICE(ebus), "console-serial-base",
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 2d33d178ad..a540149639 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -57,7 +57,6 @@
 #include 
 
 #include "hw/pci/pci.h"
-#include "hw/pci/pci_bus.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
 #include "xen_pt.h"
@@ -951,7 +950,7 @@ void xen_igd_reserve_slot(PCIBus *pci_bus)
 }
 
 XEN_PT_LOG(0, "Reserving PCI slot 2 for IGD\n");
-pci_bus->slot_reserved_mask |= XEN_PCI_IGD_SLOT_MASK;
+pci_bus_set_slot_reserved_mask(pci_bus, XEN_PCI_IGD_SLOT_MASK);
 }
 
 static void xen_igd_clear_slot(DeviceState *qdev, Error **errp)
@@ -971,7 +970,7 @@ static void xen_igd_clear_slot(DeviceState *qdev, Error 
**errp)
 return;
 }
 
-if (!(pci_bus->slot_reserved_mask & XEN_PCI_IGD_SLOT_MASK)) {
+if (!(pci_bus_get_slot_reserved_mask(pci_bus) & XEN_PCI_IGD_SLOT_MASK)) {
 xpdc->pci_qdev_realize(qdev, errp);
 return;
 }
@@ -982,7 +981,7 @@ static void xen_igd_clear_slot(DeviceState *qdev, Error 
**errp)
 s->real_device.dev == XEN_PCI_IGD_DEV &&
 s->real_device.func == XEN_PCI_IGD_FN &&
 s->real_device.vendor_id == PCI_VENDOR_ID_INTEL) {
-pci_bus->slot_reserved_mask &= ~XEN_PCI_IGD_SLOT_MASK;
+pci_bus_clear_slot_reserved_mask(pci_bus, XEN_PCI_IGD_SLOT_MASK);
 XEN_PT_LOG(pci_dev, "Intel IGD found, using slot 2\n");
 }
 xpdc->pci_qdev_realize(qdev, errp);
-- 
MST




[PULL 12/31] hw/i386/amd_iommu: Move capab_offset from AMDVIState to AMDVIPCIState

2023-04-25 Thread Michael S. Tsirkin
From: Philippe Mathieu-Daudé 

The 'PCI capability offset' is a *PCI* notion. Since AMDVIPCIState
inherits PCIDevice and hold PCI-related fields, move capab_offset
from AMDVIState to AMDVIPCIState.

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20230313153031.86107-5-phi...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.h  |  2 +-
 hw/i386/acpi-build.c |  2 +-
 hw/i386/amd_iommu.c  | 14 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 5eccaad790..1c0cb54bd4 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -308,6 +308,7 @@ typedef struct AMDVIAddressSpace AMDVIAddressSpace;
 /* functions to steal PCI config space */
 typedef struct AMDVIPCIState {
 PCIDevice dev;   /* The PCI device itself*/
+uint32_t capab_offset;   /* capability offset pointer*/
 } AMDVIPCIState;
 
 struct AMDVIState {
@@ -315,7 +316,6 @@ struct AMDVIState {
 AMDVIPCIState pci;  /* IOMMU PCI device */
 
 uint32_t version;
-uint32_t capab_offset;   /* capability offset pointer*/
 
 uint64_t mmio_addr;
 
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a27bc33956..7f211e1f48 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2399,7 +2399,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
   object_property_get_int(OBJECT(&s->pci), "addr",
   &error_abort), 2);
 /* Capability offset */
-build_append_int_noprefix(table_data, s->capab_offset, 2);
+build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
 /* IOMMU base address */
 build_append_int_noprefix(table_data, s->mmio.addr, 8);
 /* PCI Segment Group */
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 19f57e6318..9f6622e11f 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1516,15 +1516,15 @@ static void amdvi_init(AMDVIState *s)
 pci_config_set_class(s->pci.dev.config, 0x0806);
 
 /* reset AMDVI specific capabilities, all r/o */
-pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
-pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
+pci_set_long(s->pci.dev.config + s->pci.capab_offset, 
AMDVI_CAPAB_FEATURES);
+pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_BAR_LOW,
  AMDVI_BASE_ADDR & ~(0x));
-pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
+pci_set_long(s->pci.dev.config + s->pci.capab_offset + 
AMDVI_CAPAB_BAR_HIGH,
 (AMDVI_BASE_ADDR & ~(0x)) >> 16);
-pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
+pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_RANGE,
  0xff00);
-pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
-pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC,
+pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC, 
0);
+pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC,
 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
 }
 
@@ -1557,7 +1557,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error 
**errp)
 if (ret < 0) {
 return;
 }
-s->capab_offset = ret;
+s->pci.capab_offset = ret;
 
 ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0,
  AMDVI_CAPAB_REG_SIZE, errp);
-- 
MST




[PULL 17/31] vhost-user-blk-server: notify client about disk resize

2023-04-25 Thread Michael S. Tsirkin
From: Vladimir Sementsov-Ogievskiy 

Currently block_resize qmp command is simply ignored by vhost-user-blk
export. So, the block-node is successfully resized, but virtio config
is unchanged and guest doesn't see that disk is resized.

Let's handle the resize by modifying the config and notifying the guest
appropriately.

After this comment, lsblk in linux guest with attached
vhost-user-blk-pci device shows new size immediately after block_resize
QMP command on vhost-user exported block node.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20230321201323.3695923-1-vsement...@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 subprojects/libvhost-user/libvhost-user.h |  2 ++
 block/export/vhost-user-blk-server.c  | 24 +++
 subprojects/libvhost-user/libvhost-user.c | 10 ++
 3 files changed, 36 insertions(+)

diff --git a/subprojects/libvhost-user/libvhost-user.h 
b/subprojects/libvhost-user/libvhost-user.h
index 8c5a2719e3..49208cceaa 100644
--- a/subprojects/libvhost-user/libvhost-user.h
+++ b/subprojects/libvhost-user/libvhost-user.h
@@ -585,6 +585,8 @@ bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
  */
 void vu_queue_notify(VuDev *dev, VuVirtq *vq);
 
+void vu_config_change_msg(VuDev *dev);
+
 /**
  * vu_queue_notify_sync:
  * @dev: a VuDev context
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 3409d9e02e..e56b92f2e2 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -10,6 +10,7 @@
  * later.  See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "block/block.h"
 #include "subprojects/libvhost-user/libvhost-user.h" /* only for the type 
definitions */
 #include "standard-headers/linux/virtio_blk.h"
@@ -251,6 +252,27 @@ static void vu_blk_exp_request_shutdown(BlockExport *exp)
 vhost_user_server_stop(&vexp->vu_server);
 }
 
+static void vu_blk_exp_resize(void *opaque)
+{
+VuBlkExport *vexp = opaque;
+BlockDriverState *bs = blk_bs(vexp->handler.blk);
+int64_t new_size = bdrv_getlength(bs);
+
+if (new_size < 0) {
+error_printf("Failed to get length of block node '%s'",
+ bdrv_get_node_name(bs));
+return;
+}
+
+vexp->blkcfg.capacity = cpu_to_le64(new_size >> VIRTIO_BLK_SECTOR_BITS);
+
+vu_config_change_msg(&vexp->vu_server.vu_dev);
+}
+
+static const BlockDevOps vu_blk_dev_ops = {
+.resize_cb = vu_blk_exp_resize,
+};
+
 static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
  Error **errp)
 {
@@ -292,6 +314,8 @@ static int vu_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
  vexp);
 
+blk_set_dev_ops(exp->blk, &vu_blk_dev_ops, vexp);
+
 if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
  num_queues, &vu_blk_iface, errp)) {
 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index 0200b78e8e..0abd898a52 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -2455,6 +2455,16 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
 _vu_queue_notify(dev, vq, true);
 }
 
+void vu_config_change_msg(VuDev *dev)
+{
+VhostUserMsg vmsg = {
+.request = VHOST_USER_BACKEND_CONFIG_CHANGE_MSG,
+.flags = VHOST_USER_VERSION,
+};
+
+vu_message_write(dev, dev->slave_fd, &vmsg);
+}
+
 static inline void
 vring_used_flags_set_bit(VuVirtq *vq, int mask)
 {
-- 
MST




[PULL 10/31] hw/i386/amd_iommu: Explicit use of AMDVI_BASE_ADDR in amdvi_init

2023-04-25 Thread Michael S. Tsirkin
From: Philippe Mathieu-Daudé 

By accessing MemoryRegion internals, amdvi_init() gives the false
idea that the PCI BAR can be modified. However this isn't true
(at least the model isn't ready for that): the device is explicitly
maps at the BAR at the fixed AMDVI_BASE_ADDR address in
amdvi_sysbus_realize(). Since the SysBus API isn't designed to
remap regions, directly use the fixed address in amdvi_init().

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20230313153031.86107-3-phi...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index bcd016f5c5..3813b341ec 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1519,9 +1519,9 @@ static void amdvi_init(AMDVIState *s)
 /* reset AMDVI specific capabilities, all r/o */
 pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
- s->mmio.addr & ~(0x));
+ AMDVI_BASE_ADDR & ~(0x));
 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
-(s->mmio.addr & ~(0x)) >> 16);
+(AMDVI_BASE_ADDR & ~(0x)) >> 16);
 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
  0xff00);
 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
-- 
MST




[PULL 06/31] docs: vhost-user: Define memory region separately

2023-04-25 Thread Michael S. Tsirkin
From: Viresh Kumar 

The same layout is defined twice, once in "single memory region
description" and then in "memory regions description".

Separate out details of memory region from these two and reuse the same
definition later on.

While at it, also rename "memory regions description" to "multiple
memory regions description", to avoid potential confusion around similar
names. And define single region before multiple ones.

This is just a documentation optimization, the protocol remains the same.

Signed-off-by: Viresh Kumar 
Message-Id: 
<7c3718e5eb99178b22696682ae73aca6df1899c7.1678351495.git.viresh.ku...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alex Bennée 
---
 docs/interop/vhost-user.rst | 39 +
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 8a5924ea75..1720d68126 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -130,18 +130,8 @@ A vring address description
 Note that a ring address is an IOVA if ``VIRTIO_F_IOMMU_PLATFORM`` has
 been negotiated. Otherwise it is a user address.
 
-Memory regions description
-^^
-
-+-+-+-+-+-+
-| num regions | padding | region0 | ... | region7 |
-+-+-+-+-+-+
-
-:num regions: a 32-bit number of regions
-
-:padding: 32-bit
-
-A region is:
+Memory region description
+^
 
 +---+--+--+-+
 | guest address | size | user address | mmap offset |
@@ -158,19 +148,26 @@ A region is:
 Single memory region description
 
 
-+-+---+--+--+-+
-| padding | guest address | size | user address | mmap offset |
-+-+---+--+--+-+
++-++
+| padding | region |
++-++
 
 :padding: 64-bit
 
-:guest address: a 64-bit guest address of the region
+A region is represented by Memory region description.
 
-:size: a 64-bit size
+Multiple Memory regions description
+^^^
 
-:user address: a 64-bit user address
++-+-+-+-+-+
+| num regions | padding | region0 | ... | region7 |
++-+-+-+-+-+
 
-:mmap offset: 64-bit offset where region starts in the mapped memory
+:num regions: a 32-bit number of regions
+
+:padding: 32-bit
+
+A region is represented by Memory region description.
 
 Log description
 ^^^
@@ -952,8 +949,8 @@ Front-end message types
 ``VHOST_USER_SET_MEM_TABLE``
   :id: 5
   :equivalent ioctl: ``VHOST_SET_MEM_TABLE``
-  :request payload: memory regions description
-  :reply payload: (postcopy only) memory regions description
+  :request payload: multiple memory regions description
+  :reply payload: (postcopy only) multiple memory regions description
 
   Sets the memory map regions on the back-end so it can translate the
   vring addresses. In the ancillary data there is an array of file
-- 
MST




[PULL 02/31] Add my old and new work email mapping and use work email to support biosbits

2023-04-25 Thread Michael S. Tsirkin
From: Ani Sinha 

Update mailmap to indicate a...@anisinha.ca and anisi...@redhat.com are one and
the same person. Additionally update MAINTAINERS and bits documentation to use
my work (redhat) email.

Signed-off-by: Ani Sinha 
Message-Id: <20230320114233.90638-1-anisi...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 .mailmap | 1 +
 MAINTAINERS  | 2 +-
 docs/devel/acpi-bits.rst | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index 7677047950..bbe6d3fd69 100644
--- a/.mailmap
+++ b/.mailmap
@@ -54,6 +54,7 @@ Aleksandar Markovic  

 Aleksandar Rikalo  
 Aleksandar Rikalo  
 Alexander Graf  
+Ani Sinha  
 Anthony Liguori  Anthony Liguori 
 Christian Borntraeger  
 Damien Hedde  
diff --git a/MAINTAINERS b/MAINTAINERS
index 2c2068ea5c..0bd3d1830e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1931,7 +1931,7 @@ F: hw/acpi/viot.c
 F: hw/acpi/viot.h
 
 ACPI/AVOCADO/BIOSBITS
-M: Ani Sinha 
+M: Ani Sinha 
 M: Michael S. Tsirkin 
 S: Supported
 F: tests/avocado/acpi-bits/*
diff --git a/docs/devel/acpi-bits.rst b/docs/devel/acpi-bits.rst
index 9eb4b9e666..22e2580200 100644
--- a/docs/devel/acpi-bits.rst
+++ b/docs/devel/acpi-bits.rst
@@ -135,7 +135,7 @@ Under ``tests/avocado/`` as the root we have:
(c) They need not be loaded by avocado framework when running tests.
 
 
-Author: Ani Sinha 
+Author: Ani Sinha 
 
 References:
 ---
-- 
MST




[PULL 20/31] tests: bios-tables-test: replace memset with initializer

2023-04-25 Thread Michael S. Tsirkin
From: Paolo Bonzini 

Coverity complains that memset() writes over a const field.  Use
an initializer instead, so that the const field is left to zero.
Tests that have to write the const field already use an initializer
for the whole struct, here I am choosing the smallest possible
patch (which is not that small already).

Cc: Michael S. Tsirkin 
Signed-off-by: Paolo Bonzini 
Message-Id: <20230330131109.47856-1-pbonz...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Thomas Huth 
---
 tests/qtest/bios-tables-test.c | 123 -
 1 file changed, 43 insertions(+), 80 deletions(-)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 8733589437..464f87382e 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -438,10 +438,9 @@ static void test_acpi_asl(test_data *data)
 {
 int i;
 AcpiSdtTable *sdt, *exp_sdt;
-test_data exp_data;
+test_data exp_data = {};
 gboolean exp_err, err, all_tables_match = true;
 
-memset(&exp_data, 0, sizeof(exp_data));
 exp_data.tables = load_expected_aml(data);
 dump_aml_files(data, false);
 for (i = 0; i < data->tables->len; ++i) {
@@ -853,12 +852,11 @@ static uint8_t base_required_struct_types[] = {
 
 static void test_acpi_piix4_tcg(void)
 {
-test_data data;
+test_data data = {};
 
 /* Supplying -machine accel argument overrides the default (qtest).
  * This is to make guest actually run.
  */
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_PC;
 data.required_struct_types = base_required_struct_types;
 data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
@@ -868,9 +866,8 @@ static void test_acpi_piix4_tcg(void)
 
 static void test_acpi_piix4_tcg_bridge(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_PC;
 data.variant = ".bridge";
 data.required_struct_types = base_required_struct_types;
@@ -906,9 +903,8 @@ static void test_acpi_piix4_tcg_bridge(void)
 
 static void test_acpi_piix4_no_root_hotplug(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_PC;
 data.variant = ".roothp";
 data.required_struct_types = base_required_struct_types;
@@ -923,9 +919,8 @@ static void test_acpi_piix4_no_root_hotplug(void)
 
 static void test_acpi_piix4_no_bridge_hotplug(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_PC;
 data.variant = ".hpbridge";
 data.required_struct_types = base_required_struct_types;
@@ -940,9 +935,8 @@ static void test_acpi_piix4_no_bridge_hotplug(void)
 
 static void test_acpi_piix4_no_acpi_pci_hotplug(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_PC;
 data.variant = ".hpbrroot";
 data.required_struct_types = base_required_struct_types;
@@ -962,9 +956,8 @@ static void test_acpi_piix4_no_acpi_pci_hotplug(void)
 
 static void test_acpi_q35_tcg(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_Q35;
 data.required_struct_types = base_required_struct_types;
 data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
@@ -994,9 +987,8 @@ static void test_acpi_q35_tcg_core_count2(void)
 
 static void test_acpi_q35_tcg_bridge(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_Q35;
 data.variant = ".bridge";
 data.required_struct_types = base_required_struct_types;
@@ -1009,9 +1001,8 @@ static void test_acpi_q35_tcg_bridge(void)
 
 static void test_acpi_q35_tcg_no_acpi_hotplug(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_Q35;
 data.variant = ".noacpihp";
 data.required_struct_types = base_required_struct_types;
@@ -1105,9 +1096,8 @@ static void test_acpi_q35_tcg_mmio64(void)
 
 static void test_acpi_piix4_tcg_cphp(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_PC;
 data.variant = ".cphp";
 test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6"
@@ -1121,9 +,8 @@ static void test_acpi_piix4_tcg_cphp(void)
 
 static void test_acpi_q35_tcg_cphp(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_Q35;
 data.variant = ".cphp";
 test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6"
@@ -1141,9 +1130,8 @@ static uint8_t ipmi_required_struct_types[] = {
 
 static void test_acpi_q35_tcg_ipmi(void)
 {
-test_data data;
+test_data data = {};
 
-memset(&data, 0, sizeof(data));
 data.machine = MACHINE_Q35;
 data.

[PULL 14/31] hw/i386/amd_iommu: Factor amdvi_pci_realize out of amdvi_sysbus_realize

2023-04-25 Thread Michael S. Tsirkin
From: Philippe Mathieu-Daudé 

Aside the Frankenstein model of a SysBusDevice realizing a PCIDevice,
QOM parents shouldn't access children internals. In this particular
case, amdvi_sysbus_realize() is just open-coding TYPE_AMD_IOMMU_PCI's
DeviceRealize() handler. Factor it out.

Declare QOM-cast macros with OBJECT_DECLARE_SIMPLE_TYPE() so we can
cast the AMDVIPCIState in amdvi_pci_realize().

Note this commit removes the single use in the repository of
pci_add_capability() and msi_init() on a *realized* QDev instance.

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20230313153031.86107-7-phi...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.h |  5 ++--
 hw/i386/amd_iommu.c | 62 ++---
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 1c0cb54bd4..6da893ee57 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -300,16 +300,17 @@ struct irte_ga {
 OBJECT_DECLARE_SIMPLE_TYPE(AMDVIState, AMD_IOMMU_DEVICE)
 
 #define TYPE_AMD_IOMMU_PCI "AMDVI-PCI"
+OBJECT_DECLARE_SIMPLE_TYPE(AMDVIPCIState, AMD_IOMMU_PCI)
 
 #define TYPE_AMD_IOMMU_MEMORY_REGION "amd-iommu-iommu-memory-region"
 
 typedef struct AMDVIAddressSpace AMDVIAddressSpace;
 
 /* functions to steal PCI config space */
-typedef struct AMDVIPCIState {
+struct AMDVIPCIState {
 PCIDevice dev;   /* The PCI device itself*/
 uint32_t capab_offset;   /* capability offset pointer*/
-} AMDVIPCIState;
+};
 
 struct AMDVIState {
 X86IOMMUState iommu;/* IOMMU bus device */
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 8e4ce63f8e..9c77304438 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1509,20 +1509,48 @@ static void amdvi_init(AMDVIState *s)
 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
 0xffef, 0);
 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
+}
+
+static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
+{
+AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
+int ret;
+
+ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
+ AMDVI_CAPAB_SIZE, errp);
+if (ret < 0) {
+return;
+}
+s->capab_offset = ret;
+
+ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
+ AMDVI_CAPAB_REG_SIZE, errp);
+if (ret < 0) {
+return;
+}
+ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
+ AMDVI_CAPAB_REG_SIZE, errp);
+if (ret < 0) {
+return;
+}
+
+if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
+return;
+}
 
 /* reset device ident */
-pci_config_set_prog_interface(s->pci.dev.config, 00);
+pci_config_set_prog_interface(pdev->config, 0);
 
 /* reset AMDVI specific capabilities, all r/o */
-pci_set_long(s->pci.dev.config + s->pci.capab_offset, 
AMDVI_CAPAB_FEATURES);
-pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_BAR_LOW,
+pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
+pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
  AMDVI_BASE_ADDR & ~(0x));
-pci_set_long(s->pci.dev.config + s->pci.capab_offset + 
AMDVI_CAPAB_BAR_HIGH,
+pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
 (AMDVI_BASE_ADDR & ~(0x)) >> 16);
-pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_RANGE,
+pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
  0xff00);
-pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC, 
0);
-pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC,
+pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
+pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
 }
 
@@ -1536,7 +1564,6 @@ static void amdvi_sysbus_reset(DeviceState *dev)
 
 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
 {
-int ret = 0;
 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
 MachineState *ms = MACHINE(qdev_get_machine());
 PCMachineState *pcms = PC_MACHINE(ms);
@@ -1550,23 +1577,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error 
**errp)
 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
 return;
 }
-ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
- AMDVI_CAPAB_SIZE, errp);
-if (ret < 0) {
-return;
-}
-s->pci.capab_offset = ret;
-
-ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0,
- AMDVI_CAPAB_REG_SIZE, errp);
-if (ret < 0) {
-return;
-}
-ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0,
-  

[PULL 04/31] meson_options.txt: Enable qom-cast-debug by default again

2023-04-25 Thread Michael S. Tsirkin
From: Thomas Huth 

This switch had been disabled by default by accident in commit
c55cf6ab03f. But we should enable it by default instead to avoid
regressions in the QOM device hierarchy.

Fixes: c55cf6ab03 ("configure, meson: move some default-disabled options to 
meson_options.txt")
Signed-off-by: Thomas Huth 
Message-Id: <20230417130037.236747-3-th...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Peter Maydell 
Reported-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
---
 meson_options.txt | 2 +-
 scripts/meson-buildoptions.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..2471dd02da 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -315,7 +315,7 @@ option('debug_mutex', type: 'boolean', value: false,
description: 'mutex debugging support')
 option('debug_stack_usage', type: 'boolean', value: false,
description: 'measure coroutine stack usage')
-option('qom_cast_debug', type: 'boolean', value: false,
+option('qom_cast_debug', type: 'boolean', value: true,
description: 'cast debugging support')
 option('gprof', type: 'boolean', value: false,
description: 'QEMU profiling with gprof',
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..d4369a3ad8 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -14,6 +14,7 @@ meson_options_help() {
   printf "%s\n" '   use idef-parser to automatically 
generate TCG'
   printf "%s\n" '   code for the Hexagon frontend'
   printf "%s\n" '  --disable-install-blobs  install provided firmware blobs'
+  printf "%s\n" '  --disable-qom-cast-debug cast debugging support'
   printf "%s\n" '  --docdir=VALUE   Base directory for documentation 
installation'
   printf "%s\n" '   (can be empty) [share/doc]'
   printf "%s\n" '  --enable-block-drv-whitelist-in-tools'
@@ -35,7 +36,6 @@ meson_options_help() {
   printf "%s\n" '  --enable-module-upgrades try to load modules from alternate 
paths for'
   printf "%s\n" '   upgrades'
   printf "%s\n" '  --enable-profilerprofiler support'
-  printf "%s\n" '  --enable-qom-cast-debug  cast debugging support'
   printf "%s\n" '  --enable-rng-nonedummy RNG, avoid using 
/dev/(u)random and'
   printf "%s\n" '   getrandom()'
   printf "%s\n" '  --enable-strip   Strip targets on install'
-- 
MST




[PULL 19/31] hw/acpi: limit warning on acpi table size to pc machines older than version 2.3

2023-04-25 Thread Michael S. Tsirkin
From: Ani Sinha 

i440fx machine versions 2.3 and newer supports dynamic ram
resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks 
resizeable") .
Currently supported all q35 machine types (versions 2.4 and newer) supports
resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table
size exceeds a pre-defined value does not apply to those machine versions.
Add a check limiting the warning message to only those machines that does not
support expandable ram blocks (that is, i440fx machines with version 2.2
and older).

Signed-off-by: Ani Sinha 
Message-Id: <20230329045726.14028-1-anisi...@redhat.com>
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/i386/pc.h | 3 +++
 hw/i386/acpi-build.c | 6 --
 hw/i386/pc.c | 1 +
 hw/i386/pc_piix.c| 1 +
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index eb668e9034..84935fc958 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -127,6 +127,9 @@ struct PCMachineClass {
 
 /* create kvmclock device even when KVM PV features are not exposed */
 bool kvmclock_create_always;
+
+/* resizable acpi blob compat */
+bool resizable_acpi_blob;
 };
 
 #define TYPE_PC_MACHINE "generic-pc-machine"
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 7f211e1f48..512162003b 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2697,7 +2697,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
 int legacy_table_size =
 ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
  ACPI_BUILD_ALIGN_SIZE);
-if (tables_blob->len > legacy_table_size) {
+if ((tables_blob->len > legacy_table_size) &&
+!pcmc->resizable_acpi_blob) {
 /* Should happen only with PCI bridges and -M pc-i440fx-2.0.  */
 warn_report("ACPI table size %u exceeds %d bytes,"
 " migration may not work",
@@ -2708,7 +2709,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
 g_array_set_size(tables_blob, legacy_table_size);
 } else {
 /* Make sure we have a buffer in case we need to resize the tables. */
-if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
+if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
+!pcmc->resizable_acpi_blob) {
 /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. 
 */
 warn_report("ACPI table size %u exceeds %d bytes,"
 " migration may not work",
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 615e1d3d06..d761c8c775 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1946,6 +1946,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 pcmc->acpi_data_size = 0x2 + 0x8000;
 pcmc->pvh_enabled = true;
 pcmc->kvmclock_create_always = true;
+pcmc->resizable_acpi_blob = true;
 assert(!mc->get_hotplug_handler);
 mc->get_hotplug_handler = pc_get_hotplug_handler;
 mc->hotplug_allowed = pc_hotplug_allowed;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 21591dad8d..66a849d279 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -756,6 +756,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m)
 compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
 compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
 pcmc->rsdp_in_ram = false;
+pcmc->resizable_acpi_blob = false;
 }
 
 DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
-- 
MST




[PULL 21/31] MAINTAINERS: Add Eugenio Pérez as vhost-shadow-virtqueue reviewer

2023-04-25 Thread Michael S. Tsirkin
From: Eugenio Pérez 

I'd like to be notified on SVQ patches and review them.

Signed-off-by: Eugenio Pérez 
Message-Id: <20230331150410.2627214-1-epere...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
---
 MAINTAINERS | 4 
 1 file changed, 4 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e365a7a47e..5e2d4b2c2c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2075,6 +2075,10 @@ F: backends/vhost-user.c
 F: include/sysemu/vhost-user-backend.h
 F: subprojects/libvhost-user/
 
+vhost-shadow-virtqueue
+R: Eugenio Pérez 
+F: hw/virtio/vhost-shadow-virtqueue.*
+
 virtio
 M: Michael S. Tsirkin 
 S: Supported
-- 
MST




[PULL 22/31] docs/cxl: Fix sentence

2023-04-25 Thread Michael S. Tsirkin
From: Stefan Weil 

Signed-off-by: Stefan Weil 
Message-Id: <20230409201828.1159568-1...@weilnetz.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/system/devices/cxl.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index f25783a4ec..4c38223069 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -111,7 +111,7 @@ Interfaces provided include:
 
 CXL Root Ports (CXL RP)
 ~~~
-A CXL Root Port servers te same purpose as a PCIe Root Port.
+A CXL Root Port serves the same purpose as a PCIe Root Port.
 There are a number of CXL specific Designated Vendor Specific
 Extended Capabilities (DVSEC) in PCIe Configuration Space
 and associated component register access via PCI bars.
-- 
MST




[PULL 18/31] Add my old and new work email mapping and use work email to support acpi

2023-04-25 Thread Michael S. Tsirkin
From: Ani Sinha 

Updating mailmap to indicate a...@anisinha.ca and anisi...@redhat.com are one
and the same person. Also updating my email in MAINTAINERS for all my acpi work
(reviewing patches and biosbits) to my work email. Also doing the same for
bios bits test framework documentation.

Signed-off-by: Ani Sinha 
Message-Id: <20230329040834.11973-1-anisi...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 83c0373872..e365a7a47e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1894,7 +1894,7 @@ F: hw/pci/pcie_doe.c
 ACPI/SMBIOS
 M: Michael S. Tsirkin 
 M: Igor Mammedov 
-R: Ani Sinha 
+R: Ani Sinha 
 S: Supported
 F: include/hw/acpi/*
 F: include/hw/firmware/smbios.h
-- 
MST




[PULL 25/31] virtio: i2c: Check notifier helpers for VIRTIO_CONFIG_IRQ_IDX

2023-04-25 Thread Michael S. Tsirkin
From: Viresh Kumar 

Since the driver doesn't support interrupts, we must return early when
index is set to VIRTIO_CONFIG_IRQ_IDX.

Fixes: 544f0278afca ("virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX")
Signed-off-by: Viresh Kumar 
Message-Id: 

Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost-user-i2c.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c
index 60eaf0d95b..4eef3f0633 100644
--- a/hw/virtio/vhost-user-i2c.c
+++ b/hw/virtio/vhost-user-i2c.c
@@ -128,6 +128,14 @@ static void vu_i2c_guest_notifier_mask(VirtIODevice *vdev, 
int idx, bool mask)
 {
 VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
 
+/*
+ * We don't support interrupts, return early if index is set to
+ * VIRTIO_CONFIG_IRQ_IDX.
+ */
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return;
+}
+
 vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask);
 }
 
@@ -135,6 +143,14 @@ static bool vu_i2c_guest_notifier_pending(VirtIODevice 
*vdev, int idx)
 {
 VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
 
+/*
+ * We don't support interrupts, return early if index is set to
+ * VIRTIO_CONFIG_IRQ_IDX.
+ */
+if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+return false;
+}
+
 return vhost_virtqueue_pending(&i2c->vhost_dev, idx);
 }
 
-- 
MST




[PULL 30/31] hw/pci-bridge: pci_expander_bridge fix type in pxb_cxl_dev_reset()

2023-04-25 Thread Michael S. Tsirkin
From: Jonathan Cameron 

Reproduce issue with

configure --enable-qom-cast-debug ...

qemu-system-x86_64 -display none -machine q35,cxl=on -device pxb-cxl,bus=pcie.0

  hw/pci-bridge/pci_expander_bridge.c:54:PXB_DEV: Object 0x5570e0b1ada0 is not 
an instance of type pxb
  Aborted

The type conversion results in the right state structure, but PXB_DEV is
not a parent of PXB_CXL_DEV hence the error. Rather than directly
cleaning up the inheritance, this is the minimal fix which will be
followed by the cleanup.

Fixes: 154070eaf6 ("hw/pxb-cxl: Support passthrough HDM Decoders unless 
overridden")
Reported-by: Peter Maydell 
Signed-off-by: Jonathan Cameron 
Message-Id: <20230420142750.6950-2-jonathan.came...@huawei.com>
Reviewed-by: Thomas Huth 
Cc: qemu-sta...@nongnu.org
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci-bridge/pci_expander_bridge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci-bridge/pci_expander_bridge.c 
b/hw/pci-bridge/pci_expander_bridge.c
index ead33f0c05..a78327b5f2 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -311,7 +311,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
  * The CXL specification allows for host bridges with no HDM decoders
  * if they only have a single root port.
  */
-if (!PXB_DEV(dev)->hdm_for_passthrough) {
+if (!PXB_CXL_DEV(dev)->hdm_for_passthrough) {
 dsp_count = pcie_count_ds_ports(hb->bus);
 }
 /* Initial reset will have 0 dsp so wait until > 0 */
-- 
MST




[PULL 26/31] acpi: pcihp: allow repeating hot-unplug requests

2023-04-25 Thread Michael S. Tsirkin
From: Igor Mammedov 

with Q35 using ACPI PCI hotplug by default, user's request to unplug
device is ignored when it's issued before guest OS has been booted.
And any additional attempt to request device hot-unplug afterwards
results in following error:

  "Device XYZ is already in the process of unplug"

arguably it can be considered as a regression introduced by [2],
before which it was possible to issue unplug request multiple
times.

Accept new uplug requests after timeout (1ms). This brings ACPI PCI
hotplug on par with native PCIe unplug behavior [1] and allows user
to repeat unplug requests at propper times.
Set expire timeout to arbitrary 1msec so user won't be able to
flood guest with SCI interrupts by calling device_del in tight loop.

PS:
ACPI spec doesn't mandate what OSPM can do with GPEx.status
bits set before it's booted => it's impl. depended.
Status bits may be retained (I tested with one Windows version)
or cleared (Linux since 2.6 kernel times) during guest's ACPI
subsystem initialization.
Clearing status bits (though not wrong per se) hides the unplug
event from guest, and it's upto user to repeat device_del later
when guest is able to handle unplug requests.

1) 18416c62e3 ("pcie: expire pending delete")
2)
Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del")
Signed-off-by: Igor Mammedov 
Acked-by: Gerd Hoffmann 
CC: m...@redhat.com
CC: anisi...@redhat.com
CC: jus...@redhat.com
CC: kra...@redhat.com
Message-Id: <20230418090449.2155757-1-imamm...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Ani Sinha 
---
 hw/acpi/pcihp.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index dcfb779a7a..cdd6f775a1 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler 
*hotplug_dev,
  * acpi_pcihp_eject_slot() when the operation is completed.
  */
 pdev->qdev.pending_deleted_event = true;
+/* if unplug was requested before OSPM is initialized,
+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively
+ * hides unplug event. And than followup qmp_device_del() calls remain
+ * blocked by above flag permanently.
+ * Unblock qmp_device_del() by setting expire limit, so user can
+ * repeat unplug request later when OSPM has been booted.
+ */
+pdev->qdev.pending_deleted_expires_ms =
+qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */
+
 s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
 acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
 }
-- 
MST




[PULL 15/31] hw: Add compat machines for 8.1

2023-04-25 Thread Michael S. Tsirkin
From: Cornelia Huck 

Add 8.1 machine types for arm/i440fx/m68k/q35/s390x/spapr.

Signed-off-by: Cornelia Huck 
Message-Id: <20230314173009.152667-1-coh...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/boards.h|  3 +++
 include/hw/i386/pc.h   |  3 +++
 hw/arm/virt.c  | 11 +--
 hw/core/machine.c  |  3 +++
 hw/i386/pc.c   |  3 +++
 hw/i386/pc_piix.c  | 16 +---
 hw/i386/pc_q35.c   | 14 --
 hw/m68k/virt.c | 11 +--
 hw/ppc/spapr.c | 17 ++---
 hw/s390x/s390-virtio-ccw.c | 14 +-
 10 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index f840f88d54..f4117fdb9a 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -383,6 +383,9 @@ struct MachineState {
 } \
 type_init(machine_initfn##_register_types)
 
+extern GlobalProperty hw_compat_8_0[];
+extern const size_t hw_compat_8_0_len;
+
 extern GlobalProperty hw_compat_7_2[];
 extern const size_t hw_compat_7_2_len;
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 8206d5405a..eb668e9034 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -195,6 +195,9 @@ void pc_madt_cpu_entry(int uid, const CPUArchIdList 
*apic_ids,
 /* sgx.c */
 void pc_machine_init_sgx_epc(PCMachineState *pcms);
 
+extern GlobalProperty pc_compat_8_0[];
+extern const size_t pc_compat_8_0_len;
+
 extern GlobalProperty pc_compat_7_2[];
 extern const size_t pc_compat_7_2_len;
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ac626b3bef..267fe56fae 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3232,10 +3232,17 @@ static void machvirt_machine_init(void)
 }
 type_init(machvirt_machine_init);
 
-static void virt_machine_8_0_options(MachineClass *mc)
+static void virt_machine_8_1_options(MachineClass *mc)
 {
 }
-DEFINE_VIRT_MACHINE_AS_LATEST(8, 0)
+DEFINE_VIRT_MACHINE_AS_LATEST(8, 1)
+
+static void virt_machine_8_0_options(MachineClass *mc)
+{
+virt_machine_8_1_options(mc);
+compat_props_add(mc->compat_props, hw_compat_8_0, hw_compat_8_0_len);
+}
+DEFINE_VIRT_MACHINE(8, 0)
 
 static void virt_machine_7_2_options(MachineClass *mc)
 {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index cd13b8b0a3..2ce97a5d3b 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -39,6 +39,9 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-pci.h"
 
+GlobalProperty hw_compat_8_0[] = {};
+const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
+
 GlobalProperty hw_compat_7_2[] = {
 { "e1000e", "migrate-timadj", "off" },
 { "virtio-mem", "x-early-migration", "false" },
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1489abf010..615e1d3d06 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -116,6 +116,9 @@
 { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
 { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
 
+GlobalProperty pc_compat_8_0[] = {};
+const size_t pc_compat_8_0_len = G_N_ELEMENTS(pc_compat_8_0);
+
 GlobalProperty pc_compat_7_2[] = {
 { "ICH9-LPC", "noreboot", "true" },
 };
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 30eedd62a3..21591dad8d 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -454,21 +454,31 @@ static void pc_i440fx_machine_options(MachineClass *m)
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
 }
 
-static void pc_i440fx_8_0_machine_options(MachineClass *m)
+static void pc_i440fx_8_1_machine_options(MachineClass *m)
 {
 pc_i440fx_machine_options(m);
 m->alias = "pc";
 m->is_default = true;
 }
 
+DEFINE_I440FX_MACHINE(v8_1, "pc-i440fx-8.1", NULL,
+  pc_i440fx_8_1_machine_options);
+
+static void pc_i440fx_8_0_machine_options(MachineClass *m)
+{
+pc_i440fx_8_1_machine_options(m);
+m->alias = NULL;
+m->is_default = false;
+compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len);
+compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len);
+}
+
 DEFINE_I440FX_MACHINE(v8_0, "pc-i440fx-8.0", NULL,
   pc_i440fx_8_0_machine_options);
 
 static void pc_i440fx_7_2_machine_options(MachineClass *m)
 {
 pc_i440fx_8_0_machine_options(m);
-m->alias = NULL;
-m->is_default = false;
 compat_props_add(m->compat_props, hw_compat_7_2, hw_compat_7_2_len);
 compat_props_add(m->compat_props, pc_compat_7_2, pc_compat_7_2_len);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 797ba347fd..f02919d92c 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -373,19 +373,29 @@ static void pc_q35_machine_options(MachineClass *m)
 m->max_cpus = 288;
 }
 
-static void pc_q35_8_0_machine_options(MachineClass *m)
+static void pc_q35_8_1_machine_options(MachineClass *m)
 {
 pc_q35_machine_options(m);
 m->alias = "q35";
 }
 
+DEFINE_Q35_MACHINE(v8_1, "pc-q35-8.1", NULL,
+ 

[PULL 29/31] docs/specs: Convert pci-testdev.txt to rst

2023-04-25 Thread Michael S. Tsirkin
From: Peter Maydell 

Convert pci-testdev.txt to reStructuredText. Includes
some minor wordsmithing.

Signed-off-by: Peter Maydell 
Message-Id: <20230420160334.1048224-4-peter.mayd...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/specs/pci-testdev.txt | 31 --
 docs/specs/index.rst   |  1 +
 docs/specs/pci-ids.rst |  2 +-
 docs/specs/pci-testdev.rst | 39 ++
 4 files changed, 41 insertions(+), 32 deletions(-)
 delete mode 100644 docs/specs/pci-testdev.txt
 create mode 100644 docs/specs/pci-testdev.rst

diff --git a/docs/specs/pci-testdev.txt b/docs/specs/pci-testdev.txt
deleted file mode 100644
index 4280a1e73c..00
--- a/docs/specs/pci-testdev.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-pci-test is a device used for testing low level IO
-
-device implements up to three BARs: BAR0, BAR1 and BAR2.
-Each of BAR 0+1 can be memory or IO. Guests must detect
-BAR types and act accordingly.
-
-BAR 0+1 size is up to 4K bytes each.
-BAR 0+1 starts with the following header:
-
-typedef struct PCITestDevHdr {
-uint8_t test;  <- write-only, starts a given test number
-uint8_t width_type; <- read-only, type and width of access for a given 
test.
-   1,2,4 for byte,word or long write.
-   any other value if test not supported on this BAR
-uint8_t pad0[2];
-uint32_t offset; <- read-only, offset in this BAR for a given test
-uint32_t data;<- read-only, data to use for a given test
-uint32_t count;  <- for debugging. number of writes detected.
-uint8_t name[]; <- for debugging. 0-terminated ASCII string.
-} PCITestDevHdr;
-
-All registers are little endian.
-
-device is expected to always implement tests 0 to N on each BAR, and to add new
-tests with higher numbers.  In this way a guest can scan test numbers until it
-detects an access type that it does not support on this BAR, then stop.
-
-BAR2 is a 64bit memory bar, without backing storage.  It is disabled
-by default and can be enabled using the membar= property.  This
-can be used to test whether guests handle pci bars of a specific
-(possibly quite large) size correctly.
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index dfa136073c..e58be38c41 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -10,6 +10,7 @@ guest hardware that is specific to QEMU.
 
pci-ids
pci-serial
+   pci-testdev
ppc-xive
ppc-spapr-xive
ppc-spapr-numa
diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst
index 7bc7168fb6..e302bea484 100644
--- a/docs/specs/pci-ids.rst
+++ b/docs/specs/pci-ids.rst
@@ -69,7 +69,7 @@ PCI devices (other than virtio):
 1b36:0004
   PCI Quad-port 16550A adapter (:doc:`pci-serial`)
 1b36:0005
-  PCI test device (``docs/specs/pci-testdev.txt``)
+  PCI test device (:doc:`pci-testdev`)
 1b36:0006
   PCI Rocker Ethernet switch device
 1b36:0007
diff --git a/docs/specs/pci-testdev.rst b/docs/specs/pci-testdev.rst
new file mode 100644
index 00..4b6d36543b
--- /dev/null
+++ b/docs/specs/pci-testdev.rst
@@ -0,0 +1,39 @@
+
+QEMU PCI test device
+
+
+``pci-testdev`` is a device used for testing low level IO.
+
+The device implements up to three BARs: BAR0, BAR1 and BAR2.
+Each of BAR 0+1 can be memory or IO. Guests must detect
+BAR types and act accordingly.
+
+BAR 0+1 size is up to 4K bytes each.
+BAR 0+1 starts with the following header:
+
+.. code-block:: c
+
+  typedef struct PCITestDevHdr {
+  uint8_t test;/* write-only, starts a given test number */
+  uint8_t width_type;  /*
+* read-only, type and width of access for a given 
test.
+* 1,2,4 for byte,word or long write.
+* any other value if test not supported on this BAR
+*/
+  uint8_t pad0[2];
+  uint32_t offset; /* read-only, offset in this BAR for a given test */
+  uint32_t data;   /* read-only, data to use for a given test */
+  uint32_t count;  /* for debugging. number of writes detected. */
+  uint8_t name[];  /* for debugging. 0-terminated ASCII string. */
+  } PCITestDevHdr;
+
+All registers are little endian.
+
+The device is expected to always implement tests 0 to N on each BAR, and to 
add new
+tests with higher numbers.  In this way a guest can scan test numbers until it
+detects an access type that it does not support on this BAR, then stop.
+
+BAR2 is a 64bit memory BAR, without backing storage.  It is disabled
+by default and can be enabled using the ``membar=`` property.  This
+can be used to test whether guests handle PCI BARs of a specific
+(possibly quite large) size correctly.
-- 
MST




[PULL 28/31] docs/specs: Convert pci-serial.txt to rst

2023-04-25 Thread Michael S. Tsirkin
From: Peter Maydell 

Convert pci-serial.txt to reStructuredText. This includes
some wordsmithing, and the correction of the docs to note
that the Windows inf file includes 2x and 4x support
(as it has done since commit dc9528fdf9f61 in 2014).

Signed-off-by: Peter Maydell 
Message-Id: <20230420160334.1048224-3-peter.mayd...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
---
 docs/specs/pci-serial.txt  | 34 --
 hw/char/serial-pci-multi.c |  2 +-
 hw/char/serial-pci.c   |  2 +-
 docs/specs/index.rst   |  1 +
 docs/specs/pci-ids.rst |  6 +++---
 docs/specs/pci-serial.rst  | 37 +
 6 files changed, 43 insertions(+), 39 deletions(-)
 delete mode 100644 docs/specs/pci-serial.txt
 create mode 100644 docs/specs/pci-serial.rst

diff --git a/docs/specs/pci-serial.txt b/docs/specs/pci-serial.txt
deleted file mode 100644
index 66c761f2b4..00
--- a/docs/specs/pci-serial.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-
-QEMU pci serial devices
-===
-
-There is one single-port variant and two muliport-variants.  Linux
-guests out-of-the box with all cards.  There is a Windows inf file
-(docs/qemupciserial.inf) to setup the single-port card in Windows
-guests.
-
-
-single-port card
-
-
-Name:   pci-serial
-PCI ID: 1b36:0002
-
-PCI Region 0:
-   IO bar, 8 bytes long, with the 16550 uart mapped to it.
-   Interrupt is wired to pin A.
-
-
-multiport cards

-
-Name:   pci-serial-2x
-PCI ID: 1b36:0003
-
-Name:   pci-serial-4x
-PCI ID: 1b36:0004
-
-PCI Region 0:
-   IO bar, with two/four 16550 uart mapped after each other.
-   The first is at offset 0, second at offset 8, ...
-   Interrupt is wired to pin A.
diff --git a/hw/char/serial-pci-multi.c b/hw/char/serial-pci-multi.c
index f18b8dcce5..5d65c534cb 100644
--- a/hw/char/serial-pci-multi.c
+++ b/hw/char/serial-pci-multi.c
@@ -25,7 +25,7 @@
  * THE SOFTWARE.
  */
 
-/* see docs/specs/pci-serial.txt */
+/* see docs/specs/pci-serial.rst */
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c
index 801b769aba..087da3059a 100644
--- a/hw/char/serial-pci.c
+++ b/hw/char/serial-pci.c
@@ -23,7 +23,7 @@
  * THE SOFTWARE.
  */
 
-/* see docs/specs/pci-serial.txt */
+/* see docs/specs/pci-serial.rst */
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 8aa0fcb77a..dfa136073c 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -9,6 +9,7 @@ guest hardware that is specific to QEMU.
:maxdepth: 2
 
pci-ids
+   pci-serial
ppc-xive
ppc-spapr-xive
ppc-spapr-numa
diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst
index e1cf022006..7bc7168fb6 100644
--- a/docs/specs/pci-ids.rst
+++ b/docs/specs/pci-ids.rst
@@ -63,11 +63,11 @@ PCI devices (other than virtio):
 1b36:0001
   PCI-PCI bridge
 1b36:0002
-  PCI serial port (16550A) adapter (``docs/specs/pci-serial.txt``)
+  PCI serial port (16550A) adapter (:doc:`pci-serial`)
 1b36:0003
-  PCI Dual-port 16550A adapter (``docs/specs/pci-serial.txt``)
+  PCI Dual-port 16550A adapter (:doc:`pci-serial`)
 1b36:0004
-  PCI Quad-port 16550A adapter (``docs/specs/pci-serial.txt``)
+  PCI Quad-port 16550A adapter (:doc:`pci-serial`)
 1b36:0005
   PCI test device (``docs/specs/pci-testdev.txt``)
 1b36:0006
diff --git a/docs/specs/pci-serial.rst b/docs/specs/pci-serial.rst
new file mode 100644
index 00..8d916a3669
--- /dev/null
+++ b/docs/specs/pci-serial.rst
@@ -0,0 +1,37 @@
+===
+QEMU PCI serial devices
+===
+
+QEMU implements some PCI serial devices which are simple PCI
+wrappers around one or more 16550 UARTs.
+
+There is one single-port variant and two multiport-variants.  Linux
+guests work out-of-the box with all cards.  There is a Windows inf file
+(``docs/qemupciserial.inf``) to set up the cards in Windows guests.
+
+
+Single-port card
+
+
+Name:
+  ``pci-serial``
+PCI ID:
+  1b36:0002
+PCI Region 0:
+   IO bar, 8 bytes long, with the 16550 UART mapped to it.
+Interrupt:
+   Wired to pin A.
+
+
+Multiport cards
+---
+
+Name:
+  ``pci-serial-2x``, ``pci-serial-4x``
+PCI ID:
+  1b36:0003 (``-2x``) and 1b36:0004 (``-4x``)
+PCI Region 0:
+   IO bar, with two or four 16550 UARTs mapped after each other.
+   The first is at offset 0, the second at offset 8, and so on.
+Interrupt:
+   Wired to pin A.
-- 
MST




[PULL 27/31] docs/specs/pci-ids: Convert from txt to rST

2023-04-25 Thread Michael S. Tsirkin
From: Peter Maydell 

Convert the pci-ids document from plain text to reStructuredText.

I opted to use definition-lists here because rST tables are
super-clunky, and actually formatting these as tables didn't
seem necessary.

Signed-off-by: Peter Maydell 
Message-Id: <20230420160334.1048224-2-peter.mayd...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/specs/pci-ids.txt | 70 --
 docs/specs/index.rst   |  1 +
 docs/specs/pci-ids.rst | 98 ++
 3 files changed, 99 insertions(+), 70 deletions(-)
 delete mode 100644 docs/specs/pci-ids.txt
 create mode 100644 docs/specs/pci-ids.rst

diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
deleted file mode 100644
index e463c4cb3a..00
--- a/docs/specs/pci-ids.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-
-PCI IDs for qemu
-
-
-Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
-virtual devices.  The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
-
-Contact Gerd Hoffmann  to get a device ID assigned
-for your devices.
-
-1af4 vendor ID
---
-
-The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
-Note that this allocation separate from the virtio device IDs, which are
-maintained as part of the virtio specification.
-
-1af4:1000  network device (legacy)
-1af4:1001  block device (legacy)
-1af4:1002  balloon device (legacy)
-1af4:1003  console device (legacy)
-1af4:1004  SCSI host bus adapter device (legacy)
-1af4:1005  entropy generator device (legacy)
-1af4:1009  9p filesystem device (legacy)
-1af4:1012  vsock device (bug compatibility)
-
-1af4:1040  Start of ID range for modern virtio devices.  The PCI device
-   to  ID is calculated from the virtio device ID by adding the
-1af4:10ef  0x1040 offset.  The virtio IDs are defined in the virtio
-   specification.  The Linux kernel has a header file with
-   defines for all virtio IDs (linux/virtio_ids.h), qemu has a
-   copy in include/standard-headers/.
-
-1af4:10f0  Available for experimental usage without registration.  Must get
-   to  official ID when the code leaves the test lab (i.e. when seeking
-1af4:10ff  upstream merge or shipping a distro/product) to avoid conflicts.
-
-1af4:1100  Used as PCI Subsystem ID for existing hardware devices emulated
-   by qemu.
-
-1af4:1110  ivshmem device (shared memory, docs/specs/ivshmem-spec.txt)
-
-All other device IDs are reserved.
-
-1b36 vendor ID
---
-
-The  -> 00ff device ID range is used as follows for QEMU-specific
-PCI devices (other than virtio):
-
-1b36:0001  PCI-PCI bridge
-1b36:0002  PCI serial port (16550A) adapter (docs/specs/pci-serial.txt)
-1b36:0003  PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
-1b36:0004  PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
-1b36:0005  PCI test device (docs/specs/pci-testdev.txt)
-1b36:0006  PCI Rocker Ethernet switch device
-1b36:0007  PCI SD Card Host Controller Interface (SDHCI)
-1b36:0008  PCIe host bridge
-1b36:0009  PCI Expander Bridge (-device pxb)
-1b36:000a  PCI-PCI bridge (multiseat)
-1b36:000b  PCIe Expander Bridge (-device pxb-pcie)
-1b36:000d  PCI xhci usb host adapter
-1b36:000f  mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c
-1b36:0010  PCIe NVMe device (-device nvme)
-1b36:0011  PCI PVPanic device (-device pvpanic-pci)
-1b36:0012  PCI ACPI ERST device (-device acpi-erst)
-
-All these devices are documented in docs/specs.
-
-The 0100 device ID is used for the QXL video card device.
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index a58d9311cb..8aa0fcb77a 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -8,6 +8,7 @@ guest hardware that is specific to QEMU.
 .. toctree::
:maxdepth: 2
 
+   pci-ids
ppc-xive
ppc-spapr-xive
ppc-spapr-numa
diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst
new file mode 100644
index 00..e1cf022006
--- /dev/null
+++ b/docs/specs/pci-ids.rst
@@ -0,0 +1,98 @@
+
+PCI IDs for QEMU
+
+
+Red Hat, Inc. donates a part of its device ID range to QEMU, to be used for
+virtual devices.  The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
+
+Contact Gerd Hoffmann  to get a device ID assigned
+for your devices.
+
+1af4 vendor ID
+--
+
+The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
+Note that this allocation is separate from the virtio device IDs, which are
+maintained as part of the virtio specification.
+
+1af4:1000
+  network device (legacy)
+1af4:1001
+  block device (legacy)
+1af4:1002
+  balloon device (legacy)
+1af4:1003
+  console device (legacy)
+1af4:1004
+  SCSI host bus adapter device (legacy)
+1af4:1005
+  entropy generator device (legacy)
+1af4:1009
+  9p filesystem device (legacy)
+1af4:1012
+  vsock device (bug compatibility)
+
+1af4:1040 to 1af4:10ef
+  ID range for m

Re: [PATCH v8 8/8] memory: abort on re-entrancy in debug builds

2023-04-25 Thread Thomas Huth

On 21/04/2023 16.27, Alexander Bulekov wrote:

This is useful for using unit-tests/fuzzing to detect bugs introduced by
the re-entrancy guard mechanism into devices that are intentionally
re-entrant.

Signed-off-by: Alexander Bulekov 
---
  softmmu/memory.c | 3 +++
  util/async.c | 3 +++
  2 files changed, 6 insertions(+)

diff --git a/softmmu/memory.c b/softmmu/memory.c
index a11ee3e30d..5390f91db6 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -547,6 +547,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
  !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
  if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
  trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
+#ifdef DEBUG
+abort();
+#endif
  return MEMTX_ACCESS_ERROR;
  }
  mr->dev->mem_reentrancy_guard.engaged_in_io = true;
diff --git a/util/async.c b/util/async.c
index a9b528c370..2dc9389e0d 100644
--- a/util/async.c
+++ b/util/async.c
@@ -160,6 +160,9 @@ void aio_bh_call(QEMUBH *bh)
  last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
  if (bh->reentrancy_guard->engaged_in_io) {
  trace_reentrant_aio(bh->ctx, bh->name);
+#ifdef DEBUG
+abort();
+#endif
  }
  bh->reentrancy_guard->engaged_in_io = true;
  }


Reviewed-by: Thomas Huth 




[PULL 13/31] hw/i386/amd_iommu: Set PCI static/const fields via PCIDeviceClass

2023-04-25 Thread Michael S. Tsirkin
From: Philippe Mathieu-Daudé 

Set PCI static/const fields once in amdvi_pci_class_init.
They will be propagated via DeviceClassRealize handler via
pci_qdev_realize() -> do_pci_register_device() -> pci_config_set*().

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20230313153031.86107-6-phi...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 9f6622e11f..8e4ce63f8e 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1511,9 +1511,7 @@ static void amdvi_init(AMDVIState *s)
 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
 
 /* reset device ident */
-pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
 pci_config_set_prog_interface(s->pci.dev.config, 00);
-pci_config_set_class(s->pci.dev.config, 0x0806);
 
 /* reset AMDVI specific capabilities, all r/o */
 pci_set_long(s->pci.dev.config + s->pci.capab_offset, 
AMDVI_CAPAB_FEATURES);
@@ -1623,6 +1621,10 @@ static const TypeInfo amdvi_sysbus = {
 static void amdvi_pci_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
+PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+k->vendor_id = PCI_VENDOR_ID_AMD;
+k->class_id = 0x0806;
 
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
-- 
MST




[PULL 03/31] vdpa: accept VIRTIO_NET_F_SPEED_DUPLEX in SVQ

2023-04-25 Thread Michael S. Tsirkin
From: Eugenio Pérez 

There is no reason to block it as it has nothing to do with the vrings.
All the support of the feature comes via config space.

Signed-off-by: Eugenio Pérez 
Suggested-by: Alvaro Karsz 
Message-Id: <20230307170018.260557-1-epere...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 net/vhost-vdpa.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 99904a0da7..37cdc84562 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -104,7 +104,8 @@ static const uint64_t vdpa_svq_device_features =
 /* VHOST_F_LOG_ALL is exposed by SVQ */
 BIT_ULL(VHOST_F_LOG_ALL) |
 BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
-BIT_ULL(VIRTIO_NET_F_STANDBY);
+BIT_ULL(VIRTIO_NET_F_STANDBY) |
+BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
 
 #define VHOST_VDPA_NET_CVQ_ASID 1
 
-- 
MST




[PULL 09/31] MAINTAINERS: Mark AMD-Vi emulation as orphan

2023-04-25 Thread Michael S. Tsirkin
From: Philippe Mathieu-Daudé 

hw/i386/amd_iommu.c seems unmaintained:
After commit 1c7955c450 ("x86-iommu: introduce parent class",
2016-07-14), almost no feature added, 2 bug fixes, other changes
are generic tree-wide API cleanups.

Cc: Roman Kapl 
Cc: Wei Huang 
Cc: Brijesh Singh 
Cc: David Kiarie 
Cc: Jean-Philippe Brucker 
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20230313153031.86107-2-phi...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 MAINTAINERS | 4 
 1 file changed, 4 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0bd3d1830e..83c0373872 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3367,6 +3367,10 @@ F: hw/i386/intel_iommu.c
 F: hw/i386/intel_iommu_internal.h
 F: include/hw/i386/intel_iommu.h
 
+AMD-Vi Emulation
+S: Orphan
+F: hw/i386/amd_iommu.?
+
 OpenSBI Firmware
 M: Bin Meng 
 S: Supported
-- 
MST




[PULL 23/31] intel_iommu: refine iotlb hash calculation

2023-04-25 Thread Michael S. Tsirkin
From: Jason Wang 

Commit 1b2b12376c8 ("intel-iommu: PASID support") takes PASID into
account when calculating iotlb hash like:

static guint vtd_iotlb_hash(gconstpointer v)
{
const struct vtd_iotlb_key *key = v;

return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) |
   (key->level) << VTD_IOTLB_LVL_SHIFT |
   (key->pasid) << VTD_IOTLB_PASID_SHIFT;
}

This turns out to be problematic since:

- the shift will lose bits if not converting to uint64_t
- level should be off by one in order to fit into 2 bits
- VTD_IOTLB_PASID_SHIFT is 30 but PASID is 20 bits which will waste
  some bits
- the hash result is uint64_t so we will lose bits when converting to
  guint

So this patch fixes them by

- converting the keys into uint64_t before doing the shift
- off level by one to make it fit into two bits
- change the sid, lvl and pasid shift to 26, 42 and 44 in order to
  take the full width of uint64_t
- perform an XOR to the top 32bit with the bottom 32bit for the final
  result to fit guint

Fixes: Coverity CID 1508100
Fixes: 1b2b12376c8 ("intel-iommu: PASID support")
Signed-off-by: Jason Wang 
Message-Id: <20230412073510.7158-1-jasow...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Peter Xu 
---
 hw/i386/intel_iommu_internal.h | 6 +++---
 hw/i386/intel_iommu.c  | 9 +
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f090e61e11..2e61eec2f5 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -114,9 +114,9 @@
  VTD_INTERRUPT_ADDR_FIRST + 1)
 
 /* The shift of source_id in the key of IOTLB hash table */
-#define VTD_IOTLB_SID_SHIFT 20
-#define VTD_IOTLB_LVL_SHIFT 28
-#define VTD_IOTLB_PASID_SHIFT   30
+#define VTD_IOTLB_SID_SHIFT 26
+#define VTD_IOTLB_LVL_SHIFT 42
+#define VTD_IOTLB_PASID_SHIFT   44
 #define VTD_IOTLB_MAX_SIZE  1024/* Max size of the hash table */
 
 /* IOTLB_REG */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a62896759c..94d52f4205 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -64,8 +64,8 @@ struct vtd_as_key {
 struct vtd_iotlb_key {
 uint64_t gfn;
 uint32_t pasid;
-uint32_t level;
 uint16_t sid;
+uint8_t level;
 };
 
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
@@ -221,10 +221,11 @@ static gboolean vtd_iotlb_equal(gconstpointer v1, 
gconstpointer v2)
 static guint vtd_iotlb_hash(gconstpointer v)
 {
 const struct vtd_iotlb_key *key = v;
+uint64_t hash64 = key->gfn | ((uint64_t)(key->sid) << VTD_IOTLB_SID_SHIFT) 
|
+(uint64_t)(key->level - 1) << VTD_IOTLB_LVL_SHIFT |
+(uint64_t)(key->pasid) << VTD_IOTLB_PASID_SHIFT;
 
-return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) |
-   (key->level) << VTD_IOTLB_LVL_SHIFT |
-   (key->pasid) << VTD_IOTLB_PASID_SHIFT;
+return (guint)((hash64 >> 32) ^ (hash64 & 0xU));
 }
 
 static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
-- 
MST




[PULL 11/31] hw/i386/amd_iommu: Remove intermediate AMDVIState::devid field

2023-04-25 Thread Michael S. Tsirkin
From: Philippe Mathieu-Daudé 

AMDVIState::devid is only accessed by build_amd_iommu() which
has access to the PCIDevice state. Directly get the property
calling object_property_get_int() there.

Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20230313153031.86107-4-phi...@linaro.org>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/amd_iommu.h  | 2 --
 hw/i386/acpi-build.c | 4 +++-
 hw/i386/amd_iommu.c  | 2 --
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 79d38a3e41..5eccaad790 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -319,8 +319,6 @@ struct AMDVIState {
 
 uint64_t mmio_addr;
 
-uint32_t devid;  /* auto-assigned devid  */
-
 bool enabled;/* IOMMU enabled*/
 bool ats_enabled;/* address translation enabled  */
 bool cmdbuf_enabled; /* command buffer enabled   */
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..a27bc33956 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2395,7 +2395,9 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, 
const char *oem_id,
 /* IVHD length */
 build_append_int_noprefix(table_data, ivhd_table_len, 2);
 /* DeviceID */
-build_append_int_noprefix(table_data, s->devid, 2);
+build_append_int_noprefix(table_data,
+  object_property_get_int(OBJECT(&s->pci), "addr",
+  &error_abort), 2);
 /* Capability offset */
 build_append_int_noprefix(table_data, s->capab_offset, 2);
 /* IOMMU base address */
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 3813b341ec..19f57e6318 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1513,7 +1513,6 @@ static void amdvi_init(AMDVIState *s)
 /* reset device ident */
 pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
 pci_config_set_prog_interface(s->pci.dev.config, 00);
-pci_config_set_device_id(s->pci.dev.config, s->devid);
 pci_config_set_class(s->pci.dev.config, 0x0806);
 
 /* reset AMDVI specific capabilities, all r/o */
@@ -1581,7 +1580,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error 
**errp)
 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
 pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
-s->devid = object_property_get_int(OBJECT(&s->pci), "addr", &error_abort);
 msi_init(&s->pci.dev, 0, 1, true, false, errp);
 amdvi_init(s);
 }
-- 
MST




[PULL 31/31] hw/pci-bridge: Make PCIe and CXL PXB Devices inherit from TYPE_PXB_DEV

2023-04-25 Thread Michael S. Tsirkin
From: Jonathan Cameron 

Previously, PXB_CXL_DEVICE, PXB_PCIE_DEVICE and PXB_DEVICE all
have PCI_DEVICE as their direct parent but share a common state
struct PXBDev. convert_to_pxb() is used to get the PXBDev
instance from which ever of these types it is called on.

This patch switches to an explicit hierarchy based on shared
functionality.  To allow use of OBJECT_DECLARE_SIMPLE_TYPE()
whilst minimizing code changes, all types are renamed to have
the postfix _DEV rather than _DEVICE.  The new heirarchy
has PXB_CXL_DEV with parent PXB_PCIE_DEV which in turn
has parent PXB_DEV which continues to have parent PCI_DEVICE.

This allows simple use of PXB_DEV() etc rather than a custom function
+ removal of duplicated properties and moving the CXL specific
elements out of struct PXBDev.

Signed-off-by: Jonathan Cameron 
Message-Id: <20230420142750.6950-3-jonathan.came...@huawei.com>
Reviewed-by: Thomas Huth 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl.h|  4 +-
 include/hw/pci/pci_bridge.h | 30 ++-
 hw/acpi/cxl.c   | 11 +++---
 hw/cxl/cxl-host.c   |  4 +-
 hw/pci-bridge/pci_expander_bridge.c | 59 ++---
 5 files changed, 50 insertions(+), 58 deletions(-)

diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index b2cffbb364..c453983e83 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -23,12 +23,12 @@
 
 #define CXL_WINDOW_MAX 10
 
-typedef struct PXBDev PXBDev;
+typedef struct PXBCXLDev PXBCXLDev;
 
 typedef struct CXLFixedWindow {
 uint64_t size;
 char **targets;
-PXBDev *target_hbs[8];
+PXBCXLDev *target_hbs[8];
 uint8_t num_targets;
 uint8_t enc_int_ways;
 uint8_t enc_int_gran;
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 1677176b2a..01670e9e65 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -84,7 +84,7 @@ struct PCIBridge {
 #define PCI_BRIDGE_DEV_PROP_SHPC   "shpc"
 typedef struct CXLHost CXLHost;
 
-struct PXBDev {
+typedef struct PXBDev {
 /*< private >*/
 PCIDevice parent_obj;
 /*< public >*/
@@ -92,15 +92,27 @@ struct PXBDev {
 uint8_t bus_nr;
 uint16_t numa_node;
 bool bypass_iommu;
-bool hdm_for_passthrough;
-struct cxl_dev {
-CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
-} cxl;
-};
+} PXBDev;
 
-#define TYPE_PXB_CXL_DEVICE "pxb-cxl"
-DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV,
- TYPE_PXB_CXL_DEVICE)
+typedef struct PXBPCIEDev {
+/*< private >*/
+PXBDev parent_obj;
+} PXBPCIEDev;
+
+#define TYPE_PXB_DEV "pxb"
+OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV)
+
+typedef struct PXBCXLDev {
+/*< private >*/
+PXBPCIEDev parent_obj;
+/*< public >*/
+
+bool hdm_for_passthrough;
+CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
+} PXBCXLDev;
+
+#define TYPE_PXB_CXL_DEV "pxb-cxl"
+OBJECT_DECLARE_SIMPLE_TYPE(PXBCXLDev, PXB_CXL_DEV)
 
 int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset,
   uint16_t svid, uint16_t ssid,
diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c
index 2bf8c07993..92b46bc932 100644
--- a/hw/acpi/cxl.c
+++ b/hw/acpi/cxl.c
@@ -30,9 +30,10 @@
 #include "qapi/error.h"
 #include "qemu/uuid.h"
 
-static void cedt_build_chbs(GArray *table_data, PXBDev *cxl)
+static void cedt_build_chbs(GArray *table_data, PXBCXLDev *cxl)
 {
-SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl.cxl_host_bridge);
+PXBDev *pxb = PXB_DEV(cxl);
+SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl_host_bridge);
 struct MemoryRegion *mr = sbd->mmio[0].memory;
 
 /* Type */
@@ -45,7 +46,7 @@ static void cedt_build_chbs(GArray *table_data, PXBDev *cxl)
 build_append_int_noprefix(table_data, 32, 2);
 
 /* UID - currently equal to bus number */
-build_append_int_noprefix(table_data, cxl->bus_nr, 4);
+build_append_int_noprefix(table_data, pxb->bus_nr, 4);
 
 /* Version */
 build_append_int_noprefix(table_data, 1, 4);
@@ -112,7 +113,7 @@ static void cedt_build_cfmws(GArray *table_data, CXLState 
*cxls)
 /* Host Bridge List (list of UIDs - currently bus_nr) */
 for (i = 0; i < fw->num_targets; i++) {
 g_assert(fw->target_hbs[i]);
-build_append_int_noprefix(table_data, fw->target_hbs[i]->bus_nr, 
4);
+build_append_int_noprefix(table_data, 
PXB_DEV(fw->target_hbs[i])->bus_nr, 4);
 }
 }
 }
@@ -121,7 +122,7 @@ static int cxl_foreach_pxb_hb(Object *obj, void *opaque)
 {
 Aml *cedt = opaque;
 
-if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEVICE)) {
+if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEV)) {
 cedt_build_chbs(cedt->buf, PXB_CXL_DEV(obj));
 }
 
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index 6e923ceeaf..034c7805b3 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -84,7 +84,7 @@ void cxl_fmws_link_targets(CXLState *cxl_state, 

RE: [PATCH v3 44/47] igb: Notify only new interrupts

2023-04-25 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Monday, 24 April 2023 13:50
> To: Sriram Yagnaraman 
> Cc: Jason Wang ; Dmitry Fleytman
> ; Michael S . Tsirkin ; Alex
> Bennée ; Philippe Mathieu-Daudé
> ; Thomas Huth ; Wainer dos Santos
> Moschetta ; Beraldo Leal ;
> Cleber Rosa ; Laurent Vivier ; Paolo
> Bonzini ; qemu-devel@nongnu.org; Tomasz Dzieciol
> 
> Subject: Re: [PATCH v3 44/47] igb: Notify only new interrupts
> 
> On 2023/04/24 20:41, Sriram Yagnaraman wrote:
> >
> >
> >> -Original Message-
> >> From: Akihiko Odaki 
> >> Sent: Sunday, 23 April 2023 06:19
> >> Cc: Sriram Yagnaraman ; Jason Wang
> >> ; Dmitry Fleytman ;
> >> Michael S . Tsirkin ; Alex Bennée
> >> ; Philippe Mathieu-Daudé ;
> >> Thomas Huth ; Wainer dos Santos Moschetta
> >> ; Beraldo Leal ; Cleber Rosa
> >> ; Laurent Vivier ; Paolo
> >> Bonzini ; qemu-devel@nongnu.org; Tomasz
> Dzieciol
> >> ; Akihiko Odaki
> >> 
> >> Subject: [PATCH v3 44/47] igb: Notify only new interrupts
> >>
> >> This follows the corresponding change for e1000e. This fixes:
> >> tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb
> >>
> >> Signed-off-by: Akihiko Odaki 
> >> ---
> >>   hw/net/igb_core.c | 201 --
> >>   hw/net/trace-events   |  11 +-
> >>   .../org.centos/stream/8/x86_64/test-avocado   |   1 +
> >>   tests/avocado/netdev-ethtool.py   |   4 -
> >>   4 files changed, 87 insertions(+), 130 deletions(-)
> >>
> >
> > This is a good change, makes a clear distinction on whether we are setting
> EICR or ICR or MBVFICR.
> >
> >> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> >> 1519a90aa6..96b7335b31 100644
> >> --- a/hw/net/igb_core.c
> >> +++ b/hw/net/igb_core.c
> >> @@ -94,10 +94,7 @@ static ssize_t
> >>   igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
> >>bool has_vnet, bool *external_tx);
> >>
> >> -static inline void
> >> -igb_set_interrupt_cause(IGBCore *core, uint32_t val);
> >> -
> >> -static void igb_update_interrupt_state(IGBCore *core);
> >> +static void igb_raise_interrupts(IGBCore *core, size_t index,
> >> +uint32_t causes);
> >>   static void igb_reset(IGBCore *core, bool sw);
> >>
> >>   static inline void
> >> @@ -913,8 +910,8 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing
> *txr)
> >>   }
> >>
> >>   if (eic) {
> >> -core->mac[EICR] |= eic;
> >> -igb_set_interrupt_cause(core, E1000_ICR_TXDW);
> >> +igb_raise_interrupts(core, EICR, eic);
> >> +igb_raise_interrupts(core, ICR, E1000_ICR_TXDW);
> >>   }
> >>
> >>   net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci,
> >> d); @@ -
> >> 1686,6 +1683,7 @@ igb_receive_internal(IGBCore *core, const struct
> >> iovec *iov, int iovcnt,  {
> >>   uint16_t queues = 0;
> >>   uint32_t causes = 0;
> >> +uint32_t ecauses = 0;
> >>   union {
> >>   L2Header l2_header;
> >>   uint8_t octets[ETH_ZLEN];
> >> @@ -1788,13 +1786,14 @@ igb_receive_internal(IGBCore *core, const
> >> struct iovec *iov, int iovcnt,
> >>   causes |= E1000_ICS_RXDMT0;
> >>   }
> >>
> >> -core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx);
> >> +ecauses |= igb_rx_wb_eic(core, rxr.i->idx);
> >>
> >>   trace_e1000e_rx_written_to_guest(rxr.i->idx);
> >>   }
> >>
> >>   trace_e1000e_rx_interrupt_set(causes);
> >> -igb_set_interrupt_cause(core, causes);
> >> +igb_raise_interrupts(core, EICR, ecauses);
> >> +igb_raise_interrupts(core, ICR, causes);
> >>
> >>   return orig_size;
> >>   }
> >> @@ -1854,7 +1853,7 @@ void igb_core_set_link_status(IGBCore *core)
> >>   }
> >>
> >>   if (core->mac[STATUS] != old_status) {
> >> -igb_set_interrupt_cause(core, E1000_ICR_LSC);
> >> +igb_raise_interrupts(core, ICR, E1000_ICR_LSC);
> >>   }
> >>   }
> >>
> >> @@ -1934,13 +1933,6 @@ igb_set_rx_control(IGBCore *core, int index,
> >> uint32_t val)
> >>   }
> >>   }
> >>
> >> -static inline void
> >> -igb_clear_ims_bits(IGBCore *core, uint32_t bits) -{
> >> -trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] &
> >> ~bits);
> >> -core->mac[IMS] &= ~bits;
> >> -}
> >> -
> >>   static inline bool
> >>   igb_postpone_interrupt(IGBIntrDelayTimer *timer)  { @@ -1963,9
> >> +1955,8 @@ igb_eitr_should_postpone(IGBCore *core, int idx)
> >>   return igb_postpone_interrupt(&core->eitr[idx]);
> >>   }
> >>
> >> -static void igb_send_msix(IGBCore *core)
> >> +static void igb_send_msix(IGBCore *core, uint32_t causes)
> >>   {
> >> -uint32_t causes = core->mac[EICR] & core->mac[EIMS];
> >>   int vector;
> >>
> >>   for (vector = 0; vector < IGB_INTR_NUM; ++vector) { @@
> >> -1988,124
> >> +1979,116 @@ igb_fix_icr_asserted(IGBCore *core)
> >>   trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]);
> >>   }
> >>
> >> -static void
> >> -igb_update_interrupt_state(IGBCore *core)
> >>

[PULL 24/31] docs: Remove obsolete descriptions of SR-IOV support

2023-04-25 Thread Michael S. Tsirkin
From: Akihiko Odaki 

The documentation used to say there is no device implemented with
SR-IOV, but igb and nvme support SR-IOV today.

Signed-off-by: Akihiko Odaki 
Message-Id: <20230414090441.23156-1-akihiko.od...@daynix.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/pcie_sriov.txt | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index 11158dbf88..7eff7f2703 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -9,10 +9,7 @@ virtual functions (VFs) for the main purpose of eliminating 
software
 overhead in I/O from virtual machines.
 
 QEMU now implements the basic common functionality to enable an emulated device
-to support SR/IOV. Yet no fully implemented devices exists in QEMU, but a
-proof-of-concept hack of the Intel igb can be found here:
-
-git://github.com/knuto/qemu.git sriov_patches_v5
+to support SR/IOV.
 
 Implementation
 ==
-- 
MST




Re: [PULL 18/31] Add my old and new work email mapping and use work email to support acpi

2023-04-25 Thread Ani Sinha
On Tue, Apr 25, 2023 at 1:15 PM Michael S. Tsirkin  wrote:
>
> From: Ani Sinha 
>
> Updating mailmap to indicate a...@anisinha.ca and anisi...@redhat.com are one
> and the same person. Also updating my email in MAINTAINERS for all my acpi 
> work
> (reviewing patches and biosbits) to my work email. Also doing the same for
> bios bits test framework documentation.
>
> Signed-off-by: Ani Sinha 
> Message-Id: <20230329040834.11973-1-anisi...@redhat.com>
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Michael S. Tsirkin 

I think you can squash this one with patch #2 in the PR.

> ---
>  MAINTAINERS | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 83c0373872..e365a7a47e 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1894,7 +1894,7 @@ F: hw/pci/pcie_doe.c
>  ACPI/SMBIOS
>  M: Michael S. Tsirkin 
>  M: Igor Mammedov 
> -R: Ani Sinha 
> +R: Ani Sinha 
>  S: Supported
>  F: include/hw/acpi/*
>  F: include/hw/firmware/smbios.h
> --
> MST
>



Re: [RFC] hw/arm/virt: Provide DT binding generation for PCI eXpander Bridges

2023-04-25 Thread Peter Maydell
On Mon, 24 Apr 2023 at 22:56, Jonathan Cameron
 wrote:
>
> On Mon, 24 Apr 2023 16:45:48 +0100
> Peter Maydell  wrote:
> > On the other hand, having QEMU enumerate PCI devices is *also* a
> > very different model from today, where we assume that the guest
> > code is the one that is going to deal with enumerating PCI devices.
> > To my mind one of the major advantages of PCI is exactly that it
> > is entirely probeable and discoverable, so that there is no need
> > for the dtb to include a lot of information that the kernel can
> > find out for itself by directly asking the hardware...
>
> I absolutely agree that QEMU enumerating PCI seem silly level of complexity
> to introduce. So easy route is to just use the bus numbers to partition
> resources. We have those available without any complexity. It's not the
> same as how it's done with ACPI, but then the alternatives are either
> (though maybe they are closer).  Note current proposed algorithm may be
> too simplistic (perhaps some alignment forcing should adjust the division
> of the resources to start on round number addresses)

I think we definitely need to talk about this later this week,
but my initial view is that if:
 (1) the guest kernel can get the information it needs to do this
 by probing the hardware
 (2) doing it in QEMU gives you "this isn't a great allocation"
 "we don't really have the info we need to do it optimally"
 "this is more of a policy decision" effects
 (which is what it's sounding like to me)

this is a really strong argument for "guest software should be
doing this". DTB-booting kernels has always meant the kernel
doing a lot of work that under ACPI/UEFI/x86-PC is typically
done by firmware, and this seems similar to me.

thanks
-- PMM



Re: [PATCH] pci: make ROM memory resizable

2023-04-25 Thread Vladimir Sementsov-Ogievskiy

On 25.04.23 10:43, Michael S. Tsirkin wrote:

On Tue, Apr 25, 2023 at 03:26:54AM -0400, Michael S. Tsirkin wrote:

On Mon, Apr 24, 2023 at 11:36:47PM +0300, Vladimir Sementsov-Ogievskiy wrote:

On migration, on target we load local ROM file. But actual ROM content
migrates through migration channel. Original ROM content from local
file doesn't matter. But when size mismatch - we have an error like

  Size mismatch: :00:03.0/virtio-net-pci.rom: 0x4 != 0x8: Invalid 
argument


Oh, this is this old bug then:
https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1713490

People seem to be "fixing" this by downgrading ROMs.

Actually, I think the fix is different: we need to build
versions of ROMs for old machine types that can fit
in the old BAR size.

Gerd, Laszlo what's your take on all this?

Actually, ignore this - we do keep old ROMs around specifically to avoid
ROM size changes and have been for ever. E.g.:

commit c45e5b5b30ac1f5505725a7b36e68cedfce4f01f
Author: Gerd Hoffmann
Date:   Tue Feb 26 17:46:11 2013 +0100

 Switch to efi-enabled nic roms by default
 
 All PCI nics are switched to EFI-enabled roms by default.  They are

 composed from three images (legacy, efi ia32 & efi x86), so classic
 pxe booting will continue to work.
 
 Exception: eepro100 is not switched, it uses a single rom for all

 emulated eepro100 variants, then goes patch the rom header on the
 fly with the correct PCI IDs.  I doubt that will work as-is with
 the efi roms.
 
 Keep old roms for 1.4+older machine types via compat properties,

 needed because the efi-enabled roms are larger so the pci rom bar
 size would change.
 
 Signed-off-by: Gerd Hoffmann



So it's downstream messing up with things, overriding the
rom file then changing its size.


On fedora I find both pxe virtio and efi virtio so it gets it right.




Yes I understand that distribution may work-around the problem just having all 
needed roms on target and specifying correct romfile= argument.

But this is not ideal: having the file only to get its size, to not mismatch 
with incoming RAM block. There should be way to migrate ROMs automatically 
without extra files on target.

--
Best regards,
Vladimir




Re: [PATCH v19 02/21] s390x/cpu topology: add topology entries on CPU hotplug

2023-04-25 Thread Pierre Morel



On 4/24/23 17:32, Nina Schoetterl-Glausch wrote:

On Fri, 2023-04-21 at 12:20 +0200, Pierre Morel wrote:

On 4/20/23 10:59, Nina Schoetterl-Glausch wrote:

On Mon, 2023-04-03 at 18:28 +0200, Pierre Morel wrote:

[..]

In the next version with entitlement being an enum it is right.

However, deleting this means that the default value for entitlement
depends on dedication.

If we have only low, medium, high and default for entitlement is medium.

If the user specifies the dedication true without specifying entitlement
we could force entitlement to high.

But we can not distinguish this from the user specifying dedication true
with a medium entitlement, which is wrong.

So three solution:

1) We ignore what the user say if dedication is specified as true

2) We specify that both dedication and entitlement must be specified if
dedication is true

3) We set an impossible default to distinguish default from medium
entitlement


For me the solution 3 is the best one, it is more flexible for the user.

Solution 1 is obviously bad.

Solution 2 forces the user to specify entitlement high and only high if
it specifies dedication true.

AFAIU, you prefer the solution 2, forcing user to specify both
dedication and entitlement to suppress a default value in the enum.
Why is it bad to have a default value in the enum that we do not use to
specify that the value must be calculated?

Yes, I'd prefer solution 2. I don't like adapting the internal state where only
the three values make sense for the user interface.
It also keeps things simple and requires less code.
I also don't think it's a bad thing for the user, as it's not a thing done 
manually often.
I'm also not a fan of a value being implicitly being changed even though it 
doesn't look
like it from the command.

However, what I really don't like is the additional state and naming it 
"horizontal",



No problem to use another name like "auto" as you propose later.



not so much the adjustment if dedication is switched to true without an 
entitlement given.
For the monitor command there is no problem, you currently have:



That is clear, the has_xxx does the job.

[..]



So you can just set it if (!has_entitlement).
There is also ways to set the value for cpus defined on the command line, e.g.:



Yes, thanks, I already said I find your proposition to use a 
DEFINE_PROP_CPUS390ENTITLEMENT good and will use it.





diff --git a/include/hw/qdev-properties-system.h 
b/include/hw/qdev-properties-system.h
index 0ac327ae60..41a605c5a7 100644
--- a/include/hw/qdev-properties-system.h
+++ b/include/hw/qdev-properties-system.h
@@ -22,6 +22,7 @@ extern const PropertyInfo qdev_prop_audiodev;
  extern const PropertyInfo qdev_prop_off_auto_pcibar;
  extern const PropertyInfo qdev_prop_pcie_link_speed;
  extern const PropertyInfo qdev_prop_pcie_link_width;
+extern const PropertyInfo qdev_prop_cpus390entitlement;
  
  #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d)   \

  DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t)
@@ -73,5 +74,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width;
  #define DEFINE_PROP_UUID_NODEFAULT(_name, _state, _field) \
  DEFINE_PROP(_name, _state, _field, qdev_prop_uuid, QemuUUID)
  
+#define DEFINE_PROP_CPUS390ENTITLEMENT(_n, _s, _f) \

+DEFINE_PROP(_n, _s, _f, qdev_prop_cpus390entitlement, int)
+
  
  #endif

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 54541d2230..01308e0b94 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -135,7 +135,7 @@ struct CPUArchState {
  int32_t book_id;
  int32_t drawer_id;
  bool dedicated;
-uint8_t entitlement;/* Used only for vertical polarization */
+int entitlement;/* Used only for vertical polarization */



Isn't it better to use:

+    CpuS390Entitlement entitlement; /* Used only for vertical 
polarization */




  uint64_t cpuid;
  #endif
  
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c

index d42493f630..db5c3d4fe6 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -1143,3 +1143,14 @@ const PropertyInfo qdev_prop_uuid = {
  .set   = set_uuid,
  .set_default_value = set_default_uuid_auto,
  };
+
+/* --- s390x cpu topology entitlement --- */
+
+QEMU_BUILD_BUG_ON(sizeof(CpuS390Entitlement) != sizeof(int));
+
+const PropertyInfo qdev_prop_cpus390entitlement = {
+.name = "CpuS390Entitlement",
+.enum_table = &CpuS390Entitlement_lookup,
+.get   = qdev_propinfo_get_enum,
+.set   = qdev_propinfo_set_enum,
+};
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index b8a292340c..1b3f5c61ae 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -199,8 +199,7 @@ static void s390_topology_cpu_default(S390CPU *cpu, Error 
**errp)
   * is not dedicated.
   * A dedicated CPU always receives a high entitlement.
   */
-if (env->entitlement >= S390_CPU_ENTITLEMENT__MAX ||
-e

Re: [PATCH] kvm: Merge kvm_check_extension() and kvm_vm_check_extension()

2023-04-25 Thread Jean-Philippe Brucker
On Mon, Apr 24, 2023 at 03:01:54PM +0200, Cornelia Huck wrote:
> > @@ -2480,6 +2471,7 @@ static int kvm_init(MachineState *ms)
> >  }
> >  
> >  s->vmfd = ret;
> > +s->check_extension_vm = kvm_check_extension(s, 
> > KVM_CAP_CHECK_EXTENSION_VM);
> 
> Hm, it's a bit strange to set s->check_extension_vm by calling a
> function that already checks for the value of
> s->check_extension_vm... would it be better to call kvm_ioctl() directly
> on this line?

Yes that's probably best. I'll use kvm_vm_ioctl() since the doc suggests
to check KVM_CAP_CHECK_EXTENSION_VM on the vm fd.

Thanks,
Jean

> 
> I think it would be good if some ppc folks could give this a look, but
> in general, it looks fine to me.
> 



git-format-patch useAutoBase (was Re: [PATCH v2] migration: Handle block ... )

2023-04-25 Thread Juan Quintela
Eric Blake  wrote:
> On Thu, Apr 20, 2023 at 12:41:25PM +0200, Juan Quintela wrote:
>> Eric Blake  wrote:
>
> ...lots of lines...
>
>> > ---
>> >  migration/migration.c | 5 ++---
>> >  1 file changed, 2 insertions(+), 3 deletions(-)
>
> ...describing a tiny change ;)
>
>> >
>> > diff --git a/migration/migration.c b/migration/migration.c
>> > index bda47891933..cb0d42c0610 100644
>> > --- a/migration/migration.c
>> > +++ b/migration/migration.c
>> > @@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s)
>> >  MIGRATION_STATUS_DEVICE);
>> >  }
>> >  if (ret >= 0) {
>> > +s->block_inactive = inactivate;
>> >  qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
>> >  ret = qemu_savevm_state_complete_precopy(s->to_dst_file, 
>> > false,
>> >   inactivate);
>> >  }
>> > -if (inactivate && ret >= 0) {
>> > -s->block_inactive = true;
>> > -}
>> >  }
>> >  qemu_mutex_unlock_iothread();
>> 
>> And I still have to look at the file to understand this "simple" patch.
>> (simple in size, not in what it means).
>
> Indeed - hence the long commit message!
>
>> 
>> I will add this to my queue, but if you are in the "mood", I would like
>> to remove the declaration of inactivate and change this to something like:
>> 
>>  if (ret >= 0) {
>>  /* Colo don't stop disks in normal operation */
>>  s->block_inactive = !migrate_colo_enabled();
>>  qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
>>  ret = qemu_savevm_state_complete_precopy(s->to_dst_file, 
>> false,
>>   s->block_inactive);
>>  }
>> 
>> Or something around that lines?
>
> Yes, that looks like a trivial refactoring that preserves the same
> semantics.
>
>> 
>> > @@ -3522,6 +3520,7 @@ fail_invalidate:
>> >  bdrv_activate_all(&local_err);
>> >  if (local_err) {
>> >  error_report_err(local_err);
>> > +s->block_inactive = true;
>> >  } else {
>> >  s->block_inactive = false;
>> >  }
>> > base-commit: 7dbd6f8a27e30fe14adb3d5869097cddf24038d6
>> 
>> Just wondering, what git magic creates this line?
>
> git send-email --base=COMMIT_ID
>
> or even 'git config format.useAutoBase whenAble' to try and automate
> the use of this.  (If my own git habits were cleaner, of always
> sticking patches in fresh branches, --base=auto is handy; but in
> practice, I tend to send one-off patches like this in the middle of
> 'git rebase' of a larger series, at which point I'm not on a clean
> branch where --base=auto works, so I end up having to manually specify
> one at the command line.  Either way, including the base-commit: info
> can be very informative for applying a patch at the branch point then
> rebasing it locally, when attempting to apply the patch sent through
> email hits merge conflicts when applying it directly to changes on
> master in the meantime; I believe 'git am -3' is even able to exploit
> the comment when present to make smarter decisions about which parent
> commit it tries for doing 3-way patch resolution)

Thanks a lot.

It does the right thing for "trivial" stuff, i.e. when I sent a single
patch or a series against qemu/master.

I am not completely sure that it does the right thing when I send a
series on top of my previous pull request.

097387873b (HEAD -> atomic_counters) migration: Make dirty_bytes_last_sync 
atomic
3f699a13b2 migration: Make dirty_pages_rate atomic
276a275895 (next) migration: Move qmp_migrate_set_parameters() to options.c
ab13b47801 migration: Move migrate_use_tls() to options.c
ecf5c18eac MAINTAINERS: Add Leonardo and Peter as reviewers
6e5dda696c migration: Disable postcopy + multifd migration
ac5f7bf8e2 (qemu/staging, qemu/master, qemu/HEAD) Merge tag 
'migration-20230424-pull-request' of https://gitlab.com/juan.quintela/qemu into 
staging

where the branchs are:

qemu/master: obvious
next: whatever is going to be on next migration PULL request, I will
  rename this to migration-$date and send this series to the list
  1st. I.e. assume they are on list but still not on master.
HEAD/atomic_counters: This is the series that I am sending

I have done:


>>  ret = qemu_savevm_state_complete_precopy(s->to_dst_file, 
>> false,
>>   s->block_inactive);
>>  }
>> 
>> Or something around that lines?
>
> Yes, that looks like a trivial refactoring that preserves the same
> semantics.
>
>> 
>> > @@ -3522,6 +3520,7 @@ fail_invalidate:
>> >  bdrv_activate_all(&local_err);
>> >  if (local_err) {
>> >  error_report_err(local_err);
>> > +s->block_inactive

Re: [PATCH] aspeed/hace: Initialize g_autofree pointer

2023-04-25 Thread Thomas Huth

On 21/04/2023 15.15, Cédric Le Goater wrote:

As mentioned in docs/devel/style.rst "Automatic memory deallocation":

* Variables declared with g_auto* MUST always be initialized,
   otherwise the cleanup function will use uninitialized stack memory

This avoids QEMU to coredump when running the "hash test" command
under Zephyr.

Cc: Steven Lee 
Cc: Joel Stanley 
Fixes: c5475b3f9a ("hw: Model ASPEED's Hash and Crypto Engine")
Signed-off-by: Cédric Le Goater 
---
  hw/misc/aspeed_hace.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
index 12a761f1f5..b07506ec04 100644
--- a/hw/misc/aspeed_hace.c
+++ b/hw/misc/aspeed_hace.c
@@ -189,7 +189,7 @@ static void do_hash_operation(AspeedHACEState *s, int algo, 
bool sg_mode,
bool acc_mode)
  {
  struct iovec iov[ASPEED_HACE_MAX_SG];
-g_autofree uint8_t *digest_buf;
+g_autofree uint8_t *digest_buf = NULL;


We maybe need a checkpatch.pl rule to catch such bugs...

Reviewed-by: Thomas Huth 




Re: [PATCH v19 02/21] s390x/cpu topology: add topology entries on CPU hotplug

2023-04-25 Thread Nina Schoetterl-Glausch
On Tue, 2023-04-25 at 10:45 +0200, Pierre Morel wrote:
> On 4/24/23 17:32, Nina Schoetterl-Glausch wrote:
> > On Fri, 2023-04-21 at 12:20 +0200, Pierre Morel wrote:
> > > > On 4/20/23 10:59, Nina Schoetterl-Glausch wrote:
> > > > > > On Mon, 2023-04-03 at 18:28 +0200, Pierre Morel wrote:
> [..]
> > > > In the next version with entitlement being an enum it is right.
> > > > 
> > > > However, deleting this means that the default value for entitlement
> > > > depends on dedication.
> > > > 
> > > > If we have only low, medium, high and default for entitlement is medium.
> > > > 
> > > > If the user specifies the dedication true without specifying entitlement
> > > > we could force entitlement to high.
> > > > 
> > > > But we can not distinguish this from the user specifying dedication true
> > > > with a medium entitlement, which is wrong.
> > > > 
> > > > So three solution:
> > > > 
> > > > 1) We ignore what the user say if dedication is specified as true
> > > > 
> > > > 2) We specify that both dedication and entitlement must be specified if
> > > > dedication is true
> > > > 
> > > > 3) We set an impossible default to distinguish default from medium
> > > > entitlement
> > > > 
> > > > 
> > > > For me the solution 3 is the best one, it is more flexible for the user.
> > > > 
> > > > Solution 1 is obviously bad.
> > > > 
> > > > Solution 2 forces the user to specify entitlement high and only high if
> > > > it specifies dedication true.
> > > > 
> > > > AFAIU, you prefer the solution 2, forcing user to specify both
> > > > dedication and entitlement to suppress a default value in the enum.
> > > > Why is it bad to have a default value in the enum that we do not use to
> > > > specify that the value must be calculated?
> > Yes, I'd prefer solution 2. I don't like adapting the internal state where 
> > only
> > the three values make sense for the user interface.
> > It also keeps things simple and requires less code.
> > I also don't think it's a bad thing for the user, as it's not a thing done 
> > manually often.
> > I'm also not a fan of a value being implicitly being changed even though it 
> > doesn't look
> > like it from the command.
> > 
> > However, what I really don't like is the additional state and naming it 
> > "horizontal",
> 
> 
> No problem to use another name like "auto" as you propose later.
> 
> 
> > not so much the adjustment if dedication is switched to true without an 
> > entitlement given.
> > For the monitor command there is no problem, you currently have:
> 
> 
> That is clear, the has_xxx does the job.
> 
> [..]
> 
> 
> > So you can just set it if (!has_entitlement).
> > There is also ways to set the value for cpus defined on the command line, 
> > e.g.:
> 
> 
> Yes, thanks, I already said I find your proposition to use a 
> DEFINE_PROP_CPUS390ENTITLEMENT good and will use it.
> 
> 
> > 
> > diff --git a/include/hw/qdev-properties-system.h 
> > b/include/hw/qdev-properties-system.h
> > index 0ac327ae60..41a605c5a7 100644
> > --- a/include/hw/qdev-properties-system.h
> > +++ b/include/hw/qdev-properties-system.h
> > @@ -22,6 +22,7 @@ extern const PropertyInfo qdev_prop_audiodev;
> >   extern const PropertyInfo qdev_prop_off_auto_pcibar;
> >   extern const PropertyInfo qdev_prop_pcie_link_speed;
> >   extern const PropertyInfo qdev_prop_pcie_link_width;
> > +extern const PropertyInfo qdev_prop_cpus390entitlement;
> >   
> >   #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d)   \
> >   DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t)
> > @@ -73,5 +74,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width;
> >   #define DEFINE_PROP_UUID_NODEFAULT(_name, _state, _field) \
> >   DEFINE_PROP(_name, _state, _field, qdev_prop_uuid, QemuUUID)
> >   
> > +#define DEFINE_PROP_CPUS390ENTITLEMENT(_n, _s, _f) \
> > +DEFINE_PROP(_n, _s, _f, qdev_prop_cpus390entitlement, int)
> > +
> >   
> >   #endif
> > diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> > index 54541d2230..01308e0b94 100644
> > --- a/target/s390x/cpu.h
> > +++ b/target/s390x/cpu.h
> > @@ -135,7 +135,7 @@ struct CPUArchState {
> >   int32_t book_id;
> >   int32_t drawer_id;
> >   bool dedicated;
> > -uint8_t entitlement;/* Used only for vertical polarization */
> > +int entitlement;/* Used only for vertical polarization */
> 
> 
> Isn't it better to use:
> 
> +    CpuS390Entitlement entitlement; /* Used only for vertical 
> polarization */
> 
> 
> >   uint64_t cpuid;
> >   #endif
> >   
> > diff --git a/hw/core/qdev-properties-system.c 
> > b/hw/core/qdev-properties-system.c
> > index d42493f630..db5c3d4fe6 100644
> > --- a/hw/core/qdev-properties-system.c
> > +++ b/hw/core/qdev-properties-system.c
> > @@ -1143,3 +1143,14 @@ const PropertyInfo qdev_prop_uuid = {
> >   .set   = set_uuid,
> >   .set_default_value = set_default_uuid_auto,
> >   };
> > +
> > +/* --- s390x cpu topology entitlement --- */
> > +
> > +QEMU_BUILD_BUG_ON(sizeof(Cpu

Re: [PATCH v8 0/8] memory: prevent dma-reentracy issues

2023-04-25 Thread Thomas Huth

On 21/04/2023 16.27, Alexander Bulekov wrote:

v7 -> v8:
 - Disable reentrancy checks for bcm2835_property's iomem (Patch 7)
 - Cache DeviceState* in the MemoryRegion to avoid dynamic cast for
   each MemoryRegion access. (Patch 1)
 - Make re-entrancy fatal for debug-builds (Patch 8)


 Hi Alexander,

I just put your series into a run with the gitlab-CI and it seems this now 
introduced another failure in one of the avocado tests:


 https://gitlab.com/thuth/qemu/-/jobs/4171448248#L318

The "IbmPrep40pMachine.test_openbios_and_netbsd" test is failing now.

You can reproduce it manually quite easily:

 wget 
https://archive.netbsd.org/pub/NetBSD-archive/NetBSD-7.1.2/iso/NetBSD-7.1.2-prep.iso


 ./qemu-system-ppc -nographic -M 40p -boot d -cdrom NetBSD-7.1.2-prep.iso

Without your patches, this prints out "NetBSD/prep BOOT, Revision 1.9" in 
the console, but with your patches, the message does not appear anymore.


Could you please have a look?

 Thanks,
  Thomas




Re: [PATCH] pci: make ROM memory resizable

2023-04-25 Thread Laszlo Ersek
On 4/25/23 09:26, Michael S. Tsirkin wrote:
> On Mon, Apr 24, 2023 at 11:36:47PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> On migration, on target we load local ROM file. But actual ROM content
>> migrates through migration channel. Original ROM content from local
>> file doesn't matter. But when size mismatch - we have an error like
>>
>>  Size mismatch: :00:03.0/virtio-net-pci.rom: 0x4 != 0x8: Invalid 
>> argument
> 
> 
> Oh, this is this old bug then:
> https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1713490
> 
> People seem to be "fixing" this by downgrading ROMs.
> 
> Actually, I think the fix is different: we need to build
> versions of ROMs for old machine types that can fit
> in the old BAR size.

My working memory has been that we'd build the ROM, such as SeaBIOS, in
multiple configs (resulting in different sizes -- a smaller size for the
old machine type, and a larger size for the new machine type). The new
(large) build would stay "bios.bin", and the old (small) binary would
get a new name. Then the old machine type's compat knobs would include a
setting for loading the old (= small) ROM. This would cause the
destination QEMU to size the ROM area as "small", which would
accommodate the incoming stream just fine.

However, my memory has not been (entirely) correct. Commit bcf2b7d2af7c
("pc: switch 2.0 machine types to large seabios binary", 2013-12-06)
indicates that the *new* (large) binary gets the new name. That confuses
me; it does not seem consistent with how compat knobs usually tie down
old machine types.

It does not change the mechanism, I think, but naming the ROM files (on
host distros) gets more complicated, perhaps. I think Gerd will know
more history.

Laszlo

> 
> Gerd, Laszlo what's your take on all this?
> 
> 
> 
>> Let's just allow resizing of ROM memory. This way migration is not
>> relate on local ROM file on target node which is loaded by default but
>> is not actually needed.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> ---
>>  hw/pci/pci.c  |  7 +--
>>  include/exec/memory.h | 26 ++
>>  softmmu/memory.c  | 39 +++
>>  3 files changed, 70 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index def5000e7b..72ee8f6aea 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -59,6 +59,8 @@
>>  # define PCI_DPRINTF(format, ...)   do { } while (0)
>>  #endif
>>  
>> +#define MAX_ROM_SIZE (2 * GiB)
>> +
>>  bool pci_available = true;
>>  
>>  static char *pcibus_get_dev_path(DeviceState *dev);
>> @@ -2341,7 +2343,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
>> is_default_rom,
>>  error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
>>  g_free(path);
>>  return;
>> -} else if (size > 2 * GiB) {
>> +} else if (size > MAX_ROM_SIZE) {
>>  error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 
>> GiB)",
>> pdev->romfile);
>>  g_free(path);
>> @@ -2366,7 +2368,8 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
>> is_default_rom,
>>  snprintf(name, sizeof(name), "%s.rom", 
>> object_get_typename(OBJECT(pdev)));
>>  }
>>  pdev->has_rom = true;
>> -memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, 
>> &error_fatal);
>> +memory_region_init_rom_resizable(&pdev->rom, OBJECT(pdev), name,
>> + pdev->romsize, MAX_ROM_SIZE, 
>> &error_fatal);
>>  ptr = memory_region_get_ram_ptr(&pdev->rom);
>>  if (load_image_size(path, ptr, size) < 0) {
>>  error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile);
>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>> index 15ade918ba..ed1e5d9126 100644
>> --- a/include/exec/memory.h
>> +++ b/include/exec/memory.h
>> @@ -1453,6 +1453,19 @@ void memory_region_init_rom_nomigrate(MemoryRegion 
>> *mr,
>>uint64_t size,
>>Error **errp);
>>  
>> +/*
>> + * memory_region_init_rom_nomigrate_resizable: same as
>> + * memory_region_init_rom_nomigrate(), but initialize resizable memory 
>> region.
>> + *
>> + * @max_size maximum allowed size.
>> + */
>> +void memory_region_init_rom_nomigrate_resizable(MemoryRegion *mr,
>> +struct Object *owner,
>> +const char *name,
>> +uint64_t size,
>> +uint64_t max_size,
>> +Error **errp);
>> +
>>  /**
>>   * memory_region_init_rom_device_nomigrate:  Initialize a ROM memory region.
>>   * Writes are handled via callbacks.
>> @@ -1562,6 +1575,19 @@ void memory_region_init_rom(MemoryRegion *mr,
>>  uint64_t size,

[PATCH v2] hw/riscv: virt: Assume M-mode FW in pflash0 only when "-bios none"

2023-04-25 Thread Sunil V L
Currently, virt machine supports two pflash instances each with
32MB size. However, the first pflash is always assumed to
contain M-mode firmware and reset vector is set to this if
enabled. Hence, for S-mode payloads like EDK2, only one pflash
instance is available for use. This means both code and NV variables
of EDK2 will need to use the same pflash.

The OS distros keep the EDK2 FW code as readonly. When non-volatile
variables also need to share the same pflash, it is not possible
to keep it as readonly since variables need write access.

To resolve this issue, the code and NV variables need to be separated.
But in that case we need an extra flash. Hence, modify the convention
such that pflash0 will contain the M-mode FW only when "-bios none"
option is used. Otherwise, pflash0 will contain the S-mode payload FW.
This enables both pflash instances available for EDK2 use.

Example usage:
1) pflash0 containing M-mode FW
qemu-system-riscv64 -bios none -pflash  -machine virt
or
qemu-system-riscv64 -bios none \
-drive file=,if=pflash,format=raw,unit=0 -machine virt

2) pflash0 containing S-mode payload like EDK2
qemu-system-riscv64 -pflash  -pflash  -machine  
virt
or
qemu-system-riscv64 -bios  \
-pflash  \
-pflash  \
-machine  virt
or
qemu-system-riscv64 -bios  \
-drive file=,if=pflash,format=raw,unit=0 \
-drive file=,if=pflash,format=raw,unit=1,readonly=on  \
-machine virt

Signed-off-by: Sunil V L 
Reported-by: Heinrich Schuchardt 
---
The issue is reported at
https://salsa.debian.org/qemu-team/edk2/-/commit/c345655a0149f64c5020bfc1e53c619ce60587f6

The patch is based on Alistair's riscv-to-apply.next branch.

Changes since v1:
1) Simplified the fix such that it doesn't break current EDK2.

 hw/riscv/virt.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 4e3efbee16..ca445d3d02 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1296,10 +1296,11 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
 kernel_entry = 0;
 }
 
-if (drive_get(IF_PFLASH, 0, 0)) {
+if (drive_get(IF_PFLASH, 0, 0) &&
+ machine->firmware && !strcmp(machine->firmware, "none")) {
 /*
- * Pflash was supplied, let's overwrite the address we jump to after
- * reset to the base of the flash.
+ * Pflash0 was supplied with "-bios none", let's overwrite the address
+ * we jump to after reset to the base of the flash.
  */
 start_addr = virt_memmap[VIRT_FLASH].base;
 }
-- 
2.34.1




  1   2   3   4   5   >