On 8/28/23 08:19, Jiajie Chen wrote:
Signed-off-by: Jiajie Chen <c...@jia.je>
---
tcg/loongarch64/tcg-target-con-set.h | 1 +
tcg/loongarch64/tcg-target.c.inc | 25 +++++++++++++++++++++++++
2 files changed, 26 insertions(+)
Reviewed-by: Richard Henderson <richard.hender...@linaro.org>
@@ -1624,6 +1624,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
TCGReg temp = TCG_REG_TMP0;
int32_t offset;
+ static const LoongArchInsn cmp_vec_insn[16][4] = {
+ [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
+ [TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
+ [TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
+ [TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
+ [TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
+ };
+ LoongArchInsn insn;
+
a0 = args[0];
a1 = args[1];
a2 = args[2];
@@ -1656,6 +1665,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
}
tcg_out_opc_vld(s, a0, base, offset);
break;
+ case INDEX_op_cmp_vec:
+ TCGCond cond = args[3];
+ insn = cmp_vec_insn[cond][vece];
+ if (insn == 0) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ insn = cmp_vec_insn[cond][vece];
+ tcg_debug_assert(insn != 0);
+ }
+ tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
+ break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
@@ -1671,6 +1692,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type,
unsigned vece)
case INDEX_op_st_vec:
case INDEX_op_dup_vec:
case INDEX_op_dupm_vec:
+ case INDEX_op_cmp_vec:
return 1;
default:
return 0;
@@ -1832,6 +1854,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode
op)
case INDEX_op_st_vec:
return C_O0_I2(w, r);
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(w, w, w);
Further improvement can be done with VS*I, and its 5-bit signed immediate.
While you can't swap operands for GT/GE in that case, you can invert, e.g.
GT -> LE, then invert the result using VNOR.
Comparing vs zero is especially common...
r~