riscv: Implement vector cmp ops

LIU Zhiwei Tue, 27 Aug 2024 00:52:41 -0700


On 2024/8/14 17:39, Richard Henderson wrote:

On 8/13/24 21:34, LIU Zhiwei wrote:

From: TANG Tiancheng <tangtiancheng....@alibaba-inc.com>

1.Address immediate value constraints in RISC-V Vector Extension 1.0 for
comparison instructions.

2.Extend comparison results from mask registers to SEW-width elements,

following recommendations in The RISC-V SPEC Volume I (Version20240411).


This aligns with TCG's cmp_vec behavior by expanding compare results to
full element width: all 1s for true, all 0s for false.

Signed-off-by: TANG Tiancheng <tangtiancheng....@alibaba-inc.com>
Reviewed-by: Liu Zhiwei <zhiwei_...@linux.alibaba.com>
---
  tcg/riscv/tcg-target-con-set.h |   2 +
  tcg/riscv/tcg-target-con-str.h |   1 +
  tcg/riscv/tcg-target.c.inc     | 188 +++++++++++++++++++++++++++++++++
  tcg/riscv/tcg-target.opc.h     |   3 +
  4 files changed, 194 insertions(+)

diff --git a/tcg/riscv/tcg-target-con-set.hb/tcg/riscv/tcg-target-con-set.h

index 8a0de18257..23b391dd07 100644
--- a/tcg/riscv/tcg-target-con-set.h
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -22,5 +22,7 @@ C_N1_I2(r, r, rM)
  C_O1_I4(r, r, rI, rM, rM)
  C_O2_I4(r, r, rZ, rZ, rM, rM)
  C_O0_I2(v, r)
+C_O0_I2(v, vK)
  C_O1_I1(v, r)
  C_O1_I2(v, v, v)
+C_O1_I2(v, v, vK)

diff --git a/tcg/riscv/tcg-target-con-str.hb/tcg/riscv/tcg-target-con-str.h

index b2b3211bcb..0aaad7b753 100644
--- a/tcg/riscv/tcg-target-con-str.h
+++ b/tcg/riscv/tcg-target-con-str.h
@@ -17,6 +17,7 @@ REGS('v', ALL_VECTOR_REGS)
   */
  CONST('I', TCG_CT_CONST_S12)
  CONST('J', TCG_CT_CONST_J12)
+CONST('K', TCG_CT_CONST_S5)
  CONST('N', TCG_CT_CONST_N12)
  CONST('M', TCG_CT_CONST_M12)
  CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 650b5eff1a..3f1e215e90 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc

@@ -113,6 +113,7 @@ static TCGRegtcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)

  #define TCG_CT_CONST_N12   0x400
  #define TCG_CT_CONST_M12   0x800
  #define TCG_CT_CONST_J12  0x1000
+#define TCG_CT_CONST_S5   0x2000
    #define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
  #define ALL_VECTOR_REGS    MAKE_64BIT_MASK(33, 31)

@@ -160,6 +161,13 @@ static bool tcg_target_const_match(int64_t val,int ct,

      if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) {
          return 1;
      }
+    /*
+     * Sign extended from 5 bits: [-0x10, 0x0f].
+     * Used for vector-immediate.
+     */
+    if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) {
+        return 1;
+    }
      return 0;
  }
  @@ -289,12 +297,39 @@ typedef enum {
      OPC_VSE32_V = 0x6027 | V_SUMOP,
      OPC_VSE64_V = 0x7027 | V_SUMOP,
  +    OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI,
+    OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV,
+    OPC_VMNAND_MM = 0x74000057 | V_OPMVV,
+
      OPC_VADD_VV = 0x57 | V_OPIVV,
      OPC_VSUB_VV = 0x8000057 | V_OPIVV,
      OPC_VAND_VV = 0x24000057 | V_OPIVV,
      OPC_VOR_VV = 0x28000057 | V_OPIVV,
      OPC_VXOR_VV = 0x2c000057 | V_OPIVV,
  +    OPC_VMSEQ_VV = 0x60000057 | V_OPIVV,
+    OPC_VMSEQ_VI = 0x60000057 | V_OPIVI,
+    OPC_VMSEQ_VX = 0x60000057 | V_OPIVX,
+    OPC_VMSNE_VV = 0x64000057 | V_OPIVV,
+    OPC_VMSNE_VI = 0x64000057 | V_OPIVI,
+    OPC_VMSNE_VX = 0x64000057 | V_OPIVX,
+
+    OPC_VMSLTU_VV = 0x68000057 | V_OPIVV,
+    OPC_VMSLTU_VX = 0x68000057 | V_OPIVX,
+    OPC_VMSLT_VV = 0x6c000057 | V_OPIVV,
+    OPC_VMSLT_VX = 0x6c000057 | V_OPIVX,
+    OPC_VMSLEU_VV = 0x70000057 | V_OPIVV,
+    OPC_VMSLEU_VX = 0x70000057 | V_OPIVX,
+    OPC_VMSLE_VV = 0x74000057 | V_OPIVV,
+    OPC_VMSLE_VX = 0x74000057 | V_OPIVX,
+
+    OPC_VMSLEU_VI = 0x70000057 | V_OPIVI,
+    OPC_VMSLE_VI = 0x74000057 | V_OPIVI,
+    OPC_VMSGTU_VI = 0x78000057 | V_OPIVI,
+    OPC_VMSGTU_VX = 0x78000057 | V_OPIVX,
+    OPC_VMSGT_VI = 0x7c000057 | V_OPIVI,
+    OPC_VMSGT_VX = 0x7c000057 | V_OPIVX,
+
      OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
      OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
      OPC_VMV_V_X = 0x5e000057 | V_OPIVX,

@@ -575,6 +610,15 @@ static void tcg_out_opc_vec_config(TCGContext*s, RISCVInsn opc,

  #define tcg_out_opc_vi(s, opc, vd, vs2, imm, vm) \
      tcg_out_opc_reg_vec_i(s, opc, vd, imm, vs2, vm);
  +#define tcg_out_opc_vim_mask(s, opc, vd, vs2, imm) \
+    tcg_out_opc_reg_vec_i(s, opc, vd, imm, vs2, false);
+
+#define tcg_out_opc_vvm_mask(s, opc, vd, vs2, vs1) \
+    tcg_out_opc_reg_vec(s, opc, vd, vs1, vs2, false);
+
+#define tcg_out_opc_mvv(s, opc, vd, vs2, vs1, vm) \
+    tcg_out_opc_reg_vec(s, opc, vd, vs1, vs2, vm);
+
  #define tcg_out_opc_vconfig(s, opc, rd, avl, vtypei) \
      tcg_out_opc_vec_config(s, opc, rd, avl, vtypei);
  @@ -1037,6 +1081,22 @@ static const struct {
      [TCG_COND_GTU] = { OPC_BLTU, true  }
  };
  +static const struct {
+    RISCVInsn opc;
+    bool swap;
+} tcg_cmpcond_to_rvv_vv[] = {
+    [TCG_COND_EQ] =  { OPC_VMSEQ_VV,  false },
+    [TCG_COND_NE] =  { OPC_VMSNE_VV,  false },
+    [TCG_COND_LT] =  { OPC_VMSLT_VV,  false },
+    [TCG_COND_GE] =  { OPC_VMSLE_VV,  true  },
+    [TCG_COND_GT] =  { OPC_VMSLT_VV,  true  },
+    [TCG_COND_LE] =  { OPC_VMSLE_VV,  false },
+    [TCG_COND_LTU] = { OPC_VMSLTU_VV, false },
+    [TCG_COND_GEU] = { OPC_VMSLEU_VV, true  },
+    [TCG_COND_GTU] = { OPC_VMSLTU_VV, true  },
+    [TCG_COND_LEU] = { OPC_VMSLEU_VV, false }
+};
+
  static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
                             TCGReg arg2, TCGLabel *l)
  {

@@ -1054,6 +1114,79 @@ static void tcg_out_brcond(TCGContext *s,TCGCond cond, TCGReg arg1,

      tcg_out_opc_branch(s, op, arg1, arg2, 0);
  }
  +static const struct {
+    RISCVInsn op;
+    bool expand;


invert is probably a better name.

OK.

Why are these tables so far apart?
+static void tcg_out_cmp_vec_vx(TCGContext *s, TCGCond cond, TCGRegarg1,
+                               tcg_target_long arg2)
+{
+    RISCVInsn op;
+
+ tcg_debug_assert((unsigned)cond <ARRAY_SIZE(tcg_cmpcond_to_rvv_vx));
+    op = tcg_cmpcond_to_rvv_vx[cond].op;
+    tcg_debug_assert(op != 0);
+
+    tcg_out_opc_vx(s, op, TCG_REG_V0, arg1, arg2, true);
+    if (tcg_cmpcond_to_rvv_vx[cond].expand) {
+        tcg_out_opc_mvv(s, OPC_VMNAND_MM, TCG_REG_V0, TCG_REG_V0,
+                        TCG_REG_V0, false);
+    }
+}
I think you'll be better served by handling the invert during expand,because you can always swap the sense of the predicate in the user.

OK. We have implemented this IR in expand way, where we callrvv_cmp_vv/rvv_cmp_vx/ rvv_cmp_vi according to the type and value of arg2.

Compare tcg/i386 expand_vec_cmp_noinv.
+ tcg_gen_mov_vec(v0, tcg_constant_vec_matching(v0, vece,0));
You don't need to copy to v0; just use the tcg_constant_vec directly as
+ vec_gen_3(INDEX_op_rvv_merge_vec, type, vece,
+                      tcgv_vec_arg(v0), tcgv_vec_arg(v0),
+                      tcgv_i64_arg(tcg_constant_i64(-1)));
the first source operand.
You can swap 0 and -1 if the comparison instruction requires thepredicate to be inverted.


OK.

Thanks,
Zhiwei

r~

Re: [PATCH v1 09/15] tcg/riscv: Implement vector cmp ops

Reply via email to