From: TANG Tiancheng <tangtiancheng....@alibaba-inc.com>

In RISC-V, vector operations require initial configuration using
the vset{i}vl{i} instruction.

This instruction:
  1. Sets the vector length (vl) in bytes
  2. Configures the vtype register, which includes:
    SEW (Single Element Width)
    LMUL (vector register group multiplier)
    Other vector operation parameters

This configuration is crucial for defining subsequent vector
operation behavior. To optimize performance, the configuration
process is managed dynamically:
  1. Reconfiguration using vset{i}vl{i} is necessary when SEW
     or vector register group width changes.
  2. The vset instruction can be omitted when configuration
     remains unchanged.

This optimization is only effective within a single TB.
Each TB requires reconfiguration at its start, as the current
state cannot be obtained from hardware.

Signed-off-by: TANG Tiancheng <tangtiancheng....@alibaba-inc.com>
Signed-off-by: Weiwei Li <liwei1...@gmail.com>
Reviewed-by: Liu Zhiwei <zhiwei_...@linux.alibaba.com>
---
 include/tcg/tcg.h          |   3 +
 tcg/riscv/tcg-target.c.inc | 128 +++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 21d5884741..267e6ff95c 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -566,6 +566,9 @@ struct TCGContext {
 
     /* Exit to translator on overflow. */
     sigjmp_buf jmp_trans;
+
+    /* For host-specific values. */
+    int riscv_host_vtype;
 };
 
 static inline bool temp_readonly(TCGTemp *ts)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index c3f018ff0c..df96d350a3 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -165,6 +165,26 @@ static bool tcg_target_const_match(int64_t val, int ct,
  * RISC-V Base ISA opcodes (IM)
  */
 
+#define V_OPIVV (0x0 << 12)
+#define V_OPFVV (0x1 << 12)
+#define V_OPMVV (0x2 << 12)
+#define V_OPIVI (0x3 << 12)
+#define V_OPIVX (0x4 << 12)
+#define V_OPFVF (0x5 << 12)
+#define V_OPMVX (0x6 << 12)
+#define V_OPCFG (0x7 << 12)
+
+typedef enum {
+    VLMUL_M1 = 0, /* LMUL=1 */
+    VLMUL_M2,     /* LMUL=2 */
+    VLMUL_M4,     /* LMUL=4 */
+    VLMUL_M8,     /* LMUL=8 */
+    VLMUL_RESERVED,
+    VLMUL_MF8,    /* LMUL=1/8 */
+    VLMUL_MF4,    /* LMUL=1/4 */
+    VLMUL_MF2,    /* LMUL=1/2 */
+} RISCVVlmul;
+
 typedef enum {
     OPC_ADD = 0x33,
     OPC_ADDI = 0x13,
@@ -260,6 +280,11 @@ typedef enum {
     /* Zicond: integer conditional operations */
     OPC_CZERO_EQZ = 0x0e005033,
     OPC_CZERO_NEZ = 0x0e007033,
+
+    /* V: Vector extension 1.0 */
+    OPC_VSETVLI  = 0x57 | V_OPCFG,
+    OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
+    OPC_VSETVL   = 0x80000057 | V_OPCFG,
 } RISCVInsn;
 
 /*
@@ -370,6 +395,26 @@ static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
            (s2 & 0x1f) << 20 | (vm << 25);
 }
 
+/* Vector Configuration */
+
+static uint32_t encode_vtype(bool vta, bool vma,
+                            MemOp vsew, RISCVVlmul vlmul)
+{
+    return vma << 7 | vta << 6 | vsew << 3 | vlmul;
+}
+
+static int32_t encode_vcfg(RISCVInsn opc, TCGReg rd,
+                           TCGArg rs1, uint32_t vtype)
+{
+    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20;
+}
+
+static int32_t encode_vcfgi(RISCVInsn opc, TCGReg rd,
+                            uint32_t uimm, uint32_t vtype)
+{
+    return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 
20;
+}
+
 /*
  * RISC-V instruction emitters
  */
@@ -519,6 +564,88 @@ static void tcg_out_opc_ldst_vec(TCGContext *s, RISCVInsn 
opc, TCGReg data,
     tcg_out32(s, encode_v(opc, data, rs1, 0, vm));
 }
 
+static void tcg_out_opc_vec_config(TCGContext *s, RISCVInsn opc,
+                                  TCGReg rd, TCGReg rs1, int32_t vtype)
+{
+    tcg_out32(s, encode_vcfg(opc, rd, rs1, vtype));
+}
+
+static void tcg_out_opc_vec_configi(TCGContext *s, RISCVInsn opc,
+                                  TCGReg rd, uint32_t avl, int32_t vtype)
+{
+    tcg_out32(s, encode_vcfgi(opc, rd, avl, vtype));
+}
+
+static void tcg_out_vset(TCGContext *s, uint32_t avl, int vtype)
+{
+    if (avl < 32) {
+        vtype = sextreg(vtype, 0, 10);
+        tcg_out_opc_vec_configi(s, OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype);
+    } else {
+        vtype = sextreg(vtype, 0, 11);
+        tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl);
+        tcg_out_opc_vec_config(s, OPC_VSETVLI, TCG_REG_ZERO,
+                               TCG_REG_TMP0, vtype);
+    }
+}
+
+/* LMUL_MAX = 8, vlmax = vlen / sew * LMUL_MAX. */
+static unsigned get_vlmax(MemOp vsew)
+{
+    return riscv_vlen / (8 << vsew) * 8;
+}
+
+static unsigned get_vec_type_bytes(TCGType type)
+{
+    tcg_debug_assert(type >= TCG_TYPE_V64);
+    return 8 << (type - TCG_TYPE_V64);
+}
+
+static RISCVVlmul calc_vlmul(MemOp vsew, unsigned oprsz)
+{
+    if (oprsz > riscv_vlenb) {
+        return ctzl(oprsz / riscv_vlenb);
+    } else {
+        if (vsew < MO_64) {
+            switch (riscv_vlenb / oprsz) {
+            case 2:
+                return VLMUL_MF2;
+            case 4:
+                return VLMUL_MF4;
+            case 8:
+                return VLMUL_MF8;
+            default:
+                break;
+            }
+        }
+    }
+    return VLMUL_M1;
+}
+
+static void tcg_target_set_vec_config(TCGContext *s, TCGType type,
+                                      MemOp vsew)
+{
+    unsigned oprsz, avl;
+    int vtype;
+    RISCVVlmul vlmul;
+
+    tcg_debug_assert(vsew <= MO_64);
+
+    oprsz = get_vec_type_bytes(type);
+    avl = oprsz / (1 << vsew);
+    vlmul = calc_vlmul(vsew, oprsz);
+
+    tcg_debug_assert(avl <= get_vlmax(vsew));
+    tcg_debug_assert(vlmul <= VLMUL_MF2);
+
+    vtype = encode_vtype(false, false, vsew, vlmul);
+
+    if (vtype != s->riscv_host_vtype) {
+        s->riscv_host_vtype = vtype;
+        tcg_out_vset(s, avl, vtype);
+    }
+}
+
 /*
  * TCG intrinsics
  */
@@ -2167,6 +2294,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 static void tcg_out_tb_start(TCGContext *s)
 {
+    s->riscv_host_vtype = -1;
     /* nothing to do */
 }
 
-- 
2.43.0


Reply via email to