On 2024/10/8 00:24, Richard Henderson wrote:
On 10/6/24 19:56, LIU Zhiwei wrote:
+static void probe_frac_lmul(void)
+{
+    unsigned long vlmax[3];
+
+    for (int i = MO_8; i <= MO_64; ++i) {
+        switch (i) {
+        case MO_8:
+            asm volatile(
+                "vsetvli %0, zero, e8, mf2\n\t"
+                "vsetvli %1, zero, e8, mf4\n\t"
+                "vsetvli %2, zero, e8, mf8"
+                : "=r"(vlmax[0]), "=r"(vlmax[1]), "=r"(vlmax[2])
+            );
+            break;
+        case MO_16:
+            asm volatile(
+                "vsetvli %0, zero, e16, mf2\n\t"
+                "vsetvli %1, zero, e16, mf4\n\t"
+                "vsetvli %2, zero, e16, mf8"
+                : "=r"(vlmax[0]), "=r"(vlmax[1]), "=r"(vlmax[2])
+            );
+            break;
+        case MO_32:
+            asm volatile(
+                "vsetvli %0, zero, e32, mf2\n\t"
+                "vsetvli %1, zero, e32, mf4\n\t"
+                "vsetvli %2, zero, e32, mf8"
+                : "=r"(vlmax[0]), "=r"(vlmax[1]), "=r"(vlmax[2])
+            );
+            break;
+        case MO_64:
+            asm volatile(
+                "vsetvli %0, zero, e64, mf2\n\t"
+                "vsetvli %1, zero, e64, mf4\n\t"
+                "vsetvli %2, zero, e64, mf8"
+                : "=r"(vlmax[0]), "=r"(vlmax[1]), "=r"(vlmax[2])
+            );
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        vaild_frac_lmul[i][1] = vlmax[0] != 0;
+        vaild_frac_lmul[i][2] = vlmax[1] != 0;
+        vaild_frac_lmul[i][3] = vlmax[2] != 0;
+    }
  }

This fails to build on debian with default cflags:

/home/rth/qemu/src/tcg/riscv/tcg-target.c.inc: Assembler messages:
/home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2880: Error: unrecognized opcode `vsetvli a3,zero,e8,mf2', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2881: Error: unrecognized opcode `vsetvli a4,zero,e8,mf4', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2882: Error: unrecognized opcode `vsetvli a5,zero,e8,mf8', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2888: Error: unrecognized opcode `vsetvli a3,zero,e16,mf2', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2889: Error: unrecognized opcode `vsetvli a4,zero,e16,mf4', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2890: Error: unrecognized opcode `vsetvli a5,zero,e16,mf8', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2896: Error: unrecognized opcode `vsetvli a3,zero,e32,mf2', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2897: Error: unrecognized opcode `vsetvli a4,zero,e32,mf4', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2898: Error: unrecognized opcode `vsetvli a5,zero,e32,mf8', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2904: Error: unrecognized opcode `vsetvli a3,zero,e64,mf2', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2905: Error: unrecognized opcode `vsetvli a4,zero,e64,mf4', extension `v' or `zve64x' or `zve32x' required /home/rth/qemu/src/tcg/riscv/tcg-target.c.inc:2906: Error: unrecognized opcode `vsetvli a5,zero,e64,mf8', extension `v' or `zve64x' or `zve32x' required

Rather than expanding this with a switch over immediate encodings, perhaps better feed encode_vtype() to the .insn encoding of vsetvl.

OK.



@@ -2160,6 +2483,7 @@ static void tcg_target_init(TCGContext *s)
         tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
         break;
     }
+    probe_frac_lmul();

You need to avoid this call if the host does not support vectors.
Agree.

It occurs to me that, rather than caching valid_frac_lmul[][], we can pre-compute encode_vtype and lmul_eq_avl.

Do you mean cache vtype and lmul_eq_avl for different (lmul, sew) pairs instead of valid_frac_lmul?

Thanks,
Zhiwei

  It's not much of a savings within set_vtype(), but perhaps it is clearer.


r~

Reply via email to