riscv: introduce VMID allocation and manegement

Oleksii Kurochko Thu, 31 Jul 2025 08:58:52 -0700

Current implementation is based on x86's way to allocate VMIDs:
  VMIDs partition the physical TLB. In the current implementation VMIDs are
  introduced to reduce the number of TLB flushes.  Each time the guest's
  virtual address space changes, instead of flushing the TLB, a new VMID is
  assigned.  This reduces the number of TLB flushes to at most 1/#VMIDs.
  The biggest advantage is that hot parts of the hypervisor's code and data
  retain in the TLB.


  VMIDs are a hart-local resource.  As preemption of VMIDs is not possible,
  VMIDs are assigned in a round-robin scheme.  To minimize the overhead of
  VMID invalidation, at the time of a TLB flush, VMIDs are tagged with a
  64-bit generation. Only on a generation overflow the code needs to
  invalidate all VMID information stored at the VCPUs with are run on the
  specific physical processor.  This overflow appears after about 2^80
  host processor cycles, so we do not optimize this case, but simply disable
  VMID useage to retain correctness.

Only minor changes are made compared to the x86 implementation.
These include using RISC-V-specific terminology, adding a check to ensure
the type used for storing the VMID has enough bits to hold VMIDLEN,
and introducing a new function vmidlen_detect() to clarify the VMIDLEN
value.

Signed-off-by: Oleksii Kurochko <oleksii.kuroc...@gmail.com>
---
Changes in V3:
 - Reimplemnt VMID allocation similar to what x86 has implemented.
---
Changes in V2:
 - New patch.
---
 xen/arch/riscv/Makefile               |   1 +
 xen/arch/riscv/include/asm/domain.h   |   6 +
 xen/arch/riscv/include/asm/flushtlb.h |   5 +
 xen/arch/riscv/include/asm/vmid.h     |   8 ++
 xen/arch/riscv/setup.c                |   3 +
 xen/arch/riscv/vmid.c                 | 165 ++++++++++++++++++++++++++
 6 files changed, 188 insertions(+)
 create mode 100644 xen/arch/riscv/include/asm/vmid.h
 create mode 100644 xen/arch/riscv/vmid.c

diff --git a/xen/arch/riscv/Makefile b/xen/arch/riscv/Makefile
index e2b8aa42c8..745a85e116 100644
--- a/xen/arch/riscv/Makefile
+++ b/xen/arch/riscv/Makefile
@@ -16,6 +16,7 @@ obj-y += smpboot.o
 obj-y += stubs.o
 obj-y += time.o
 obj-y += traps.o
+obj-y += vmid.o
 obj-y += vm_event.o
 
 $(TARGET): $(TARGET)-syms
diff --git a/xen/arch/riscv/include/asm/domain.h 
b/xen/arch/riscv/include/asm/domain.h
index c3d965a559..aac1040658 100644
--- a/xen/arch/riscv/include/asm/domain.h
+++ b/xen/arch/riscv/include/asm/domain.h
@@ -5,6 +5,11 @@
 #include <xen/xmalloc.h>
 #include <public/hvm/params.h>
 
+struct vcpu_vmid {
+    uint64_t generation;
+    uint16_t vmid;
+};
+
 struct hvm_domain
 {
     uint64_t              params[HVM_NR_PARAMS];
@@ -14,6 +19,7 @@ struct arch_vcpu_io {
 };
 
 struct arch_vcpu {
+    struct vcpu_vmid vmid;
 };
 
 struct arch_domain {
diff --git a/xen/arch/riscv/include/asm/flushtlb.h 
b/xen/arch/riscv/include/asm/flushtlb.h
index 51c8f753c5..f391ae6eb7 100644
--- a/xen/arch/riscv/include/asm/flushtlb.h
+++ b/xen/arch/riscv/include/asm/flushtlb.h
@@ -7,6 +7,11 @@
 
 #include <asm/sbi.h>
 
+static inline void local_hfence_gvma_all(void)
+{
+    asm volatile ( "hfence.gvma zero, zero" ::: "memory" );
+}
+
 /* Flush TLB of local processor for address va. */
 static inline void flush_tlb_one_local(vaddr_t va)
 {
diff --git a/xen/arch/riscv/include/asm/vmid.h 
b/xen/arch/riscv/include/asm/vmid.h
new file mode 100644
index 0000000000..2f1f7ec9a2
--- /dev/null
+++ b/xen/arch/riscv/include/asm/vmid.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef ASM_RISCV_VMID_H
+#define ASM_RISCV_VMID_H
+
+void vmid_init(void);
+
+#endif /* ASM_RISCV_VMID_H */
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 483cdd7e17..549228d73f 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -25,6 +25,7 @@
 #include <asm/sbi.h>
 #include <asm/setup.h>
 #include <asm/traps.h>
+#include <asm/vmid.h>
 
 /* Xen stack for bringing up the first CPU. */
 unsigned char __initdata cpu0_boot_stack[STACK_SIZE]
@@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
 
     console_init_postirq();
 
+    vmid_init();
+
     printk("All set up\n");
 
     machine_halt();
diff --git a/xen/arch/riscv/vmid.c b/xen/arch/riscv/vmid.c
new file mode 100644
index 0000000000..7ad1b91ee2
--- /dev/null
+++ b/xen/arch/riscv/vmid.c
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <xen/domain.h>
+#include <xen/init.h>
+#include <xen/sections.h>
+#include <xen/lib.h>
+#include <xen/param.h>
+#include <xen/percpu.h>
+
+#include <asm/atomic.h>
+#include <asm/csr.h>
+#include <asm/flushtlb.h>
+
+/* Xen command-line option to enable VMIDs */
+static bool __read_mostly opt_vmid_enabled = true;
+boolean_param("vmid", opt_vmid_enabled);
+
+/*
+ * VMIDs partition the physical TLB. In the current implementation VMIDs are
+ * introduced to reduce the number of TLB flushes.  Each time the guest's
+ * virtual address space changes, instead of flushing the TLB, a new VMID is
+ * assigned. This reduces the number of TLB flushes to at most 1/#VMIDs.
+ * The biggest advantage is that hot parts of the hypervisor's code and data
+ * retain in the TLB.
+ *
+ * Sketch of the Implementation:
+ *
+ * VMIDs are a hart-local resource.  As preemption of VMIDs is not possible,
+ * VMIDs are assigned in a round-robin scheme.  To minimize the overhead of
+ * VMID invalidation, at the time of a TLB flush, VMIDs are tagged with a
+ * 64-bit generation. Only on a generation overflow the code needs to
+ * invalidate all VMID information stored at the VCPUs with are run on the
+ * specific physical processor.  This overflow appears after about 2^80
+ * host processor cycles, so we do not optimize this case, but simply disable
+ * VMID useage to retain correctness.
+ */
+
+/* Per-Hart VMID management. */
+struct vmid_data {
+   uint64_t hart_vmid_generation;
+   uint16_t next_vmid;
+   uint16_t max_vmid;
+   bool disabled;
+};
+
+static DEFINE_PER_CPU(struct vmid_data, vmid_data);
+
+static unsigned long vmidlen_detect(void)
+{
+    unsigned long vmid_bits;
+    unsigned long old;
+
+    /* Figure-out number of VMID bits in HW */
+    old = csr_read(CSR_HGATP);
+
+    csr_write(CSR_HGATP, old | HGATP_VMID_MASK);
+    vmid_bits = csr_read(CSR_HGATP);
+    vmid_bits =  MASK_EXTR(vmid_bits, HGATP_VMID_MASK);
+    vmid_bits = flsl(vmid_bits);
+    csr_write(CSR_HGATP, old);
+
+    /*
+     * We polluted local TLB so flush all guest TLB as
+     * a speculative access can happen at any time.
+     */
+    local_hfence_gvma_all();
+
+    return vmid_bits;
+}
+
+void vmid_init(void)
+{
+    static bool g_disabled = false;
+
+    unsigned long vmid_len = vmidlen_detect();
+    struct vmid_data *data = &this_cpu(vmid_data);
+    unsigned long max_availalbe_bits = sizeof(data->max_vmid) << 3;
+
+    if ( vmid_len > max_availalbe_bits )
+        panic("%s: VMIDLEN is bigger then a type which represent VMID: 
%lu(%lu)\n",
+              __func__, vmid_len, max_availalbe_bits);
+
+    data->max_vmid = BIT(vmid_len, U) - 1;
+    data->disabled = !opt_vmid_enabled || (vmid_len <= 1);
+
+    if ( g_disabled != data->disabled )
+    {
+        printk("%s: VMIDs %sabled.\n", __func__,
+               data->disabled ? "dis" : "en");
+        if ( !g_disabled )
+            g_disabled = data->disabled;
+    }
+
+    /* Zero indicates 'invalid generation', so we start the count at one. */
+    data->hart_vmid_generation = 1;
+
+    /* Zero indicates 'VMIDs disabled', so we start the count at one. */
+    data->next_vmid = 1;
+}
+
+void vcpu_vmid_flush_vcpu(struct vcpu *v)
+{
+    write_atomic(&v->arch.vmid.generation, 0);
+}
+
+void vmid_flush_hart(void)
+{
+    struct vmid_data *data = &this_cpu(vmid_data);
+
+    if ( data->disabled )
+        return;
+
+    if ( likely(++data->hart_vmid_generation != 0) )
+        return;
+
+    /*
+     * VMID generations are 64 bit.  Overflow of generations never happens.
+     * For safety, we simply disable ASIDs, so correctness is established; it
+     * only runs a bit slower.
+     */
+    printk("%s: VMID generation overrun. Disabling VMIDs.\n", __func__);
+    data->disabled = 1;
+}
+
+bool vmid_handle_vmenter(struct vcpu_vmid *vmid)
+{
+    struct vmid_data *data = &this_cpu(vmid_data);
+
+    /* Test if VCPU has valid VMID. */
+    if ( read_atomic(&vmid->generation) == data->hart_vmid_generation )
+        return 0;
+
+    /* If there are no free VMIDs, need to go to a new generation. */
+    if ( unlikely(data->next_vmid > data->max_vmid) )
+    {
+        vmid_flush_hart();
+        data->next_vmid = 1;
+        if ( data->disabled )
+            goto disabled;
+    }
+
+    /* Now guaranteed to be a free VMID. */
+    vmid->vmid = data->next_vmid++;
+    write_atomic(&vmid->generation, data->hart_vmid_generation);
+
+    /*
+     * When we assign VMID 1, flush all TLB entries as we are starting a new
+     * generation, and all old VMID allocations are now stale.
+     */
+    return (vmid->vmid == 1);
+
+ disabled:
+    vmid->vmid = 0;
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.50.1

[PATCH v3 03/20] xen/riscv: introduce VMID allocation and manegement

Reply via email to