From: Fred Griffoul <fgri...@amazon.co.uk>

Introduce selftest to verify nested VMX APIC virtualization page cache
invalidation and refresh mechanisms for pfncache implementation.

The test exercises the nested VMX APIC cache invalidation path through:

- L2 guest setup: creates a nested environment where L2 accesses the
  APIC access page that is cached by KVM using pfncache.

- Cache invalidation triggers: a separate update thread periodically
  invalidates the cached pages using either:
   - madvise(MADV_DONTNEED) to trigger MMU notifications.
   - vm_mem_region_move() to trigger memslot changes.

The test validates that:
- L2 can successfully access APIC page before and after invalidation.
- KVM properly handles cache refresh without guest-visible errors.
- Both MMU notification and memslot change invalidation paths work
  correctly.

Signed-off-by: Fred Griffoul <fgri...@amazon.co.uk>
---
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 .../selftests/kvm/x86/vmx_apic_update_test.c  | 302 ++++++++++++++++++
 2 files changed, 303 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86/vmx_apic_update_test.c

diff --git a/tools/testing/selftests/kvm/Makefile.kvm 
b/tools/testing/selftests/kvm/Makefile.kvm
index 90f03f00cb04..5d4505c7f6f0 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -136,6 +136,7 @@ TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
 TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
 TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
 TEST_GEN_PROGS_x86 += x86/aperfmperf_test
+TEST_GEN_PROGS_x86 += x86/vmx_apic_update_test
 TEST_GEN_PROGS_x86 += access_tracking_perf_test
 TEST_GEN_PROGS_x86 += coalesced_io_test
 TEST_GEN_PROGS_x86 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_update_test.c 
b/tools/testing/selftests/kvm/x86/vmx_apic_update_test.c
new file mode 100644
index 000000000000..22f82cf6dd0c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_apic_update_test.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_update_test
+ *
+ * Copyright (C) 2025, mazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Test L2 guest APIC access page writes with concurrent MMU
+ * notifications and memslot move updates.
+ */
+#include <pthread.h>
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VAPIC_GPA      0xc0000000
+#define VAPIC_SLOT     1
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define L2_DELAY       (100)
+
+static void l2_guest_code(void)
+{
+       uint32_t *vapic_addr = (uint32_t *) (VAPIC_GPA + 0x80);
+
+       /* Unroll the loop to avoid any compiler side effect */
+
+       WRITE_ONCE(*vapic_addr, 1 << 0);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       WRITE_ONCE(*vapic_addr, 1 << 1);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       WRITE_ONCE(*vapic_addr, 1 << 2);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       WRITE_ONCE(*vapic_addr, 1 << 3);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       WRITE_ONCE(*vapic_addr, 1 << 4);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       WRITE_ONCE(*vapic_addr, 1 << 5);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       WRITE_ONCE(*vapic_addr, 1 << 6);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       WRITE_ONCE(*vapic_addr, 0);
+       udelay(msecs_to_usecs(L2_DELAY));
+
+       /* Exit to L1 */
+       vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control, exit_reason;
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* Enable APIC access */
+       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+       control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+       control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+       vmwrite(APIC_ACCESS_ADDR, VAPIC_GPA);
+
+       GUEST_SYNC1(0);
+       GUEST_ASSERT(!vmlaunch());
+again:
+       exit_reason = vmreadz(VM_EXIT_REASON);
+       if (exit_reason == EXIT_REASON_APIC_ACCESS) {
+               uint64_t guest_rip = vmreadz(GUEST_RIP);
+               uint64_t instr_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+
+               vmwrite(GUEST_RIP, guest_rip + instr_len);
+               GUEST_ASSERT(!vmresume());
+               goto again;
+       }
+
+       GUEST_SYNC1(exit_reason);
+       GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+       GUEST_DONE();
+}
+
+static const char *progname;
+static int update_period_ms = L2_DELAY / 4;
+
+struct update_control {
+       pthread_mutex_t mutex;
+       pthread_cond_t start_cond;
+       struct kvm_vm *vm;
+       bool running;
+       bool started;
+       int updates;
+};
+
+static void wait_for_start_signal(struct update_control *ctrl)
+{
+       pthread_mutex_lock(&ctrl->mutex);
+       while (!ctrl->started)
+               pthread_cond_wait(&ctrl->start_cond, &ctrl->mutex);
+
+       pthread_mutex_unlock(&ctrl->mutex);
+       printf("%s: starting update\n", progname);
+}
+
+static bool is_running(struct update_control *ctrl)
+{
+       return READ_ONCE(ctrl->running);
+}
+
+static void set_running(struct update_control *ctrl, bool running)
+{
+       WRITE_ONCE(ctrl->running, running);
+}
+
+static void signal_thread_start(struct update_control *ctrl)
+{
+       pthread_mutex_lock(&ctrl->mutex);
+       if (!ctrl->started) {
+               ctrl->started = true;
+               pthread_cond_signal(&ctrl->start_cond);
+       }
+       pthread_mutex_unlock(&ctrl->mutex);
+}
+
+static void *update_madvise(void *arg)
+{
+       struct update_control *ctrl = arg;
+       void *hva;
+
+       wait_for_start_signal(ctrl);
+
+       hva = addr_gpa2hva(ctrl->vm, VAPIC_GPA);
+       memset(hva, 0x45, ctrl->vm->page_size);
+
+       while (is_running(ctrl)) {
+               usleep(update_period_ms * 1000);
+               madvise(hva, ctrl->vm->page_size, MADV_DONTNEED);
+               ctrl->updates++;
+       }
+
+       return NULL;
+}
+
+static void *update_move_memslot(void *arg)
+{
+       struct update_control *ctrl = arg;
+       uint64_t gpa = VAPIC_GPA;
+
+       wait_for_start_signal(ctrl);
+
+       while (is_running(ctrl)) {
+               usleep(update_period_ms * 1000);
+               gpa += 0x10000;
+               vm_mem_region_move(ctrl->vm, VAPIC_SLOT, gpa);
+               ctrl->updates++;
+       }
+
+       return NULL;
+}
+
+static void run(void * (*update)(void *), const char *name)
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       struct vmx_pages *vmx;
+       struct update_control ctrl;
+       struct ucall uc;
+       vm_vaddr_t vmx_pages_gva;
+       pthread_t update_thread;
+       bool done = false;
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       /* Allocate VMX pages */
+       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+
+       /* Allocate memory and create VAPIC memslot */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, VAPIC_GPA,
+                                   VAPIC_SLOT, 1, 0);
+
+       /* Allocate guest page table */
+       virt_map(vm, VAPIC_GPA, VAPIC_GPA, 1);
+
+       /* Set up nested EPT */
+       prepare_eptp(vmx, vm, 0);
+       nested_map_memslot(vmx, vm, 0);
+       nested_map_memslot(vmx, vm, VAPIC_SLOT);
+       nested_map(vmx, vm, VAPIC_GPA, VAPIC_GPA, vm->page_size);
+
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       pthread_mutex_init(&ctrl.mutex, NULL);
+       pthread_cond_init(&ctrl.start_cond, NULL);
+       ctrl.vm = vm;
+       ctrl.running = true;
+       ctrl.started = false;
+       ctrl.updates = 0;
+
+       pthread_create(&update_thread, NULL, update, &ctrl);
+
+       printf("%s: running %s (tsc_khz %lu)\n", progname, name, guest_tsc_khz);
+
+       while (!done) {
+               vcpu_run(vcpu);
+
+               switch (vcpu->run->exit_reason) {
+               case KVM_EXIT_IO:
+                       switch (get_ucall(vcpu, &uc)) {
+                       case UCALL_SYNC:
+                               printf("%s: sync(%ld)\n", progname, uc.args[0]);
+                               if (uc.args[0] == 0)
+                                       signal_thread_start(&ctrl);
+                               break;
+                       case UCALL_ABORT:
+                               REPORT_GUEST_ASSERT(uc);
+                               /* NOT REACHED */
+                       case UCALL_DONE:
+                               done = true;
+                               break;
+                       default:
+                               TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+                       }
+                       break;
+               case KVM_EXIT_MMIO:
+                       /* Handle APIC MMIO access after memslot move */
+                       printf
+                           ("%s: APIC MMIO access at 0x%llx (memslot move 
effect)\n",
+                            progname, vcpu->run->mmio.phys_addr);
+                       break;
+               default:
+                       TEST_FAIL("%s: Unexpected exit reason: %d (flags 0x%x)",
+                                 progname,
+                                 vcpu->run->exit_reason, vcpu->run->flags);
+               }
+       }
+
+       set_running(&ctrl, false);
+       if (!ctrl.started)
+               signal_thread_start(&ctrl);
+       pthread_join(update_thread, NULL);
+       printf("%s: completed with %d updates\n", progname, ctrl.updates);
+
+       pthread_mutex_destroy(&ctrl.mutex);
+       pthread_cond_destroy(&ctrl.start_cond);
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       int opt_madvise = 0;
+       int opt_memslot_move = 0;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+       TEST_REQUIRE(kvm_cpu_has_ept());
+
+       if (argc == 1) {
+               opt_madvise = 1;
+               opt_memslot_move = 1;
+       } else {
+               int opt;
+
+               while ((opt = getopt(argc, argv, "amp:")) != -1) {
+                       switch (opt) {
+                       case 'a':
+                               opt_madvise = 1;
+                               break;
+                       case 'm':
+                               opt_memslot_move = 1;
+                               break;
+                       case 'p':
+                               update_period_ms = atoi(optarg);
+                               break;
+                       default:
+                               exit(1);
+                       }
+               }
+       }
+
+       TEST_ASSERT(opt_madvise
+                   || opt_memslot_move, "No update test configured");
+
+       progname = argv[0];
+
+       if (opt_madvise)
+               run(update_madvise, "madvise");
+
+       if (opt_memslot_move)
+               run(update_move_memslot, "move memslot");
+
+       return 0;
+}
-- 
2.51.0


Reply via email to