On 3/28/25 15:30, Lendacky, Thomas wrote: > A page state change is typically followed by an access of the page(s) and > results in another VMEXIT in order to map the page into the nested page > table. Depending on the size of page state change request, this can > generate a number of additional VMEXITs. For example, under SNP, when > Linux is utilizing lazy memory acceptance, memory is typically accepted in > 4M chunks. A page state change request is submitted to mark the pages as > private, followed by validation of the memory. Since the guest_memfd > currently only supports 4K pages, each page validation will result in > VMEXIT to map the page, resulting in 1024 additional exits. > > When performing a page state change, invoke KVM_PRE_FAULT_MEMORY for the > size of the page state change in order to pre-map the pages and avoid the > additional VMEXITs. This helps speed up boot times.
Ping... > > Signed-off-by: Tom Lendacky <thomas.lenda...@amd.com> > --- > accel/kvm/kvm-all.c | 2 ++ > include/system/kvm.h | 1 + > target/i386/kvm/kvm.c | 31 ++++++++++++++++++++++++++----- > 3 files changed, 29 insertions(+), 5 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index f89568bfa3..0cd487cea7 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -93,6 +93,7 @@ bool kvm_allowed; > bool kvm_readonly_mem_allowed; > bool kvm_vm_attributes_allowed; > bool kvm_msi_use_devid; > +bool kvm_pre_fault_memory_supported; > static bool kvm_has_guest_debug; > static int kvm_sstep_flags; > static bool kvm_immediate_exit; > @@ -2732,6 +2733,7 @@ static int kvm_init(MachineState *ms) > kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && > kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && > (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); > + kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, > KVM_CAP_PRE_FAULT_MEMORY); > > if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { > s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? > ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; > diff --git a/include/system/kvm.h b/include/system/kvm.h > index ab17c09a55..492ea8a383 100644 > --- a/include/system/kvm.h > +++ b/include/system/kvm.h > @@ -42,6 +42,7 @@ extern bool kvm_gsi_routing_allowed; > extern bool kvm_gsi_direct_mapping; > extern bool kvm_readonly_mem_allowed; > extern bool kvm_msi_use_devid; > +extern bool kvm_pre_fault_memory_supported; > > #define kvm_enabled() (kvm_allowed) > /** > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > index 6c749d4ee8..7c39d30c5f 100644 > --- a/target/i386/kvm/kvm.c > +++ b/target/i386/kvm/kvm.c > @@ -5999,9 +5999,11 @@ static bool host_supports_vmx(void) > * because private/shared page tracking is already provided through other > * means, these 2 use-cases should be treated as being mutually-exclusive. > */ > -static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) > +static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run) > { > + struct kvm_pre_fault_memory mem; > uint64_t gpa, size, attributes; > + int ret; > > if (!machine_require_guest_memfd(current_machine)) > return -EINVAL; > @@ -6012,13 +6014,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run > *run) > > trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); > > - return kvm_convert_memory(gpa, size, attributes & > KVM_MAP_GPA_RANGE_ENCRYPTED); > + ret = kvm_convert_memory(gpa, size, attributes & > KVM_MAP_GPA_RANGE_ENCRYPTED); > + if (ret || !kvm_pre_fault_memory_supported) { > + return ret; > + } > + > + /* > + * Opportunistically pre-fault memory in. Failures are ignored so that > any > + * errors in faulting in the memory will get captured in KVM page fault > + * path when the guest first accesses the page. > + */ > + memset(&mem, 0, sizeof(mem)); > + mem.gpa = gpa; > + mem.size = size; > + while (mem.size) { > + if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) { > + break; > + } > + } > + > + return 0; > } > > -static int kvm_handle_hypercall(struct kvm_run *run) > +static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run) > { > if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) > - return kvm_handle_hc_map_gpa_range(run); > + return kvm_handle_hc_map_gpa_range(cpu, run); > > return -EINVAL; > } > @@ -6118,7 +6139,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run > *run) > break; > #endif > case KVM_EXIT_HYPERCALL: > - ret = kvm_handle_hypercall(run); > + ret = kvm_handle_hypercall(cpu, run); > break; > default: > fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); > > base-commit: 0f15892acaf3f50ecc20c6dad4b3ebdd701aa93e