[RFC PATCH v2 11/13] KVM: x86: Handle moving of memory context for intra-host migration

Ryan Afranji Fri, 16 May 2025 12:23:35 -0700

From: Ackerley Tng <ackerley...@google.com>

Migration of memory context involves moving lpage_info and
mem_attr_array from source to destination VM.


Co-developed-by: Sagi Shahar <sa...@google.com>
Signed-off-by: Sagi Shahar <sa...@google.com>
Co-developed-by: Vishal Annapurve <vannapu...@google.com>
Signed-off-by: Vishal Annapurve <vannapu...@google.com>
Signed-off-by: Ackerley Tng <ackerley...@google.com>
Signed-off-by: Ryan Afranji <afra...@google.com>
---
 arch/x86/kvm/x86.c       | 110 +++++++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h |  17 ++++++
 virt/kvm/guest_memfd.c   |  25 +++++++++
 3 files changed, 152 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3a7e05c47aa8..887702781465 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4564,6 +4564,33 @@ void kvm_unlock_two_vms(struct kvm *dst_kvm, struct kvm 
*src_kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_unlock_two_vms);
 
+static int kvm_lock_vm_memslots(struct kvm *dst_kvm, struct kvm *src_kvm)
+{
+       int r = -EINVAL;
+
+       if (dst_kvm == src_kvm)
+               return r;
+
+       r = -EINTR;
+       if (mutex_lock_killable(&dst_kvm->slots_lock))
+               return r;
+
+       if (mutex_lock_killable_nested(&src_kvm->slots_lock, 
SINGLE_DEPTH_NESTING))
+               goto unlock_dst;
+
+       return 0;
+
+unlock_dst:
+       mutex_unlock(&dst_kvm->slots_lock);
+       return r;
+}
+
+static void kvm_unlock_vm_memslots(struct kvm *dst_kvm, struct kvm *src_kvm)
+{
+       mutex_unlock(&src_kvm->slots_lock);
+       mutex_unlock(&dst_kvm->slots_lock);
+}
+
 /*
  * Read or write a bunch of msrs. All parameters are kernel addresses.
  *
@@ -6597,6 +6624,78 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct 
kvm_irq_level *irq_event,
        return 0;
 }
 
+static bool memslot_configurations_match(struct kvm_memslots *src_slots,
+                                        struct kvm_memslots *dst_slots)
+{
+       struct kvm_memslot_iter src_iter;
+       struct kvm_memslot_iter dst_iter;
+
+       kvm_for_each_memslot_pair(&src_iter, src_slots, &dst_iter, dst_slots) {
+               if (src_iter.slot->base_gfn != dst_iter.slot->base_gfn ||
+                   src_iter.slot->npages != dst_iter.slot->npages ||
+                   src_iter.slot->flags != dst_iter.slot->flags)
+                       return false;
+
+               if (kvm_slot_can_be_private(dst_iter.slot) &&
+                   !kvm_gmem_params_match(src_iter.slot, dst_iter.slot))
+                       return false;
+       }
+
+       /* There should be no more nodes to iterate if configurations match */
+       return !src_iter.node && !dst_iter.node;
+}
+
+static int kvm_move_memory_ctxt_from(struct kvm *dst, struct kvm *src)
+{
+       struct kvm_memslot_iter src_iter;
+       struct kvm_memslot_iter dst_iter;
+       struct kvm_memslots *src_slots, *dst_slots;
+       int i;
+
+       /* TODO: Do we also need to check consistency for as_id == SMM? */
+       src_slots = __kvm_memslots(src, 0);
+       dst_slots = __kvm_memslots(dst, 0);
+
+       if (!memslot_configurations_match(src_slots, dst_slots))
+               return -EINVAL;
+
+       /*
+        * Transferring lpage_info is an optimization, lpage_info can be rebuilt
+        * by the destination VM.
+        */
+       kvm_for_each_memslot_pair(&src_iter, src_slots, &dst_iter, dst_slots) {
+               for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
+                       unsigned long ugfn = dst_iter.slot->userspace_addr >> 
PAGE_SHIFT;
+                       int level = i + 1;
+
+                       /*
+                        * If the gfn and userspace address are not aligned wrt 
each
+                        * other, skip migrating lpage_info.
+                        */
+                       if ((dst_iter.slot->base_gfn ^ ugfn) &
+                               (KVM_PAGES_PER_HPAGE(level) - 1))
+                               continue;
+
+                       kvfree(dst_iter.slot->arch.lpage_info[i - 1]);
+                       dst_iter.slot->arch.lpage_info[i - 1] =
+                               src_iter.slot->arch.lpage_info[i - 1];
+                       src_iter.slot->arch.lpage_info[i - 1] = NULL;
+               }
+       }
+
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+       /*
+        * For VMs that don't use private memory, this will just be moving an
+        * empty xarray pointer.
+        */
+       dst->mem_attr_array.xa_head = src->mem_attr_array.xa_head;
+       src->mem_attr_array.xa_head = NULL;
+#endif
+
+       kvm_vm_dead(src);
+       return 0;
+}
+
 static int kvm_vm_move_enc_context_from(struct kvm *kvm, unsigned int 
source_fd)
 {
        int r;
@@ -6624,6 +6723,14 @@ static int kvm_vm_move_enc_context_from(struct kvm *kvm, 
unsigned int source_fd)
        if (r)
                goto out_mark_migration_done;
 
+       r = kvm_lock_vm_memslots(kvm, source_kvm);
+       if (r)
+               goto out_unlock;
+
+       r = kvm_move_memory_ctxt_from(kvm, source_kvm);
+       if (r)
+               goto out_unlock_memslots;
+
        /*
         * Different types of VMs will allow userspace to define if moving
         * encryption context should be required.
@@ -6633,6 +6740,9 @@ static int kvm_vm_move_enc_context_from(struct kvm *kvm, 
unsigned int source_fd)
                r = kvm_x86_call(vm_move_enc_context_from)(kvm, source_kvm);
        }
 
+out_unlock_memslots:
+       kvm_unlock_vm_memslots(kvm, source_kvm);
+out_unlock:
        kvm_unlock_two_vms(kvm, source_kvm);
 out_mark_migration_done:
        kvm_mark_migration_done(kvm, source_kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0c1d637a6e7d..99abe9879856 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1197,6 +1197,16 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, 
gfn_t gfn);
 struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
 struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t 
gfn);
 
+
+/* Iterate over a pair of memslots in gfn order until one of the trees end */
+#define kvm_for_each_memslot_pair(iter1, slots1, iter2, slots2)                
\
+       for (kvm_memslot_iter_start(iter1, slots1, 0),                  \
+                    kvm_memslot_iter_start(iter2, slots2, 0);          \
+            kvm_memslot_iter_is_valid(iter1, U64_MAX) &&               \
+                    kvm_memslot_iter_is_valid(iter2, U64_MAX);         \
+            kvm_memslot_iter_next(iter1),                              \
+                    kvm_memslot_iter_next(iter2))
+
 /*
  * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
  * - create a new memory slot
@@ -2521,6 +2531,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, 
gfn_t gfn)
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
                     gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
                     int *max_order);
+bool kvm_gmem_params_match(struct kvm_memory_slot *slot1,
+                          struct kvm_memory_slot *slot2);
 #else
 static inline int kvm_gmem_get_pfn(struct kvm *kvm,
                                   struct kvm_memory_slot *slot, gfn_t gfn,
@@ -2530,6 +2542,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
        KVM_BUG_ON(1, kvm);
        return -EIO;
 }
+static inline bool kvm_gmem_params_match(struct kvm_memory_slot *slot1,
+                                        struct kvm_memory_slot *slot2)
+{
+               return false;
+}
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
 #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index d76bd1119198..1a4198c4a4dd 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -778,6 +778,31 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct 
kvm_memory_slot *slot,
 }
 EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
 
+bool kvm_gmem_params_match(struct kvm_memory_slot *slot1,
+                          struct kvm_memory_slot *slot2)
+{
+       bool ret;
+       struct file *file1;
+       struct file *file2;
+
+       if (slot1->gmem.pgoff != slot2->gmem.pgoff)
+               return false;
+
+       file1 = kvm_gmem_get_file(slot1);
+       file2 = kvm_gmem_get_file(slot2);
+
+       ret = (file1 && file2 &&
+              file_inode(file1) == file_inode(file2));
+
+       if (file1)
+               fput(file1);
+       if (file2)
+               fput(file2);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_gmem_params_match);
+
 #ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
 long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, 
long npages,
                       kvm_gmem_populate_cb post_populate, void *opaque)
-- 
2.49.0.1101.gccaa498523-goog

[RFC PATCH v2 11/13] KVM: x86: Handle moving of memory context for intra-host migration

Reply via email to