On 5/27/2025 11:32 PM, Fuad Tabba wrote:
> This patch enables support for shared memory in guest_memfd, including
> mapping that memory at the host userspace. This support is gated by the
> configuration option KVM_GMEM_SHARED_MEM, and toggled by the guest_memfd
> flag GUEST_MEMFD_FLAG_SUPPORT_SHARED, which can be set when creating a
> guest_memfd instance.
> 
> Co-developed-by: Ackerley Tng <ackerley...@google.com>
> Signed-off-by: Ackerley Tng <ackerley...@google.com>
> Signed-off-by: Fuad Tabba <ta...@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 10 ++++
>  arch/x86/kvm/x86.c              |  3 +-
>  include/linux/kvm_host.h        | 13 ++++++
>  include/uapi/linux/kvm.h        |  1 +
>  virt/kvm/Kconfig                |  5 ++
>  virt/kvm/guest_memfd.c          | 81 +++++++++++++++++++++++++++++++++
>  6 files changed, 112 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 709cc2a7ba66..ce9ad4cd93c5 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2255,8 +2255,18 @@ void kvm_configure_mmu(bool enable_tdp, int 
> tdp_forced_root_level,
>  
>  #ifdef CONFIG_KVM_GMEM
>  #define kvm_arch_supports_gmem(kvm) ((kvm)->arch.supports_gmem)
> +
> +/*
> + * CoCo VMs with hardware support that use guest_memfd only for backing 
> private
> + * memory, e.g., TDX, cannot use guest_memfd with userspace mapping enabled.
> + */
> +#define kvm_arch_supports_gmem_shared_mem(kvm)                       \
> +     (IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) &&                      \
> +      ((kvm)->arch.vm_type == KVM_X86_SW_PROTECTED_VM ||             \
> +       (kvm)->arch.vm_type == KVM_X86_DEFAULT_VM))
>  #else
>  #define kvm_arch_supports_gmem(kvm) false
> +#define kvm_arch_supports_gmem_shared_mem(kvm) false
>  #endif
>  
>  #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 035ced06b2dd..2a02f2457c42 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12718,7 +12718,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long 
> type)
>               return -EINVAL;
>  
>       kvm->arch.vm_type = type;
> -     kvm->arch.supports_gmem = (type == KVM_X86_SW_PROTECTED_VM);
> +     kvm->arch.supports_gmem =
> +             type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;


I've been testing this patch-series. I did not saw failure with guest_memfd 
selftests but encountered a regression on my system with KVM_X86_DEFAULT_VM.

I'm getting below error in QEMU:
Issue #1 - QEMU fails to start with KVM_X86_DEFAULT_VM, showing:

qemu-system-x86_64: kvm_set_user_memory_region: KVM_SET_USER_MEMORY_REGION2 
failed, slot=65536, start=0x0, size=0x80000000, flags=0x0, guest_memfd=-1, 
guest_memfd_offset=0x0: Invalid argument
kvm_set_phys_mem: error registering slot: Invalid argument

I did some digging to find out,
In kvm_set_memory_region as_id >= kvm_arch_nr_memslot_as_ids(kvm) now returns 
true.
(as_id:1 kvm_arch_nr_memslot_as_ids(kvm):1 id:0 KVM_MEM_SLOTS_NUM:32767)

/* SMM is currently unsupported for guests with guest_memfd (esp private) 
memory. */
# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_supports_gmem(kvm) ? 1 : 2)
evaluates to be 1

I'm still debugging to find answer to these question
Why slot=65536 and (as_id = mem->slot >> 16 = 1) is requested for 
KVM_X86_DEFAULT_VM case
which is making it fail for above check.
Was this change intentional for KVM_X86_DEFAULT_VM? Should this be considered 
as KVM regression or QEMU[1] compatibility issue?

---
Issue #2: Testing challenges with QEMU changes[2] and mmap Implementation:
Currently, QEMU only enables guest_memfd for SEV_SNP_GUEST (KVM_X86_SNP_VM) by 
setting require_guest_memfd=true. However, the new mmap implementation doesn't 
support SNP guests per kvm_arch_supports_gmem_shared_mem().

static void
sev_snp_guest_instance_init(Object *obj)
{
    ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
    SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);

    cgs->require_guest_memfd = true;


To bypass this, I did two things and failed:
1. Enabling guest_memfd for KVM_X86_DEFAULT_VM in QEMU: Hits Issue #1 above
2. Adding KVM_X86_SNP_VM to kvm_arch_supports_gmem_shared_mem(): mmap() 
succeeds but QEMU stuck during boot.



My NUMA policy support for guest-memfd patch[3] depends on mmap() support and 
extends
kvm_gmem_vm_ops with get_policy/set_policy operations.
Since NUMA policy applies to both shared and private memory scenarios, what 
checks should
be included in the mmap() implementation, and what's the recommended approach 
for
integrating with your shared memory restrictions?


[1] https://github.com/qemu/qemu
[2] Snippet to QEMU changes to add mmap

+                new_block->guest_memfd = kvm_create_guest_memfd(
+                                           new_block->max_length, /*0 
*/GUEST_MEMFD_FLAG_SUPPORT_SHARED, errp);
+                if (new_block->guest_memfd < 0) {
+                        qemu_mutex_unlock_ramlist();
+                        goto out_free;
+                }
+                new_block->ptr_memfd = mmap(NULL, new_block->max_length,
+                                            PROT_READ | PROT_WRITE,
+                                            MAP_SHARED,
+                                            new_block->guest_memfd, 0);
+                if (new_block->ptr_memfd == MAP_FAILED) {
+                    error_report("Failed to mmap guest_memfd");
+                    qemu_mutex_unlock_ramlist();
+                    goto out_free;
+                }
+                printf("mmap successful\n");
+            }
[3] https://lore.kernel.org/linux-mm/20250408112402.181574-1-shiva...@amd.com



>       /* Decided by the vendor code for other VM types.  */
>       kvm->arch.pre_fault_allowed =
>               type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 80371475818f..ba83547e62b0 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -729,6 +729,19 @@ static inline bool kvm_arch_supports_gmem(struct kvm 
> *kvm)
>  }
>  #endif
>  
> +/*
> + * Returns true if this VM supports shared mem in guest_memfd.
> + *
> + * Arch code must define kvm_arch_supports_gmem_shared_mem if support for
> + * guest_memfd is enabled.
> + */
> +#if !defined(kvm_arch_supports_gmem_shared_mem) && 
> !IS_ENABLED(CONFIG_KVM_GMEM)
> +static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm)
> +{
> +     return false;
> +}
> +#endif
> +
>  #ifndef kvm_arch_has_readonly_mem
>  static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>  {
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index b6ae8ad8934b..c2714c9d1a0e 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1566,6 +1566,7 @@ struct kvm_memory_attributes {
>  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
>  
>  #define KVM_CREATE_GUEST_MEMFD       _IOWR(KVMIO,  0xd4, struct 
> kvm_create_guest_memfd)
> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED      (1ULL << 0)
>  
>  struct kvm_create_guest_memfd {
>       __u64 size;
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 559c93ad90be..df225298ab10 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -128,3 +128,8 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
>  config HAVE_KVM_ARCH_GMEM_INVALIDATE
>         bool
>         depends on KVM_GMEM
> +
> +config KVM_GMEM_SHARED_MEM
> +       select KVM_GMEM
> +       bool
> +       prompt "Enable support for non-private (shared) memory in guest_memfd"
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 6db515833f61..5d34712f64fc 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -312,7 +312,81 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot 
> *slot, gfn_t gfn)
>       return gfn - slot->base_gfn + slot->gmem.pgoff;
>  }
>  
> +static bool kvm_gmem_supports_shared(struct inode *inode)
> +{
> +     u64 flags;
> +
> +     if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
> +             return false;
> +
> +     flags = (u64)inode->i_private;
> +
> +     return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> +}
> +
> +
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
> +{
> +     struct inode *inode = file_inode(vmf->vma->vm_file);
> +     struct folio *folio;
> +     vm_fault_t ret = VM_FAULT_LOCKED;
> +
> +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> +     if (IS_ERR(folio)) {
> +             int err = PTR_ERR(folio);
> +
> +             if (err == -EAGAIN)
> +                     return VM_FAULT_RETRY;
> +
> +             return vmf_error(err);
> +     }
> +
> +     if (WARN_ON_ONCE(folio_test_large(folio))) {
> +             ret = VM_FAULT_SIGBUS;
> +             goto out_folio;
> +     }
> +
> +     if (!folio_test_uptodate(folio)) {
> +             clear_highpage(folio_page(folio, 0));
> +             kvm_gmem_mark_prepared(folio);
> +     }
> +
> +     vmf->page = folio_file_page(folio, vmf->pgoff);
> +
> +out_folio:
> +     if (ret != VM_FAULT_LOCKED) {
> +             folio_unlock(folio);
> +             folio_put(folio);
> +     }
> +
> +     return ret;
> +}
> +
> +static const struct vm_operations_struct kvm_gmem_vm_ops = {
> +     .fault = kvm_gmem_fault_shared,
> +};
> +
> +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +     if (!kvm_gmem_supports_shared(file_inode(file)))
> +             return -ENODEV;
> +
> +     if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
> +         (VM_SHARED | VM_MAYSHARE)) {
> +             return -EINVAL;
> +     }
> +
> +     vma->vm_ops = &kvm_gmem_vm_ops;
> +
> +     return 0;
> +}
> +#else
> +#define kvm_gmem_mmap NULL
> +#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
> +
>  static struct file_operations kvm_gmem_fops = {
> +     .mmap           = kvm_gmem_mmap,
>       .open           = generic_file_open,
>       .release        = kvm_gmem_release,
>       .fallocate      = kvm_gmem_fallocate,
> @@ -463,6 +537,9 @@ int kvm_gmem_create(struct kvm *kvm, struct 
> kvm_create_guest_memfd *args)
>       u64 flags = args->flags;
>       u64 valid_flags = 0;
>  
> +     if (kvm_arch_supports_gmem_shared_mem(kvm))
> +             valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> +
>       if (flags & ~valid_flags)
>               return -EINVAL;
>  
> @@ -501,6 +578,10 @@ int kvm_gmem_bind(struct kvm *kvm, struct 
> kvm_memory_slot *slot,
>           offset + size > i_size_read(inode))
>               goto err;
>  
> +     if (kvm_gmem_supports_shared(inode) &&
> +         !kvm_arch_supports_gmem_shared_mem(kvm))
> +             goto err;
> +
>       filemap_invalidate_lock(inode->i_mapping);
>  
>       start = offset >> PAGE_SHIFT;








Reply via email to