Add kvm_follow_pfn.refcounted_page as an output for the "to pfn" APIs to
"return" the struct page that is associated with the returned pfn (if KVM
acquired a reference to the page).  This will eventually allow removing
KVM's hacky kvm_pfn_to_refcounted_page() code, which is error prone and
can't detect pfns that are valid, but aren't (currently) refcounted.

Tested-by: Alex Bennée <alex.ben...@linaro.org>
Signed-off-by: Sean Christopherson <sea...@google.com>
---
 virt/kvm/kvm_main.c | 99 +++++++++++++++++++++------------------------
 virt/kvm/kvm_mm.h   |  9 +++++
 2 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d3e48fcc4fb0..e29f78ed6f48 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2746,6 +2746,46 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu 
*vcpu, gfn_t gfn, bool *w
        return gfn_to_hva_memslot_prot(slot, gfn, writable);
 }
 
+static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page,
+                                struct follow_pfnmap_args *map, bool writable)
+{
+       kvm_pfn_t pfn;
+
+       WARN_ON_ONCE(!!page == !!map);
+
+       if (kfp->map_writable)
+               *kfp->map_writable = writable;
+
+       /*
+        * FIXME: Remove this once KVM no longer blindly calls put_page() on
+        *        every pfn that points at a struct page.
+        *
+        * Get a reference for follow_pte() pfns if they happen to point at a
+        * struct page, as KVM will ultimately call kvm_release_pfn_clean() on
+        * the returned pfn, i.e. KVM expects to have a reference.
+        *
+        * Certain IO or PFNMAP mappings can be backed with valid struct pages,
+        * but be allocated without refcounting, e.g. tail pages of
+        * non-compound higher order allocations.  Grabbing and putting a
+        * reference to such pages would cause KVM to prematurely free a page
+        * it doesn't own (KVM gets and puts the one and only reference).
+        * Don't allow those pages until the FIXME is resolved.
+        */
+       if (map) {
+               pfn = map->pfn;
+               page = kvm_pfn_to_refcounted_page(pfn);
+               if (page && !get_page_unless_zero(page))
+                       return KVM_PFN_ERR_FAULT;
+       } else {
+               pfn = page_to_pfn(page);
+       }
+
+       if (kfp->refcounted_page)
+               *kfp->refcounted_page = page;
+
+       return pfn;
+}
+
 /*
  * The fast path to get the writable pfn which will be stored in @pfn,
  * true indicates success, otherwise false is returned.
@@ -2763,9 +2803,7 @@ static bool hva_to_pfn_fast(struct kvm_follow_pfn *kfp, 
kvm_pfn_t *pfn)
                return false;
 
        if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page)) {
-               *pfn = page_to_pfn(page);
-               if (kfp->map_writable)
-                       *kfp->map_writable = true;
+               *pfn = kvm_resolve_pfn(kfp, page, NULL, true);
                return true;
        }
 
@@ -2797,23 +2835,15 @@ static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, 
kvm_pfn_t *pfn)
        if (npages != 1)
                return npages;
 
-       if (!kfp->map_writable)
-               goto out;
-
-       if (kfp->flags & FOLL_WRITE) {
-               *kfp->map_writable = true;
-               goto out;
-       }
-
        /* map read fault as writable if possible */
-       if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
-               *kfp->map_writable = true;
+       if (!(flags & FOLL_WRITE) && kfp->map_writable &&
+           get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
                put_page(page);
                page = wpage;
+               flags |= FOLL_WRITE;
        }
 
-out:
-       *pfn = page_to_pfn(page);
+       *pfn = kvm_resolve_pfn(kfp, page, NULL, flags & FOLL_WRITE);
        return npages;
 }
 
@@ -2828,22 +2858,11 @@ static bool vma_is_valid(struct vm_area_struct *vma, 
bool write_fault)
        return true;
 }
 
-static int kvm_try_get_pfn(kvm_pfn_t pfn)
-{
-       struct page *page = kvm_pfn_to_refcounted_page(pfn);
-
-       if (!page)
-               return 1;
-
-       return get_page_unless_zero(page);
-}
-
 static int hva_to_pfn_remapped(struct vm_area_struct *vma,
                               struct kvm_follow_pfn *kfp, kvm_pfn_t *p_pfn)
 {
        struct follow_pfnmap_args args = { .vma = vma, .address = kfp->hva };
        bool write_fault = kfp->flags & FOLL_WRITE;
-       kvm_pfn_t pfn;
        int r;
 
        r = follow_pfnmap_start(&args);
@@ -2867,37 +2886,13 @@ static int hva_to_pfn_remapped(struct vm_area_struct 
*vma,
        }
 
        if (write_fault && !args.writable) {
-               pfn = KVM_PFN_ERR_RO_FAULT;
+               *p_pfn = KVM_PFN_ERR_RO_FAULT;
                goto out;
        }
 
-       if (kfp->map_writable)
-               *kfp->map_writable = args.writable;
-       pfn = args.pfn;
-
-       /*
-        * Get a reference here because callers of *hva_to_pfn* and
-        * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
-        * returned pfn.  This is only needed if the VMA has VM_MIXEDMAP
-        * set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will
-        * simply do nothing for reserved pfns.
-        *
-        * Whoever called remap_pfn_range is also going to call e.g.
-        * unmap_mapping_range before the underlying pages are freed,
-        * causing a call to our MMU notifier.
-        *
-        * Certain IO or PFNMAP mappings can be backed with valid
-        * struct pages, but be allocated without refcounting e.g.,
-        * tail pages of non-compound higher order allocations, which
-        * would then underflow the refcount when the caller does the
-        * required put_page. Don't allow those pages here.
-        */
-       if (!kvm_try_get_pfn(pfn))
-               r = -EFAULT;
+       *p_pfn = kvm_resolve_pfn(kfp, NULL, &args, args.writable);
 out:
        follow_pfnmap_end(&args);
-       *p_pfn = pfn;
-
        return r;
 }
 
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index d5a215958f06..d3ac1ba8ba66 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -35,6 +35,15 @@ struct kvm_follow_pfn {
         * Set to true if a writable mapping was obtained.
         */
        bool *map_writable;
+
+       /*
+        * Optional output.  Set to a valid "struct page" if the returned pfn
+        * is for a refcounted or pinned struct page, NULL if the returned pfn
+        * has no struct page or if the struct page is not being refcounted
+        * (e.g. tail pages of non-compound higher order allocations from
+        * IO/PFNMAP mappings).
+        */
+       struct page **refcounted_page;
 };
 
 kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *kfp);
-- 
2.47.0.rc1.288.g06298d1525-goog


Reply via email to