e500 KVM tries to bypass __kvm_faultin_pfn() in order to map VM_PFNMAP
VMAs as huge pages.  This is a Bad Idea because VM_PFNMAP VMAs could
become noncontiguous as a result of callsto remap_pfn_range().

Instead, use the already existing host PTE lookup to retrieve a
valid host-side mapping level after __kvm_faultin_pfn() has
returned.  Then find the largest size that will satisfy the
guest's request while staying within a single host PTE.

Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
 arch/powerpc/kvm/e500_mmu_host.c | 180 ++++++++++++-------------------
 1 file changed, 70 insertions(+), 110 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 7752b7f24c51..0457bbc2526f 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -326,15 +326,14 @@ static inline int kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
        struct tlbe_ref *ref)
 {
        struct kvm_memory_slot *slot;
-       unsigned long pfn = 0; /* silence GCC warning */
+       unsigned int psize;
+       unsigned long pfn;
        struct page *page = NULL;
        unsigned long hva;
-       int pfnmap = 0;
        int tsize = BOOK3E_PAGESZ_4K;
        int ret = 0;
        unsigned long mmu_seq;
        struct kvm *kvm = vcpu_e500->vcpu.kvm;
-       unsigned long tsize_pages = 0;
        pte_t *ptep;
        unsigned int wimg = 0;
        pgd_t *pgdir;
@@ -361,111 +360,12 @@ static inline int kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
 
        hva = gfn_to_hva_memslot(slot, gfn);
 
-       if (tlbsel == 1) {
-               struct vm_area_struct *vma;
-               mmap_read_lock(kvm->mm);
-
-               vma = find_vma(kvm->mm, hva);
-               if (vma && hva >= vma->vm_start &&
-                   (vma->vm_flags & VM_PFNMAP)) {
-                       /*
-                        * This VMA is a physically contiguous region (e.g.
-                        * /dev/mem) that bypasses normal Linux page
-                        * management.  Find the overlap between the
-                        * vma and the memslot.
-                        */
-
-                       unsigned long start, end;
-                       unsigned long slot_start, slot_end;
-
-                       pfnmap = 1;
-                       writable = vma->vm_flags & VM_WRITE;
-
-                       start = vma->vm_pgoff;
-                       end = start +
-                             vma_pages(vma);
-
-                       pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
-
-                       slot_start = pfn - (gfn - slot->base_gfn);
-                       slot_end = slot_start + slot->npages;
-
-                       if (start < slot_start)
-                               start = slot_start;
-                       if (end > slot_end)
-                               end = slot_end;
-
-                       tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
-                               MAS1_TSIZE_SHIFT;
-
-                       /*
-                        * e500 doesn't implement the lowest tsize bit,
-                        * or 1K pages.
-                        */
-                       tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
-
-                       /*
-                        * Now find the largest tsize (up to what the guest
-                        * requested) that will cover gfn, stay within the
-                        * range, and for which gfn and pfn are mutually
-                        * aligned.
-                        */
-
-                       for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
-                               unsigned long gfn_start, gfn_end;
-                               tsize_pages = 1UL << (tsize - 2);
-
-                               gfn_start = gfn & ~(tsize_pages - 1);
-                               gfn_end = gfn_start + tsize_pages;
-
-                               if (gfn_start + pfn - gfn < start)
-                                       continue;
-                               if (gfn_end + pfn - gfn > end)
-                                       continue;
-                               if ((gfn & (tsize_pages - 1)) !=
-                                   (pfn & (tsize_pages - 1)))
-                                       continue;
-
-                               gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
-                               pfn &= ~(tsize_pages - 1);
-                               break;
-                       }
-               } else if (vma && hva >= vma->vm_start &&
-                          is_vm_hugetlb_page(vma)) {
-                       unsigned long psize = vma_kernel_pagesize(vma);
-
-                       tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
-                               MAS1_TSIZE_SHIFT;
-
-                       /*
-                        * Take the largest page size that satisfies both host
-                        * and guest mapping
-                        */
-                       tsize = min(__ilog2(psize) - 10, tsize);
-
-                       /*
-                        * e500 doesn't implement the lowest tsize bit,
-                        * or 1K pages.
-                        */
-                       tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
-               }
-
-               mmap_read_unlock(kvm->mm);
-       }
-
-       if (likely(!pfnmap)) {
-               tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
-               pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, 
&page);
-               if (is_error_noslot_pfn(pfn)) {
-                       if (printk_ratelimit())
-                               pr_err("%s: real page not found for gfn %lx\n",
-                                      __func__, (long)gfn);
-                       return -EINVAL;
-               }
-
-               /* Align guest and physical address to page map boundaries */
-               pfn &= ~(tsize_pages - 1);
-               gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+       pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page);
+       if (is_error_noslot_pfn(pfn)) {
+               if (printk_ratelimit())
+                       pr_err("%s: real page not found for gfn %lx\n",
+                              __func__, (long)gfn);
+               return -EINVAL;
        }
 
        spin_lock(&kvm->mmu_lock);
@@ -483,7 +383,7 @@ static inline int kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
         * can't run hence pfn won't change.
         */
        local_irq_save(flags);
-       ptep = find_linux_pte(pgdir, hva, NULL, NULL);
+       ptep = find_linux_pte(pgdir, hva, NULL, &psize);
        if (ptep) {
                pte_t pte = READ_ONCE(*ptep);
 
@@ -500,6 +400,66 @@ static inline int kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
                }
        }
 
+       if (psize && tlbsel == 1) {
+               unsigned long psize_pages, tsize_pages;
+               unsigned long start, end;
+               unsigned long slot_start, slot_end;
+
+               psize_pages = 1UL << (psize - PAGE_SHIFT);
+               start = pfn & ~(psize_pages - 1);
+               end = start + psize_pages;
+
+               slot_start = pfn - (gfn - slot->base_gfn);
+               slot_end = slot_start + slot->npages;
+
+               if (start < slot_start)
+                       start = slot_start;
+               if (end > slot_end)
+                       end = slot_end;
+
+               tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+                       MAS1_TSIZE_SHIFT;
+
+               /*
+                * Any page size that doesn't satisfy the host mapping
+                * will fail the start and end tests.
+                */
+               tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize);
+
+               /*
+                * e500 doesn't implement the lowest tsize bit,
+                * or 1K pages.
+                */
+               tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+
+               /*
+                * Now find the largest tsize (up to what the guest
+                * requested) that will cover gfn, stay within the
+                * range, and for which gfn and pfn are mutually
+                * aligned.
+                */
+
+               for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
+                       unsigned long gfn_start, gfn_end;
+                       tsize_pages = 1UL << (tsize - 2);
+
+                       gfn_start = gfn & ~(tsize_pages - 1);
+                       gfn_end = gfn_start + tsize_pages;
+
+                       if (gfn_start + pfn - gfn < start)
+                               continue;
+                       if (gfn_end + pfn - gfn > end)
+                               continue;
+                       if ((gfn & (tsize_pages - 1)) !=
+                           (pfn & (tsize_pages - 1)))
+                               continue;
+
+                       gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+                       pfn &= ~(tsize_pages - 1);
+                       break;
+               }
+       }
+
        kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable);
        kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
                                ref, gvaddr, stlbe);
-- 
2.47.1


Reply via email to