Some minor comments/questions below. Overall, the patches look
fine to me.

> +#include <linux/pagemap.h>
> +#include <linux/migrate.h>
> +#include <linux/kvm_host.h>
> +#include <asm/ultravisor.h>
> +
> +static struct dev_pagemap kvmppc_devm_pgmap;
> +static unsigned long *kvmppc_devm_pfn_bitmap;
> +static DEFINE_SPINLOCK(kvmppc_devm_pfn_lock);

Is this lock protecting just the pfn_bitmap?

> +
> +struct kvmppc_devm_page_pvt {
> +     unsigned long *rmap;
> +     unsigned int lpid;
> +     unsigned long gpa;
> +};
> +
> +/*
> + * Get a free device PFN from the pool
> + *
> + * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
> + * PFN will be used to keep track of the secure page on HV side.
> + *
> + * @rmap here is the slot in the rmap array that corresponds to @gpa.
> + * Thus a non-zero rmap entry indicates that the corresponding guest
> + * page has become secure, and is not mapped on the HV side.
> + *
> + * NOTE: In this and subsequent functions, we pass around and access
> + * individual elements of kvm_memory_slot->arch.rmap[] without any
> + * protection. Should we use lock_rmap() here?
> + */
> +static struct page *kvmppc_devm_get_page(unsigned long *rmap, unsigned long 
> gpa,
> +                                      unsigned int lpid)
> +{
> +     struct page *dpage = NULL;
> +     unsigned long bit, devm_pfn;
> +     unsigned long flags;
> +     struct kvmppc_devm_page_pvt *pvt;
> +     unsigned long pfn_last, pfn_first;
> +
> +     if (kvmppc_rmap_is_devm_pfn(*rmap))
> +             return NULL;
> +
> +     pfn_first = kvmppc_devm_pgmap.res.start >> PAGE_SHIFT;
> +     pfn_last = pfn_first +
> +                (resource_size(&kvmppc_devm_pgmap.res) >> PAGE_SHIFT);
> +     spin_lock_irqsave(&kvmppc_devm_pfn_lock, flags);

Blank lines around spin_lock() would help.

> +     bit = find_first_zero_bit(kvmppc_devm_pfn_bitmap, pfn_last - pfn_first);
> +     if (bit >= (pfn_last - pfn_first))
> +             goto out;
> +
> +     bitmap_set(kvmppc_devm_pfn_bitmap, bit, 1);
> +     devm_pfn = bit + pfn_first;

Can we drop the &kvmppc_devm_pfn_lock here or after the trylock_page()?
Or does it also protect the ->zone_device_data' assignment below as well?
If so, maybe drop the 'pfn_' from the name of the lock?

Besides, we don't seem to hold this lock when accessing ->zone_device_data
in kvmppc_share_page(). Maybe &kvmppc_devm_pfn_lock just protects the bitmap?


> +     dpage = pfn_to_page(devm_pfn);

Does this code and hence CONFIG_PPC_UV depend on a specific model like
CONFIG_SPARSEMEM_VMEMMAP?
> +
> +     if (!trylock_page(dpage))
> +             goto out_clear;
> +
> +     *rmap = devm_pfn | KVMPPC_RMAP_DEVM_PFN;
> +     pvt = kzalloc(sizeof(*pvt), GFP_ATOMIC);
> +     if (!pvt)
> +             goto out_unlock;
> +     pvt->rmap = rmap;
> +     pvt->gpa = gpa;
> +     pvt->lpid = lpid;
> +     dpage->zone_device_data = pvt;

->zone_device_data is set after locking the dpage here, but in
kvmppc_share_page() and kvmppc_devm_fault_migrate_alloc_and_copy()
it is accessed without locking the page?

> +     spin_unlock_irqrestore(&kvmppc_devm_pfn_lock, flags);
> +
> +     get_page(dpage);
> +     return dpage;
> +
> +out_unlock:
> +     unlock_page(dpage);
> +out_clear:
> +     bitmap_clear(kvmppc_devm_pfn_bitmap, devm_pfn - pfn_first, 1);
> +out:
> +     spin_unlock_irqrestore(&kvmppc_devm_pfn_lock, flags);
> +     return NULL;
> +}
> +
> +/*
> + * Alloc a PFN from private device memory pool and copy page from normal
> + * memory to secure memory.
> + */
> +static int
> +kvmppc_devm_migrate_alloc_and_copy(struct migrate_vma *mig,
> +                                unsigned long *rmap, unsigned long gpa,
> +                                unsigned int lpid, unsigned long page_shift)
> +{
> +     struct page *spage = migrate_pfn_to_page(*mig->src);
> +     unsigned long pfn = *mig->src >> MIGRATE_PFN_SHIFT;
> +     struct page *dpage;
> +
> +     *mig->dst = 0;
> +     if (!spage || !(*mig->src & MIGRATE_PFN_MIGRATE))
> +             return 0;
> +
> +     dpage = kvmppc_devm_get_page(rmap, gpa, lpid);
> +     if (!dpage)
> +             return -EINVAL;
> +
> +     if (spage)
> +             uv_page_in(lpid, pfn << page_shift, gpa, 0, page_shift);
> +
> +     *mig->dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> +     return 0;
> +}
> +
> +/*
> + * Move page from normal memory to secure memory.
> + */
> +unsigned long
> +kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
> +                  unsigned long flags, unsigned long page_shift)
> +{
> +     unsigned long addr, end;
> +     unsigned long src_pfn, dst_pfn;

These are the host frame numbers correct? Trying to distinguish them
from 'gfn' and 'gpa' used in the function.

> +     struct migrate_vma mig;
> +     struct vm_area_struct *vma;
> +     int srcu_idx;
> +     unsigned long gfn = gpa >> page_shift;
> +     struct kvm_memory_slot *slot;
> +     unsigned long *rmap;
> +     int ret;
> +
> +     if (page_shift != PAGE_SHIFT)
> +             return H_P3;
> +
> +     if (flags)
> +             return H_P2;
> +
> +     ret = H_PARAMETER;
> +     down_read(&kvm->mm->mmap_sem);
> +     srcu_idx = srcu_read_lock(&kvm->srcu);
> +     slot = gfn_to_memslot(kvm, gfn);

Can slot be NULL? could be a bug in UV...

> +     rmap = &slot->arch.rmap[gfn - slot->base_gfn];
> +     addr = gfn_to_hva(kvm, gpa >> page_shift);

Use 'gfn' as the second parameter? 

Nit. for consistency with gpa and gfn, maybe rename 'addr' to
'hva' or to match 'end' maybe to 'start'.

Also, can we check 'kvmppc_rmap_is_devm_pfn(*rmap)' here and bail out
if its already shared? We currently do it further down the call chain
in kvmppc_devm_get_page() after doing more work.


> +     if (kvm_is_error_hva(addr))
> +             goto out;
> +
> +     end = addr + (1UL << page_shift);
> +     vma = find_vma_intersection(kvm->mm, addr, end);
> +     if (!vma || vma->vm_start > addr || vma->vm_end < end)
> +             goto out;
> +
> +     memset(&mig, 0, sizeof(mig));
> +     mig.vma = vma;
> +     mig.start = addr;
> +     mig.end = end;
> +     mig.src = &src_pfn;
> +     mig.dst = &dst_pfn;
> +
> +     if (migrate_vma_setup(&mig))
> +             goto out;
> +
> +     if (kvmppc_devm_migrate_alloc_and_copy(&mig, rmap, gpa,
> +                                            kvm->arch.lpid, page_shift))
> +             goto out_finalize;
> +
> +     migrate_vma_pages(&mig);
> +     ret = H_SUCCESS;
> +out_finalize:
> +     migrate_vma_finalize(&mig);
> +out:
> +     srcu_read_unlock(&kvm->srcu, srcu_idx);
> +     up_read(&kvm->mm->mmap_sem);
> +     return ret;
> +}
> +
> +/*
> + * Provision a new page on HV side and copy over the contents
> + * from secure memory.
> + */
> +static int
> +kvmppc_devm_fault_migrate_alloc_and_copy(struct migrate_vma *mig,
> +                                      unsigned long page_shift)
> +{
> +     struct page *dpage, *spage;
> +     struct kvmppc_devm_page_pvt *pvt;
> +     unsigned long pfn;
> +     int ret;
> +
> +     spage = migrate_pfn_to_page(*mig->src);
> +     if (!spage || !(*mig->src & MIGRATE_PFN_MIGRATE))
> +             return 0;
> +     if (!is_zone_device_page(spage))
> +             return 0;

What does it mean if its not a zone_device page at this point? Caller
would then proceed to migrage_vma_pages() if we return 0 right?

> +
> +     dpage = alloc_page_vma(GFP_HIGHUSER, mig->vma, mig->start);
> +     if (!dpage)
> +             return -EINVAL;
> +     lock_page(dpage);
> +     pvt = spage->zone_device_data;
> +
> +     pfn = page_to_pfn(dpage);
> +     ret = uv_page_out(pvt->lpid, pfn << page_shift, pvt->gpa, 0,
> +                       page_shift);
> +     if (ret == U_SUCCESS)
> +             *mig->dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
> +     else {
> +             unlock_page(dpage);
> +             __free_page(dpage);
> +     }
> +     return ret;
> +}
> +
> +/*
> + * Fault handler callback when HV touches any page that has been
> + * moved to secure memory, we ask UV to give back the page by
> + * issuing a UV_PAGE_OUT uvcall.
> + *
> + * This eventually results in dropping of device PFN and the newly
> + * provisioned page/PFN gets populated in QEMU page tables.
> + */
> +static vm_fault_t kvmppc_devm_migrate_to_ram(struct vm_fault *vmf)
> +{
> +     unsigned long src_pfn, dst_pfn = 0;
> +     struct migrate_vma mig;
> +     int ret = 0;
> +
> +     memset(&mig, 0, sizeof(mig));
> +     mig.vma = vmf->vma;
> +     mig.start = vmf->address;
> +     mig.end = vmf->address + PAGE_SIZE;
> +     mig.src = &src_pfn;
> +     mig.dst = &dst_pfn;
> +
> +     if (migrate_vma_setup(&mig)) {
> +             ret = VM_FAULT_SIGBUS;
> +             goto out;
> +     }
> +
> +     if (kvmppc_devm_fault_migrate_alloc_and_copy(&mig, PAGE_SHIFT)) {
> +             ret = VM_FAULT_SIGBUS;
> +             goto out_finalize;
> +     }
> +
> +     migrate_vma_pages(&mig);
> +out_finalize:
> +     migrate_vma_finalize(&mig);
> +out:
> +     return ret;
> +}
> +
> +/*
> + * Release the device PFN back to the pool
> + *
> + * Gets called when secure page becomes a normal page during UV_PAGE_OUT.

Nit: Should that be H_SVM_PAGE_OUT?

> + */
> +static void kvmppc_devm_page_free(struct page *page)
> +{
> +     unsigned long pfn = page_to_pfn(page);
> +     unsigned long flags;
> +     struct kvmppc_devm_page_pvt *pvt;
> +
> +     spin_lock_irqsave(&kvmppc_devm_pfn_lock, flags);
> +     pvt = page->zone_device_data;
> +     page->zone_device_data = NULL;

If the pfn_lock only protects the bitmap, would be better to move
it here?

> +
> +     bitmap_clear(kvmppc_devm_pfn_bitmap,
> +                  pfn - (kvmppc_devm_pgmap.res.start >> PAGE_SHIFT), 1);
> +     *pvt->rmap = 0;
> +     spin_unlock_irqrestore(&kvmppc_devm_pfn_lock, flags);
> +     kfree(pvt);
> +}
> +
> +static const struct dev_pagemap_ops kvmppc_devm_ops = {
> +     .page_free = kvmppc_devm_page_free,
> +     .migrate_to_ram = kvmppc_devm_migrate_to_ram,
> +};
> +
> +/*
> + * Move page from secure memory to normal memory.
> + */
> +unsigned long
> +kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
> +                   unsigned long flags, unsigned long page_shift)
> +{
> +     struct migrate_vma mig;
> +     unsigned long addr, end;
> +     struct vm_area_struct *vma;
> +     unsigned long src_pfn, dst_pfn = 0;
> +     int srcu_idx;
> +     int ret;

Nit: Not sure its a coding style requirement, but many functions seem
to "sort" these local variables in descending order of line length for
appearance :-)  (eg: migrate_vma* functions).

> +
> +     if (page_shift != PAGE_SHIFT)
> +             return H_P3;
> +
> +     if (flags)
> +             return H_P2;
> +
> +     ret = H_PARAMETER;
> +     down_read(&kvm->mm->mmap_sem);
> +     srcu_idx = srcu_read_lock(&kvm->srcu);
> +     addr = gfn_to_hva(kvm, gpa >> page_shift);
> +     if (kvm_is_error_hva(addr))
> +             goto out;
> +
> +     end = addr + (1UL << page_shift);
> +     vma = find_vma_intersection(kvm->mm, addr, end);
> +     if (!vma || vma->vm_start > addr || vma->vm_end < end)
> +             goto out;
> +
> +     memset(&mig, 0, sizeof(mig));
> +     mig.vma = vma;
> +     mig.start = addr;
> +     mig.end = end;
> +     mig.src = &src_pfn;
> +     mig.dst = &dst_pfn;
> +     if (migrate_vma_setup(&mig))
> +             goto out;
> +
> +     ret = kvmppc_devm_fault_migrate_alloc_and_copy(&mig, page_shift);
> +     if (ret)
> +             goto out_finalize;
> +
> +     migrate_vma_pages(&mig);
> +     ret = H_SUCCESS;

Nit: Blank line here?

> +out_finalize:
> +     migrate_vma_finalize(&mig);
> +out:
> +     srcu_read_unlock(&kvm->srcu, srcu_idx);
> +     up_read(&kvm->mm->mmap_sem);
> +     return ret;
> +}
> +
> +static u64 kvmppc_get_secmem_size(void)
> +{
> +     struct device_node *np;
> +     int i, len;
> +     const __be32 *prop;
> +     u64 size = 0;
> +
> +     np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
> +     if (!np)
> +             goto out;
> +
> +     prop = of_get_property(np, "secure-memory-ranges", &len);
> +     if (!prop)
> +             goto out_put;
> +
> +     for (i = 0; i < len / (sizeof(*prop) * 4); i++)
> +             size += of_read_number(prop + (i * 4) + 2, 2);
> +
> +out_put:
> +     of_node_put(np);
> +out:
> +     return size;
> +}
> +
> +int kvmppc_devm_init(void)
> +{
> +     int ret = 0;
> +     unsigned long size;
> +     struct resource *res;
> +     void *addr;
> +     unsigned long pfn_last, pfn_first;
> +
> +     size = kvmppc_get_secmem_size();
> +     if (!size) {
> +             ret = -ENODEV;
> +             goto out;
> +     }
> +
> +     res = request_free_mem_region(&iomem_resource, size, "kvmppc_devm");
> +     if (IS_ERR(res)) {
> +             ret = PTR_ERR(res);
> +             goto out;
> +     }
> +
> +     kvmppc_devm_pgmap.type = MEMORY_DEVICE_PRIVATE;
> +     kvmppc_devm_pgmap.res = *res;
> +     kvmppc_devm_pgmap.ops = &kvmppc_devm_ops;
> +     addr = memremap_pages(&kvmppc_devm_pgmap, -1);
> +     if (IS_ERR(addr)) {
> +             ret = PTR_ERR(addr);
> +             goto out_free_region;
> +     }
> +
> +     pfn_first = res->start >> PAGE_SHIFT;
> +     pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
> +     kvmppc_devm_pfn_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
> +                                      sizeof(unsigned long), GFP_KERNEL);
> +     if (!kvmppc_devm_pfn_bitmap) {
> +             ret = -ENOMEM;
> +             goto out_unmap;
> +     }
> +
> +     pr_info("KVMPPC-DEVM: Secure Memory size 0x%lx\n", size);
> +     return ret;

Nit: Blank line here?

> +out_unmap:
> +     memunmap_pages(&kvmppc_devm_pgmap);
> +out_free_region:
> +     release_mem_region(res->start, size);
> +out:
> +     return ret;
> +}
> +
> +void kvmppc_devm_free(void)
> +{
> +     memunmap_pages(&kvmppc_devm_pgmap);
> +     release_mem_region(kvmppc_devm_pgmap.res.start,
> +                        resource_size(&kvmppc_devm_pgmap.res));
> +     kfree(kvmppc_devm_pfn_bitmap);
> +}
> -- 
> 2.21.0

Reply via email to