Am 2021-11-15 um 2:30 p.m. schrieb Alex Sierra:
> Device Coherent type uses device memory that is coherently accesible by
> the CPU. This could be shown as SP (special purpose) memory range
> at the BIOS-e820 memory enumeration. If no SP memory is supported in
> system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.
>
> Currently, test_hmm only supports two different SP ranges of at least
> 256MB size. This could be specified in the kernel parameter variable
> efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 &
> 0x140000000 physical address. Ex.
> efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000
>
> Signed-off-by: Alex Sierra <alex.sie...@amd.com>
> ---
>  lib/test_hmm.c      | 191 +++++++++++++++++++++++++++++++++-----------
>  lib/test_hmm_uapi.h |  15 ++--
>  2 files changed, 153 insertions(+), 53 deletions(-)
>
> diff --git a/lib/test_hmm.c b/lib/test_hmm.c
> index 45334df28d7b..9e2cc0cd4412 100644
> --- a/lib/test_hmm.c
> +++ b/lib/test_hmm.c
> @@ -471,6 +471,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
> *mdevice,
>       unsigned long pfn_first;
>       unsigned long pfn_last;
>       void *ptr;
> +     int ret = -ENOMEM;
>  
>       devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
>       if (!devmem)
> @@ -553,7 +554,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
> *mdevice,
>       }
>       spin_unlock(&mdevice->lock);
>  
> -     return true;
> +     return 0;

You're changing the meaning of the return value, but you're not updating
the callers. I think at least dmirror_devmem_alloc_page will be broken
by this change.


>  
>  err_release:
>       mutex_unlock(&mdevice->devmem_lock);
> @@ -562,7 +563,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
> *mdevice,
>  err_devmem:
>       kfree(devmem);
>  
> -     return false;
> +     return ret;
>  }
>  
>  static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
> @@ -571,8 +572,10 @@ static struct page *dmirror_devmem_alloc_page(struct 
> dmirror_device *mdevice)
>       struct page *rpage;
>  
>       /*
> -      * This is a fake device so we alloc real system memory to store
> -      * our device memory.
> +      * For ZONE_DEVICE private type, this is a fake device so we alloc real
> +      * system memory to store our device memory.
> +      * For ZONE_DEVICE coherent type we use the actual dpage to store the 
> data
> +      * and ignore rpage.
>        */
>       rpage = alloc_page(GFP_HIGHUSER);
>       if (!rpage)
> @@ -623,12 +626,17 @@ static void dmirror_migrate_alloc_and_copy(struct 
> migrate_vma *args,
>                * unallocated pte_none() or read-only zero page.
>                */
>               spage = migrate_pfn_to_page(*src);
> +             if (spage && is_zone_device_page(spage))
> +                     pr_err("page already in device spage pfn: 0x%lx\n",
> +                             page_to_pfn(spage));
> +             WARN_ON(spage && is_zone_device_page(spage));
You can write WARN_ON inside the if-condition:

        if (WARN_ON(spage && is_zone_device_page(spage))
                ...

But I don't see why you need both pr_err and WARN_ON. You can add a
custom message by using WARN instead of WARN_ON:

        WARN(spage && is_zone_device_page(spage),
             "page already in device spage pfn: 0x%lx\n",
             page_to_pfn(spage));


>  
>               dpage = dmirror_devmem_alloc_page(mdevice);
>               if (!dpage)
>                       continue;
>  
> -             rpage = dpage->zone_device_data;
> +             rpage = is_device_private_page(dpage) ? dpage->zone_device_data 
> :
> +                                                     dpage;
>               if (spage)
>                       copy_highpage(rpage, spage);
>               else
> @@ -640,8 +648,11 @@ static void dmirror_migrate_alloc_and_copy(struct 
> migrate_vma *args,
>                * the simulated device memory and that page holds the pointer
>                * to the mirror.
>                */
> +             rpage = dpage->zone_device_data;
>               rpage->zone_device_data = dmirror;
>  
> +             pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 
> 0x%lx\n",
> +                      page_to_pfn(spage), page_to_pfn(dpage));
>               *dst = migrate_pfn(page_to_pfn(dpage)) |
>                           MIGRATE_PFN_LOCKED;
>               if ((*src & MIGRATE_PFN_WRITE) ||
> @@ -721,10 +732,13 @@ static int dmirror_migrate_finalize_and_map(struct 
> migrate_vma *args,
>                       continue;
>  
>               /*
> -              * Store the page that holds the data so the page table
> -              * doesn't have to deal with ZONE_DEVICE private pages.
> +              * For ZONE_DEVICE private pages we store the page that
> +              * holds the data so the page table doesn't have to deal it.
> +              * For ZONE_DEVICE coherent pages we store the actual page, 
> since
> +              * the CPU has coherent access to the page.
>                */

I find this explanation confusing. The comment in
dmirror_devmem_alloc_page is much clearer. I think all we need here is
that dpage->zone_device_data points to the backing page for
device_private pages. Device_coherent struct pages don't have a backing
page because they represent a real CPU-accessible page already.


> -             entry = dpage->zone_device_data;
> +             entry = is_device_private_page(dpage) ? dpage->zone_device_data 
> :
> +                                                     dpage;

I see this in a few places. Maybe better to wrap that in a helper
function or macro. Something like this:

#define BACKING_PAGE(page) (is_device_private_page((page)) ? \
                            (page)->zone_device_data : (page))


>               if (*dst & MIGRATE_PFN_WRITE)
>                       entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
>               entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
> @@ -804,8 +818,110 @@ static int dmirror_exclusive(struct dmirror *dmirror,
>       return ret;
>  }
>  
> -static int dmirror_migrate(struct dmirror *dmirror,
> -                        struct hmm_dmirror_cmd *cmd)
> +static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma 
> *args,
> +                                                   struct dmirror *dmirror)
> +{
> +     const unsigned long *src = args->src;
> +     unsigned long *dst = args->dst;
> +     unsigned long start = args->start;
> +     unsigned long end = args->end;
> +     unsigned long addr;
> +
> +     for (addr = start; addr < end; addr += PAGE_SIZE,
> +                                    src++, dst++) {
> +             struct page *dpage, *spage;
> +
> +             spage = migrate_pfn_to_page(*src);
> +             if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
> +                     continue;
> +
> +             WARN_ON(!is_device_page(spage));
> +             spage = is_device_private_page(spage) ? spage->zone_device_data 
> :
> +                                                     spage;
> +             dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
> +             if (!dpage)
> +                     continue;
> +             pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 
> 0x%lx\n",
> +                      page_to_pfn(spage), page_to_pfn(dpage));
> +
> +             lock_page(dpage);
> +             xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
> +             copy_highpage(dpage, spage);
> +             *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> +             if (*src & MIGRATE_PFN_WRITE)
> +                     *dst |= MIGRATE_PFN_WRITE;
> +     }
> +     return 0;
> +}
> +
> +static int dmirror_migrate_to_system(struct dmirror *dmirror,
> +                                  struct hmm_dmirror_cmd *cmd)
> +{
> +     unsigned long start, end, addr;
> +     unsigned long size = cmd->npages << PAGE_SHIFT;
> +     struct mm_struct *mm = dmirror->notifier.mm;
> +     struct vm_area_struct *vma;
> +     unsigned long src_pfns[64];
> +     unsigned long dst_pfns[64];
> +     struct migrate_vma args;
> +     unsigned long next;
> +     int ret;
> +
> +     start = cmd->addr;
> +     end = start + size;
> +     if (end < start)
> +             return -EINVAL;
> +
> +     /* Since the mm is for the mirrored process, get a reference first. */
> +     if (!mmget_not_zero(mm))
> +             return -EINVAL;
> +
> +     mmap_read_lock(mm);
> +     for (addr = start; addr < end; addr = next) {
> +             vma = find_vma(mm, addr);
> +             if (!vma || addr < vma->vm_start ||
> +                 !(vma->vm_flags & VM_READ)) {
> +                     ret = -EINVAL;
> +                     goto out;
> +             }
> +             next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
> +             if (next > vma->vm_end)
> +                     next = vma->vm_end;
> +
> +             args.vma = vma;
> +             args.src = src_pfns;
> +             args.dst = dst_pfns;
> +             args.start = addr;
> +             args.end = next;
> +             args.pgmap_owner = dmirror->mdevice;
> +             args.flags = (dmirror->mdevice->zone_device_type ==
> +                           HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
> +                           MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
> +                           MIGRATE_VMA_SELECT_DEVICE_COHERENT;
> +
> +             ret = migrate_vma_setup(&args);
> +             if (ret)
> +                     goto out;
> +
> +             pr_debug("Migrating from device mem to sys mem\n");
> +             dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
> +
> +             migrate_vma_pages(&args);
> +             migrate_vma_finalize(&args);
> +     }
> +     mmap_read_unlock(mm);
> +     mmput(mm);
> +
> +     return ret;
> +
> +out:
> +     mmap_read_unlock(mm);
> +     mmput(mm);
> +     return ret;
> +}
> +
> +static int dmirror_migrate_to_device(struct dmirror *dmirror,
> +                             struct hmm_dmirror_cmd *cmd)
>  {
>       unsigned long start, end, addr;
>       unsigned long size = cmd->npages << PAGE_SHIFT;
> @@ -849,6 +965,7 @@ static int dmirror_migrate(struct dmirror *dmirror,
>               if (ret)
>                       goto out;
>  
> +             pr_debug("Migrating from sys mem to device mem\n");
>               dmirror_migrate_alloc_and_copy(&args, dmirror);
>               migrate_vma_pages(&args);
>               dmirror_migrate_finalize_and_map(&args, dmirror);
> @@ -857,7 +974,7 @@ static int dmirror_migrate(struct dmirror *dmirror,
>       mmap_read_unlock(mm);
>       mmput(mm);
>  
> -     /* Return the migrated data for verification. */
> +     /* Return the migrated data for verification. only for pages in device 
> zone */
>       ret = dmirror_bounce_init(&bounce, start, size);
>       if (ret)
>               return ret;
> @@ -894,9 +1011,15 @@ static void dmirror_mkentry(struct dmirror *dmirror, 
> struct hmm_range *range,
>       }
>  
>       page = hmm_pfn_to_page(entry);
> -     if (is_device_private_page(page)) {
> -             /* Is the page migrated to this device or some other? */
> -             if (dmirror->mdevice == dmirror_page_to_device(page))
> +     if (is_device_page(page)) {
> +             /* Is page ZONE_DEVICE coherent? */
> +             if (!is_device_private_page(page))
> +                     *perm = HMM_DMIRROR_PROT_DEV_COHERENT;

Does it make sense to distinguish between DEV_COHERENT_LOCAL and
DEV_COHERENT_REMOTE as well?


> +             /*
> +              * Is page ZONE_DEVICE private migrated to
> +              * this device or some other?
> +              */
> +             else if (dmirror->mdevice == dmirror_page_to_device(page))
>                       *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
>               else
>                       *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
> @@ -1096,8 +1219,12 @@ static long dmirror_fops_unlocked_ioctl(struct file 
> *filp,
>               ret = dmirror_write(dmirror, &cmd);
>               break;
>  
> -     case HMM_DMIRROR_MIGRATE:
> -             ret = dmirror_migrate(dmirror, &cmd);
> +     case HMM_DMIRROR_MIGRATE_TO_DEV:
> +             ret = dmirror_migrate_to_device(dmirror, &cmd);
> +             break;
> +
> +     case HMM_DMIRROR_MIGRATE_TO_SYS:
> +             ret = dmirror_migrate_to_system(dmirror, &cmd);
>               break;
>  
>       case HMM_DMIRROR_EXCLUSIVE:
> @@ -1152,38 +1279,6 @@ static void dmirror_devmem_free(struct page *page)
>       spin_unlock(&mdevice->lock);
>  }
>  
> -static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma 
> *args,
> -                                                   struct dmirror *dmirror)
> -{
> -     const unsigned long *src = args->src;
> -     unsigned long *dst = args->dst;
> -     unsigned long start = args->start;
> -     unsigned long end = args->end;
> -     unsigned long addr;
> -
> -     for (addr = start; addr < end; addr += PAGE_SIZE,
> -                                    src++, dst++) {
> -             struct page *dpage, *spage;
> -
> -             spage = migrate_pfn_to_page(*src);
> -             if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
> -                     continue;
> -             spage = spage->zone_device_data;
> -
> -             dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
> -             if (!dpage)
> -                     continue;
> -
> -             lock_page(dpage);
> -             xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
> -             copy_highpage(dpage, spage);
> -             *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> -             if (*src & MIGRATE_PFN_WRITE)
> -                     *dst |= MIGRATE_PFN_WRITE;
> -     }
> -     return 0;
> -}
> -
>  static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
>  {
>       struct migrate_vma args;
> diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
> index 77f81e6314eb..af15c35a90f4 100644
> --- a/lib/test_hmm_uapi.h
> +++ b/lib/test_hmm_uapi.h
> @@ -19,6 +19,7 @@
>   * @npages: (in) number of pages to read/write
>   * @cpages: (out) number of pages copied
>   * @faults: (out) number of device page faults seen
> + * @zone_device_type: (out) zone device memory type
>   */
>  struct hmm_dmirror_cmd {
>       __u64           addr;
> @@ -32,11 +33,12 @@ struct hmm_dmirror_cmd {
>  /* Expose the address space of the calling process through hmm device file */
>  #define HMM_DMIRROR_READ             _IOWR('H', 0x00, struct hmm_dmirror_cmd)
>  #define HMM_DMIRROR_WRITE            _IOWR('H', 0x01, struct hmm_dmirror_cmd)
> -#define HMM_DMIRROR_MIGRATE          _IOWR('H', 0x02, struct hmm_dmirror_cmd)
> -#define HMM_DMIRROR_SNAPSHOT         _IOWR('H', 0x03, struct hmm_dmirror_cmd)
> -#define HMM_DMIRROR_EXCLUSIVE                _IOWR('H', 0x04, struct 
> hmm_dmirror_cmd)
> -#define HMM_DMIRROR_CHECK_EXCLUSIVE  _IOWR('H', 0x05, struct hmm_dmirror_cmd)
> -#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x06, struct hmm_dmirror_cmd)
> +#define HMM_DMIRROR_MIGRATE_TO_DEV   _IOWR('H', 0x02, struct hmm_dmirror_cmd)
> +#define HMM_DMIRROR_MIGRATE_TO_SYS   _IOWR('H', 0x03, struct hmm_dmirror_cmd)
> +#define HMM_DMIRROR_SNAPSHOT         _IOWR('H', 0x04, struct hmm_dmirror_cmd)
> +#define HMM_DMIRROR_EXCLUSIVE                _IOWR('H', 0x05, struct 
> hmm_dmirror_cmd)
> +#define HMM_DMIRROR_CHECK_EXCLUSIVE  _IOWR('H', 0x06, struct hmm_dmirror_cmd)
> +#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x07, struct hmm_dmirror_cmd)
>  
>  /*
>   * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
> @@ -51,6 +53,8 @@ struct hmm_dmirror_cmd {
>   *                                   device the ioctl() is made
>   * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
>   *                                   other device
> + * HMM_DMIRROR_PROT_DEV_COHERENT: Migrate device coherent page on the device
> + *                             the ioctl() is made
>   */
>  enum {
>       HMM_DMIRROR_PROT_ERROR                  = 0xFF,
> @@ -62,6 +66,7 @@ enum {
>       HMM_DMIRROR_PROT_ZERO                   = 0x10,
>       HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL      = 0x20,
>       HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE     = 0x30,
> +     HMM_DMIRROR_PROT_DEV_COHERENT           = 0x40,

Does it make sense to distinguish between DEV_COHERENT_LOCAL and
DEV_COHERENT_REMOTE as well?

Regards,
  Felix


>  };
>  
>  enum {

Reply via email to