dmem: HMM P2P DMA for private dev pages

Alistair Popple Tue, 15 Oct 2024 22:30:30 -0700


Yonatan Maman <yma...@nvidia.com> writes:


> From: Yonatan Maman <yma...@nvidia.com>
>
> Enabling Peer-to-Peer DMA (P2P DMA) access in GPU-centric applications
> is crucial for minimizing data transfer overhead (e.g., for RDMA use-
> case).
>
> This change aims to enable that capability for Nouveau over HMM device
> private pages. P2P DMA for private device pages allows the GPU to
> directly exchange data with other devices (e.g., NICs) without needing
> to traverse system RAM.
>
> To fully support Peer-to-Peer for device private pages, the following
> changes are made:
>
>  - Introduce struct nouveau_dmem_hmm_p2p within struct nouveau_dmem
>    to manage BAR1 PCI P2P memory. p2p_start_addr holds the virtual
>    address allocated with pci_alloc_p2pmem(), and p2p_size represents
>    the allocated size of the PCI P2P memory.
>
>  - nouveau_dmem_init - Ensure BAR1 accessibility and assign struct
>    pages (PCI_P2P_PAGE) for all BAR1 pages. Introduce
>    nouveau_alloc_bar1_pci_p2p_mem in nouveau_dmem to expose BAR1 for
>    use as P2P memory via pci_p2pdma_add_resource and implement static
>    allocation and assignment of struct pages using pci_alloc_p2pmem.
>    This function will be called from nouveau_dmem_init, and failure
>    triggers a warning message instead of driver failure.
>
>  - nouveau_dmem_fini - Ensure BAR1 PCI P2P memory is properly
>    destroyed during driver cleanup. Introduce
>    nouveau_destroy_bar1_pci_p2p_mem to handle freeing of PCI P2P
>    memory associated with Nouveau BAR1. Modify nouveau_dmem_fini to
>    call nouveau_destroy_bar1_pci_p2p_mem.
>
>  - Implement Nouveau `p2p_page` callback function - Implement BAR1
>    mapping for the chunk using `io_mem_reserve` if no mapping exists.
>    Retrieve the pre-allocated P2P virtual address and size from
>    `hmm_p2p`. Calculate the page offset within BAR1 and return the
>    corresponding P2P page.
>
> Signed-off-by: Yonatan Maman <yma...@nvidia.com>
> Reviewed-by: Gal Shalom <galsha...@nvidia.com>
> ---
>  drivers/gpu/drm/nouveau/nouveau_dmem.c | 117 ++++++++++++++++++++++++-
>  1 file changed, 115 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index 1a072568cef6..13fb8671f212 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -40,6 +40,9 @@
>  #include <linux/hmm.h>
>  #include <linux/memremap.h>
>  #include <linux/migrate.h>
> +#include <linux/pci-p2pdma.h>
> +#include <nvkm/core/pci.h>
> +
>  
>  /*
>   * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
> @@ -77,9 +80,15 @@ struct nouveau_dmem_migrate {
>       struct nouveau_channel *chan;
>  };
>  
> +struct nouveau_dmem_hmm_p2p {
> +     size_t p2p_size;
> +     void *p2p_start_addr;
> +};
> +
>  struct nouveau_dmem {
>       struct nouveau_drm *drm;
>       struct nouveau_dmem_migrate migrate;
> +     struct nouveau_dmem_hmm_p2p hmm_p2p;
>       struct list_head chunks;
>       struct mutex mutex;
>       struct page *free_pages;
> @@ -158,6 +167,61 @@ static int nouveau_dmem_copy_one(struct nouveau_drm 
> *drm, struct page *spage,
>       return 0;
>  }
>  
> +static int nouveau_dmem_bar1_mapping(struct nouveau_bo *nvbo,
> +                                  unsigned long long *bus_addr)
> +{
> +     int ret;
> +     struct ttm_resource *mem = nvbo->bo.resource;
> +
> +     if (mem->bus.offset) {
> +             *bus_addr = mem->bus.offset;
> +             return 0;
> +     }
> +
> +     if (PFN_UP(nvbo->bo.base.size) > PFN_UP(nvbo->bo.resource->size))
> +             return -EINVAL;
> +
> +     ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
> +     if (ret)
> +             return ret;
> +
> +     ret = nvbo->bo.bdev->funcs->io_mem_reserve(nvbo->bo.bdev, mem);
> +     *bus_addr = mem->bus.offset;
> +
> +     ttm_bo_unreserve(&nvbo->bo);
> +     return ret;
> +}
> +
> +static struct page *nouveau_dmem_get_dma_page(struct page *private_page)
> +{
> +     int ret;
> +     unsigned long long offset_in_chunk, offset_in_bar1;
> +     unsigned long long chunk_bus_addr, page_bus_addr;
> +     unsigned long long bar1_base_addr;
> +     struct nouveau_drm *drm = page_to_drm(private_page);
> +     struct nouveau_bo *nvbo = nouveau_page_to_chunk(private_page)->bo;
> +     struct nvkm_device *nv_device = nvxx_device(drm);
> +     void *p2p_start_addr = drm->dmem->hmm_p2p.p2p_start_addr;
> +     size_t p2p_size = drm->dmem->hmm_p2p.p2p_size;
> +
> +     bar1_base_addr = nv_device->func->resource_addr(nv_device, 1);
> +     offset_in_chunk =
> +             (page_to_pfn(private_page) << PAGE_SHIFT) -
> +             nouveau_page_to_chunk(private_page)->pagemap.range.start;
> +
> +     ret = nouveau_dmem_bar1_mapping(nvbo, &chunk_bus_addr);
> +     if (ret)
> +             return ERR_PTR(ret);
> +
> +     page_bus_addr = chunk_bus_addr + offset_in_chunk;
> +     if (!p2p_size || page_bus_addr > bar1_base_addr + p2p_size ||
> +         page_bus_addr < bar1_base_addr)
> +             return ERR_PTR(-ENOMEM);
> +
> +     offset_in_bar1 = page_bus_addr - bar1_base_addr;
> +     return virt_to_page(p2p_start_addr + offset_in_bar1);

This conversion looks a bit complicated. Once you have page_bus_addr I
think you can just return pfn_to_page(page_bus_addr >> PAGE_SHIFT)

> +}
> +
>  static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>  {
>       struct nouveau_drm *drm = page_to_drm(vmf->page);
> @@ -219,8 +283,9 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct 
> vm_fault *vmf)
>  }
>  
>  static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
> -     .page_free              = nouveau_dmem_page_free,
> -     .migrate_to_ram         = nouveau_dmem_migrate_to_ram,
> +     .page_free               = nouveau_dmem_page_free,
> +     .migrate_to_ram          = nouveau_dmem_migrate_to_ram,
> +     .get_dma_page_for_device = nouveau_dmem_get_dma_page,
>  };
>  
>  static int
> @@ -413,14 +478,31 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk 
> *chunk)
>       kvfree(dma_addrs);
>  }
>  
> +static void nouveau_destroy_bar1_pci_p2p_mem(struct nouveau_drm *drm,
> +                                          struct pci_dev *pdev,
> +                                          void *p2p_start_addr,
> +                                          size_t p2p_size)
> +{
> +     if (p2p_size)
> +             pci_free_p2pmem(pdev, p2p_start_addr, p2p_size);
> +
> +     NV_INFO(drm, "PCI P2P memory freed(%p)\n", p2p_start_addr);
> +}
> +
>  void
>  nouveau_dmem_fini(struct nouveau_drm *drm)
>  {
>       struct nouveau_dmem_chunk *chunk, *tmp;
> +     struct nvkm_device *nv_device = nvxx_device(drm);
>  
>       if (drm->dmem == NULL)
>               return;
>  
> +     nouveau_destroy_bar1_pci_p2p_mem(drm,
> +                                      nv_device->func->pci(nv_device)->pdev,
> +                                      drm->dmem->hmm_p2p.p2p_start_addr,
> +                                      drm->dmem->hmm_p2p.p2p_size);
> +
>       mutex_lock(&drm->dmem->mutex);
>  
>       list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
> @@ -586,10 +668,30 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm)
>       return -ENODEV;
>  }
>  
> +static int nouveau_alloc_bar1_pci_p2p_mem(struct nouveau_drm *drm,
> +                                       struct pci_dev *pdev, size_t size,
> +                                       void **pp2p_start_addr,
> +                                       size_t *pp2p_size)
> +{
> +     int ret;
> +
> +     ret = pci_p2pdma_add_resource(pdev, 1, size, 0);
> +     if (ret)
> +             return ret;
> +
> +     *pp2p_start_addr = pci_alloc_p2pmem(pdev, size);
> +     *pp2p_size = (*pp2p_start_addr) ? size : 0;

Why return the size here? Personally I think it would be clearer to have
the caller directly initialise/clear whatever struct values it needs.

> +
> +     NV_INFO(drm, "PCI P2P memory allocated(%p)\n", *pp2p_start_addr);
> +     return 0;
> +}
> +
>  void
>  nouveau_dmem_init(struct nouveau_drm *drm)
>  {
>       int ret;
> +     struct nvkm_device *nv_device = nvxx_device(drm);
> +     size_t bar1_size;
>  
>       /* This only make sense on PASCAL or newer */
>       if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
> @@ -610,6 +712,17 @@ nouveau_dmem_init(struct nouveau_drm *drm)
>               kfree(drm->dmem);
>               drm->dmem = NULL;
>       }
> +
> +     /* Expose BAR1 for HMM P2P Memory */
> +     bar1_size = nv_device->func->resource_size(nv_device, 1);
> +     ret = nouveau_alloc_bar1_pci_p2p_mem(drm,
> +                                          
> nv_device->func->pci(nv_device)->pdev,
> +                                          bar1_size,
> +                                          &drm->dmem->hmm_p2p.p2p_start_addr,
> +                                          &drm->dmem->hmm_p2p.p2p_size);
> +     if (ret)
> +             NV_WARN(drm,
> +                     "PCI P2P memory allocation failed, HMM P2P won't be 
> supported\n");
>  }
>  
>  static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,

Re: [PATCH v1 2/4] nouveau/dmem: HMM P2P DMA for private dev pages

Reply via email to