On Wed, 2025-09-03 at 15:30 -0700, Vivek Kasireddy wrote:
> For BOs of type ttm_bo_type_sg, that are backed by PCI BAR addresses
> associated with a VF, we need to adjust and translate these addresses
> to LMEM addresses to make the BOs usable by the PF. Otherwise, the
> BOs (i.e, PCI BAR addresses) are only accessible by the CPU and not
> by the GPU.
> 
> In order to do the above, we first need to identify if the addresses
> associated with an imported BO (type ttm_bo_type_sg) belong to System
> RAM or a VF or other PCI devices. After we confirm that they belong
> to
> a VF, we convert the BAR addresses to DPAs and create a new dma_addr
> array (of type drm_pagemap_dma_addr) and populate it with the new
> addresses along with the segment sizes.
> 
> v2:
> - Use dma_addr array instead of sg table to store translated
> addresses
>   (Matt)
> 
> v3:
> - Remove the usage of iommu_iova_to_phys() as the imported BO would
> no
>   longer contain IOVAs and would instead have BAR addresses.
> 
> Cc: Matthew Brost <matthew.br...@intel.com>
> Cc: Thomas Hellström <thomas.hellst...@linux.intel.com>
> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c       | 98
> +++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++
>  2 files changed, 109 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 4faf15d5fa6d..6987bffb7aa7 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -21,11 +21,13 @@
>  
>  #include <trace/events/gpu_mem.h>
>  
> +#include "regs/xe_bars.h"
>  #include "xe_device.h"
>  #include "xe_dma_buf.h"
>  #include "xe_drm_client.h"
>  #include "xe_ggtt.h"
>  #include "xe_gt.h"
> +#include "xe_gt_sriov_pf_config.h"
>  #include "xe_map.h"
>  #include "xe_migrate.h"
>  #include "xe_pm.h"
> @@ -33,6 +35,7 @@
>  #include "xe_pxp.h"
>  #include "xe_res_cursor.h"
>  #include "xe_shrinker.h"
> +#include "xe_sriov_pf_helpers.h"
>  #include "xe_sriov_vf_ccs.h"
>  #include "xe_trace_bo.h"
>  #include "xe_ttm_stolen_mgr.h"
> @@ -677,6 +680,88 @@ static int xe_bo_trigger_rebind(struct xe_device
> *xe, struct xe_bo *bo,
>       return ret;
>  }
>  
> +static struct pci_dev *xe_find_vf_dev(struct xe_device *xe,
> +                                   phys_addr_t phys)
> +{
> +     struct pci_dev *pdev, *pf_pdev = to_pci_dev(xe->drm.dev);
> +     resource_size_t io_start, io_size;
> +
> +     list_for_each_entry(pdev, &pf_pdev->bus->devices, bus_list)
> {
> +             if (pdev->is_physfn)
> +                     continue;
> +
> +             io_start = pci_resource_start(pdev, LMEM_BAR);
> +             io_size = pci_resource_len(pdev, LMEM_BAR);
> +
> +             if (phys >= io_start &&
> +                 phys < (io_start + io_size - PAGE_SIZE))
> +                     return pdev;
> +     }
> +
> +     return NULL;
> +}
> +
> +
> +static void xe_bo_translate_io_to_dpa(struct xe_bo *bo, struct
> sg_table *sg,
> +                                   resource_size_t io_start, int
> vfid)
> +{
> +     struct xe_device *xe = xe_bo_device(bo);
> +     struct xe_gt *gt = xe_root_mmio_gt(xe);
> +     struct scatterlist *sgl;
> +     struct xe_bo *lmem_bo;
> +     phys_addr_t phys;
> +     dma_addr_t addr;
> +     u64 offset, i;
> +
> +     lmem_bo = xe_gt_sriov_pf_config_get_lmem_obj(gt, ++vfid);
> +
> +     for_each_sgtable_dma_sg(sg, sgl, i) {
> +             phys = sg_dma_address(sgl);
> +             offset = phys - io_start;
> +             addr = xe_bo_addr(lmem_bo, offset, sg_dma_len(sgl));

Umm, isn't this O(npages²) complexity? Should be using a resource
cursor for this rather than xe_bo_addr().

Also as mentioned separately for malicious guest KMDs, the resulting
address must be sanity checked?

> +
> +             bo->dma_addr[i] = drm_pagemap_addr_encode(addr,
> +                                             DRM_INTERCONNECT_DRI
> VER,

Please use the XE variant here, XE_INTERCONNECT_VRAM, The DRM define
just says that it's the first free enum that drivers can use for their
private interconnects.

> +                                             get_order(sg_dma_len
> (sgl)),
> +                                             DMA_BIDIRECTIONAL);
> +     }
> +}
> +
> +static int xe_bo_sg_to_dma_addr_array(struct sg_table *sg, struct
> xe_bo *bo)
> +{
> +     struct xe_device *xe = xe_bo_device(bo);
> +     resource_size_t io_start;
> +     struct pci_dev *pdev;
> +     phys_addr_t phys;
> +     int vfid;
> +
> +     if (!IS_SRIOV_PF(xe))
> +             return 0;
> +
> +     phys = sg_dma_address(sg->sgl);
> +     if (page_is_ram(PFN_DOWN(phys)))
> +             return 0;
> +
> +     pdev = xe_find_vf_dev(xe, phys);
> +     if (!pdev)
> +             return 0;
> +
> +     vfid = pci_iov_vf_id(pdev);
> +     if (vfid < 0)
> +             return 0;
> +
> +     bo->dma_addr = kmalloc_array(sg->nents, sizeof(*bo-
> >dma_addr),
> +                                  GFP_KERNEL);
> +     if (!bo->dma_addr)
> +             return -ENOMEM;
> +
> +     bo->is_devmem_external = true;
> +     io_start = pci_resource_start(pdev, LMEM_BAR);
> +     xe_bo_translate_io_to_dpa(bo, sg, io_start, vfid);
> +
> +     return 0;
> +}
> +
>  /*
>   * The dma-buf map_attachment() / unmap_attachment() is hooked up
> here.
>   * Note that unmapping the attachment is deferred to the next
> @@ -695,6 +780,7 @@ static int xe_bo_move_dmabuf(struct
> ttm_buffer_object *ttm_bo,
>       struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
>       bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
>       struct sg_table *sg;
> +     int ret = 0;
>  
>       xe_assert(xe, attach);
>       xe_assert(xe, ttm_bo->ttm);
> @@ -719,13 +805,19 @@ static int xe_bo_move_dmabuf(struct
> ttm_buffer_object *ttm_bo,
>       if (IS_ERR(sg))
>               return PTR_ERR(sg);
>  
> +     ret = xe_bo_sg_to_dma_addr_array(sg, ttm_to_xe_bo(ttm_bo));

It looks like this is called for *all* imported dma-bufs, 
Shouldn't this be checked for is_devmem_external?

> +     if (ret < 0) {
> +             dma_buf_unmap_attachment(attach, sg,
> DMA_BIDIRECTIONAL);
> +             return ret;
> +     }
> +
>       ttm_bo->sg = sg;
>       xe_tt->sg = sg;
>  
>  out:
>       ttm_bo_move_null(ttm_bo, new_res);
>  
> -     return 0;
> +     return ret;
>  }
>  
>  /**
> @@ -1540,6 +1632,10 @@ static void xe_ttm_bo_delete_mem_notify(struct
> ttm_buffer_object *ttm_bo)
>  
>               dma_buf_unmap_attachment(ttm_bo->base.import_attach,
> ttm_bo->sg,
>                                        DMA_BIDIRECTIONAL);
> +
> +             if (bo->is_devmem_external) {
> +                     kfree(bo->dma_addr);
> +             }
>               ttm_bo->sg = NULL;
>               xe_tt->sg = NULL;
>       }
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
> b/drivers/gpu/drm/xe/xe_bo_types.h
> index 314652afdca7..371eee3f975f 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -88,6 +88,18 @@ struct xe_bo {
>       /** @bb_ccs_rw: BB instructions of CCS read/write. Valid
> only for VF */
>       struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
>  
> +     /**
> +      * @is_devmem_external: Whether this BO is an imported dma-
> buf that
> +      * is LMEM based.
> +      */
> +     bool is_devmem_external;
> +
> +     /**
> +      * @dma_addr: An array to store DMA addresses (DPAs) for
> imported
> +      * dmabuf BOs that are LMEM based.
> +      */
> +     struct drm_pagemap_addr *dma_addr;
> +
>       /**
>        * @cpu_caching: CPU caching mode. Currently only used for
> userspace
>        * objects. Exceptions are system memory on DGFX, which is
> always

Reply via email to