On Tue, Sep 16, 2025 at 10:25:29PM +0200, Thomas Hellström wrote: > On Wed, 2025-09-03 at 15:30 -0700, Vivek Kasireddy wrote: > > For BOs of type ttm_bo_type_sg, that are backed by PCI BAR addresses > > associated with a VF, we need to adjust and translate these addresses > > to LMEM addresses to make the BOs usable by the PF. Otherwise, the > > BOs (i.e, PCI BAR addresses) are only accessible by the CPU and not > > by the GPU. > > > > In order to do the above, we first need to identify if the addresses > > associated with an imported BO (type ttm_bo_type_sg) belong to System > > RAM or a VF or other PCI devices. After we confirm that they belong > > to > > a VF, we convert the BAR addresses to DPAs and create a new dma_addr > > array (of type drm_pagemap_dma_addr) and populate it with the new > > addresses along with the segment sizes. > > > > v2: > > - Use dma_addr array instead of sg table to store translated > > addresses > > (Matt) > > > > v3: > > - Remove the usage of iommu_iova_to_phys() as the imported BO would > > no > > longer contain IOVAs and would instead have BAR addresses. > > > > Cc: Matthew Brost <matthew.br...@intel.com> > > Cc: Thomas Hellström <thomas.hellst...@linux.intel.com> > > Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> > > --- > > drivers/gpu/drm/xe/xe_bo.c | 98 > > +++++++++++++++++++++++++++++++- > > drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++ > > 2 files changed, 109 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c > > index 4faf15d5fa6d..6987bffb7aa7 100644 > > --- a/drivers/gpu/drm/xe/xe_bo.c > > +++ b/drivers/gpu/drm/xe/xe_bo.c > > @@ -21,11 +21,13 @@ > > > > #include <trace/events/gpu_mem.h> > > > > +#include "regs/xe_bars.h" > > #include "xe_device.h" > > #include "xe_dma_buf.h" > > #include "xe_drm_client.h" > > #include "xe_ggtt.h" > > #include "xe_gt.h" > > +#include "xe_gt_sriov_pf_config.h" > > #include "xe_map.h" > > #include "xe_migrate.h" > > #include "xe_pm.h" > > @@ -33,6 +35,7 @@ > > #include "xe_pxp.h" > > #include "xe_res_cursor.h" > > #include "xe_shrinker.h" > > +#include "xe_sriov_pf_helpers.h" > > #include "xe_sriov_vf_ccs.h" > > #include "xe_trace_bo.h" > > #include "xe_ttm_stolen_mgr.h" > > @@ -677,6 +680,88 @@ static int xe_bo_trigger_rebind(struct xe_device > > *xe, struct xe_bo *bo, > > return ret; > > } > > > > +static struct pci_dev *xe_find_vf_dev(struct xe_device *xe, > > + phys_addr_t phys) > > +{ > > + struct pci_dev *pdev, *pf_pdev = to_pci_dev(xe->drm.dev); > > + resource_size_t io_start, io_size; > > + > > + list_for_each_entry(pdev, &pf_pdev->bus->devices, bus_list) > > { > > + if (pdev->is_physfn) > > + continue; > > + > > + io_start = pci_resource_start(pdev, LMEM_BAR); > > + io_size = pci_resource_len(pdev, LMEM_BAR); > > + > > + if (phys >= io_start && > > + phys < (io_start + io_size - PAGE_SIZE)) > > + return pdev; > > + } > > + > > + return NULL; > > +} > > + > > + > > +static void xe_bo_translate_io_to_dpa(struct xe_bo *bo, struct > > sg_table *sg, > > + resource_size_t io_start, int > > vfid) > > +{ > > + struct xe_device *xe = xe_bo_device(bo); > > + struct xe_gt *gt = xe_root_mmio_gt(xe); > > + struct scatterlist *sgl; > > + struct xe_bo *lmem_bo; > > + phys_addr_t phys; > > + dma_addr_t addr; > > + u64 offset, i; > > + > > + lmem_bo = xe_gt_sriov_pf_config_get_lmem_obj(gt, ++vfid); > > + > > + for_each_sgtable_dma_sg(sg, sgl, i) { > > + phys = sg_dma_address(sgl); > > + offset = phys - io_start; > > + addr = xe_bo_addr(lmem_bo, offset, sg_dma_len(sgl)); > > Umm, isn't this O(npages²) complexity? Should be using a resource > cursor for this rather than xe_bo_addr(). >
Yea, the coplexity isn't great but the cursor as is won't work. The cursor assumes continous walk in increasing values and sg list could be out of order. We could find a happy medium are try to use the cursor of if entires show up in increasing order, reinit it if out of order. Matt > Also as mentioned separately for malicious guest KMDs, the resulting > address must be sanity checked? > > > + > > + bo->dma_addr[i] = drm_pagemap_addr_encode(addr, > > + DRM_INTERCONNECT_DRI > > VER, > > Please use the XE variant here, XE_INTERCONNECT_VRAM, The DRM define > just says that it's the first free enum that drivers can use for their > private interconnects. > > > + get_order(sg_dma_len > > (sgl)), > > + DMA_BIDIRECTIONAL); > > + } > > +} > > + > > +static int xe_bo_sg_to_dma_addr_array(struct sg_table *sg, struct > > xe_bo *bo) > > +{ > > + struct xe_device *xe = xe_bo_device(bo); > > + resource_size_t io_start; > > + struct pci_dev *pdev; > > + phys_addr_t phys; > > + int vfid; > > + > > + if (!IS_SRIOV_PF(xe)) > > + return 0; > > + > > + phys = sg_dma_address(sg->sgl); > > + if (page_is_ram(PFN_DOWN(phys))) > > + return 0; > > + > > + pdev = xe_find_vf_dev(xe, phys); > > + if (!pdev) > > + return 0; > > + > > + vfid = pci_iov_vf_id(pdev); > > + if (vfid < 0) > > + return 0; > > + > > + bo->dma_addr = kmalloc_array(sg->nents, sizeof(*bo- > > >dma_addr), > > + GFP_KERNEL); > > + if (!bo->dma_addr) > > + return -ENOMEM; > > + > > + bo->is_devmem_external = true; > > + io_start = pci_resource_start(pdev, LMEM_BAR); > > + xe_bo_translate_io_to_dpa(bo, sg, io_start, vfid); > > + > > + return 0; > > +} > > + > > /* > > * The dma-buf map_attachment() / unmap_attachment() is hooked up > > here. > > * Note that unmapping the attachment is deferred to the next > > @@ -695,6 +780,7 @@ static int xe_bo_move_dmabuf(struct > > ttm_buffer_object *ttm_bo, > > struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); > > bool device_unplugged = drm_dev_is_unplugged(&xe->drm); > > struct sg_table *sg; > > + int ret = 0; > > > > xe_assert(xe, attach); > > xe_assert(xe, ttm_bo->ttm); > > @@ -719,13 +805,19 @@ static int xe_bo_move_dmabuf(struct > > ttm_buffer_object *ttm_bo, > > if (IS_ERR(sg)) > > return PTR_ERR(sg); > > > > + ret = xe_bo_sg_to_dma_addr_array(sg, ttm_to_xe_bo(ttm_bo)); > > It looks like this is called for *all* imported dma-bufs, > Shouldn't this be checked for is_devmem_external? > > > + if (ret < 0) { > > + dma_buf_unmap_attachment(attach, sg, > > DMA_BIDIRECTIONAL); > > + return ret; > > + } > > + > > ttm_bo->sg = sg; > > xe_tt->sg = sg; > > > > out: > > ttm_bo_move_null(ttm_bo, new_res); > > > > - return 0; > > + return ret; > > } > > > > /** > > @@ -1540,6 +1632,10 @@ static void xe_ttm_bo_delete_mem_notify(struct > > ttm_buffer_object *ttm_bo) > > > > dma_buf_unmap_attachment(ttm_bo->base.import_attach, > > ttm_bo->sg, > > DMA_BIDIRECTIONAL); > > + > > + if (bo->is_devmem_external) { > > + kfree(bo->dma_addr); > > + } > > ttm_bo->sg = NULL; > > xe_tt->sg = NULL; > > } > > diff --git a/drivers/gpu/drm/xe/xe_bo_types.h > > b/drivers/gpu/drm/xe/xe_bo_types.h > > index 314652afdca7..371eee3f975f 100644 > > --- a/drivers/gpu/drm/xe/xe_bo_types.h > > +++ b/drivers/gpu/drm/xe/xe_bo_types.h > > @@ -88,6 +88,18 @@ struct xe_bo { > > /** @bb_ccs_rw: BB instructions of CCS read/write. Valid > > only for VF */ > > struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; > > > > + /** > > + * @is_devmem_external: Whether this BO is an imported dma- > > buf that > > + * is LMEM based. > > + */ > > + bool is_devmem_external; > > + > > + /** > > + * @dma_addr: An array to store DMA addresses (DPAs) for > > imported > > + * dmabuf BOs that are LMEM based. > > + */ > > + struct drm_pagemap_addr *dma_addr; > > + > > /** > > * @cpu_caching: CPU caching mode. Currently only used for > > userspace > > * objects. Exceptions are system memory on DGFX, which is > > always >