> -----Original Message----- > From: Ding, Xuan <xuan.d...@intel.com> > Sent: Monday, November 15, 2021 8:32 PM > To: maxime.coque...@redhat.com; Xia, Chenbo <chenbo....@intel.com> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Wang, YuanX > <yuanx.w...@intel.com>; He, Xingguang <xingguang...@intel.com>; Ding, Xuan > <xuan.d...@intel.com> > Subject: [PATCH v4] vhost: fix physical address mapping > > When choosing IOVA as PA mode, IOVA is likely to be discontinuous, > which requires page by page mapping for DMA devices. To be consistent, > this patch implements page by page mapping instead of mapping at the > region granularity for both IOVA as VA and PA mode. > > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost") > > Signed-off-by: Xuan Ding <xuan.d...@intel.com> > --- > v4: > * Remove unnessary ENOSUP check. > * Adjust return type. > > v3: > * Fix commit title. > > v2: > * Fix a format issue. > --- > lib/vhost/vhost.h | 1 + > lib/vhost/vhost_user.c | 111 ++++++++++++++++++++--------------------- > 2 files changed, 54 insertions(+), 58 deletions(-) > > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h > index 7085e0885c..d246538ca5 100644 > --- a/lib/vhost/vhost.h > +++ b/lib/vhost/vhost.h > @@ -355,6 +355,7 @@ struct vring_packed_desc_event { > struct guest_page { > uint64_t guest_phys_addr; > uint64_t host_phys_addr;
This name confused me when doing review: this should be host_iova. Could you help rename it in the patch? > + uint64_t host_user_addr; > uint64_t size; > }; > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c > index a781346c4d..0a9dc3350f 100644 > --- a/lib/vhost/vhost_user.c > +++ b/lib/vhost/vhost_user.c > @@ -143,57 +143,56 @@ get_blk_size(int fd) > return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; > } > > -static int > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map) > +static void > +async_dma_map(struct virtio_net *dev, bool do_map) > { > - uint64_t host_iova; > int ret = 0; > - > - host_iova = rte_mem_virt2iova((void > *)(uintptr_t)region->host_user_addr); > + uint32_t i; > + struct guest_page *page; > if (do_map) { > - /* Add mapped region into the default container of DPDK. */ > - ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, > - region->host_user_addr, > - host_iova, > - region->size); > - if (ret) { > - /* > - * DMA device may bind with kernel driver, in this case, > - * we don't need to program IOMMU manually. However, if > no > - * device is bound with vfio/uio in DPDK, and vfio > kernel > - * module is loaded, the API will still be called and > return > - * with ENODEV/ENOSUP. > - * > - * DPDK vfio only returns ENODEV/ENOSUP in very similar > - * situations(vfio either unsupported, or supported > - * but no devices found). Either way, no mappings could > be > - * performed. We treat it as normal case in async path. > - */ > - if (rte_errno == ENODEV || rte_errno == ENOTSUP) > - return 0; > - > - VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); > - /* DMA mapping errors won't stop > VHST_USER_SET_MEM_TABLE. */ > - return 0; > + for (i = 0; i < dev->nr_guest_pages; i++) { > + page = &dev->guest_pages[i]; > + ret = > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, > + page->host_user_addr, > + page->host_phys_addr, > + page->size); > + if (ret) { > + /* > + * DMA device may bind with kernel driver, in > this > case, > + * we don't need to program IOMMU manually. > However, > if no > + * device is bound with vfio/uio in DPDK, and > vfio > kernel > + * module is loaded, the API will still be > called and > return > + * with ENODEV. > + * > + * DPDK vfio only returns ENODEV in very similar > situations > + * (vfio either unsupported, or supported but no > devices found). > + * Either way, no mappings could be performed. > We > treat it as > + * normal case in async path. This is a > workaround. > + */ > + if (rte_errno == ENODEV) > + return; > + > + /* DMA mapping errors won't stop > VHST_USER_SET_MEM_TABLE. */ A comment in v3 is missed: VHST -> VHOST Thanks, Chenbo > + VHOST_LOG_CONFIG(ERR, "DMA engine map > failed\n"); > + } > } > > } else { > - /* Remove mapped region from the default container of DPDK. */ > - ret = > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, > - region->host_user_addr, > - host_iova, > - region->size); > - if (ret) { > - /* like DMA map, ignore the kernel driver case when > unmap. > */ > - if (rte_errno == EINVAL) > - return 0; > - > - VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); > - return ret; > + for (i = 0; i < dev->nr_guest_pages; i++) { > + page = &dev->guest_pages[i]; > + ret = > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, > + page->host_user_addr, > + page->host_phys_addr, > + page->size); > + if (ret) { > + /* like DMA map, ignore the kernel driver case > when > unmap. */ > + if (rte_errno == EINVAL) > + return; > + > + VHOST_LOG_CONFIG(ERR, "DMA engine unmap > failed\n"); > + } > } > } > - > - return ret; > } > > static void > @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev) > if (!dev || !dev->mem) > return; > > + if (dev->async_copy && rte_vfio_is_enabled("vfio")) > + async_dma_map(dev, false); > + > for (i = 0; i < dev->mem->nregions; i++) { > reg = &dev->mem->regions[i]; > if (reg->host_user_addr) { > - if (dev->async_copy && rte_vfio_is_enabled("vfio")) > - async_dma_map(reg, false); > - > munmap(reg->mmap_addr, reg->mmap_size); > close(reg->fd); > } > @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev, > > static int > add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, > - uint64_t host_phys_addr, uint64_t size) > + uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t > size) > { > struct guest_page *page, *last_page; > struct guest_page *old_pages; > @@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t > guest_phys_addr, > page = &dev->guest_pages[dev->nr_guest_pages++]; > page->guest_phys_addr = guest_phys_addr; > page->host_phys_addr = host_phys_addr; > + page->host_user_addr = host_user_addr; > page->size = size; > > return 0; > @@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct > rte_vhost_mem_region *reg, > size = page_size - (guest_phys_addr & (page_size - 1)); > size = RTE_MIN(size, reg_size); > > - if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) > + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, > + host_user_addr, size) < 0) > return -1; > > host_user_addr += size; > @@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct > rte_vhost_mem_region *reg, > host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) > host_user_addr); > if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, > - size) < 0) > + host_user_addr, size) < 0) > return -1; > > host_user_addr += size; > @@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev, > uint64_t mmap_size; > uint64_t alignment; > int populate; > - int ret; > > /* Check for memory_size + mmap_offset overflow */ > if (mmap_offset >= -region->size) { > @@ -1274,14 +1274,6 @@ vhost_user_mmap_region(struct virtio_net *dev, > VHOST_LOG_CONFIG(ERR, "adding guest pages to region > failed.\n"); > return -1; > } > - > - if (rte_vfio_is_enabled("vfio")) { > - ret = async_dma_map(region, true); > - if (ret) { > - VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA > engine > failed\n"); > - return -1; > - } > - } > } > > VHOST_LOG_CONFIG(INFO, > @@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, > struct VhostUserMsg *msg, > dev->mem->nregions++; > } > > + if (dev->async_copy && rte_vfio_is_enabled("vfio")) > + async_dma_map(dev, true); > + > if (vhost_user_postcopy_register(dev, main_fd, msg) < 0) > goto free_mem_table; > > -- > 2.17.1