Hi Chenbo, > -----Original Message----- > From: Xia, Chenbo <chenbo....@intel.com> > Sent: 2021年11月16日 15:48 > To: Ding, Xuan <xuan.d...@intel.com>; maxime.coque...@redhat.com > Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Wang, YuanX > <yuanx.w...@intel.com>; He, Xingguang <xingguang...@intel.com> > Subject: RE: [PATCH v4] vhost: fix physical address mapping > > > -----Original Message----- > > From: Ding, Xuan <xuan.d...@intel.com> > > Sent: Monday, November 15, 2021 8:32 PM > > To: maxime.coque...@redhat.com; Xia, Chenbo <chenbo....@intel.com> > > Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Wang, YuanX > > <yuanx.w...@intel.com>; He, Xingguang <xingguang...@intel.com>; Ding, > > Xuan <xuan.d...@intel.com> > > Subject: [PATCH v4] vhost: fix physical address mapping > > > > When choosing IOVA as PA mode, IOVA is likely to be discontinuous, > > which requires page by page mapping for DMA devices. To be consistent, > > this patch implements page by page mapping instead of mapping at the > > region granularity for both IOVA as VA and PA mode. > > > > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost") > > > > Signed-off-by: Xuan Ding <xuan.d...@intel.com> > > --- > > v4: > > * Remove unnessary ENOSUP check. > > * Adjust return type. > > > > v3: > > * Fix commit title. > > > > v2: > > * Fix a format issue. > > --- > > lib/vhost/vhost.h | 1 + > > lib/vhost/vhost_user.c | 111 > > ++++++++++++++++++++--------------------- > > 2 files changed, 54 insertions(+), 58 deletions(-) > > > > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index > > 7085e0885c..d246538ca5 100644 > > --- a/lib/vhost/vhost.h > > +++ b/lib/vhost/vhost.h > > @@ -355,6 +355,7 @@ struct vring_packed_desc_event { struct > > guest_page { > > uint64_t guest_phys_addr; > > uint64_t host_phys_addr; > > This name confused me when doing review: this should be host_iova. > > Could you help rename it in the patch? Yes, the host_phys_addr is a iova, it depends on the DPDK IOVA mode.
> > > + uint64_t host_user_addr; > > uint64_t size; > > }; > > > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index > > a781346c4d..0a9dc3350f 100644 > > --- a/lib/vhost/vhost_user.c > > +++ b/lib/vhost/vhost_user.c > > @@ -143,57 +143,56 @@ get_blk_size(int fd) > > return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } > > > > -static int > > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map) > > +static void > > +async_dma_map(struct virtio_net *dev, bool do_map) > > { > > - uint64_t host_iova; > > int ret = 0; > > - > > - host_iova = rte_mem_virt2iova((void *)(uintptr_t)region- > >host_user_addr); > > + uint32_t i; > > + struct guest_page *page; > > if (do_map) { > > - /* Add mapped region into the default container of DPDK. */ > > - ret = > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, > > - region->host_user_addr, > > - host_iova, > > - region->size); > > - if (ret) { > > - /* > > - * DMA device may bind with kernel driver, in this case, > > - * we don't need to program IOMMU manually. > However, if no > > - * device is bound with vfio/uio in DPDK, and vfio > > kernel > > - * module is loaded, the API will still be called and > return > > - * with ENODEV/ENOSUP. > > - * > > - * DPDK vfio only returns ENODEV/ENOSUP in very > similar > > - * situations(vfio either unsupported, or supported > > - * but no devices found). Either way, no mappings could > be > > - * performed. We treat it as normal case in async path. > > - */ > > - if (rte_errno == ENODEV || rte_errno == ENOTSUP) > > - return 0; > > - > > - VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); > > - /* DMA mapping errors won't stop > VHST_USER_SET_MEM_TABLE. */ > > - return 0; > > + for (i = 0; i < dev->nr_guest_pages; i++) { > > + page = &dev->guest_pages[i]; > > + ret = > > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, > > + page- > >host_user_addr, > > + page- > >host_phys_addr, > > + page->size); > > + if (ret) { > > + /* > > + * DMA device may bind with kernel driver, in > this > > case, > > + * we don't need to program IOMMU manually. > However, > > if no > > + * device is bound with vfio/uio in DPDK, and > vfio > > kernel > > + * module is loaded, the API will still be > > called > and > > return > > + * with ENODEV. > > + * > > + * DPDK vfio only returns ENODEV in very > similar > > situations > > + * (vfio either unsupported, or supported but no > > devices found). > > + * Either way, no mappings could be performed. > We > > treat it as > > + * normal case in async path. This is a > workaround. > > + */ > > + if (rte_errno == ENODEV) > > + return; > > + > > + /* DMA mapping errors won't stop > > VHST_USER_SET_MEM_TABLE. */ > > A comment in v3 is missed: Thanks for the catch. Will fix it in v5. Thanks, Xuan > > VHST -> VHOST > > Thanks, > Chenbo > > > + VHOST_LOG_CONFIG(ERR, "DMA engine map > failed\n"); > > + } > > } > > > > } else { > > - /* Remove mapped region from the default container of DPDK. > */ > > - ret = > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, > > - region->host_user_addr, > > - host_iova, > > - region->size); > > - if (ret) { > > - /* like DMA map, ignore the kernel driver case when > unmap. > > */ > > - if (rte_errno == EINVAL) > > - return 0; > > - > > - VHOST_LOG_CONFIG(ERR, "DMA engine unmap > failed\n"); > > - return ret; > > + for (i = 0; i < dev->nr_guest_pages; i++) { > > + page = &dev->guest_pages[i]; > > + ret = > > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, > > + page- > >host_user_addr, > > + page- > >host_phys_addr, > > + page->size); > > + if (ret) { > > + /* like DMA map, ignore the kernel driver case > when > > unmap. */ > > + if (rte_errno == EINVAL) > > + return; > > + > > + VHOST_LOG_CONFIG(ERR, "DMA engine > unmap failed\n"); > > + } > > } > > } > > - > > - return ret; > > } > > > > static void > > @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev) > > if (!dev || !dev->mem) > > return; > > > > + if (dev->async_copy && rte_vfio_is_enabled("vfio")) > > + async_dma_map(dev, false); > > + > > for (i = 0; i < dev->mem->nregions; i++) { > > reg = &dev->mem->regions[i]; > > if (reg->host_user_addr) { > > - if (dev->async_copy && rte_vfio_is_enabled("vfio")) > > - async_dma_map(reg, false); > > - > > munmap(reg->mmap_addr, reg->mmap_size); > > close(reg->fd); > > } > > @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net > > **pdev, > > > > static int > > add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, > > - uint64_t host_phys_addr, uint64_t size) > > + uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t > size) > > { > > struct guest_page *page, *last_page; > > struct guest_page *old_pages; > > @@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev, > > uint64_t guest_phys_addr, > > page = &dev->guest_pages[dev->nr_guest_pages++]; > > page->guest_phys_addr = guest_phys_addr; > > page->host_phys_addr = host_phys_addr; > > + page->host_user_addr = host_user_addr; > > page->size = size; > > > > return 0; > > @@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct > > rte_vhost_mem_region *reg, > > size = page_size - (guest_phys_addr & (page_size - 1)); > > size = RTE_MIN(size, reg_size); > > > > - if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < > 0) > > + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, > > + host_user_addr, size) < 0) > > return -1; > > > > host_user_addr += size; > > @@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct > > rte_vhost_mem_region *reg, > > host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) > > host_user_addr); > > if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, > > - size) < 0) > > + host_user_addr, size) < 0) > > return -1; > > > > host_user_addr += size; > > @@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev, > > uint64_t mmap_size; > > uint64_t alignment; > > int populate; > > - int ret; > > > > /* Check for memory_size + mmap_offset overflow */ > > if (mmap_offset >= -region->size) { > > @@ -1274,14 +1274,6 @@ vhost_user_mmap_region(struct virtio_net *dev, > > VHOST_LOG_CONFIG(ERR, "adding guest pages to > region failed.\n"); > > return -1; > > } > > - > > - if (rte_vfio_is_enabled("vfio")) { > > - ret = async_dma_map(region, true); > > - if (ret) { > > - VHOST_LOG_CONFIG(ERR, "Configure IOMMU > for DMA engine > > failed\n"); > > - return -1; > > - } > > - } > > } > > > > VHOST_LOG_CONFIG(INFO, > > @@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net > > **pdev, struct VhostUserMsg *msg, > > dev->mem->nregions++; > > } > > > > + if (dev->async_copy && rte_vfio_is_enabled("vfio")) > > + async_dma_map(dev, true); > > + > > if (vhost_user_postcopy_register(dev, main_fd, msg) < 0) > > goto free_mem_table; > > > > -- > > 2.17.1