> -----Original Message-----
> From: Ding, Xuan <xuan.d...@intel.com>
> Sent: Monday, November 15, 2021 8:32 PM
> To: maxime.coque...@redhat.com; Xia, Chenbo <chenbo....@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Wang, YuanX
> <yuanx.w...@intel.com>; He, Xingguang <xingguang...@intel.com>; Ding, Xuan
> <xuan.d...@intel.com>
> Subject: [PATCH v4] vhost: fix physical address mapping
> 
> When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> which requires page by page mapping for DMA devices. To be consistent,
> this patch implements page by page mapping instead of mapping at the
> region granularity for both IOVA as VA and PA mode.
> 
> Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> 
> Signed-off-by: Xuan Ding <xuan.d...@intel.com>
> ---
> v4:
> * Remove unnessary ENOSUP check.
> * Adjust return type.
> 
> v3:
> * Fix commit title.
> 
> v2:
> * Fix a format issue.
> ---
>  lib/vhost/vhost.h      |   1 +
>  lib/vhost/vhost_user.c | 111 ++++++++++++++++++++---------------------
>  2 files changed, 54 insertions(+), 58 deletions(-)
> 
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 7085e0885c..d246538ca5 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -355,6 +355,7 @@ struct vring_packed_desc_event {
>  struct guest_page {
>       uint64_t guest_phys_addr;
>       uint64_t host_phys_addr;

This name confused me when doing review: this should be host_iova.

Could you help rename it in the patch?

> +     uint64_t host_user_addr;
>       uint64_t size;
>  };
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index a781346c4d..0a9dc3350f 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -143,57 +143,56 @@ get_blk_size(int fd)
>       return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
>  }
> 
> -static int
> -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> +static void
> +async_dma_map(struct virtio_net *dev, bool do_map)
>  {
> -     uint64_t host_iova;
>       int ret = 0;
> -
> -     host_iova = rte_mem_virt2iova((void 
> *)(uintptr_t)region->host_user_addr);
> +     uint32_t i;
> +     struct guest_page *page;
>       if (do_map) {
> -             /* Add mapped region into the default container of DPDK. */
> -             ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -                                              region->host_user_addr,
> -                                              host_iova,
> -                                              region->size);
> -             if (ret) {
> -                     /*
> -                      * DMA device may bind with kernel driver, in this case,
> -                      * we don't need to program IOMMU manually. However, if 
> no
> -                      * device is bound with vfio/uio in DPDK, and vfio 
> kernel
> -                      * module is loaded, the API will still be called and 
> return
> -                      * with ENODEV/ENOSUP.
> -                      *
> -                      * DPDK vfio only returns ENODEV/ENOSUP in very similar
> -                      * situations(vfio either unsupported, or supported
> -                      * but no devices found). Either way, no mappings could 
> be
> -                      * performed. We treat it as normal case in async path.
> -                      */
> -                     if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> -                             return 0;
> -
> -                     VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> -                     /* DMA mapping errors won't stop 
> VHST_USER_SET_MEM_TABLE. */
> -                     return 0;
> +             for (i = 0; i < dev->nr_guest_pages; i++) {
> +                     page = &dev->guest_pages[i];
> +                     ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +                                                      page->host_user_addr,
> +                                                      page->host_phys_addr,
> +                                                      page->size);
> +                     if (ret) {
> +                             /*
> +                              * DMA device may bind with kernel driver, in 
> this
> case,
> +                              * we don't need to program IOMMU manually. 
> However,
> if no
> +                              * device is bound with vfio/uio in DPDK, and 
> vfio
> kernel
> +                              * module is loaded, the API will still be 
> called and
> return
> +                              * with ENODEV.
> +                              *
> +                              * DPDK vfio only returns ENODEV in very similar
> situations
> +                              * (vfio either unsupported, or supported but no
> devices found).
> +                              * Either way, no mappings could be performed. 
> We
> treat it as
> +                              * normal case in async path. This is a 
> workaround.
> +                              */
> +                             if (rte_errno == ENODEV)
> +                                     return;
> +
> +                             /* DMA mapping errors won't stop
> VHST_USER_SET_MEM_TABLE. */

A comment in v3 is missed:

VHST -> VHOST

Thanks,
Chenbo

> +                             VHOST_LOG_CONFIG(ERR, "DMA engine map 
> failed\n");
> +                     }
>               }
> 
>       } else {
> -             /* Remove mapped region from the default container of DPDK. */
> -             ret = 
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -                                                region->host_user_addr,
> -                                                host_iova,
> -                                                region->size);
> -             if (ret) {
> -                     /* like DMA map, ignore the kernel driver case when 
> unmap.
> */
> -                     if (rte_errno == EINVAL)
> -                             return 0;
> -
> -                     VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> -                     return ret;
> +             for (i = 0; i < dev->nr_guest_pages; i++) {
> +                     page = &dev->guest_pages[i];
> +                     ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +                                                        page->host_user_addr,
> +                                                        page->host_phys_addr,
> +                                                        page->size);
> +                     if (ret) {
> +                             /* like DMA map, ignore the kernel driver case 
> when
> unmap. */
> +                             if (rte_errno == EINVAL)
> +                                     return;
> +
> +                             VHOST_LOG_CONFIG(ERR, "DMA engine unmap 
> failed\n");
> +                     }
>               }
>       }
> -
> -     return ret;
>  }
> 
>  static void
> @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
>       if (!dev || !dev->mem)
>               return;
> 
> +     if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +             async_dma_map(dev, false);
> +
>       for (i = 0; i < dev->mem->nregions; i++) {
>               reg = &dev->mem->regions[i];
>               if (reg->host_user_addr) {
> -                     if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> -                             async_dma_map(reg, false);
> -
>                       munmap(reg->mmap_addr, reg->mmap_size);
>                       close(reg->fd);
>               }
> @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
> 
>  static int
>  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> -                uint64_t host_phys_addr, uint64_t size)
> +                uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t 
> size)
>  {
>       struct guest_page *page, *last_page;
>       struct guest_page *old_pages;
> @@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t
> guest_phys_addr,
>       page = &dev->guest_pages[dev->nr_guest_pages++];
>       page->guest_phys_addr = guest_phys_addr;
>       page->host_phys_addr  = host_phys_addr;
> +     page->host_user_addr = host_user_addr;
>       page->size = size;
> 
>       return 0;
> @@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>       size = page_size - (guest_phys_addr & (page_size - 1));
>       size = RTE_MIN(size, reg_size);
> 
> -     if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
> +     if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> +                            host_user_addr, size) < 0)
>               return -1;
> 
>       host_user_addr  += size;
> @@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>               host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
>                                                 host_user_addr);
>               if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> -                             size) < 0)
> +                                    host_user_addr, size) < 0)
>                       return -1;
> 
>               host_user_addr  += size;
> @@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>       uint64_t mmap_size;
>       uint64_t alignment;
>       int populate;
> -     int ret;
> 
>       /* Check for memory_size + mmap_offset overflow */
>       if (mmap_offset >= -region->size) {
> @@ -1274,14 +1274,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>                       VHOST_LOG_CONFIG(ERR, "adding guest pages to region
> failed.\n");
>                       return -1;
>               }
> -
> -             if (rte_vfio_is_enabled("vfio")) {
> -                     ret = async_dma_map(region, true);
> -                     if (ret) {
> -                             VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA 
> engine
> failed\n");
> -                             return -1;
> -                     }
> -             }
>       }
> 
>       VHOST_LOG_CONFIG(INFO,
> @@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>               dev->mem->nregions++;
>       }
> 
> +     if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +             async_dma_map(dev, true);
> +
>       if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
>               goto free_mem_table;
> 
> --
> 2.17.1

Reply via email to