Hi Chenbo,

> -----Original Message-----
> From: Xia, Chenbo <chenbo....@intel.com>
> Sent: 2021年11月16日 15:48
> To: Ding, Xuan <xuan.d...@intel.com>; maxime.coque...@redhat.com
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Wang, YuanX
> <yuanx.w...@intel.com>; He, Xingguang <xingguang...@intel.com>
> Subject: RE: [PATCH v4] vhost: fix physical address mapping
> 
> > -----Original Message-----
> > From: Ding, Xuan <xuan.d...@intel.com>
> > Sent: Monday, November 15, 2021 8:32 PM
> > To: maxime.coque...@redhat.com; Xia, Chenbo <chenbo....@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Wang, YuanX
> > <yuanx.w...@intel.com>; He, Xingguang <xingguang...@intel.com>; Ding,
> > Xuan <xuan.d...@intel.com>
> > Subject: [PATCH v4] vhost: fix physical address mapping
> >
> > When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> > which requires page by page mapping for DMA devices. To be consistent,
> > this patch implements page by page mapping instead of mapping at the
> > region granularity for both IOVA as VA and PA mode.
> >
> > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> >
> > Signed-off-by: Xuan Ding <xuan.d...@intel.com>
> > ---
> > v4:
> > * Remove unnessary ENOSUP check.
> > * Adjust return type.
> >
> > v3:
> > * Fix commit title.
> >
> > v2:
> > * Fix a format issue.
> > ---
> >  lib/vhost/vhost.h      |   1 +
> >  lib/vhost/vhost_user.c | 111
> > ++++++++++++++++++++---------------------
> >  2 files changed, 54 insertions(+), 58 deletions(-)
> >
> > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> > 7085e0885c..d246538ca5 100644
> > --- a/lib/vhost/vhost.h
> > +++ b/lib/vhost/vhost.h
> > @@ -355,6 +355,7 @@ struct vring_packed_desc_event {  struct
> > guest_page {
> >     uint64_t guest_phys_addr;
> >     uint64_t host_phys_addr;
> 
> This name confused me when doing review: this should be host_iova.
> 
> Could you help rename it in the patch?
 
Yes, the host_phys_addr is a iova, it depends on the DPDK IOVA mode.

> 
> > +   uint64_t host_user_addr;
> >     uint64_t size;
> >  };
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > a781346c4d..0a9dc3350f 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -143,57 +143,56 @@ get_blk_size(int fd)
> >     return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
> >
> > -static int
> > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> > +static void
> > +async_dma_map(struct virtio_net *dev, bool do_map)
> >  {
> > -   uint64_t host_iova;
> >     int ret = 0;
> > -
> > -   host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> >host_user_addr);
> > +   uint32_t i;
> > +   struct guest_page *page;
> >     if (do_map) {
> > -           /* Add mapped region into the default container of DPDK. */
> > -           ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > -                                            region->host_user_addr,
> > -                                            host_iova,
> > -                                            region->size);
> > -           if (ret) {
> > -                   /*
> > -                    * DMA device may bind with kernel driver, in this case,
> > -                    * we don't need to program IOMMU manually.
> However, if no
> > -                    * device is bound with vfio/uio in DPDK, and vfio 
> > kernel
> > -                    * module is loaded, the API will still be called and
> return
> > -                    * with ENODEV/ENOSUP.
> > -                    *
> > -                    * DPDK vfio only returns ENODEV/ENOSUP in very
> similar
> > -                    * situations(vfio either unsupported, or supported
> > -                    * but no devices found). Either way, no mappings could
> be
> > -                    * performed. We treat it as normal case in async path.
> > -                    */
> > -                   if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> > -                           return 0;
> > -
> > -                   VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> > -                   /* DMA mapping errors won't stop
> VHST_USER_SET_MEM_TABLE. */
> > -                   return 0;
> > +           for (i = 0; i < dev->nr_guest_pages; i++) {
> > +                   page = &dev->guest_pages[i];
> > +                   ret =
> > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > +                                                    page-
> >host_user_addr,
> > +                                                    page-
> >host_phys_addr,
> > +                                                    page->size);
> > +                   if (ret) {
> > +                           /*
> > +                            * DMA device may bind with kernel driver, in
> this
> > case,
> > +                            * we don't need to program IOMMU manually.
> However,
> > if no
> > +                            * device is bound with vfio/uio in DPDK, and
> vfio
> > kernel
> > +                            * module is loaded, the API will still be 
> > called
> and
> > return
> > +                            * with ENODEV.
> > +                            *
> > +                            * DPDK vfio only returns ENODEV in very
> similar
> > situations
> > +                            * (vfio either unsupported, or supported but no
> > devices found).
> > +                            * Either way, no mappings could be performed.
> We
> > treat it as
> > +                            * normal case in async path. This is a
> workaround.
> > +                            */
> > +                           if (rte_errno == ENODEV)
> > +                                   return;
> > +
> > +                           /* DMA mapping errors won't stop
> > VHST_USER_SET_MEM_TABLE. */
> 
> A comment in v3 is missed:
 
Thanks for the catch. Will fix it in v5.

Thanks,
Xuan

> 
> VHST -> VHOST
> 
> Thanks,
> Chenbo
> 
> > +                           VHOST_LOG_CONFIG(ERR, "DMA engine map
> failed\n");
> > +                   }
> >             }
> >
> >     } else {
> > -           /* Remove mapped region from the default container of DPDK.
> */
> > -           ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > -                                              region->host_user_addr,
> > -                                              host_iova,
> > -                                              region->size);
> > -           if (ret) {
> > -                   /* like DMA map, ignore the kernel driver case when
> unmap.
> > */
> > -                   if (rte_errno == EINVAL)
> > -                           return 0;
> > -
> > -                   VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> failed\n");
> > -                   return ret;
> > +           for (i = 0; i < dev->nr_guest_pages; i++) {
> > +                   page = &dev->guest_pages[i];
> > +                   ret =
> > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > +                                                      page-
> >host_user_addr,
> > +                                                      page-
> >host_phys_addr,
> > +                                                      page->size);
> > +                   if (ret) {
> > +                           /* like DMA map, ignore the kernel driver case
> when
> > unmap. */
> > +                           if (rte_errno == EINVAL)
> > +                                   return;
> > +
> > +                           VHOST_LOG_CONFIG(ERR, "DMA engine
> unmap failed\n");
> > +                   }
> >             }
> >     }
> > -
> > -   return ret;
> >  }
> >
> >  static void
> > @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
> >     if (!dev || !dev->mem)
> >             return;
> >
> > +   if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > +           async_dma_map(dev, false);
> > +
> >     for (i = 0; i < dev->mem->nregions; i++) {
> >             reg = &dev->mem->regions[i];
> >             if (reg->host_user_addr) {
> > -                   if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > -                           async_dma_map(reg, false);
> > -
> >                     munmap(reg->mmap_addr, reg->mmap_size);
> >                     close(reg->fd);
> >             }
> > @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net
> > **pdev,
> >
> >  static int
> >  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> > -              uint64_t host_phys_addr, uint64_t size)
> > +              uint64_t host_phys_addr, uint64_t host_user_addr, uint64_t
> size)
> >  {
> >     struct guest_page *page, *last_page;
> >     struct guest_page *old_pages;
> > @@ -1009,6 +1008,7 @@ add_one_guest_page(struct virtio_net *dev,
> > uint64_t guest_phys_addr,
> >     page = &dev->guest_pages[dev->nr_guest_pages++];
> >     page->guest_phys_addr = guest_phys_addr;
> >     page->host_phys_addr  = host_phys_addr;
> > +   page->host_user_addr = host_user_addr;
> >     page->size = size;
> >
> >     return 0;
> > @@ -1028,7 +1028,8 @@ add_guest_pages(struct virtio_net *dev, struct
> > rte_vhost_mem_region *reg,
> >     size = page_size - (guest_phys_addr & (page_size - 1));
> >     size = RTE_MIN(size, reg_size);
> >
> > -   if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) <
> 0)
> > +   if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > +                          host_user_addr, size) < 0)
> >             return -1;
> >
> >     host_user_addr  += size;
> > @@ -1040,7 +1041,7 @@ add_guest_pages(struct virtio_net *dev, struct
> > rte_vhost_mem_region *reg,
> >             host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
> >                                               host_user_addr);
> >             if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
> > -                           size) < 0)
> > +                                  host_user_addr, size) < 0)
> >                     return -1;
> >
> >             host_user_addr  += size;
> > @@ -1215,7 +1216,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> >     uint64_t mmap_size;
> >     uint64_t alignment;
> >     int populate;
> > -   int ret;
> >
> >     /* Check for memory_size + mmap_offset overflow */
> >     if (mmap_offset >= -region->size) {
> > @@ -1274,14 +1274,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
> >                     VHOST_LOG_CONFIG(ERR, "adding guest pages to
> region failed.\n");
> >                     return -1;
> >             }
> > -
> > -           if (rte_vfio_is_enabled("vfio")) {
> > -                   ret = async_dma_map(region, true);
> > -                   if (ret) {
> > -                           VHOST_LOG_CONFIG(ERR, "Configure IOMMU
> for DMA engine
> > failed\n");
> > -                           return -1;
> > -                   }
> > -           }
> >     }
> >
> >     VHOST_LOG_CONFIG(INFO,
> > @@ -1420,6 +1412,9 @@ vhost_user_set_mem_table(struct virtio_net
> > **pdev, struct VhostUserMsg *msg,
> >             dev->mem->nregions++;
> >     }
> >
> > +   if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > +           async_dma_map(dev, true);
> > +
> >     if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
> >             goto free_mem_table;
> >
> > --
> > 2.17.1

Reply via email to