On 9/23/21 16:39, Hu, Jiayu wrote:
Hi Xuan,
-----Original Message-----
From: Ding, Xuan <xuan.d...@intel.com>
Sent: Friday, September 17, 2021 1:26 PM
To: dev@dpdk.org; Burakov, Anatoly <anatoly.bura...@intel.com>;
maxime.coque...@redhat.com; Xia, Chenbo <chenbo....@intel.com>
Cc: Hu, Jiayu <jiayu...@intel.com>; Jiang, Cheng1 <cheng1.ji...@intel.com>;
Richardson, Bruce <bruce.richard...@intel.com>; Pai G, Sunil
<sunil.pa...@intel.com>; Wang, Yinan <yinan.w...@intel.com>; Yang,
YvonneX <yvonnex.y...@intel.com>; Ding, Xuan <xuan.d...@intel.com>
Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
The use of IOMMU has many advantages, such as isolation and address
translation. This patch extends the capbility of DMA engine to use IOMMU if
the DMA engine is bound to vfio.
When set memory table, the guest memory will be mapped into the default
container of DPDK.
Signed-off-by: Xuan Ding <xuan.d...@intel.com>
---
lib/vhost/rte_vhost.h | 1 +
lib/vhost/vhost_user.c | 57
+++++++++++++++++++++++++++++++++++++++++-
2 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
8d875e9322..e0537249f3 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
void *mmap_addr;
uint64_t mmap_size;
int fd;
+ uint64_t dma_map_success;
How about using bool for dma_map_success?
The bigger problem here is that you are breaking the ABI.
};
/**
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
29a4c9af60..7d1d592b86 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -45,6 +45,8 @@
#include <rte_common.h>
#include <rte_malloc.h>
#include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
#include "iotlb.h"
#include "vhost.h"
@@ -141,6 +143,46 @@ get_blk_size(int fd)
return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; }
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool do_map) {
+ int ret = 0;
+ uint64_t host_iova;
+ host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
host_user_addr);
+ if (do_map) {
+ /* Add mapped region into the default container of DPDK. */
+ ret =
rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ region->dma_map_success = ret == 0;
+ if (ret) {
+ if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
+ VHOST_LOG_CONFIG(ERR, "DMA engine map
failed\n");
+ return ret;
+ }
+ return 0;
Why return 0, if ret is -1 here?
Thanks,
Jiayu
+ }
+ return ret;
+ } else {
+ /* No need to do vfio unmap if the map failed. */
+ if (!region->dma_map_success)
+ return 0;
+
+ /* Remove mapped region from the default container of
DPDK. */
+ ret =
rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ if (ret) {
+ VHOST_LOG_CONFIG(ERR, "DMA engine unmap
failed\n");
+ return ret;
+ }
+ region->dma_map_success = 0;
+ }
+ return ret;
+}
+
static void
free_mem_region(struct virtio_net *dev) { @@ -153,6 +195,9 @@
free_mem_region(struct virtio_net *dev)
for (i = 0; i < dev->mem->nregions; i++) {
reg = &dev->mem->regions[i];
if (reg->host_user_addr) {
+ if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+ async_dma_map(reg, false);
+
munmap(reg->mmap_addr, reg->mmap_size);
close(reg->fd);
}
@@ -1157,6 +1202,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
uint64_t mmap_size;
uint64_t alignment;
int populate;
+ int ret;
/* Check for memory_size + mmap_offset overflow */
if (mmap_offset >= -region->size) {
@@ -1210,13 +1256,22 @@ vhost_user_mmap_region(struct virtio_net *dev,
region->mmap_size = mmap_size;
region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
mmap_offset;
- if (dev->async_copy)
+ if (dev->async_copy) {
if (add_guest_pages(dev, region, alignment) < 0) {
VHOST_LOG_CONFIG(ERR,
"adding guest pages to region
failed.\n");
return -1;
}
+ if (rte_vfio_is_enabled("vfio")) {
+ ret = async_dma_map(region, true);
+ if (ret < 0) {
+ VHOST_LOG_CONFIG(ERR, "Configure
IOMMU for DMA engine failed\n");
+ return -1;
+ }
+ }
+ }
+
VHOST_LOG_CONFIG(INFO,
"guest memory region size: 0x%" PRIx64 "\n"
"\t guest physical addr: 0x%" PRIx64 "\n"
--
2.17.1