Hi Maxime, The response are inline. I have also submitted patch-set v12 with the changes.
Internal Use - Confidential From: Maxime Coquelin <[email protected]> Sent: Tuesday, May 5, 2026 2:48 AM To: Bathija, Pravin <[email protected]> Cc: [email protected]; [email protected]; [email protected]; [email protected] Subject: Re: [PATCH v11 3/5] vhost_user: support function defines for back-end [EXTERNAL EMAIL] On Tue, May 5, 2026 at 7:53 AM <[email protected]<mailto:[email protected]>> wrote: From: Pravin M Bathija <[email protected]<mailto:[email protected]>> Here we define support functions which are called from the various vhost-user back-end message functions like set memory table, get memory slots, add memory region, remove memory region. These are essentially common functions to initialize memory, unmap a set of memory regions, perform register copy, align memory addresses, dma map/unmap a single memory region and remove guest pages by removing all entries belonging to a given memory region. Signed-off-by: Pravin M Bathija <[email protected]<mailto:[email protected]>> --- lib/vhost/vhost_user.c | 146 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 10 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 4bfb13fb98..1f96ecf963 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -171,6 +171,52 @@ get_blk_size(int fd) return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } +static int +async_dma_map_region(struct virtio_net *dev, struct rte_vhost_mem_region *reg, bool do_map) +{ + uint32_t i; + int ret; + uint64_t reg_start = reg->host_user_addr; + uint64_t reg_end = reg_start + reg->size; + + for (i = 0; i < dev->nr_guest_pages; i++) { + struct guest_page *page = &dev->guest_pages[i]; + + /* Only process pages belonging to this region */ + if (page->host_user_addr < reg_start || + page->host_user_addr >= reg_end) + continue; + + if (do_map) { + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, + page->host_user_addr, + page->host_iova, + page->size); + if (ret) { + if (rte_errno == ENODEV) + return 0; + + VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA engine map failed"); + return -1; + } + } else { + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, + page->host_user_addr, + page->host_iova, + page->size); + if (ret) { + if (rte_errno == EINVAL) + return 0; + + VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA engine unmap failed"); + return -1; + } + } + } + + return 0; +} + static void async_dma_map(struct virtio_net *dev, bool do_map) { @@ -225,7 +271,17 @@ async_dma_map(struct virtio_net *dev, bool do_map) } I think async_dma_map and async_dma_map_region should be refactored to avoid code duplication, What about something like this: static void async_dma_map(struct virtio_net *dev, bool do_map) { uint32_t i; struct rte_vhost_mem_region *reg; for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) { reg = &dev->mem->regions[i]; if (reg->host_user_addr == 0) continue; async_dma_map_region(dev, reg, do_map); } } Also, duplicating code and stripping comments is not ideal as they are important (i.e. to understand why we can ignore ENODEV and EINVAL) DMA refactoring: async_dma_map() now delegates to async_dma_map_region(), eliminating the duplicated DMA map/unmap logic. The original comments explaining ENODEV/EINVAL handling have been restored in async_dma_map_region(). static void -free_mem_region(struct virtio_net *dev) +free_mem_region(struct rte_vhost_mem_region *reg) +{ + if (reg != NULL && reg->mmap_addr) { + munmap(reg->mmap_addr, reg->mmap_size); + close(reg->fd); + memset(reg, 0, sizeof(struct rte_vhost_mem_region)); + } +} + +static void +free_all_mem_regions(struct virtio_net *dev) { uint32_t i; struct rte_vhost_mem_region *reg; @@ -236,12 +292,10 @@ free_mem_region(struct virtio_net *dev) if (dev->async_copy && rte_vfio_is_enabled("vfio")) async_dma_map(dev, false); - for (i = 0; i < dev->mem->nregions; i++) { + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) { reg = &dev->mem->regions[i]; - if (reg->host_user_addr) { - munmap(reg->mmap_addr, reg->mmap_size); - close(reg->fd); - } + if (reg->mmap_addr) + free_mem_region(reg); } } @@ -255,7 +309,7 @@ vhost_backend_cleanup(struct virtio_net *dev) vdpa_dev->ops->dev_cleanup(dev->vid); if (dev->mem) { - free_mem_region(dev); + free_all_mem_regions(dev); rte_free(dev->mem); dev->mem = NULL; } @@ -704,7 +758,7 @@ numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq) vhost_devices[dev->vid] = dev; mem_size = sizeof(struct rte_vhost_memory) + - sizeof(struct rte_vhost_mem_region) * dev->mem->nregions; + sizeof(struct rte_vhost_mem_region) * VHOST_MEMORY_MAX_NREGIONS; mem = rte_realloc_socket(dev->mem, mem_size, 0, node); if (!mem) { VHOST_CONFIG_LOG(dev->ifname, ERR, @@ -808,8 +862,10 @@ hua_to_alignment(struct rte_vhost_memory *mem, void *ptr) uint32_t i; uintptr_t hua = (uintptr_t)ptr; - for (i = 0; i < mem->nregions; i++) { + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) { r = &mem->regions[i]; + if (r->host_user_addr == 0) + continue; if (hua >= r->host_user_addr && hua < r->host_user_addr + r->size) { return get_blk_size(r->fd); @@ -1136,6 +1192,24 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, return 0; } +static void +remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg) +{ + uint64_t reg_start = reg->host_user_addr; + uint64_t reg_end = reg_start + reg->size; + uint32_t i, j = 0; + + for (i = 0; i < dev->nr_guest_pages; i++) { + if (dev->guest_pages[i].host_user_addr >= reg_start && + dev->guest_pages[i].host_user_addr < reg_end) + continue; + if (j != i) + dev->guest_pages[j] = dev->guest_pages[i]; + j++; + } + dev->nr_guest_pages = j; +} + #ifdef RTE_LIBRTE_VHOST_DEBUG /* TODO: enable it only in debug mode? */ static void @@ -1246,10 +1320,14 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd, * DPDK's virtual address with Qemu, so that Qemu can * retrieve the region offset when handling userfaults. */ + int reg_msg_index = 0; memory = &ctx->msg.payload.memory; for (i = 0; i < memory->nregions; i++) { reg = &dev->mem->regions[i]; - memory->regions[i].userspace_addr = reg->host_user_addr; + if (reg->host_user_addr == 0) + continue; + memory->regions[reg_msg_index].userspace_addr = reg->host_user_addr; + reg_msg_index++; } /* Send the addresses back to qemu */ @@ -1278,6 +1356,8 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd, /* Now userfault register and we can use the memory */ for (i = 0; i < memory->nregions; i++) { reg = &dev->mem->regions[i]; + if (reg->host_user_addr == 0) + continue; if (vhost_user_postcopy_region_register(dev, reg) < 0) return -1; } @@ -1382,6 +1462,52 @@ vhost_user_mmap_region(struct virtio_net *dev, return 0; } +static int +vhost_user_initialize_memory(struct virtio_net **pdev) +{ + struct virtio_net *dev = *pdev; + int numa_node = SOCKET_ID_ANY; + + if (dev->mem != NULL) { + VHOST_CONFIG_LOG(dev->ifname, ERR, + "memory already initialized, free it first"); + return -1; + } + + /* + * If VQ 0 has already been allocated, try to allocate on the same + * NUMA node. It can be reallocated later in numa_realloc(). + */ + if (dev->nr_vring > 0) + numa_node = dev->virtqueue[0]->numa_node; + + dev->nr_guest_pages = 0; + if (dev->guest_pages == NULL) { + dev->max_guest_pages = 8; + dev->guest_pages = rte_zmalloc_socket(NULL, + dev->max_guest_pages * + sizeof(struct guest_page), + RTE_CACHE_LINE_SIZE, + numa_node); + if (dev->guest_pages == NULL) { + VHOST_CONFIG_LOG(dev->ifname, ERR, + "failed to allocate memory for dev->guest_pages"); + return -1; + } + } + + dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct rte_vhost_memory) + + sizeof(struct rte_vhost_mem_region) * VHOST_MEMORY_MAX_NREGIONS, 0, numa_node); + if (dev->mem == NULL) { + VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to allocate memory for dev->mem"); + rte_free(dev->guest_pages); + dev->guest_pages = NULL; + return -1; + } + + return 0; +} + I think it should be in a dedicated patch, and in the same patch would vhost_user_set_mem_table() make use of it. The idea is to make it straitforward you are doing a refactoring, and easily check the code you are extracting out from vhost_user_set_mem_table() into a new function has not been changed in-between. vhost_user_initialize_memory() patch placement: Moved from patch 3 to patch 4, grouped with the vhost_user_set_mem_table() refactoring that uses it. This makes the extraction clearer to review as a pure refactor without mixing it with other changes. static int vhost_user_set_mem_table(struct virtio_net **pdev, struct vhu_msg_context *ctx, -- 2.43.0

