On Tue, May 5, 2026 at 7:53 AM <[email protected]> wrote:

> From: Pravin M Bathija <[email protected]>
>
> Here we define support functions which are called from the various
> vhost-user back-end message functions like set memory table, get
> memory slots, add memory region, remove memory region.  These are
> essentially common functions to initialize memory, unmap a set of
> memory regions, perform register copy, align memory addresses,
> dma map/unmap a single memory region and remove guest pages by
> removing all entries belonging to a given memory region.
>
> Signed-off-by: Pravin M Bathija <[email protected]>
> ---
>  lib/vhost/vhost_user.c | 146 ++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 136 insertions(+), 10 deletions(-)
>
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 4bfb13fb98..1f96ecf963 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -171,6 +171,52 @@ get_blk_size(int fd)
>         return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
>  }
>
> +static int
> +async_dma_map_region(struct virtio_net *dev, struct rte_vhost_mem_region
> *reg, bool do_map)
> +{
> +       uint32_t i;
> +       int ret;
> +       uint64_t reg_start = reg->host_user_addr;
> +       uint64_t reg_end = reg_start + reg->size;
> +
> +       for (i = 0; i < dev->nr_guest_pages; i++) {
> +               struct guest_page *page = &dev->guest_pages[i];
> +
> +               /* Only process pages belonging to this region */
> +               if (page->host_user_addr < reg_start ||
> +                   page->host_user_addr >= reg_end)
> +                       continue;
> +
> +               if (do_map) {
> +                       ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +                                       page->host_user_addr,
> +                                       page->host_iova,
> +                                       page->size);
> +                       if (ret) {
> +                               if (rte_errno == ENODEV)
> +                                       return 0;
> +
> +                               VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA
> engine map failed");
> +                               return -1;
> +                       }
> +               } else {
> +                       ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +                                       page->host_user_addr,
> +                                       page->host_iova,
> +                                       page->size);
> +                       if (ret) {
> +                               if (rte_errno == EINVAL)
> +                                       return 0;
> +
> +                               VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA
> engine unmap failed");
> +                               return -1;
> +                       }
> +               }
> +       }
> +
> +       return 0;
> +}
> +
>  static void
>  async_dma_map(struct virtio_net *dev, bool do_map)
>  {
> @@ -225,7 +271,17 @@ async_dma_map(struct virtio_net *dev, bool do_map)
>  }
>

I think async_dma_map and async_dma_map_region should be refactored to
avoid code duplication,
What about something like this:

  static void
  async_dma_map(struct virtio_net *dev, bool do_map)
  {
      uint32_t i;
      struct rte_vhost_mem_region *reg;

      for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {
          reg = &dev->mem->regions[i];
          if (reg->host_user_addr == 0)
              continue;
          async_dma_map_region(dev, reg, do_map);
     }
  }

Also, duplicating code and stripping comments is not ideal as they are
important
(i.e. to understand why we can ignore ENODEV and EINVAL)


>  static void
> -free_mem_region(struct virtio_net *dev)
> +free_mem_region(struct rte_vhost_mem_region *reg)
> +{
> +       if (reg != NULL && reg->mmap_addr) {
> +               munmap(reg->mmap_addr, reg->mmap_size);
> +               close(reg->fd);
> +               memset(reg, 0, sizeof(struct rte_vhost_mem_region));
> +       }
> +}
> +
> +static void
> +free_all_mem_regions(struct virtio_net *dev)
>  {
>         uint32_t i;
>         struct rte_vhost_mem_region *reg;
> @@ -236,12 +292,10 @@ free_mem_region(struct virtio_net *dev)
>         if (dev->async_copy && rte_vfio_is_enabled("vfio"))
>                 async_dma_map(dev, false);
>
> -       for (i = 0; i < dev->mem->nregions; i++) {
> +       for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {
>                 reg = &dev->mem->regions[i];
> -               if (reg->host_user_addr) {
> -                       munmap(reg->mmap_addr, reg->mmap_size);
> -                       close(reg->fd);
> -               }
> +               if (reg->mmap_addr)
> +                       free_mem_region(reg);
>         }
>  }
>
> @@ -255,7 +309,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
>                 vdpa_dev->ops->dev_cleanup(dev->vid);
>
>         if (dev->mem) {
> -               free_mem_region(dev);
> +               free_all_mem_regions(dev);
>                 rte_free(dev->mem);
>                 dev->mem = NULL;
>         }
> @@ -704,7 +758,7 @@ numa_realloc(struct virtio_net **pdev, struct
> vhost_virtqueue **pvq)
>         vhost_devices[dev->vid] = dev;
>
>         mem_size = sizeof(struct rte_vhost_memory) +
> -               sizeof(struct rte_vhost_mem_region) * dev->mem->nregions;
> +               sizeof(struct rte_vhost_mem_region) *
> VHOST_MEMORY_MAX_NREGIONS;
>         mem = rte_realloc_socket(dev->mem, mem_size, 0, node);
>         if (!mem) {
>                 VHOST_CONFIG_LOG(dev->ifname, ERR,
> @@ -808,8 +862,10 @@ hua_to_alignment(struct rte_vhost_memory *mem, void
> *ptr)
>         uint32_t i;
>         uintptr_t hua = (uintptr_t)ptr;
>
> -       for (i = 0; i < mem->nregions; i++) {
> +       for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {
>                 r = &mem->regions[i];
> +               if (r->host_user_addr == 0)
> +                       continue;
>                 if (hua >= r->host_user_addr &&
>                         hua < r->host_user_addr + r->size) {
>                         return get_blk_size(r->fd);
> @@ -1136,6 +1192,24 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
>         return 0;
>  }
>
> +static void
> +remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region
> *reg)
> +{
> +       uint64_t reg_start = reg->host_user_addr;
> +       uint64_t reg_end = reg_start + reg->size;
> +       uint32_t i, j = 0;
> +
> +       for (i = 0; i < dev->nr_guest_pages; i++) {
> +               if (dev->guest_pages[i].host_user_addr >= reg_start &&
> +                   dev->guest_pages[i].host_user_addr < reg_end)
> +                       continue;
> +               if (j != i)
> +                       dev->guest_pages[j] = dev->guest_pages[i];
> +               j++;
> +       }
> +       dev->nr_guest_pages = j;
> +}
> +
>  #ifdef RTE_LIBRTE_VHOST_DEBUG
>  /* TODO: enable it only in debug mode? */
>  static void
> @@ -1246,10 +1320,14 @@ vhost_user_postcopy_register(struct virtio_net
> *dev, int main_fd,
>          * DPDK's virtual address with Qemu, so that Qemu can
>          * retrieve the region offset when handling userfaults.
>          */
> +       int reg_msg_index = 0;
>         memory = &ctx->msg.payload.memory;
>         for (i = 0; i < memory->nregions; i++) {
>                 reg = &dev->mem->regions[i];
> -               memory->regions[i].userspace_addr = reg->host_user_addr;
> +               if (reg->host_user_addr == 0)
> +                       continue;
> +               memory->regions[reg_msg_index].userspace_addr =
> reg->host_user_addr;
> +               reg_msg_index++;
>         }
>
>         /* Send the addresses back to qemu */
> @@ -1278,6 +1356,8 @@ vhost_user_postcopy_register(struct virtio_net *dev,
> int main_fd,
>         /* Now userfault register and we can use the memory */
>         for (i = 0; i < memory->nregions; i++) {
>                 reg = &dev->mem->regions[i];
> +               if (reg->host_user_addr == 0)
> +                       continue;
>                 if (vhost_user_postcopy_region_register(dev, reg) < 0)
>                         return -1;
>         }
> @@ -1382,6 +1462,52 @@ vhost_user_mmap_region(struct virtio_net *dev,
>         return 0;
>  }
>
> +static int
> +vhost_user_initialize_memory(struct virtio_net **pdev)
> +{
> +       struct virtio_net *dev = *pdev;
> +       int numa_node = SOCKET_ID_ANY;
> +
> +       if (dev->mem != NULL) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                       "memory already initialized, free it first");
> +               return -1;
> +       }
> +
> +       /*
> +        * If VQ 0 has already been allocated, try to allocate on the same
> +        * NUMA node. It can be reallocated later in numa_realloc().
> +        */
> +       if (dev->nr_vring > 0)
> +               numa_node = dev->virtqueue[0]->numa_node;
> +
> +       dev->nr_guest_pages = 0;
> +       if (dev->guest_pages == NULL) {
> +               dev->max_guest_pages = 8;
> +               dev->guest_pages = rte_zmalloc_socket(NULL,
> +                                       dev->max_guest_pages *
> +                                       sizeof(struct guest_page),
> +                                       RTE_CACHE_LINE_SIZE,
> +                                       numa_node);
> +               if (dev->guest_pages == NULL) {
> +                       VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "failed to allocate memory for
> dev->guest_pages");
> +                       return -1;
> +               }
> +       }
> +
> +       dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
> +               sizeof(struct rte_vhost_mem_region) *
> VHOST_MEMORY_MAX_NREGIONS, 0, numa_node);
> +       if (dev->mem == NULL) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to allocate
> memory for dev->mem");
> +               rte_free(dev->guest_pages);
> +               dev->guest_pages = NULL;
> +               return -1;
> +       }
> +
> +       return 0;
> +}
> +
>

I think it should be in a dedicated patch, and in the same patch
would vhost_user_set_mem_table() make use of it.
The idea is to make it straitforward you are doing a refactoring, and
easily check the code you are extracting out from
vhost_user_set_mem_table() into a new function has not been changed
in-between.


>  static int
>  vhost_user_set_mem_table(struct virtio_net **pdev,
>                         struct vhu_msg_context *ctx,
> --
> 2.43.0
>
>

Reply via email to