> -----Original Message-----
> From: Li, Miao <miao...@intel.com>
> Sent: Friday, May 26, 2023 12:31 AM
> To: dev@dpdk.org
> Cc: sk...@marvell.com; tho...@monjalon.net; david.march...@redhat.com;
> ferruh.yi...@amd.com; Xia, Chenbo <chenbo....@intel.com>; Cao, Yahui
> <yahui....@intel.com>; Burakov, Anatoly <anatoly.bura...@intel.com>
> Subject: [PATCH v3 4/4] bus/pci: add VFIO sparse mmap support
> 
> This patch adds sparse mmap support in PCI bus. Sparse mmap is a
> capability defined in VFIO which allows multiple mmap areas in one
> VFIO region.
> 
> In this patch, the sparse mmap regions are mapped to one continuous
> virtual address region that follows device-specific BAR layout. So,
> driver can still access all mapped sparse mmap regions by using
> 'bar_base_address + bar_offset'.
> 
> Signed-off-by: Miao Li <miao...@intel.com>
> Signed-off-by: Chenbo Xia <chenbo....@intel.com>
> ---
>  drivers/bus/pci/linux/pci_vfio.c | 104 +++++++++++++++++++++++++++----
>  drivers/bus/pci/private.h        |   2 +
>  2 files changed, 94 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci_vfio.c
> b/drivers/bus/pci/linux/pci_vfio.c
> index 24b0795fbd..c411909976 100644
> --- a/drivers/bus/pci/linux/pci_vfio.c
> +++ b/drivers/bus/pci/linux/pci_vfio.c
> @@ -673,6 +673,54 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct
> mapped_pci_resource *vfio_res,
>       return 0;
>  }
> 
> +static int
> +pci_vfio_sparse_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource
> *vfio_res,
> +             int bar_index, int additional_flags)
> +{
> +     struct pci_map *bar = &vfio_res->maps[bar_index];
> +     struct vfio_region_sparse_mmap_area *sparse;
> +     void *bar_addr;
> +     uint32_t i;
> +
> +     if (bar->size == 0) {
> +             RTE_LOG(DEBUG, EAL, "Bar size is 0, skip BAR%d\n", bar_index);
> +             return 0;
> +     }
> +
> +     /* reserve the address using an inaccessible mapping */
> +     bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE |
> +                     MAP_ANONYMOUS | additional_flags, -1, 0);
> +     if (bar_addr != MAP_FAILED) {
> +             void *map_addr = NULL;
> +             for (i = 0; i < bar->nr_areas; i++) {
> +                     sparse = &bar->areas[i];
> +                     if (sparse->size) {
> +                             void *addr = RTE_PTR_ADD(bar_addr,
> (uintptr_t)sparse->offset);
> +                             map_addr = pci_map_resource(addr, vfio_dev_fd,
> +                                     bar->offset + sparse->offset, 
> sparse->size,
> +                                     RTE_MAP_FORCE_ADDRESS);
> +                             if (map_addr == NULL) {
> +                                     munmap(bar_addr, bar->size);
> +                                     RTE_LOG(ERR, EAL, "Failed to map pci
> BAR%d\n",
> +                                             bar_index);
> +                                     goto err_map;
> +                             }
> +                     }
> +             }
> +     } else {
> +             RTE_LOG(ERR, EAL, "Failed to create inaccessible mapping for
> BAR%d\n",
> +                     bar_index);
> +             goto err_map;
> +     }
> +
> +     bar->addr = bar_addr;
> +     return 0;
> +
> +err_map:
> +     bar->nr_areas = 0;
> +     return -1;
> +}
> +
>  /*
>   * region info may contain capability headers, so we need to keep
> reallocating
>   * the memory until we match allocated memory size with argsz.
> @@ -875,6 +923,8 @@ pci_vfio_map_resource_primary(struct rte_pci_device
> *dev)
> 
>       for (i = 0; i < vfio_res->nb_maps; i++) {
>               void *bar_addr;
> +             struct vfio_info_cap_header *hdr;
> +             struct vfio_region_info_cap_sparse_mmap *sparse;
> 
>               ret = pci_vfio_get_region_info(vfio_dev_fd, &reg, i);
>               if (ret < 0) {
> @@ -920,12 +970,33 @@ pci_vfio_map_resource_primary(struct rte_pci_device
> *dev)
>               maps[i].size = reg->size;
>               maps[i].path = NULL; /* vfio doesn't have per-resource paths
> */
> 
> -             ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
> -             if (ret < 0) {
> -                     RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n",
> -                                     pci_addr, i, strerror(errno));
> -                     free(reg);
> -                     goto err_vfio_res;
> +             hdr = pci_vfio_info_cap(reg, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
> +
> +             if (hdr != NULL) {
> +                     sparse = container_of(hdr,
> +                             struct vfio_region_info_cap_sparse_mmap, 
> header);
> +                     if (sparse->nr_areas > 0) {
> +                             maps[i].nr_areas = sparse->nr_areas;
> +                             maps[i].areas = sparse->areas;

I just notice that this is wrong as the memory that pointer 'sparse' points to
will be freed at the end. map[i].areas needs to be allocated by rte_zmalloc
and freed correctly. Otherwise it could leads to secondary process segfault
when it tries to access maps[i].areas.

Will fix this in v4.

Thanks,
Chenbo

> +                     }
> +             }
> +
> +             if (maps[i].nr_areas > 0) {
> +                     ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res, i,
> 0);
> +                     if (ret < 0) {
> +                             RTE_LOG(ERR, EAL, "%s sparse mapping BAR%i
> failed: %s\n",
> +                                             pci_addr, i, strerror(errno));
> +                             free(reg);
> +                             goto err_vfio_res;
> +                     }
> +             } else {
> +                     ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
> +                     if (ret < 0) {
> +                             RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: 
> %s\n",
> +                                             pci_addr, i, strerror(errno));
> +                             free(reg);
> +                             goto err_vfio_res;
> +                     }
>               }
> 
>               dev->mem_resource[i].addr = maps[i].addr;
> @@ -1008,11 +1079,20 @@ pci_vfio_map_resource_secondary(struct
> rte_pci_device *dev)
>       maps = vfio_res->maps;
> 
>       for (i = 0; i < vfio_res->nb_maps; i++) {
> -             ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
> -             if (ret < 0) {
> -                     RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n",
> -                                     pci_addr, i, strerror(errno));
> -                     goto err_vfio_dev_fd;
> +             if (maps[i].nr_areas > 0) {
> +                     ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res, i,
> 0);
> +                     if (ret < 0) {
> +                             RTE_LOG(ERR, EAL, "%s sparse mapping BAR%i
> failed: %s\n",
> +                                             pci_addr, i, strerror(errno));
> +                             goto err_vfio_dev_fd;
> +                     }
> +             } else {
> +                     ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
> +                     if (ret < 0) {
> +                             RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: 
> %s\n",
> +                                             pci_addr, i, strerror(errno));
> +                             goto err_vfio_dev_fd;
> +                     }
>               }
> 
>               dev->mem_resource[i].addr = maps[i].addr;
> @@ -1062,7 +1142,7 @@ find_and_unmap_vfio_resource(struct
> mapped_pci_res_list *vfio_res_list,
>               break;
>       }
> 
> -     if  (vfio_res == NULL)
> +     if (vfio_res == NULL)
>               return vfio_res;
> 
>       RTE_LOG(INFO, EAL, "Releasing PCI mapped resource for %s\n",
> diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
> index 2d6991ccb7..8b0ce73533 100644
> --- a/drivers/bus/pci/private.h
> +++ b/drivers/bus/pci/private.h
> @@ -121,6 +121,8 @@ struct pci_map {
>       uint64_t offset;
>       uint64_t size;
>       uint64_t phaddr;
> +     uint32_t nr_areas;
> +     struct vfio_region_sparse_mmap_area *areas;
>  };
> 
>  struct pci_msix_table {
> --
> 2.25.1

Reply via email to