Apologies, lost the signoff from Santosh Shukla and also the commit message still mentions the file that is now non-existent, so I'll submit a v4.
Thanks, Anatoly > -----Original Message----- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Anatoly Burakov > Sent: Wednesday, January 27, 2016 2:05 PM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH v3] vfio: Support for no-IOMMU mode > > This commit is adding a generic mechanism to support multiple IOMMU > types. For now, it's only type 1 (x86 IOMMU) and no-IOMMU (a special VFIO > mode that doesn't use IOMMU at all), but it's easily extended by adding > necessary definitions into eal_pci_init.h and a DMA mapping function to > eal_pci_vfio_dma.c. > > Since type 1 IOMMU module is no longer necessary to have VFIO, we fix the > module check to check for vfio-pci instead. It's not ideal and triggers VFIO > checks more often (and thus produces more error output, which was the > reason behind the module check in the first place), so we compensate for > that by providing more verbose logging, indicating whether VFIO initialization > has succeeded or failed. > > Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com> > Tested-by: Santosh Shukla <sshukla at mvista.com> > --- > v3 changes: > Merging DMA mapping functions back into eal_pci_vfio.c > Fixing and adding comments > > v2 changes: > Compile fix (hat-tip to Santosh Shukla) > Tested-by is provisional, since only superficial testing was done > > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 205 +++++++++++++++++++++-- > ------ > lib/librte_eal/linuxapp/eal/eal_vfio.h | 5 + > 2 files changed, 157 insertions(+), 53 deletions(-) > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > index 74f91ba..fdf334b 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > @@ -72,11 +72,74 @@ EAL_REGISTER_TAILQ(rte_vfio_tailq) > #define VFIO_DIR "/dev/vfio" > #define VFIO_CONTAINER_PATH "/dev/vfio/vfio" > #define VFIO_GROUP_FMT "/dev/vfio/%u" > +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" > #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) > > /* per-process VFIO config */ > static struct vfio_config vfio_cfg; > > +/* DMA mapping function prototype. > + * Takes VFIO container fd as a parameter. > + * Returns 0 on success, -1 on error. > + * */ > +typedef int (*vfio_dma_func_t)(int); > + > +struct vfio_iommu_type { > + int type_id; > + const char *name; > + vfio_dma_func_t dma_map_func; > +}; > + > +int vfio_iommu_type1_dma_map(int); > +int vfio_iommu_noiommu_dma_map(int); > + > +/* IOMMU types we support */ > +static const struct vfio_iommu_type iommu_types[] = { > + /* x86 IOMMU, otherwise known as type 1 */ > + { VFIO_TYPE1_IOMMU, "Type 1", > &vfio_iommu_type1_dma_map}, > + /* IOMMU-less mode */ > + { VFIO_NOIOMMU_IOMMU, "No-IOMMU", > &vfio_iommu_noiommu_dma_map}, }; > + > +int > +vfio_iommu_type1_dma_map(int vfio_container_fd) { > + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); > + int i, ret; > + > + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ > + for (i = 0; i < RTE_MAX_MEMSEG; i++) { > + struct vfio_iommu_type1_dma_map dma_map; > + > + if (ms[i].addr == NULL) > + break; > + > + memset(&dma_map, 0, sizeof(dma_map)); > + dma_map.argsz = sizeof(struct > vfio_iommu_type1_dma_map); > + dma_map.vaddr = ms[i].addr_64; > + dma_map.size = ms[i].len; > + dma_map.iova = ms[i].phys_addr; > + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | > VFIO_DMA_MAP_FLAG_WRITE; > + > + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, > &dma_map); > + > + if (ret) { > + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, > " > + "error %i (%s)\n", errno, > strerror(errno)); > + return -1; > + } > + } > + > + return 0; > +} > + > +int > +vfio_iommu_noiommu_dma_map(int __rte_unused vfio_container_fd) { > + /* No-IOMMU mode does not need DMA mapping */ > + return 0; > +} > + > int > pci_vfio_read_config(const struct rte_intr_handle *intr_handle, > void *buf, size_t len, off_t offs) @@ -208,42 +271,58 @@ > pci_vfio_set_bus_master(int dev_fd) > return 0; > } > > -/* set up DMA mappings */ > -static int > -pci_vfio_setup_dma_maps(int vfio_container_fd) -{ > - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); > - int i, ret; > - > - ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, > - VFIO_TYPE1_IOMMU); > - if (ret) { > - RTE_LOG(ERR, EAL, " cannot set IOMMU type, " > - "error %i (%s)\n", errno, strerror(errno)); > - return -1; > +/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for > +error */ static const struct vfio_iommu_type * > +pci_vfio_set_iommu_type(int vfio_container_fd) { > + unsigned idx; > + for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { > + const struct vfio_iommu_type *t = &iommu_types[idx]; > + > + int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, > + t->type_id); > + if (!ret) { > + RTE_LOG(NOTICE, EAL, " using IOMMU type %d > (%s)\n", > + t->type_id, t->name); > + return t; > + } > + /* not an error, there may be more supported IOMMU types > */ > + RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, " > + "error %i (%s)\n", t->type_id, t->name, > errno, > + strerror(errno)); > } > + /* if we didn't find a suitable IOMMU type, fail */ > + return NULL; > +} > > - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ > - for (i = 0; i < RTE_MAX_MEMSEG; i++) { > - struct vfio_iommu_type1_dma_map dma_map; > - > - if (ms[i].addr == NULL) > - break; > - > - memset(&dma_map, 0, sizeof(dma_map)); > - dma_map.argsz = sizeof(struct > vfio_iommu_type1_dma_map); > - dma_map.vaddr = ms[i].addr_64; > - dma_map.size = ms[i].len; > - dma_map.iova = ms[i].phys_addr; > - dma_map.flags = VFIO_DMA_MAP_FLAG_READ | > VFIO_DMA_MAP_FLAG_WRITE; > - > - ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, > &dma_map); > +/* check if we have any supported extensions */ static int > +pci_vfio_has_supported_extensions(int vfio_container_fd) { > + int ret; > + unsigned idx, n_extensions = 0; > + for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { > + const struct vfio_iommu_type *t = &iommu_types[idx]; > > - if (ret) { > - RTE_LOG(ERR, EAL, " cannot set up DMA remapping, > " > - "error %i (%s)\n", errno, > strerror(errno)); > + ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, > + t->type_id); > + if (ret < 0) { > + RTE_LOG(ERR, EAL, " could not get IOMMU type, " > + "error %i (%s)\n", errno, > + strerror(errno)); > + close(vfio_container_fd); > return -1; > + } else if (ret == 1) { > + /* we found a supported extension */ > + n_extensions++; > } > + RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n", > + t->type_id, t->name, > + ret ? "supported" : "not supported"); > + } > + > + /* if we didn't find any supported IOMMU types, fail */ > + if (!n_extensions) { > + close(vfio_container_fd); > + return -1; > } > > return 0; > @@ -372,17 +451,10 @@ pci_vfio_get_container_fd(void) > return -1; > } > > - /* check if we support IOMMU type 1 */ > - ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, > VFIO_TYPE1_IOMMU); > - if (ret != 1) { > - if (ret < 0) > - RTE_LOG(ERR, EAL, " could not get IOMMU > type, " > - "error %i (%s)\n", errno, > - strerror(errno)); > - else > - RTE_LOG(ERR, EAL, " unsupported IOMMU > type " > - "detected in VFIO\n"); > - close(vfio_container_fd); > + ret = > pci_vfio_has_supported_extensions(vfio_container_fd); > + if (ret) { > + RTE_LOG(ERR, EAL, " no supported IOMMU " > + "extensions found!\n"); > return -1; > } > > @@ -432,6 +504,7 @@ pci_vfio_get_group_fd(int iommu_group_no) > > /* if primary, try to open the group */ > if (internal_config.process_type == RTE_PROC_PRIMARY) { > + /* try regular group format */ > snprintf(filename, sizeof(filename), > VFIO_GROUP_FMT, iommu_group_no); > vfio_group_fd = open(filename, O_RDWR); @@ -442,7 > +515,20 @@ pci_vfio_get_group_fd(int iommu_group_no) > strerror(errno)); > return -1; > } > - return 0; > + > + /* special case: try no-IOMMU path as well */ > + snprintf(filename, sizeof(filename), > + VFIO_NOIOMMU_GROUP_FMT, > iommu_group_no); > + vfio_group_fd = open(filename, O_RDWR); > + if (vfio_group_fd < 0) { > + if (errno != ENOENT) { > + RTE_LOG(ERR, EAL, "Cannot open %s: > %s\n", filename, > + strerror(errno)); > + return -1; > + } > + return 0; > + } > + /* noiommu group found */ > } > > /* if the fd is valid, create a new group for it */ @@ -660,14 > +746,21 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > } > > /* > - * set up DMA mappings for container > + * pick an IOMMU type and set up DMA mappings for container > * > * needs to be done only once, only when at least one group is > assigned to > * a container and only in primary process > */ > if (internal_config.process_type == RTE_PROC_PRIMARY && > vfio_cfg.vfio_container_has_dma == 0) { > - ret = > pci_vfio_setup_dma_maps(vfio_cfg.vfio_container_fd); > + /* select an IOMMU type which we will be using */ > + const struct vfio_iommu_type *t = > + > pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd); > + if (!t) { > + RTE_LOG(ERR, EAL, " %s failed to select IOMMU > type\n", pci_addr); > + return -1; > + } > + ret = t->dma_map_func(vfio_cfg.vfio_container_fd); > if (ret) { > RTE_LOG(ERR, EAL, " %s DMA remapping failed, " > "error %i (%s)\n", pci_addr, errno, > strerror(errno)); @@ -887,35 +980,41 @@ pci_vfio_enable(void) { > /* initialize group list */ > int i; > - int module_vfio_type1; > + int vfio_available; > > for (i = 0; i < VFIO_MAX_GROUPS; i++) { > vfio_cfg.vfio_groups[i].fd = -1; > vfio_cfg.vfio_groups[i].group_no = -1; > } > > - module_vfio_type1 = rte_eal_check_module("vfio_iommu_type1"); > + /* inform the user that we are probing for VFIO */ > + RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); > + > + /* check if vfio-pci module is loaded */ > + vfio_available = rte_eal_check_module("vfio_pci"); > > /* return error directly */ > - if (module_vfio_type1 == -1) { > + if (vfio_available == -1) { > RTE_LOG(INFO, EAL, "Could not get loaded module > details!\n"); > return -1; > } > > /* return 0 if VFIO modules not loaded */ > - if (module_vfio_type1 == 0) { > - RTE_LOG(INFO, EAL, "VFIO modules not all loaded, " > - "skip VFIO support...\n"); > + if (vfio_available == 0) { > + RTE_LOG(INFO, EAL, "VFIO modules not loaded, " > + "skipping VFIO support...\n"); > return 0; > } > > vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd(); > > /* check if we have VFIO driver enabled */ > - if (vfio_cfg.vfio_container_fd != -1) > + if (vfio_cfg.vfio_container_fd != -1) { > + RTE_LOG(NOTICE, EAL, "VFIO support initialized\n"); > vfio_cfg.vfio_enabled = 1; > - else > + } else { > RTE_LOG(NOTICE, EAL, "VFIO support could not be > initialized\n"); > + } > > return 0; > } > diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h > b/lib/librte_eal/linuxapp/eal/eal_vfio.h > index 72ec3f6..638ee31 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h > +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h > @@ -52,6 +52,11 @@ > #define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE #endif > > +/* older kernels may not have no-IOMMU mode */ #ifndef > +VFIO_NOIOMMU_IOMMU #define VFIO_NOIOMMU_IOMMU 8 #endif > + > #define VFIO_PRESENT > #endif /* kernel version */ > #endif /* RTE_EAL_VFIO */ > -- > 2.5.0