Get iommu class of PCI device on the bus and returns preferred iova mapping mode for that bus.
Patch also introduces RTE_PCI_DRV_IOVA_AS_VA drv flag. Flag used when driver needs to operate in iova=va mode. Algorithm for iova scheme selection for PCI bus: 0. If no device bound then return with RTE_IOVA_DC mapping mode, else goto 1). 1. Look for device attached to vfio kdrv and has .drv_flag set to RTE_PCI_DRV_IOVA_AS_VA. 2. Look for any device attached to UIO class of driver. 3. Check for vfio-noiommu mode enabled. If 2) & 3) is false and 1) is true then select mapping scheme as RTE_IOVA_VA. Otherwise use default mapping scheme (RTE_IOVA_PA). Signed-off-by: Santosh Shukla <santosh.shu...@caviumnetworks.com> Signed-off-by: Jerin Jacob <jerin.ja...@caviumnetworks.com> Reviewed-by: Maxime Coquelin <maxime.coque...@redhat.com> Acked-by: Hemant Agrawal <hemant.agra...@nxp.com> Reviewed-by: Anatoly Burakov <anatoly.bura...@intel.com> --- v7 --> v8: - Replaced 0/1 with false/true boolean value (Suggested by Anatoly) v6 --> v7: - squashed v6 series patch no [01/12] & [05/12].. i.e.. moved RTE_PCI_DRV_IOVA_AS_VA flag into this patch. (Aaron comment). lib/librte_eal/common/include/rte_pci.h | 2 + lib/librte_eal/linuxapp/eal/eal_pci.c | 96 +++++++++++++++++++++++++ lib/librte_eal/linuxapp/eal/eal_vfio.c | 19 +++++ lib/librte_eal/linuxapp/eal/eal_vfio.h | 4 ++ lib/librte_eal/linuxapp/eal/rte_eal_version.map | 1 + 5 files changed, 122 insertions(+) diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h index 0e36de093..a67d77f22 100644 --- a/lib/librte_eal/common/include/rte_pci.h +++ b/lib/librte_eal/common/include/rte_pci.h @@ -202,6 +202,8 @@ struct rte_pci_bus { #define RTE_PCI_DRV_INTR_RMV 0x0010 /** Device driver needs to keep mapped resources if unsupported dev detected */ #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 +/** Device driver supports iova as va */ +#define RTE_PCI_DRV_IOVA_AS_VA 0X0040 /** * A structure describing a PCI mapping. diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 8951ce742..2971f1d4f 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -45,6 +45,7 @@ #include "eal_filesystem.h" #include "eal_private.h" #include "eal_pci_init.h" +#include "eal_vfio.h" /** * @file @@ -487,6 +488,101 @@ rte_pci_scan(void) return -1; } +/* + * Is pci device bound to any kdrv + */ +static inline int +pci_device_is_bound(void) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_UNKNOWN || + dev->kdrv == RTE_KDRV_NONE) { + continue; + } else { + ret = 1; + break; + } + } + return ret; +} + +/* + * Any one of the device bound to uio + */ +static inline int +pci_device_bound_uio(void) +{ + struct rte_pci_device *dev = NULL; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_IGB_UIO || + dev->kdrv == RTE_KDRV_UIO_GENERIC) { + return 1; + } + } + return 0; +} + +/* + * Any one of the device has iova as va + */ +static inline int +pci_device_has_iova_va(void) +{ + struct rte_pci_device *dev = NULL; + struct rte_pci_driver *drv = NULL; + + FOREACH_DRIVER_ON_PCIBUS(drv) { + if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) { + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_VFIO && + rte_pci_match(drv, dev)) + return 1; + } + } + } + return 0; +} + +/* + * Get iommu class of PCI devices on the bus. + */ +enum rte_iova_mode +rte_pci_get_iommu_class(void) +{ + bool is_bound; + bool is_vfio_noiommu_enabled = true; + bool has_iova_va; + bool is_bound_uio; + + is_bound = pci_device_is_bound(); + if (!is_bound) + return RTE_IOVA_DC; + + has_iova_va = pci_device_has_iova_va(); + is_bound_uio = pci_device_bound_uio(); +#ifdef VFIO_PRESENT + is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == true ? + true : false; +#endif + + if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled) + return RTE_IOVA_VA; + + if (has_iova_va) { + RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. "); + if (is_vfio_noiommu_enabled) + RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n"); + if (is_bound_uio) + RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); + } + + return RTE_IOVA_PA; +} + /* Read PCI config space. */ int rte_pci_read_config(const struct rte_pci_device *device, void *buf, size_t len, off_t offset) diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 946df7e31..c8a97b7e7 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) return 0; } +int +vfio_noiommu_is_enabled(void) +{ + int fd, ret, cnt __rte_unused; + char c; + + ret = -1; + fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); + if (fd < 0) + return -1; + + cnt = read(fd, &c, 1); + if (c == 'Y') + ret = 1; + + close(fd); + return ret; +} + #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 5ff63e5d7..26ea8e119 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -150,6 +150,8 @@ struct vfio_config { #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) #define VFIO_GET_REGION_IDX(x) (x >> 40) +#define VFIO_NOIOMMU_MODE \ + "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" /* DMA mapping function prototype. * Takes VFIO container fd as a parameter. @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void); int vfio_mp_sync_setup(void); +int vfio_noiommu_is_enabled(void); + #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 #define SOCKET_CLR_GROUP 0x300 diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 287cc75cd..a8c8ea4f4 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -248,5 +248,6 @@ DPDK_17.11 { global: rte_pci_match; + rte_pci_get_iommu_class; } DPDK_17.08; -- 2.14.1