On 7/14/2017 1:25 PM, santosh wrote:
On Friday 14 July 2017 01:09 PM, Hemant Agrawal wrote:
On 7/11/2017 11:46 AM, Santosh Shukla wrote:
Get iommu class of PCI device on the bus and returns preferred iova
mapping mode for that bus.
Algorithm for iova scheme selection for PCI bus:
0. Look for device attached to vfio kdrv and has .drv_flag set
to RTE_PCI_DRV_NEED_IOVA_VA.
1. Look for any device attached to UIO class of driver.
2. Check for vfio-noiommu mode enabled.
If 1) & 2) is false and 0) is true then select
mapping scheme as iova=va. Otherwise use default
mapping scheme (iova_pa).
Signed-off-by: Santosh Shukla <santosh.shu...@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.ja...@caviumnetworks.com>
---
v1 --> v2:
- Removed Linux version check in vfio_noiommu func. Refer [1].
- Extending autodetction logic for _iommu_class.
Refer [2].
[1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
[2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
lib/librte_eal/linuxapp/eal/eal_pci.c | 66 +++++++++++++++++++++++++
lib/librte_eal/linuxapp/eal/eal_vfio.c | 19 +++++++
lib/librte_eal/linuxapp/eal/eal_vfio.h | 4 ++
lib/librte_eal/linuxapp/eal/rte_eal_version.map | 1 +
4 files changed, 90 insertions(+)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 7d9e1a99b..573caa000 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -45,6 +45,7 @@
#include "eal_filesystem.h"
#include "eal_private.h"
#include "eal_pci_init.h"
+#include "eal_vfio.h"
/**
* @file
@@ -488,6 +489,71 @@ rte_pci_scan(void)
return -1;
}
+/*
+ * Any one of the device bound to uio
+ */
+static inline int
+pci_device_bound_uio(void)
+{
+ struct rte_pci_device *dev = NULL;
+
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (dev->kdrv == RTE_KDRV_IGB_UIO ||
+ dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Any one of the device has iova as va
+ */
+static inline int
+pci_device_has_iova_va(void)
+{
+ struct rte_pci_device *dev = NULL;
+ struct rte_pci_driver *drv = NULL;
+
+ FOREACH_DRIVER_ON_PCIBUS(drv) {
+ if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) {
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (dev->kdrv == RTE_KDRV_VFIO &&
+ rte_pci_match(drv, dev))
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Get iommu class of PCI devices on the bus.
+ */
+enum rte_iova_mode
+rte_pci_get_iommu_class(void)
+{
+ bool is_vfio_noiommu_enabled;
+ bool has_iova_va;
+ bool is_bound_uio;
+
+ has_iova_va = pci_device_has_iova_va();
+ is_bound_uio = pci_device_bound_uio();
+ is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
+
+ if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
+ return RTE_IOVA_VA;
+
PCI is generally present in all platform including dpaa2.
There may not be any device found or available for dpdk usages in such cases.
The PCI bus will still return RTE_IOVA_PA, which will make the system mode as
PA.
That's the expected behavior. And implementation makes sure
that PCI_bus return default mode aka _PA if no-pci device found.
Isn't code taking care of same?
I have attached a PCI device to the board. But it is being managed by
kernel only.
EAL: PCI device 0000:01:00.0 on NUMA socket 0
EAL: probe driver: 8086:10d3 net_e1000_em
EAL: Not managed by a supported kernel driver, skipped
So, there are devices in the PCI list. But none of them is probed or
being used by dpdk.
Let me walk through the code:
has_iova_va = 0 (if no pci device then pci_device_has_iov_va() will return 0).
And if (has_iova_va & ,,,) will fail therefore rte_pci_get_iommu_class() retuns
RTE_IOVA_PA mode.
which is default mode. Right?
This will create issue for the 2nd bus, which is a VA bus. The combined
mode will becomes '3', so the system mode will be PA.
+ if (has_iova_va) {
+ if (is_vfio_noiommu_enabled)
+ RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
+ if (is_bound_uio)
+ RTE_LOG(WARNING, EAL, "Some device attached to UIO\n");
+ }
+
+ return RTE_IOVA_PA;
+}
+
/* Read PCI config space. */
int rte_pci_read_config(const struct rte_pci_device *device,
void *buf, size_t len, off_t offset)
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c
b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 946df7e31..c8a97b7e7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
return 0;
}
+int
+vfio_noiommu_is_enabled(void)
+{
+ int fd, ret, cnt __rte_unused;
+ char c;
+
+ ret = -1;
+ fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ cnt = read(fd, &c, 1);
+ if (c == 'Y')
+ ret = 1;
+
+ close(fd);
+ return ret;
+}
+
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h
b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7..26ea8e119 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -150,6 +150,8 @@ struct vfio_config {
#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
#define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE \
+ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
/* DMA mapping function prototype.
* Takes VFIO container fd as a parameter.
@@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
int vfio_mp_sync_setup(void);
+int vfio_noiommu_is_enabled(void);
+
#define SOCKET_REQ_CONTAINER 0x100
#define SOCKET_REQ_GROUP 0x200
#define SOCKET_CLR_GROUP 0x300
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index c91dd44c4..044f89c7c 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -206,6 +206,7 @@ DPDK_17.08 {
rte_bus_find_by_device;
rte_bus_find_by_name;
rte_pci_match;
+ rte_pci_get_iommu_class;
} DPDK_17.05;