On Friday 14 July 2017 01:36 PM, Hemant Agrawal wrote: > On 7/14/2017 1:25 PM, santosh wrote: >> On Friday 14 July 2017 01:09 PM, Hemant Agrawal wrote: >> >>> On 7/11/2017 11:46 AM, Santosh Shukla wrote: >>>> Get iommu class of PCI device on the bus and returns preferred iova >>>> mapping mode for that bus. >>>> >>>> Algorithm for iova scheme selection for PCI bus: >>>> 0. Look for device attached to vfio kdrv and has .drv_flag set >>>> to RTE_PCI_DRV_NEED_IOVA_VA. >>>> 1. Look for any device attached to UIO class of driver. >>>> 2. Check for vfio-noiommu mode enabled. >>>> >>>> If 1) & 2) is false and 0) is true then select >>>> mapping scheme as iova=va. Otherwise use default >>>> mapping scheme (iova_pa). >>>> >>>> Signed-off-by: Santosh Shukla <santosh.shu...@caviumnetworks.com> >>>> Signed-off-by: Jerin Jacob <jerin.ja...@caviumnetworks.com> >>>> --- >>>> v1 --> v2: >>>> - Removed Linux version check in vfio_noiommu func. Refer [1]. >>>> - Extending autodetction logic for _iommu_class. >>>> Refer [2]. >>>> >>>> [1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html >>>> [2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html >>>> >>>> lib/librte_eal/linuxapp/eal/eal_pci.c | 66 >>>> +++++++++++++++++++++++++ >>>> lib/librte_eal/linuxapp/eal/eal_vfio.c | 19 +++++++ >>>> lib/librte_eal/linuxapp/eal/eal_vfio.h | 4 ++ >>>> lib/librte_eal/linuxapp/eal/rte_eal_version.map | 1 + >>>> 4 files changed, 90 insertions(+) >>>> >>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c >>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c >>>> index 7d9e1a99b..573caa000 100644 >>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c >>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c >>>> @@ -45,6 +45,7 @@ >>>> #include "eal_filesystem.h" >>>> #include "eal_private.h" >>>> #include "eal_pci_init.h" >>>> +#include "eal_vfio.h" >>>> >>>> /** >>>> * @file >>>> @@ -488,6 +489,71 @@ rte_pci_scan(void) >>>> return -1; >>>> } >>>> >>>> +/* >>>> + * Any one of the device bound to uio >>>> + */ >>>> +static inline int >>>> +pci_device_bound_uio(void) >>>> +{ >>>> + struct rte_pci_device *dev = NULL; >>>> + >>>> + FOREACH_DEVICE_ON_PCIBUS(dev) { >>>> + if (dev->kdrv == RTE_KDRV_IGB_UIO || >>>> + dev->kdrv == RTE_KDRV_UIO_GENERIC) { >>>> + return 1; >>>> + } >>>> + } >>>> + return 0; >>>> +} >>>> + >>>> +/* >>>> + * Any one of the device has iova as va >>>> + */ >>>> +static inline int >>>> +pci_device_has_iova_va(void) >>>> +{ >>>> + struct rte_pci_device *dev = NULL; >>>> + struct rte_pci_driver *drv = NULL; >>>> + >>>> + FOREACH_DRIVER_ON_PCIBUS(drv) { >>>> + if (drv && drv->drv_flags & RTE_PCI_DRV_NEED_IOVA_VA) { >>>> + FOREACH_DEVICE_ON_PCIBUS(dev) { >>>> + if (dev->kdrv == RTE_KDRV_VFIO && >>>> + rte_pci_match(drv, dev)) >>>> + return 1; >>>> + } >>>> + } >>>> + } >>>> + return 0; >>>> +} >>>> + >>>> +/* >>>> + * Get iommu class of PCI devices on the bus. >>>> + */ >>>> +enum rte_iova_mode >>>> +rte_pci_get_iommu_class(void) >>>> +{ >>>> + bool is_vfio_noiommu_enabled; >>>> + bool has_iova_va; >>>> + bool is_bound_uio; >>>> + >>>> + has_iova_va = pci_device_has_iova_va(); >>>> + is_bound_uio = pci_device_bound_uio(); >>>> + is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0; >>>> + >>>> + if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled) >>>> + return RTE_IOVA_VA; >>>> + >>> >>> PCI is generally present in all platform including dpaa2. >>> There may not be any device found or available for dpdk usages in such >>> cases. The PCI bus will still return RTE_IOVA_PA, which will make the >>> system mode as PA. >>> >> That's the expected behavior. And implementation makes sure >> that PCI_bus return default mode aka _PA if no-pci device found. >> >> Isn't code taking care of same? >> > > I have attached a PCI device to the board. But it is being managed by kernel > only. > > EAL: PCI device 0000:01:00.0 on NUMA socket 0 > EAL: probe driver: 8086:10d3 net_e1000_em > EAL: Not managed by a supported kernel driver, skipped > > So, there are devices in the PCI list. But none of them is probed or being > used by dpdk. > > Therefore _pci_get_iommu_class scan result would be _PA, As no device bound to dpdk.
>> Let me walk through the code: >> >> has_iova_va = 0 (if no pci device then pci_device_has_iov_va() will return >> 0). >> >> And if (has_iova_va & ,,,) will fail therefore rte_pci_get_iommu_class() >> retuns RTE_IOVA_PA mode. >> which is default mode. Right? >> > This will create issue for the 2nd bus, which is a VA bus. The combined mode > will becomes '3', so the system mode will be PA. > Yes, If both modes detected at two different bus then policy is to use default iova mapping mode across the buses(which is _pa). Are you operating on two different mode like _pa for PCI-bus and _va for fslmc bus in dpaa2? >>>> + if (has_iova_va) { >>>> + if (is_vfio_noiommu_enabled) >>>> + RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n"); >>>> + if (is_bound_uio) >>>> + RTE_LOG(WARNING, EAL, "Some device attached to UIO\n"); >>>> + } >>>> + >>>> + return RTE_IOVA_PA; >>>> +} >>>> + >>>> /* Read PCI config space. */ >>>> int rte_pci_read_config(const struct rte_pci_device *device, >>>> void *buf, size_t len, off_t offset) >>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c >>>> b/lib/librte_eal/linuxapp/eal/eal_vfio.c >>>> index 946df7e31..c8a97b7e7 100644 >>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c >>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c >>>> @@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused >>>> vfio_container_fd) >>>> return 0; >>>> } >>>> >>>> +int >>>> +vfio_noiommu_is_enabled(void) >>>> +{ >>>> + int fd, ret, cnt __rte_unused; >>>> + char c; >>>> + >>>> + ret = -1; >>>> + fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); >>>> + if (fd < 0) >>>> + return -1; >>>> + >>>> + cnt = read(fd, &c, 1); >>>> + if (c == 'Y') >>>> + ret = 1; >>>> + >>>> + close(fd); >>>> + return ret; >>>> +} >>>> + >>>> #endif >>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h >>>> b/lib/librte_eal/linuxapp/eal/eal_vfio.h >>>> index 5ff63e5d7..26ea8e119 100644 >>>> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h >>>> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h >>>> @@ -150,6 +150,8 @@ struct vfio_config { >>>> #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" >>>> #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) >>>> #define VFIO_GET_REGION_IDX(x) (x >> 40) >>>> +#define VFIO_NOIOMMU_MODE \ >>>> + "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" >>>> >>>> /* DMA mapping function prototype. >>>> * Takes VFIO container fd as a parameter. >>>> @@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void); >>>> >>>> int vfio_mp_sync_setup(void); >>>> >>>> +int vfio_noiommu_is_enabled(void); >>>> + >>>> #define SOCKET_REQ_CONTAINER 0x100 >>>> #define SOCKET_REQ_GROUP 0x200 >>>> #define SOCKET_CLR_GROUP 0x300 >>>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map >>>> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map >>>> index c91dd44c4..044f89c7c 100644 >>>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map >>>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map >>>> @@ -206,6 +206,7 @@ DPDK_17.08 { >>>> rte_bus_find_by_device; >>>> rte_bus_find_by_name; >>>> rte_pci_match; >>>> + rte_pci_get_iommu_class; >>>> >>>> } DPDK_17.05; >>>> >>>> >>> >>> >> >> > >