DMA window size needs to be big enough to span all memory segment's physical addresses. We do not need multiple levels of IOMMU tables as we already span ~70TB of physical memory with 16MB hugepages.
Signed-off-by: Jonas Pfefferle <[email protected]> --- lib/librte_eal/linuxapp/eal/eal_vfio.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 946df7e..8502216 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -722,6 +722,18 @@ vfio_type1_dma_map(int vfio_container_fd) return 0; } +static uint64_t +roundup_next_pow2(uint64_t n) +{ + uint32_t i; + + n--; + for (i = 1; i < sizeof(n) * CHAR_BIT; i += i) + n |= n >> i; + + return ++n; +} + static int vfio_spapr_dma_map(int vfio_container_fd) { @@ -759,10 +771,12 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } - /* calculate window size based on number of hugepages configured */ - create.window_size = rte_eal_get_physmem_size(); + /* physicaly pages are sorted descending i.e. ms[0].phys_addr is max */ + /* create DMA window from 0 to max(phys_addr + len) */ + /* sPAPR requires window size to be a power of 2 */ + create.window_size = roundup_next_pow2(ms[0].phys_addr + ms[0].len); create.page_shift = __builtin_ctzll(ms->hugepage_sz); - create.levels = 2; + create.levels = 1; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); if (ret) { @@ -771,6 +785,11 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } + if (create.start_addr != 0) { + RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); + return -1; + } + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { struct vfio_iommu_type1_dma_map dma_map; -- 2.7.4

