Previously, KNI kernel module uses the difference between m->buf_addr and m->buf_iova to calculate userspace virtual address from physical address. This works for direct mbufs, but does not work for indirect (cloned) mbufs that come from another mempool. Transmitting a cloned mbuf may cause segmentation fault in userspace.
Now, userspace KNI library writes the virtual address of each mbuf in m->userdata field, and KNI kernel module uses this field to restore virtual address before putting mbuf into free_q. This approach works for both direct and indirect mbufs. NOTE TO REVIEWER - DO NOT MERGE The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4 Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5 I only modified kni_net_rx_normal function. If this approach is acceptable, I will modify kni_net_rx_lo_fifo, kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well. Bugzilla ID: 183 Signed-off-by: Junxiao Shi <g...@mail1.yoursunny.com> --- kernel/linux/kni/kni_net.c | 4 ++-- lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++- lib/librte_kni/rte_kni.c | 8 ++++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c index 7bd3a9f1e..b34ed2ed4 100644 --- a/kernel/linux/kni/kni_net.c +++ b/kernel/linux/kni/kni_net.c @@ -341,7 +341,7 @@ kni_net_rx_normal(struct kni_dev *kni) kva = pa2kva(kni->pa[i]); len = kva->pkt_len; data_kva = kva2data_kva(kva); - kni->va[i] = pa2va(kni->pa[i], kva); + kni->va[i] = kva->va; skb = netdev_alloc_skb(dev, len); if (!skb) { @@ -367,7 +367,7 @@ kni_net_rx_normal(struct kni_dev *kni) kva = pa2kva(kva->next); data_kva = kva2data_kva(kva); /* Convert physical address to virtual address */ - prev_kva->next = pa2va(prev_kva->next, kva); + prev_kva->next = kva->va; } } diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h index 37d9ee8f0..bc92c0067 100644 --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h @@ -84,7 +84,8 @@ struct rte_kni_mbuf { uint16_t data_len; /**< Amount of data in segment buffer. */ /* fields on second cache line */ - char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE))); + void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE))); + /**< Virtual address of this mbuf in userspace (overwrites userdata). */ void *pool; void *next; /**< Physical address of next mbuf in kernel. */ }; diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index 4b51fb4fe..96a6f6af2 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -352,13 +352,15 @@ va2pa(struct rte_mbuf *m) static void * va2pa_all(struct rte_mbuf *mbuf) { - void *phy_mbuf = va2pa(mbuf); + void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf); struct rte_mbuf *next = mbuf->next; while (next) { - mbuf->next = va2pa(next); + mbuf->userdata = mbuf; + mbuf->next = (void*)rte_mempool_virt2iova(next); mbuf = next; next = mbuf->next; } + mbuf->userdata = mbuf; return phy_mbuf; } @@ -609,6 +611,8 @@ kni_allocate_mbufs(struct rte_kni *kni) offsetof(struct rte_kni_mbuf, buf_addr)); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) != offsetof(struct rte_kni_mbuf, next)); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) != + offsetof(struct rte_kni_mbuf, va)); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) != offsetof(struct rte_kni_mbuf, data_off)); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != -- 2.17.1