From: Kiran Kumar K <kirankum...@marvell.com>

With current KNI implementation kernel module will work only in
IOVA=PA mode. This patch will add support for kernel module to work
with IOVA=VA mode.

The idea is to get the physical address from iova address using
api iommu_iova_to_phys. Using this API, we will get the physical
address from iova address and later use phys_to_virt API to
convert the physical address to kernel virtual address.

With this approach we have compared the performance with IOVA=PA
and there is no difference observed. Seems like kernel is the
overhead.

Signed-off-by: Kiran Kumar K <kirankum...@marvell.com>
---

V3 Changes:
* Add new approach to work kni with IOVA=VA mode using
iommu_iova_to_phys API.

 kernel/linux/kni/kni_dev.h                    |  4 +
 kernel/linux/kni/kni_misc.c                   | 57 +++++++++++---
 kernel/linux/kni/kni_net.c                    | 76 +++++++++++++++----
 lib/librte_eal/linux/eal/eal.c                |  9 ---
 .../linux/eal/include/rte_kni_common.h        |  1 +
 lib/librte_kni/rte_kni.c                      |  2 +
 6 files changed, 116 insertions(+), 33 deletions(-)

diff --git a/kernel/linux/kni/kni_dev.h b/kernel/linux/kni/kni_dev.h
index df46aa70e..9c4944921 100644
--- a/kernel/linux/kni/kni_dev.h
+++ b/kernel/linux/kni/kni_dev.h
@@ -23,6 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/iommu.h>

 #include <rte_kni_common.h>
 #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
@@ -39,6 +40,9 @@ struct kni_dev {
        /* kni list */
        struct list_head list;

+       uint8_t iova_mode;
+       struct iommu_domain *domain;
+
        struct net_device_stats stats;
        int status;
        uint16_t group_id;           /* Group ID of a group of KNI devices */
diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c
index 31845e10f..de4f6ce41 100644
--- a/kernel/linux/kni/kni_misc.c
+++ b/kernel/linux/kni/kni_misc.c
@@ -306,10 +306,12 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
        struct rte_kni_device_info dev_info;
        struct net_device *net_dev = NULL;
        struct kni_dev *kni, *dev, *n;
+       struct pci_dev *pci = NULL;
+       struct iommu_domain *domain = NULL;
+       phys_addr_t phys_addr;
 #ifdef RTE_KNI_KMOD_ETHTOOL
        struct pci_dev *found_pci = NULL;
        struct net_device *lad_dev = NULL;
-       struct pci_dev *pci = NULL;
 #endif

        pr_info("Creating kni...\n");
@@ -368,15 +370,50 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
        strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);

        /* Translate user space info into kernel space info */
-       kni->tx_q = phys_to_virt(dev_info.tx_phys);
-       kni->rx_q = phys_to_virt(dev_info.rx_phys);
-       kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
-       kni->free_q = phys_to_virt(dev_info.free_phys);
-
-       kni->req_q = phys_to_virt(dev_info.req_phys);
-       kni->resp_q = phys_to_virt(dev_info.resp_phys);
-       kni->sync_va = dev_info.sync_va;
-       kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+
+       if (dev_info.iova_mode) {
+               pci = pci_get_device(dev_info.vendor_id,
+                                    dev_info.device_id, NULL);
+               while (pci) {
+                       if ((pci->bus->number == dev_info.bus) &&
+                           (PCI_SLOT(pci->devfn) == dev_info.devid) &&
+                           (PCI_FUNC(pci->devfn) == dev_info.function)) {
+                               domain = iommu_get_domain_for_dev(&pci->dev);
+                               break;
+                       }
+                       pci = pci_get_device(dev_info.vendor_id,
+                                            dev_info.device_id, pci);
+               }
+               kni->domain = domain;
+               phys_addr = iommu_iova_to_phys(domain, dev_info.tx_phys);
+               kni->tx_q = phys_to_virt(phys_addr);
+               phys_addr = iommu_iova_to_phys(domain, dev_info.rx_phys);
+               kni->rx_q = phys_to_virt(phys_addr);
+               phys_addr = iommu_iova_to_phys(domain, dev_info.alloc_phys);
+               kni->alloc_q = phys_to_virt(phys_addr);
+               phys_addr = iommu_iova_to_phys(domain, dev_info.free_phys);
+               kni->free_q = phys_to_virt(phys_addr);
+               phys_addr = iommu_iova_to_phys(domain, dev_info.req_phys);
+               kni->req_q = phys_to_virt(phys_addr);
+               phys_addr = iommu_iova_to_phys(domain, dev_info.resp_phys);
+               kni->resp_q = phys_to_virt(phys_addr);
+               kni->sync_va = dev_info.sync_va;
+               phys_addr = iommu_iova_to_phys(domain, dev_info.sync_phys);
+               kni->sync_kva = phys_to_virt(phys_addr);
+               kni->iova_mode = 1;
+
+       } else {
+               kni->tx_q = phys_to_virt(dev_info.tx_phys);
+               kni->rx_q = phys_to_virt(dev_info.rx_phys);
+               kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+               kni->free_q = phys_to_virt(dev_info.free_phys);
+
+               kni->req_q = phys_to_virt(dev_info.req_phys);
+               kni->resp_q = phys_to_virt(dev_info.resp_phys);
+               kni->sync_va = dev_info.sync_va;
+               kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+               kni->iova_mode = 0;
+       }

        kni->mbuf_size = dev_info.mbuf_size;

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index be9e6b0b9..4d07ba576 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -35,6 +35,22 @@ static void kni_net_rx_normal(struct kni_dev *kni);
 /* kni rx function pointer, with default to normal rx */
 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;

+/* iova to kernel virtual address */
+static void *
+iova2kva(struct kni_dev *kni, void *pa)
+{
+       return phys_to_virt(iommu_iova_to_phys(kni->domain,
+                               (dma_addr_t)pa));
+}
+
+static void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+       return phys_to_virt((iommu_iova_to_phys(kni->domain,
+                                       (dma_addr_t)m->buf_physaddr) +
+                            m->data_off));
+}
+
 /* physical address to kernel virtual address */
 static void *
 pa2kva(void *pa)
@@ -186,7 +202,10 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
                        return;

                for (i = 0; i < num_rx; i++) {
-                       kva = pa2kva(kni->pa[i]);
+                       if (likely(kni->iova_mode == 1))
+                               kva = iova2kva(kni, kni->pa[i]);
+                       else
+                               kva = pa2kva(kni->pa[i]);
                        kni->va[i] = pa2va(kni->pa[i], kva);
                }

@@ -263,8 +282,13 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev)
        if (likely(ret == 1)) {
                void *data_kva;

-               pkt_kva = pa2kva(pkt_pa);
-               data_kva = kva2data_kva(pkt_kva);
+               if (likely(kni->iova_mode == 1)) {
+                       pkt_kva = iova2kva(kni, pkt_pa);
+                       data_kva = iova2data_kva(kni, pkt_kva);
+               } else {
+                       pkt_kva = pa2kva(pkt_pa);
+                       data_kva = kva2data_kva(pkt_kva);
+               }
                pkt_va = pa2va(pkt_pa, pkt_kva);

                len = skb->len;
@@ -335,9 +359,14 @@ kni_net_rx_normal(struct kni_dev *kni)

        /* Transfer received packets to netif */
        for (i = 0; i < num_rx; i++) {
-               kva = pa2kva(kni->pa[i]);
+               if (likely(kni->iova_mode == 1)) {
+                       kva = iova2kva(kni, kni->pa[i]);
+                       data_kva = iova2data_kva(kni, kva);
+               } else {
+                       kva = pa2kva(kni->pa[i]);
+                       data_kva = kva2data_kva(kva);
+               }
                len = kva->pkt_len;
-               data_kva = kva2data_kva(kva);
                kni->va[i] = pa2va(kni->pa[i], kva);

                skb = dev_alloc_skb(len + 2);
@@ -434,13 +463,20 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
                num = ret;
                /* Copy mbufs */
                for (i = 0; i < num; i++) {
-                       kva = pa2kva(kni->pa[i]);
+
+                       if (likely(kni->iova_mode == 1)) {
+                               kva = iova2kva(kni, kni->pa[i]);
+                               data_kva = iova2data_kva(kni, kva);
+                               alloc_kva = iova2kva(kni, kni->alloc_pa[i]);
+                               alloc_data_kva = iova2data_kva(kni, alloc_kva);
+                       } else {
+                               kva = pa2kva(kni->pa[i]);
+                               data_kva = kva2data_kva(kva);
+                               alloc_kva = pa2kva(kni->alloc_pa[i]);
+                               alloc_data_kva = kva2data_kva(alloc_kva);
+                       }
                        len = kva->pkt_len;
-                       data_kva = kva2data_kva(kva);
                        kni->va[i] = pa2va(kni->pa[i], kva);
-
-                       alloc_kva = pa2kva(kni->alloc_pa[i]);
-                       alloc_data_kva = kva2data_kva(alloc_kva);
                        kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);

                        memcpy(alloc_data_kva, data_kva, len);
@@ -507,9 +543,15 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)

        /* Copy mbufs to sk buffer and then call tx interface */
        for (i = 0; i < num; i++) {
-               kva = pa2kva(kni->pa[i]);
+
+               if (likely(kni->iova_mode == 1)) {
+                       kva = iova2kva(kni, kni->pa[i]);
+                       data_kva = iova2data_kva(kni, kva);
+               } else {
+                       kva = pa2kva(kni->pa[i]);
+                       data_kva = kva2data_kva(kva);
+               }
                len = kva->pkt_len;
-               data_kva = kva2data_kva(kva);
                kni->va[i] = pa2va(kni->pa[i], kva);

                skb = dev_alloc_skb(len + 2);
@@ -545,8 +587,14 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
                                if (!kva->next)
                                        break;

-                               kva = pa2kva(va2pa(kva->next, kva));
-                               data_kva = kva2data_kva(kva);
+                               if (likely(kni->iova_mode == 1)) {
+                                       kva = iova2kva(kni,
+                                                      va2pa(kva->next, kva));
+                                       data_kva = iova2data_kva(kni, kva);
+                               } else {
+                                       kva = pa2kva(va2pa(kva->next, kva));
+                                       data_kva = kva2data_kva(kva);
+                               }
                        }
                }

diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c
index f7ae62d7b..8fac6707d 100644
--- a/lib/librte_eal/linux/eal/eal.c
+++ b/lib/librte_eal/linux/eal/eal.c
@@ -1040,15 +1040,6 @@ rte_eal_init(int argc, char **argv)
                /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
                rte_eal_get_configuration()->iova_mode =
                        rte_bus_get_iommu_class();
-
-               /* Workaround for KNI which requires physical address to work */
-               if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
-                               rte_eal_check_module("rte_kni") == 1) {
-                       rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
-                       RTE_LOG(WARNING, EAL,
-                               "Some devices want IOVA as VA but PA will be 
used because.. "
-                               "KNI module inserted\n");
-               }
        } else {
                rte_eal_get_configuration()->iova_mode =
                        internal_config.iova_mode;
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h 
b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 5afa08713..79ee4bc5a 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -128,6 +128,7 @@ struct rte_kni_device_info {
        unsigned mbuf_size;
        unsigned int mtu;
        char mac_addr[6];
+       uint8_t iova_mode;
 };

 #define KNI_DEVICE "kni"
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 946459c79..ec8f23694 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -304,6 +304,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
        kni->group_id = conf->group_id;
        kni->mbuf_size = conf->mbuf_size;

+       dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
        ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
        if (ret < 0)
                goto ioctl_fail;
--
2.17.1

Reply via email to