From: Vamsi Attunuru <vattun...@marvell.com>

Current KNI implementation only operates in IOVA_PA mode
patch adds required functionality to enable KNI in
IOVA_VA mode.

Packet pool's mbuf memory should be physically contiguous
for the KNI kernel module to work in IOVA=VA mode, new KNI
packet pool create APIs are introduced to take care of this
memory requirement.

examples/kni/ updated to use this API to enable IOVA as VA
or IOVA as PA mode.

Existing KNI applications can use ``--legacy-kni`` eal option
to work with DPDK 19.11 and later versions. When this option
is selected, IOVA mode will be forced to PA mode to enable old
KNI applications work with the latest DPDK without code change.

Signed-off-by: Vamsi Attunuru <vattun...@marvell.com>
Signed-off-by: Kiran Kumar K <kirankum...@marvell.com>
Suggested-by: Ferruh Yigit <ferruh.yi...@intel.com>
---
 doc/guides/prog_guide/kernel_nic_interface.rst    | 26 +++++++++
 doc/guides/rel_notes/release_19_11.rst            | 13 +++++
 examples/kni/main.c                               |  6 +-
 lib/librte_eal/linux/eal/eal.c                    | 39 +++++++++----
 lib/librte_eal/linux/eal/include/rte_kni_common.h |  1 +
 lib/librte_kni/Makefile                           |  1 +
 lib/librte_kni/meson.build                        |  1 +
 lib/librte_kni/rte_kni.c                          | 67 +++++++++++++++++++++--
 lib/librte_kni/rte_kni.h                          | 48 ++++++++++++++++
 lib/librte_kni/rte_kni_version.map                |  3 +
 10 files changed, 187 insertions(+), 18 deletions(-)

diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst 
b/doc/guides/prog_guide/kernel_nic_interface.rst
index 2fd58e1..80f731c 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -300,6 +300,32 @@ The sk_buff is then freed and the mbuf sent in the tx_q 
FIFO.
 The DPDK TX thread dequeues the mbuf and sends it to the PMD via 
``rte_eth_tx_burst()``.
 It then puts the mbuf back in the cache.
 
+IOVA = VA: Support
+------------------
+
+KNI can be operated in IOVA_VA scheme when
+
+- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) and
+- eal option `iova-mode=va` is passed or bus IOVA scheme in the DPDK is 
selected
+  as RTE_IOVA_VA.
+
+Packet Pool APIs for IOVA=VA mode
+---------------------------------
+
+``rte_kni_pktmbuf_pool_create`` and ``rte_kni_pktmbuf_pool_free`` APIs need to
+be used for creating packet pools for running KNI applications in IOVA=VA mode.
+Packet pool's mbuf memory should be physically contiguous for the KNI kernel
+module to work in IOVA=VA mode, this memory requirement was taken care inside
+those KNI packet pool create APIs.
+
+Command-line option for legacy KNI
+----------------------------------
+
+Existing KNI applications can use ``--legacy-kni`` eal command-line option to
+work with DPDK 19.11 and later versions. When this option is selected, IOVA 
mode
+will be forced to PA mode to enable old KNI applications work with the latest
+DPDK without code changes.
+
 Ethtool
 -------
 
diff --git a/doc/guides/rel_notes/release_19_11.rst 
b/doc/guides/rel_notes/release_19_11.rst
index ab2c381..e4296a0 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -120,6 +120,19 @@ New Features
   Added EAL option ``--legacy-kni`` to make existing KNI applications work
   with DPDK 19.11 and later.
 
+* **Added IOVA as VA support for KNI.**
+
+  Added IOVA as VA support for KNI. When KNI needs to operate in IOVA = VA
+  mode, packet pool's mbuf memory should be physically contiguous. This memory
+  requirement taken care using the new ``rte_kni_pktmbuf_pool_create`` and
+  ``rte_kni_pktmbuf_pool_free`` routines.
+
+  The ``examples/kni/`` updated to use this API to enable IOVA as VA or
+  IOVA as PA mode.
+
+  When "--legacy-kni" selected, IOVA mode will be forced to PA mode to enable
+  old KNI application work with the latest DPDK without code changes.
+
 Removed Items
 -------------
 
diff --git a/examples/kni/main.c b/examples/kni/main.c
index c576fc7..d2f3b46 100644
--- a/examples/kni/main.c
+++ b/examples/kni/main.c
@@ -1017,8 +1017,9 @@ main(int argc, char** argv)
                rte_exit(EXIT_FAILURE, "Could not parse input parameters\n");
 
        /* Create the mbuf pool */
-       pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
+       pktmbuf_pool = rte_kni_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
                MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id());
+
        if (pktmbuf_pool == NULL) {
                rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool\n");
                return -1;
@@ -1085,6 +1086,9 @@ main(int argc, char** argv)
                        continue;
                kni_free_kni(port);
        }
+
+       rte_kni_pktmbuf_pool_free(pktmbuf_pool);
+
        for (i = 0; i < RTE_MAX_ETHPORTS; i++)
                if (kni_port_params_array[i]) {
                        rte_free(kni_port_params_array[i]);
diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c
index f397206..f807044 100644
--- a/lib/librte_eal/linux/eal/eal.c
+++ b/lib/librte_eal/linux/eal/eal.c
@@ -947,6 +947,29 @@ static int rte_eal_vfio_setup(void)
 }
 #endif
 
+static enum rte_iova_mode
+rte_eal_kni_get_iova_mode(enum rte_iova_mode iova_mode)
+{
+       if (iova_mode == RTE_IOVA_PA)
+               goto exit;
+
+       if (internal_config.legacy_kni) {
+               iova_mode = RTE_IOVA_PA;
+               RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because legacy KNI 
is enabled\n");
+               goto exit;
+       }
+
+       if (iova_mode == RTE_IOVA_VA) {
+#if KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE
+               iova_mode = RTE_IOVA_PA;
+               RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module 
does not support VA\n");
+#endif
+       }
+
+exit:
+       return iova_mode;
+}
+
 static void rte_eal_init_alert(const char *msg)
 {
        fprintf(stderr, "EAL: FATAL: %s\n", msg);
@@ -1110,24 +1133,16 @@ rte_eal_init(int argc, char **argv)
                                RTE_LOG(DEBUG, EAL, "IOMMU is not available, 
selecting IOVA as PA mode.\n");
                        }
                }
-#ifdef RTE_LIBRTE_KNI
-               /* Workaround for KNI which requires physical address to work */
-               if (iova_mode == RTE_IOVA_VA &&
-                               rte_eal_check_module("rte_kni") == 1) {
-                       if (phys_addrs) {
-                               iova_mode = RTE_IOVA_PA;
-                               RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' 
because KNI module is loaded\n");
-                       } else {
-                               RTE_LOG(DEBUG, EAL, "KNI can not work since 
physical addresses are unavailable\n");
-                       }
-               }
-#endif
                rte_eal_get_configuration()->iova_mode = iova_mode;
        } else {
                rte_eal_get_configuration()->iova_mode =
                        internal_config.iova_mode;
        }
 
+       if (rte_eal_check_module("rte_kni") == 1)
+               rte_eal_get_configuration()->iova_mode =
+                               rte_eal_kni_get_iova_mode(rte_eal_iova_mode());
+
        if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) {
                rte_eal_init_alert("Cannot use IOVA as 'PA' since physical 
addresses are not available");
                rte_errno = EINVAL;
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h 
b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index b51fe27..1b96cf6 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -123,6 +123,7 @@ struct rte_kni_device_info {
        unsigned mbuf_size;
        unsigned int mtu;
        uint8_t mac_addr[6];
+       uint8_t iova_mode;
 };
 
 #define KNI_DEVICE "kni"
diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
index cbd6599..6405524 100644
--- a/lib/librte_kni/Makefile
+++ b/lib/librte_kni/Makefile
@@ -6,6 +6,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 # library name
 LIB = librte_kni.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
 
diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build
index 41fa2e3..dd4c8da 100644
--- a/lib/librte_kni/meson.build
+++ b/lib/librte_kni/meson.build
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+allow_experimental_apis = true
 if not is_linux or not dpdk_conf.get('RTE_ARCH_64')
        build = false
        reason = 'only supported on 64-bit linux'
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 0f36485..1e53f05 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -21,6 +21,7 @@
 #include <rte_tailq.h>
 #include <rte_rwlock.h>
 #include <rte_eal_memconfig.h>
+#include <rte_mbuf_pool_ops.h>
 #include <rte_kni_common.h>
 #include "rte_kni_fifo.h"
 
@@ -97,11 +98,6 @@ static volatile int kni_fd = -1;
 int
 rte_kni_init(unsigned int max_kni_ifaces __rte_unused)
 {
-       if (rte_eal_iova_mode() != RTE_IOVA_PA) {
-               RTE_LOG(ERR, KNI, "KNI requires IOVA as PA\n");
-               return -1;
-       }
-
        /* Check FD and open */
        if (kni_fd < 0) {
                kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
@@ -300,6 +296,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
        kni->group_id = conf->group_id;
        kni->mbuf_size = conf->mbuf_size;
 
+       dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
        ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
        if (ret < 0)
                goto ioctl_fail;
@@ -687,6 +685,65 @@ kni_allocate_mbufs(struct rte_kni *kni)
        }
 }
 
+struct rte_mempool *
+rte_kni_pktmbuf_pool_create(const char *name, unsigned int n,
+       unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
+       int socket_id)
+{
+       struct rte_pktmbuf_pool_private mbp_priv;
+       const char *mp_ops_name;
+       struct rte_mempool *mp;
+       unsigned int elt_size;
+       int ret;
+
+       if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) {
+               RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n",
+                       priv_size);
+               rte_errno = EINVAL;
+               return NULL;
+       }
+       elt_size = sizeof(struct rte_mbuf) + (unsigned int)priv_size +
+               (unsigned int)data_room_size;
+       mbp_priv.mbuf_data_room_size = data_room_size;
+       mbp_priv.mbuf_priv_size = priv_size;
+
+       mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
+                sizeof(struct rte_pktmbuf_pool_private), socket_id, 0);
+       if (mp == NULL)
+               return NULL;
+
+       mp_ops_name = rte_mbuf_best_mempool_ops();
+       ret = rte_mempool_set_ops_byname(mp, mp_ops_name, NULL);
+       if (ret != 0) {
+               RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
+               rte_mempool_free(mp);
+               rte_errno = -ret;
+               return NULL;
+       }
+       rte_pktmbuf_pool_init(mp, &mbp_priv);
+
+       if (rte_eal_iova_mode() == RTE_IOVA_VA)
+               ret = rte_mempool_populate_from_pg_sz_chunks(mp);
+       else
+               ret = rte_mempool_populate_default(mp);
+
+       if (ret < 0) {
+               rte_mempool_free(mp);
+               rte_errno = -ret;
+               return NULL;
+       }
+
+       rte_mempool_obj_iter(mp, rte_pktmbuf_init, NULL);
+
+       return mp;
+}
+
+void
+rte_kni_pktmbuf_pool_free(struct rte_mempool *mp)
+{
+       rte_mempool_free(mp);
+}
+
 struct rte_kni *
 rte_kni_get(const char *name)
 {
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index f6b66c3..2cfdc38 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -187,6 +187,54 @@ unsigned rte_kni_tx_burst(struct rte_kni *kni, struct 
rte_mbuf **mbufs,
                unsigned num);
 
 /**
+ * Create a kni packet mbuf pool.
+ *
+ * This function creates and initializes a packet mbuf pool for KNI 
applications
+ * It calls the required mempool populate routine based on the IOVA mode.
+ *
+ * @param name
+ *   The name of the mbuf pool.
+ * @param n
+ *   The number of elements in the mbuf pool. The optimum size (in terms
+ *   of memory usage) for a mempool is when n is a power of two minus one:
+ *   n = (2^q - 1).
+ * @param cache_size
+ *   Size of the per-core object cache. See rte_mempool_create() for
+ *   details.
+ * @param priv_size
+ *   Size of application private are between the rte_mbuf structure
+ *   and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN.
+ * @param data_room_size
+ *   Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM.
+ * @param socket_id
+ *   The socket identifier where the memory should be allocated. The
+ *   value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the
+ *   reserved zone.
+ * @return
+ *   The pointer to the new allocated mempool, on success. NULL on error
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config 
structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - cache size provided is too large, or priv_size is not aligned.
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+__rte_experimental
+struct rte_mempool *rte_kni_pktmbuf_pool_create(const char *name,
+               unsigned int n, unsigned int cache_size, uint16_t priv_size,
+               uint16_t data_room_size, int socket_id);
+
+/**
+ * Free the given packet mempool.
+ *
+ * @param mp
+ *  The mempool pointer.
+ */
+__rte_experimental
+void rte_kni_pktmbuf_pool_free(struct rte_mempool *mp);
+
+/**
  * Get the KNI context of its name.
  *
  * @param name
diff --git a/lib/librte_kni/rte_kni_version.map 
b/lib/librte_kni/rte_kni_version.map
index c877dc6..5937bff 100644
--- a/lib/librte_kni/rte_kni_version.map
+++ b/lib/librte_kni/rte_kni_version.map
@@ -19,5 +19,8 @@ DPDK_2.0 {
 EXPERIMENTAL {
        global:
 
+       # added in 19.11
        rte_kni_update_link;
+       rte_kni_pktmbuf_pool_create;
+       rte_kni_pktmbuf_pool_free;
 };
-- 
2.8.4

Reply via email to