Right now, vector path is limited to only use on later platform.
This patch adds a devarg use-latest-vec to allow the users to
use the latest vector path that the platform supported. Namely,
using AVX2 vector path on broadwell is possible.

Signed-off-by: Xiaoyun Li <xiaoyun...@intel.com>
---
v5:
 * Simpify the rx set function.
v4:
 * Polish the codes.
v3:
 * Polish the doc and commit log.
v2:
 * Correct the calling of the wrong function last time.
 * Fix seg fault bug.
---
 doc/guides/nics/i40e.rst               |   8 ++
 doc/guides/rel_notes/release_18_11.rst |   4 +
 drivers/net/i40e/i40e_ethdev.c         |  46 +++++++-
 drivers/net/i40e/i40e_ethdev.h         |   3 +
 drivers/net/i40e/i40e_rxtx.c           | 143 +++++++++++++------------
 5 files changed, 136 insertions(+), 68 deletions(-)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 65d87f869..643e6a062 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -163,6 +163,14 @@ Runtime Config Options
   Currently hot-plugging of representor ports is not supported so all required
   representors must be specified on the creation of the PF.
 
+- ``Use latest vector`` (default ``disable``)
+
+  Vector path was limited to use only on later platform. But users may want the
+  latest vector path. For example, VPP users may want to use AVX2 vector path 
on HSW/BDW
+  because it can get better perf. So ``devargs`` parameter ``use-latest-vec`` 
is
+  introduced, for example::
+    -w 84:00.0,use-latest-vec=1
+
 Driver compilation and testing
 ------------------------------
 
diff --git a/doc/guides/rel_notes/release_18_11.rst 
b/doc/guides/rel_notes/release_18_11.rst
index 3ae6b3f58..34af591a2 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -54,6 +54,10 @@ New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+* **Added a devarg to use the latest vector path.**
+  A new devarg ``use-latest-vec`` was introduced to allow users to choose
+  the latest vector path that the platform supported. For example, VPP users
+  can use AVX2 vector path on BDW/HSW to get better performance.
 
 API Changes
 -----------
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 85a6a867f..72377d0b6 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -44,6 +44,7 @@
 #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
 #define ETH_I40E_SUPPORT_MULTI_DRIVER  "support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG  "queue-num-per-vf"
+#define ETH_I40E_USE_LATEST_VEC        "use-latest-vec"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 
@@ -408,6 +409,7 @@ static const char *const valid_keys[] = {
        ETH_I40E_FLOATING_VEB_LIST_ARG,
        ETH_I40E_SUPPORT_MULTI_DRIVER,
        ETH_I40E_QUEUE_NUM_PER_VF_ARG,
+       ETH_I40E_USE_LATEST_VEC,
        NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -1201,6 +1203,46 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw,
        return i40e_aq_debug_write_register(hw, reg_addr, reg_val, cmd_details);
 }
 
+static int
+i40e_parse_latest_vec(struct rte_eth_dev *dev)
+{
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       int kvargs_count, use_latest_vec;
+       struct rte_kvargs *kvlist;
+
+       ad->use_latest_vec = false;
+
+       if (!dev->device->devargs)
+               return 0;
+
+       kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+       if (!kvlist)
+               return -EINVAL;
+
+       kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC);
+       if (!kvargs_count) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+
+       if (kvargs_count > 1)
+               PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+                           "the first one is used !",
+                           ETH_I40E_USE_LATEST_VEC);
+
+       use_latest_vec = atoi((&kvlist->pairs[0])->value);
+
+       rte_kvargs_free(kvlist);
+
+       if (use_latest_vec != 0 && use_latest_vec != 1)
+               PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!");
+
+       ad->use_latest_vec = (bool)use_latest_vec;
+
+       return 0;
+}
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 {
@@ -1263,6 +1305,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void 
*init_params __rte_unused)
 
        /* Check if need to support multi-driver */
        i40e_support_multi_driver(dev);
+       i40e_parse_latest_vec(dev);
 
        /* Make sure all is clean before doing PF reset */
        i40e_clear_hw(hw);
@@ -12527,4 +12570,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
                              ETH_I40E_FLOATING_VEB_ARG "=1"
                              ETH_I40E_FLOATING_VEB_LIST_ARG "=<string>"
                              ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16"
-                             ETH_I40E_SUPPORT_MULTI_DRIVER "=1");
+                             ETH_I40E_SUPPORT_MULTI_DRIVER "=1"
+                             ETH_I40E_USE_LATEST_VEC "=1");
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 3fffe5a55..140c92b84 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1078,6 +1078,9 @@ struct i40e_adapter {
        uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
        uint64_t flow_types_mask;
        uint64_t pctypes_mask;
+
+       /* For devargs */
+       bool use_latest_vec;
 };
 
 /**
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 2a28ee348..937dfdf3e 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -2909,6 +2909,35 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t 
queue_id,
        qinfo->conf.offloads = txq->offloads;
 }
 
+static eth_rx_burst_t
+i40e_get_latest_rx_vec(bool scatter)
+{
+#ifdef RTE_ARCH_X86
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+               return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
+                                i40e_recv_pkts_vec_avx2;
+#endif
+       return scatter ? i40e_recv_scattered_pkts_vec :
+                        i40e_recv_pkts_vec;
+}
+
+static eth_rx_burst_t
+i40e_get_recommend_rx_vec(bool scatter)
+{
+#ifdef RTE_ARCH_X86
+       /*
+        * since AVX frequency can be different to base frequency, limit
+        * use of AVX2 version to later plaforms, not all those that could
+        * theoretically run it.
+        */
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+               return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
+                                i40e_recv_pkts_vec_avx2;
+#endif
+       return scatter ? i40e_recv_scattered_pkts_vec :
+                        i40e_recv_pkts_vec;
+}
+
 void __attribute__((cold))
 i40e_set_rx_function(struct rte_eth_dev *dev)
 {
@@ -2940,57 +2969,17 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                }
        }
 
-       if (dev->data->scattered_rx) {
-               /* Set the non-LRO scattered callback: there are Vector and
-                * single allocation versions.
-                */
-               if (ad->rx_vec_allowed) {
-                       PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
-                                           "callback (port=%d).",
-                                    dev->data->port_id);
-
-                       dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
-#ifdef RTE_ARCH_X86
-                       /*
-                        * since AVX frequency can be different to base
-                        * frequency, limit use of AVX2 version to later
-                        * plaforms, not all those that could theoretically
-                        * run it.
-                        */
-                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-                               dev->rx_pkt_burst =
-                                       i40e_recv_scattered_pkts_vec_avx2;
-#endif
-               } else {
-                       PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
-                                          "allocation callback (port=%d).",
-                                    dev->data->port_id);
-                       dev->rx_pkt_burst = i40e_recv_scattered_pkts;
-               }
-       /* If parameters allow we are going to choose between the following
-        * callbacks:
-        *    - Vector
-        *    - Bulk Allocation
-        *    - Single buffer allocation (the simplest one)
-        */
-       } else if (ad->rx_vec_allowed) {
-               PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
-                                   "burst size no less than %d (port=%d).",
-                            RTE_I40E_DESCS_PER_LOOP,
-                            dev->data->port_id);
-
-               dev->rx_pkt_burst = i40e_recv_pkts_vec;
-#ifdef RTE_ARCH_X86
-               /*
-                * since AVX frequency can be different to base
-                * frequency, limit use of AVX2 version to later
-                * plaforms, not all those that could theoretically
-                * run it.
-                */
-               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-                       dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
-#endif
-       } else if (ad->rx_bulk_alloc_allowed) {
+       if (ad->rx_vec_allowed) {
+               /* Vec Rx path */
+               PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
+                               dev->data->port_id);
+               if (ad->use_latest_vec)
+                       dev->rx_pkt_burst =
+                       i40e_get_latest_rx_vec(dev->data->scattered_rx);
+               else
+                       dev->rx_pkt_burst =
+                       i40e_get_recommend_rx_vec(dev->data->scattered_rx);
+       } else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
                                    "satisfied. Rx Burst Bulk Alloc function "
                                    "will be used on port=%d.",
@@ -2998,12 +2987,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 
                dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
        } else {
-               PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
-                                   "satisfied, or Scattered Rx is requested "
-                                   "(port=%d).",
+               /* Simple Rx Path. */
+               PMD_INIT_LOG(DEBUG, "Simple Rx path will be used on port=%d.",
                             dev->data->port_id);
-
-               dev->rx_pkt_burst = i40e_recv_pkts;
+               dev->rx_pkt_burst = dev->data->scattered_rx ?
+                                       i40e_recv_scattered_pkts :
+                                       i40e_recv_pkts;
        }
 
        /* Propagate information about RX function choice through all queues. */
@@ -3049,6 +3038,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, 
struct i40e_tx_queue *txq)
                                txq->queue_id);
 }
 
+static eth_tx_burst_t
+i40e_get_latest_tx_vec(void)
+{
+#ifdef RTE_ARCH_X86
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+               return i40e_xmit_pkts_vec_avx2;
+#endif
+       return i40e_xmit_pkts_vec;
+}
+
+static eth_tx_burst_t
+i40e_get_recommend_tx_vec(void)
+{
+#ifdef RTE_ARCH_X86
+       /*
+        * since AVX frequency can be different to base frequency, limit
+        * use of AVX2 version to later plaforms, not all those that could
+        * theoretically run it.
+        */
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+               return i40e_xmit_pkts_vec_avx2;
+#endif
+       return i40e_xmit_pkts_vec;
+}
+
 void __attribute__((cold))
 i40e_set_tx_function(struct rte_eth_dev *dev)
 {
@@ -3073,17 +3087,12 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
        if (ad->tx_simple_allowed) {
                if (ad->tx_vec_allowed) {
                        PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
-                       dev->tx_pkt_burst = i40e_xmit_pkts_vec;
-#ifdef RTE_ARCH_X86
-                       /*
-                        * since AVX frequency can be different to base
-                        * frequency, limit use of AVX2 version to later
-                        * plaforms, not all those that could theoretically
-                        * run it.
-                        */
-                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-                               dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
-#endif
+                       if (ad->use_latest_vec)
+                               dev->tx_pkt_burst =
+                                       i40e_get_latest_tx_vec();
+                       else
+                               dev->tx_pkt_burst =
+                                       i40e_get_recommend_tx_vec();
                } else {
                        PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
                        dev->tx_pkt_burst = i40e_xmit_pkts_simple;
-- 
2.17.1

Reply via email to