> -----Original Message-----
> From: Li, Xiaoyun
> Sent: Monday, September 10, 2018 6:18 PM
> To: Xing, Beilei <beilei.x...@intel.com>; Zhang, Qi Z <qi.z.zh...@intel.com>
> Cc: dev@dpdk.org; Yang, Zhiyong <zhiyong.y...@intel.com>; Richardson,
> Bruce <bruce.richard...@intel.com>; Hunt, David <david.h...@intel.com>; Li,
> Xiaoyun <xiaoyun...@intel.com>
> Subject: [PATCH v4] net/i40e: add interface to choose latest vector path
> 
> Right now, vector path is limited to only use on later platform.
> This patch adds a devarg use-latest-vec to allow the users to use the latest
> vector path that the platform supported. Namely, using AVX2 vector path on
> broadwell is possible.
> 
> Signed-off-by: Xiaoyun Li <xiaoyun...@intel.com>
> ---
> v4:
>  * Polish the codes.
> v3:
>  * Polish the doc and commit log.
> v2:
>  * Correct the calling of the wrong function last time.
>  * Fix seg fault bug.
> ---
>  doc/guides/nics/i40e.rst               |   8 ++
>  doc/guides/rel_notes/release_18_11.rst |   4 +
>  drivers/net/i40e/i40e_ethdev.c         |  46 ++++++++++-
>  drivers/net/i40e/i40e_ethdev.h         |   3 +
>  drivers/net/i40e/i40e_rxtx.c           | 103 ++++++++++++++++---------
>  5 files changed, 128 insertions(+), 36 deletions(-)
> 
> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index
> 65d87f869..643e6a062 100644
> --- a/doc/guides/nics/i40e.rst
> +++ b/doc/guides/nics/i40e.rst
> @@ -163,6 +163,14 @@ Runtime Config Options
>    Currently hot-plugging of representor ports is not supported so all 
> required
>    representors must be specified on the creation of the PF.
> 
> +- ``Use latest vector`` (default ``disable``)
> +
> +  Vector path was limited to use only on later platform. But users may
> + want the  latest vector path. For example, VPP users may want to use
> + AVX2 vector path on HSW/BDW  because it can get better perf. So
> + ``devargs`` parameter ``use-latest-vec`` is  introduced, for example::
> +    -w 84:00.0,use-latest-vec=1
> +
>  Driver compilation and testing
>  ------------------------------
> 
> diff --git a/doc/guides/rel_notes/release_18_11.rst
> b/doc/guides/rel_notes/release_18_11.rst
> index 3ae6b3f58..34af591a2 100644
> --- a/doc/guides/rel_notes/release_18_11.rst
> +++ b/doc/guides/rel_notes/release_18_11.rst
> @@ -54,6 +54,10 @@ New Features
>       Also, make sure to start the actual text at the margin.
>       =========================================================
> 
> +* **Added a devarg to use the latest vector path.**
> +  A new devarg ``use-latest-vec`` was introduced to allow users to
> +choose
> +  the latest vector path that the platform supported. For example, VPP
> +users
> +  can use AVX2 vector path on BDW/HSW to get better performance.
> 
>  API Changes
>  -----------
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 85a6a867f..72377d0b6 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -44,6 +44,7 @@
>  #define ETH_I40E_FLOATING_VEB_LIST_ARG       "floating_veb_list"
>  #define ETH_I40E_SUPPORT_MULTI_DRIVER        "support-multi-driver"
>  #define ETH_I40E_QUEUE_NUM_PER_VF_ARG        "queue-num-per-vf"
> +#define ETH_I40E_USE_LATEST_VEC      "use-latest-vec"
> 
>  #define I40E_CLEAR_PXE_WAIT_MS     200
> 
> @@ -408,6 +409,7 @@ static const char *const valid_keys[] = {
>       ETH_I40E_FLOATING_VEB_LIST_ARG,
>       ETH_I40E_SUPPORT_MULTI_DRIVER,
>       ETH_I40E_QUEUE_NUM_PER_VF_ARG,
> +     ETH_I40E_USE_LATEST_VEC,
>       NULL};
> 
>  static const struct rte_pci_id pci_id_i40e_map[] = { @@ -1201,6 +1203,46 @@
> i40e_aq_debug_write_global_register(struct i40e_hw *hw,
>       return i40e_aq_debug_write_register(hw, reg_addr, reg_val,
> cmd_details);  }
> 
> +static int
> +i40e_parse_latest_vec(struct rte_eth_dev *dev) {
> +     struct i40e_adapter *ad =
> +             I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +     int kvargs_count, use_latest_vec;
> +     struct rte_kvargs *kvlist;
> +
> +     ad->use_latest_vec = false;
> +
> +     if (!dev->device->devargs)
> +             return 0;
> +
> +     kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
> +     if (!kvlist)
> +             return -EINVAL;
> +
> +     kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC);
> +     if (!kvargs_count) {
> +             rte_kvargs_free(kvlist);
> +             return 0;
> +     }
> +
> +     if (kvargs_count > 1)
> +             PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and
> only "
> +                         "the first one is used !",
> +                         ETH_I40E_USE_LATEST_VEC);
> +
> +     use_latest_vec = atoi((&kvlist->pairs[0])->value);
> +
> +     rte_kvargs_free(kvlist);
> +
> +     if (use_latest_vec != 0 && use_latest_vec != 1)
> +             PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!");
> +
> +     ad->use_latest_vec = (bool)use_latest_vec;
> +
> +     return 0;
> +}
> +
>  static int
>  eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
> { @@ -1263,6 +1305,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void
> *init_params __rte_unused)
> 
>       /* Check if need to support multi-driver */
>       i40e_support_multi_driver(dev);
> +     i40e_parse_latest_vec(dev);
> 
>       /* Make sure all is clean before doing PF reset */
>       i40e_clear_hw(hw);
> @@ -12527,4 +12570,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
>                             ETH_I40E_FLOATING_VEB_ARG "=1"
>                             ETH_I40E_FLOATING_VEB_LIST_ARG "=<string>"
>                             ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16"
> -                           ETH_I40E_SUPPORT_MULTI_DRIVER "=1");
> +                           ETH_I40E_SUPPORT_MULTI_DRIVER "=1"
> +                           ETH_I40E_USE_LATEST_VEC "=1");
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index 3fffe5a55..140c92b84 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -1078,6 +1078,9 @@ struct i40e_adapter {
>       uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
>       uint64_t flow_types_mask;
>       uint64_t pctypes_mask;
> +
> +     /* For devargs */
> +     bool use_latest_vec;
>  };
> 
>  /**
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
> 2a28ee348..e9fa7ed90 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -2909,6 +2909,34 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
>       qinfo->conf.offloads = txq->offloads;
>  }
> 
> +static eth_rx_burst_t
> +i40e_get_latest_rx_vec(bool scatter)
> +{
> +#ifdef RTE_ARCH_X86
> +     if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +             return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> +                              i40e_recv_pkts_vec_avx2;
> +#endif
> +     return scatter ? i40e_recv_scattered_pkts_vec :
> +                      i40e_recv_pkts_vec;
> +}
> +
> +static eth_rx_burst_t
> +i40e_get_recommend_rx_vec(bool scatter) { #ifdef RTE_ARCH_X86
> +     /*
> +      * since AVX frequency can be different to base frequency, limit
> +      * use of AVX2 version to later plaforms, not all those that could
> +      * theoretically run it.
> +      */
> +     if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +             return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
> +                              i40e_recv_pkts_vec_avx2;
> +#endif
> +     return scatter ? i40e_recv_scattered_pkts_vec :
> +                      i40e_recv_pkts_vec;
> +}
>  void __attribute__((cold))
>  i40e_set_rx_function(struct rte_eth_dev *dev)  { @@ -2948,19 +2976,12
> @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>                       PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
>                                           "callback (port=%d).",
>                                    dev->data->port_id);
> -
> -                     dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
> -#ifdef RTE_ARCH_X86
> -                     /*
> -                      * since AVX frequency can be different to base
> -                      * frequency, limit use of AVX2 version to later
> -                      * plaforms, not all those that could theoretically
> -                      * run it.
> -                      */
> -                     if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +                     if (ad->use_latest_vec)
>                               dev->rx_pkt_burst =
> -                                     i40e_recv_scattered_pkts_vec_avx2;
> -#endif
> +                                     i40e_get_latest_rx_vec(true);
> +                     else
> +                             dev->rx_pkt_burst =
> +                                     i40e_get_recommend_rx_vec(true);
>               } else {
>                       PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
>                                          "allocation callback (port=%d).", @@ 
> -2978,18
> +2999,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>                                   "burst size no less than %d (port=%d).",
>                            RTE_I40E_DESCS_PER_LOOP,
>                            dev->data->port_id);
> -
> -             dev->rx_pkt_burst = i40e_recv_pkts_vec;
> -#ifdef RTE_ARCH_X86
> -             /*
> -              * since AVX frequency can be different to base
> -              * frequency, limit use of AVX2 version to later
> -              * plaforms, not all those that could theoretically
> -              * run it.
> -              */
> -             if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> -                     dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
> -#endif
> +             if (ad->use_latest_vec)
> +                     dev->rx_pkt_burst = i40e_get_latest_rx_vec(false);
> +             else
> +                     dev->rx_pkt_burst = i40e_get_recommend_rx_vec(false);


How about simplify the code as below?

/* default */
dev->rx_pkt_burst = dev->data->scattered_rx ?
                i40e_recv_scattered_pkts : i40e_recv_pkts;

if (ad->rx_vec_allowed) {
        /* overwrite by vec path*/
        if (ad->use_latest_vec)
                dev->rx_pkt_burst = 
i40e_get_latest_rx_vec(dev->data->scattered_rx);
        else
                dev->rx_pkt_burst = 
i40e_get_recommend_rx_vec(dev->data->scattered_rx);
} else if (ad->rx_bulk_alloc_allowed) {
        /* or overwrite by bulk alloc */
        dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
}


>       } else if (ad->rx_bulk_alloc_allowed) {
>               PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
>                                   "satisfied. Rx Burst Bulk Alloc function "
> @@ -3049,6 +3062,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev,
> struct i40e_tx_queue *txq)
>                               txq->queue_id);
>  }
> 
> +static eth_tx_burst_t
> +i40e_get_latest_tx_vec(void)
> +{
> +#ifdef RTE_ARCH_X86
> +     if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +             return i40e_xmit_pkts_vec_avx2;
> +#endif
> +     return i40e_xmit_pkts_vec;
> +}
> +
> +static eth_tx_burst_t
> +i40e_get_recommend_tx_vec(void)
> +{
> +#ifdef RTE_ARCH_X86
> +     /*
> +      * since AVX frequency can be different to base frequency, limit
> +      * use of AVX2 version to later plaforms, not all those that could
> +      * theoretically run it.
> +      */
> +     if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +             return i40e_xmit_pkts_vec_avx2;
> +#endif
> +     return i40e_xmit_pkts_vec;
> +}
> +
>  void __attribute__((cold))
>  i40e_set_tx_function(struct rte_eth_dev *dev)  { @@ -3073,17 +3111,12
> @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>       if (ad->tx_simple_allowed) {
>               if (ad->tx_vec_allowed) {
>                       PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
> -                     dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> -#ifdef RTE_ARCH_X86
> -                     /*
> -                      * since AVX frequency can be different to base
> -                      * frequency, limit use of AVX2 version to later
> -                      * plaforms, not all those that could theoretically
> -                      * run it.
> -                      */
> -                     if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> -                             dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
> -#endif
> +                     if (ad->use_latest_vec)
> +                             dev->tx_pkt_burst =
> +                                     i40e_get_latest_tx_vec();
> +                     else
> +                             dev->tx_pkt_burst =
> +                                     i40e_get_recommend_tx_vec();
>               } else {
>                       PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
>                       dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> --
> 2.17.1

Reply via email to