In case some CPUs don't support AVX512. Enable AVX2 for them to get better per-core performance.
The single queue model processes all packets in order while the split queue model separates packet data and metadata into different queues for parallel processing and improved performance. Signed-off-by: Shaiq Wani <shaiq.w...@intel.com> --- doc/guides/nics/cpfl.rst | 3 ++- drivers/net/intel/cpfl/cpfl_rxtx.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/cpfl.rst b/doc/guides/nics/cpfl.rst index 154201e745..5d267ef667 100644 --- a/doc/guides/nics/cpfl.rst +++ b/doc/guides/nics/cpfl.rst @@ -177,7 +177,8 @@ The paths are chosen based on 2 conditions: On the x86 platform, the driver checks if the CPU supports AVX512. If the CPU supports AVX512 and EAL argument ``--force-max-simd-bitwidth`` - is set to 512, AVX512 paths will be chosen. + is set to 512, AVX512 paths will be chosen. Otherwise, if --force-max-simd-bitwidth is set to 256,AVX2 paths will be chosen. + (Note that 256 is the default bitwidth if no specific value is provided.) - ``Offload features`` diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c index 47351ca102..4f1fce20ae 100644 --- a/drivers/net/intel/cpfl/cpfl_rxtx.c +++ b/drivers/net/intel/cpfl/cpfl_rxtx.c @@ -1426,6 +1426,10 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { vport->rx_vec_allowed = true; + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) + vport->rx_use_avx2 = true; + if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) #ifdef CC_AVX512_SUPPORT if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && @@ -1479,6 +1483,13 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) return; } #endif /* CC_AVX512_SUPPORT */ + if (vport->rx_use_avx2) { + PMD_DRV_LOG(NOTICE, + "Using Single AVX2 Vector Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts_avx2; + return; + } } if (dev->data->scattered_rx) { PMD_DRV_LOG(NOTICE, @@ -1528,6 +1539,11 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) if (cpfl_tx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { vport->tx_vec_allowed = true; + + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) + vport->tx_use_avx2 = true; + if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) #ifdef CC_AVX512_SUPPORT { @@ -1587,6 +1603,14 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) return; } #endif /* CC_AVX512_SUPPORT */ + if (vport->tx_use_avx2) { + PMD_DRV_LOG(NOTICE, + "Using Single AVX2 Vector Tx (port %d).", + dev->data->port_id); + dev->tx_pkt_burst = idpf_dp_singleq_xmit_pkts_avx2; + dev->tx_pkt_prepare = idpf_dp_prep_pkts; + return; + } } PMD_DRV_LOG(NOTICE, "Using Single Scalar Tx (port %d).", -- 2.34.1