Hi Bruce,

> 
> The Tx function selection code in the driver only used the older txq
> flags values to check whether the scalar or vector functions should be
> used. This caused performance regressions with testpmd io-fwd as the
> scalar path rather than the vector one was being used in the default
> case. Fix this by changing the code to take account of new offloads and
> deleting the defines used for the old ones.
> 
> Fixes: 7497d3e2f777 ("net/i40e: convert to new Tx offloads API")
> 
> Signed-off-by: Bruce Richardson <bruce.richard...@intel.com>
> ---
>  drivers/net/i40e/i40e_rxtx.c | 45 
> +++++++++++++++++++++++---------------------
>  1 file changed, 24 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index ec1ce54ca..c523af575 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -40,9 +40,6 @@
>  /* Base address of the HW descriptor ring should be 128B aligned. */
>  #define I40E_RING_BASE_ALIGN 128
> 
> -#define I40E_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
> -                                     ETH_TXQ_FLAGS_NOOFFLOADS)
> -
>  #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
> 
>  #ifdef RTE_LIBRTE_IEEE1588
> @@ -70,6 +67,12 @@
>  #define I40E_TX_OFFLOAD_NOTSUP_MASK \
>               (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
> 
> +static const uint64_t i40e_simple_ol_mask = (DEV_TX_OFFLOAD_MULTI_SEGS |
> +             DEV_TX_OFFLOAD_VLAN_INSERT |
> +             DEV_TX_OFFLOAD_SCTP_CKSUM |
> +             DEV_TX_OFFLOAD_UDP_CKSUM |
> +             DEV_TX_OFFLOAD_TCP_CKSUM);
> +

Seems incomplete.
>From i40e_ethdev.c full-featured tx supports:
dev_info->tx_offload_capa =
                DEV_TX_OFFLOAD_VLAN_INSERT |
                DEV_TX_OFFLOAD_QINQ_INSERT |
                DEV_TX_OFFLOAD_IPV4_CKSUM |
                DEV_TX_OFFLOAD_UDP_CKSUM |
                DEV_TX_OFFLOAD_TCP_CKSUM |
                DEV_TX_OFFLOAD_SCTP_CKSUM |
                DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
                DEV_TX_OFFLOAD_TCP_TSO |
                DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
                DEV_TX_OFFLOAD_GRE_TNL_TSO |
                DEV_TX_OFFLOAD_IPIP_TNL_TSO |
                DEV_TX_OFFLOAD_GENEVE_TNL_TSO;

So we probably need the same here plus multiseg.
BTW, it is really strange that we don't have multiseg in tx_offload_capa.
Should be present I think.
Might be worse to create a new define for it, or just use 
dev_info->tx_offload_capa directly.
Konstantin



>  static inline void
>  i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
>  {
> @@ -2108,11 +2111,9 @@ i40e_dev_tx_queue_setup_runtime(struct rte_eth_dev 
> *dev,
>                                dev->data->nb_tx_queues)) {
>               /**
>                * If it is the first queue to setup,
> -              * set all flags to default and call
> +              * set all flags and call
>                * i40e_set_tx_function.
>                */
> -             ad->tx_simple_allowed = true;
> -             ad->tx_vec_allowed = true;
>               i40e_set_tx_function_flag(dev, txq);
>               i40e_set_tx_function(dev);
>               return 0;
> @@ -2128,9 +2129,8 @@ i40e_dev_tx_queue_setup_runtime(struct rte_eth_dev *dev,
>       }
>       /* check simple tx conflict */
>       if (ad->tx_simple_allowed) {
> -             if (((txq->txq_flags & I40E_SIMPLE_FLAGS) !=
> -                  I40E_SIMPLE_FLAGS) ||
> -                 txq->tx_rs_thresh < RTE_PMD_I40E_TX_MAX_BURST) {
> +             if ((txq->offloads & i40e_simple_ol_mask) != 0 ||
> +                             txq->tx_rs_thresh < RTE_PMD_I40E_TX_MAX_BURST) {
>                       PMD_DRV_LOG(ERR, "No-simple tx is required.");
>                       return -EINVAL;
>               }
> @@ -3080,18 +3080,21 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, 
> struct i40e_tx_queue *txq)
>               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> 
>       /* Use a simple Tx queue (no offloads, no multi segs) if possible */
> -     if (((txq->txq_flags & I40E_SIMPLE_FLAGS) == I40E_SIMPLE_FLAGS)
> -                     && (txq->tx_rs_thresh >= RTE_PMD_I40E_TX_MAX_BURST)) {
> -             if (txq->tx_rs_thresh <= RTE_I40E_TX_MAX_FREE_BUF_SZ) {
> -                     PMD_INIT_LOG(DEBUG, "Vector tx"
> -                                  " can be enabled on this txq.");
> -
> -             } else {
> -                     ad->tx_vec_allowed = false;
> -             }
> -     } else {
> -             ad->tx_simple_allowed = false;
> -     }
> +     ad->tx_simple_allowed = ((txq->offloads & i40e_simple_ol_mask) == 0 &&
> +                     txq->tx_rs_thresh >= RTE_PMD_I40E_TX_MAX_BURST);
> +     ad->tx_vec_allowed = (ad->tx_simple_allowed &&
> +                     txq->tx_rs_thresh <= RTE_I40E_TX_MAX_FREE_BUF_SZ);
> +
> +     if (ad->tx_vec_allowed)
> +             PMD_INIT_LOG(DEBUG, "Vector Tx can be enabled on Tx queue %u.",
> +                             txq->queue_id);
> +     else if (ad->tx_simple_allowed)
> +             PMD_INIT_LOG(DEBUG, "Simple Tx can be enabled on Tx queue %u.",
> +                             txq->queue_id);
> +     else
> +             PMD_INIT_LOG(DEBUG,
> +                             "Neither simple nor vector Tx enabled on Tx 
> queue %u\n",
> +                             txq->queue_id);
>  }
> 
>  void __attribute__((cold))
> --
> 2.14.3

Reply via email to