Hi,

> -----Original Message-----
> From: Alexander Kozyrev <akozy...@mellanox.com>
> Sent: Monday, March 16, 2020 5:35 PM
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasl...@mellanox.com>; Matan Azrad
> <ma...@mellanox.com>; Slava Ovsiienko <viachesl...@mellanox.com>;
> sta...@dpdk.org
> Subject: [PATCH] net/mlx5: reduce txq completion index memory loads
> 
> There is a non-optimal check if doorbel is needed present in the
> mlx5_tx_handle_completion() function. Advancing a copy of the txq
> consumer index and checking this copy with initial value causes
> unnecessary memory loads and hurts the performance. It is better to
> have a simple small boolean variable for this purpose. That allows
> to eliminate all the excessive memory operations with the txq consumer
> index and restore the performance of the tx completions.
> 
> Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozy...@mellanox.com>
> Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 5ac63da..f3bf763 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code {
>  {
>       unsigned int count = MLX5_TX_COMP_MAX_CQE;
>       volatile struct mlx5_cqe *last_cqe = NULL;
> -     uint16_t ci = txq->cq_ci;
> +     bool ring_doorbell = false;
>       int ret;
> 
>       static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative
> value");
> @@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code {
>       do {
>               volatile struct mlx5_cqe *cqe;
> 
> -             cqe = &txq->cqes[ci & txq->cqe_m];
> -             ret = check_cqe(cqe, txq->cqe_s, ci);
> +             cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
> +             ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
>               if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
>                       if (likely(ret != MLX5_CQE_STATUS_ERR)) {
>                               /* No new CQEs in completion queue. */
> @@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code {
>                        * here, before we might perform SQ reset.
>                        */
>                       rte_wmb();
> -                     txq->cq_ci = ci;
>                       ret = mlx5_tx_error_cqe_handle
>                               (txq, (volatile struct mlx5_err_cqe *)cqe);
>                       if (unlikely(ret < 0)) {
> @@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code {
>                        * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
>                        * The send queue is supposed to be empty.
>                        */
> -                     ++ci;
> -                     txq->cq_pi = ci;
> +                     ring_doorbell = true;
> +                     ++txq->cq_ci;
> +                     txq->cq_pi = txq->cq_ci;
>                       last_cqe = NULL;
>                       continue;
>               }
>               /* Normal transmit completion. */
> -             MLX5_ASSERT(ci != txq->cq_pi);
> -             MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
> +             MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
> +             MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16)
> ==
>                           cqe->wqe_counter);
> -             ++ci;
> +             ring_doorbell = true;
> +             ++txq->cq_ci;
>               last_cqe = cqe;
>               /*
>                * We have to restrict the amount of processed CQEs
> @@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code {
>               if (likely(--count == 0))
>                       break;
>       } while (true);
> -     if (likely(ci != txq->cq_ci)) {
> -             /*
> -              * Update completion queue consuming index
> -              * and ring doorbell to notify hardware.
> -              */
> +     if (likely(ring_doorbell)) {
> +             /* Ring doorbell to notify hardware. */
>               rte_compiler_barrier();
> -             txq->cq_ci = ci;
> -             *txq->cq_db = rte_cpu_to_be_32(ci);
> +             *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
>               mlx5_tx_comp_flush(txq, last_cqe, olx);
>       }
>  }
> --
> 1.8.3.1


Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh

Reply via email to