Hi, > -----Original Message----- > From: Alexander Kozyrev <akozy...@mellanox.com> > Sent: Monday, March 16, 2020 5:35 PM > To: dev@dpdk.org > Cc: Raslan Darawsheh <rasl...@mellanox.com>; Matan Azrad > <ma...@mellanox.com>; Slava Ovsiienko <viachesl...@mellanox.com>; > sta...@dpdk.org > Subject: [PATCH] net/mlx5: reduce txq completion index memory loads > > There is a non-optimal check if doorbel is needed present in the > mlx5_tx_handle_completion() function. Advancing a copy of the txq > consumer index and checking this copy with initial value causes > unnecessary memory loads and hurts the performance. It is better to > have a simple small boolean variable for this purpose. That allows > to eliminate all the excessive memory operations with the txq consumer > index and restore the performance of the tx completions. > > Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine") > Cc: sta...@dpdk.org > > Signed-off-by: Alexander Kozyrev <akozy...@mellanox.com> > Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > --- > drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++---------------- > 1 file changed, 13 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c > index 5ac63da..f3bf763 100644 > --- a/drivers/net/mlx5/mlx5_rxtx.c > +++ b/drivers/net/mlx5/mlx5_rxtx.c > @@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code { > { > unsigned int count = MLX5_TX_COMP_MAX_CQE; > volatile struct mlx5_cqe *last_cqe = NULL; > - uint16_t ci = txq->cq_ci; > + bool ring_doorbell = false; > int ret; > > static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative > value"); > @@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code { > do { > volatile struct mlx5_cqe *cqe; > > - cqe = &txq->cqes[ci & txq->cqe_m]; > - ret = check_cqe(cqe, txq->cqe_s, ci); > + cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; > + ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); > if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { > if (likely(ret != MLX5_CQE_STATUS_ERR)) { > /* No new CQEs in completion queue. */ > @@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code { > * here, before we might perform SQ reset. > */ > rte_wmb(); > - txq->cq_ci = ci; > ret = mlx5_tx_error_cqe_handle > (txq, (volatile struct mlx5_err_cqe *)cqe); > if (unlikely(ret < 0)) { > @@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code { > * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. > * The send queue is supposed to be empty. > */ > - ++ci; > - txq->cq_pi = ci; > + ring_doorbell = true; > + ++txq->cq_ci; > + txq->cq_pi = txq->cq_ci; > last_cqe = NULL; > continue; > } > /* Normal transmit completion. */ > - MLX5_ASSERT(ci != txq->cq_pi); > - MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) == > + MLX5_ASSERT(txq->cq_ci != txq->cq_pi); > + MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) > == > cqe->wqe_counter); > - ++ci; > + ring_doorbell = true; > + ++txq->cq_ci; > last_cqe = cqe; > /* > * We have to restrict the amount of processed CQEs > @@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code { > if (likely(--count == 0)) > break; > } while (true); > - if (likely(ci != txq->cq_ci)) { > - /* > - * Update completion queue consuming index > - * and ring doorbell to notify hardware. > - */ > + if (likely(ring_doorbell)) { > + /* Ring doorbell to notify hardware. */ > rte_compiler_barrier(); > - txq->cq_ci = ci; > - *txq->cq_db = rte_cpu_to_be_32(ci); > + *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); > mlx5_tx_comp_flush(txq, last_cqe, olx); > } > } > -- > 1.8.3.1
Patch applied to next-net-mlx, Kindest regards, Raslan Darawsheh