There is a non-optimal check if doorbel is needed present in the mlx5_tx_handle_completion() function. Advancing a copy of the txq consumer index and checking this copy with initial value causes unnecessary memory loads and hurts the performance. It is better to have a simple small boolean variable for this purpose. That allows to eliminate all the excessive memory operations with the txq consumer index and restore the performance of the tx completions.
Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine") Cc: sta...@dpdk.org Signed-off-by: Alexander Kozyrev <akozy...@mellanox.com> Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> --- drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 5ac63da..f3bf763 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code { { unsigned int count = MLX5_TX_COMP_MAX_CQE; volatile struct mlx5_cqe *last_cqe = NULL; - uint16_t ci = txq->cq_ci; + bool ring_doorbell = false; int ret; static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); @@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code { do { volatile struct mlx5_cqe *cqe; - cqe = &txq->cqes[ci & txq->cqe_m]; - ret = check_cqe(cqe, txq->cqe_s, ci); + cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; + ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (likely(ret != MLX5_CQE_STATUS_ERR)) { /* No new CQEs in completion queue. */ @@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code { * here, before we might perform SQ reset. */ rte_wmb(); - txq->cq_ci = ci; ret = mlx5_tx_error_cqe_handle (txq, (volatile struct mlx5_err_cqe *)cqe); if (unlikely(ret < 0)) { @@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code { * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. * The send queue is supposed to be empty. */ - ++ci; - txq->cq_pi = ci; + ring_doorbell = true; + ++txq->cq_ci; + txq->cq_pi = txq->cq_ci; last_cqe = NULL; continue; } /* Normal transmit completion. */ - MLX5_ASSERT(ci != txq->cq_pi); - MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) == + MLX5_ASSERT(txq->cq_ci != txq->cq_pi); + MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == cqe->wqe_counter); - ++ci; + ring_doorbell = true; + ++txq->cq_ci; last_cqe = cqe; /* * We have to restrict the amount of processed CQEs @@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code { if (likely(--count == 0)) break; } while (true); - if (likely(ci != txq->cq_ci)) { - /* - * Update completion queue consuming index - * and ring doorbell to notify hardware. - */ + if (likely(ring_doorbell)) { + /* Ring doorbell to notify hardware. */ rte_compiler_barrier(); - txq->cq_ci = ci; - *txq->cq_db = rte_cpu_to_be_32(ci); + *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); mlx5_tx_comp_flush(txq, last_cqe, olx); } } -- 1.8.3.1