Only 3 syndromes are considered critical and warrant a queue restart. All other syndromes can be safely ignored. We ignore them for Rx queues. Skip non-critical error CQEs for Tx queues as well.
Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error") Cc: sta...@dpdk.org Signed-off-by: Alexander Kozyrev <akozy...@nvidia.com> --- drivers/common/mlx5/mlx5_prm.h | 17 +++++++++++++++++ drivers/net/mlx5/mlx5_rx.c | 4 +--- drivers/net/mlx5/mlx5_tx.c | 4 ++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 210158350d..e4034699d8 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -5614,4 +5614,21 @@ mlx5_ts_format_conv(uint32_t ts_format) MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; } +/** + * Check if an error CQE syndrome is critical. + * + * @param syndrome + * Error CQE syndrome to check. + * + * @return + * Positive value if critical, 0 otherwise. + */ +static inline uint32_t +mlx5_critical_syndrome(uint8_t syndrome) +{ + return (syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || + syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR); +} + #endif /* RTE_PMD_MLX5_PRM_H_ */ diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c index 5e58eb8bc9..a562daa7c3 100644 --- a/drivers/net/mlx5/mlx5_rx.c +++ b/drivers/net/mlx5/mlx5_rx.c @@ -479,9 +479,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, for (i = 0; i < (int)err_n; i++) { u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask]; if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) { - if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || - u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || - u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR) + if (mlx5_critical_syndrome(u.err_cqe->syndrome)) critical_syndrome = true; break; } diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c index 2f48bbc82e..2c53feeb9c 100644 --- a/drivers/net/mlx5/mlx5_tx.c +++ b/drivers/net/mlx5/mlx5_tx.c @@ -85,7 +85,7 @@ static int mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, volatile struct mlx5_error_cqe *err_cqe) { - if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { + if (mlx5_critical_syndrome(err_cqe->syndrome)) { const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); @@ -217,7 +217,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, } /* * We are going to fetch all entries with - * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. + * non-critical error syndromes. * The send queue is supposed to be empty. */ ring_doorbell = true; -- 2.18.2