For non-fatal syndromes like LOCAL_LENGTH_ERR, the Rx queue reset shouldn't be triggered. Rx queue could continue with the next packets without any recovery. Only three syndromes warrant Rx queue reset: LOCAL_QP_OP_ERR, LOCAL_PROT_ERR and WR_FLUSH_ERR. Do not initiate a Rx queue reset in any other cases. Skip all non-critical error CQEs and continue with packet processing.
Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling") Cc: sta...@dpdk.org Signed-off-by: Alexander Kozyrev <akozy...@nvidia.com> --- drivers/net/mlx5/mlx5_rx.c | 123 ++++++++++++++++++++++++------- drivers/net/mlx5/mlx5_rx.h | 5 +- drivers/net/mlx5/mlx5_rxtx_vec.c | 3 +- 3 files changed, 102 insertions(+), 29 deletions(-) diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c index 7612d15f01..99a08ef5f1 100644 --- a/drivers/net/mlx5/mlx5_rx.c +++ b/drivers/net/mlx5/mlx5_rx.c @@ -39,7 +39,8 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, static __rte_always_inline int mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, - uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); + uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe, + uint16_t *skip_cnt, bool mprq); static __rte_always_inline uint32_t rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); @@ -408,10 +409,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); } +#define MLX5_ERROR_CQE_MASK 0x40000000 /* Must be negative. */ -#define MLX5_ERROR_CQE_RET (-1) +#define MLX5_REGULAR_ERROR_CQE_RET (-5) +#define MLX5_CRITICAL_ERROR_CQE_RET (-4) /* Must not be negative. */ #define MLX5_RECOVERY_ERROR_RET 0 +#define MLX5_RECOVERY_IGNORE_RET 1 +#define MLX5_RECOVERY_COMPLETED_RET 2 /** * Handle a Rx error. @@ -429,10 +434,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) * Number of CQEs to check for an error. * * @return - * MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE status. + * MLX5_RECOVERY_ERROR_RET in case of recovery error, + * MLX5_RECOVERY_IGNORE_RET in case of non-critical error syndrome, + * MLX5_RECOVERY_COMPLETED_RET in case of recovery is completed, + * otherwise the CQE status after ignored error syndrome or queue reset. */ int -mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n) +mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, + uint16_t err_n, uint16_t *skip_cnt) { const uint16_t cqe_n = 1 << rxq->cqe_n; const uint16_t cqe_mask = cqe_n - 1; @@ -447,14 +456,35 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n) .cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask], }; struct mlx5_mp_arg_queue_state_modify sm; + bool critical_syndrome = false; int ret, i; switch (rxq->err_state) { + case MLX5_RXQ_ERR_STATE_IGNORE: + ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci - vec); + if (ret != MLX5_CQE_STATUS_ERR) { + rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; + return ret; + } + /* Fall-through */ case MLX5_RXQ_ERR_STATE_NO_ERROR: for (i = 0; i < (int)err_n; i++) { u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask]; - if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) + if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) { + if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || + u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR) + critical_syndrome = true; break; + } + } + if (!critical_syndrome) { + if (rxq->err_state == MLX5_RXQ_ERR_STATE_NO_ERROR) { + *skip_cnt = 0; + if (i == err_n) + rxq->err_state = MLX5_RXQ_ERR_STATE_IGNORE; + } + return MLX5_RECOVERY_IGNORE_RET; } rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; /* Fall-through */ @@ -546,6 +576,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n) } mlx5_rxq_initialize(rxq); rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; + return MLX5_RECOVERY_COMPLETED_RET; } return ret; default: @@ -565,19 +596,24 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n) * @param[out] mcqe * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not * written. - * + * @param[out] skip_cnt + * Number of packets skipped due to recoverable errors. + * @param mprq + * Indication if it is called from MPRQ. * @return - * 0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE, - * otherwise the packet size in regular RxQ, and striding byte - * count format in mprq case. + * 0 in case of empty CQE, MLX5_REGULAR_ERROR_CQE_RET in case of error CQE, + * MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset, + * otherwise the packet size in regular RxQ, + * and striding byte count format in mprq case. */ static inline int mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, - uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) + uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe, + uint16_t *skip_cnt, bool mprq) { struct rxq_zip *zip = &rxq->zip; uint16_t cqe_n = cqe_cnt + 1; - int len; + int len = 0, ret = 0; uint16_t idx, end; do { @@ -626,7 +662,6 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, * compressed. */ } else { - int ret; int8_t op_own; uint32_t cq_ci; @@ -634,10 +669,12 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (unlikely(ret == MLX5_CQE_STATUS_ERR || rxq->err_state)) { - ret = mlx5_rx_err_handle(rxq, 0, 1); - if (ret == MLX5_CQE_STATUS_HW_OWN || - ret == MLX5_RECOVERY_ERROR_RET) - return MLX5_ERROR_CQE_RET; + ret = mlx5_rx_err_handle(rxq, 0, 1, skip_cnt); + if (ret == MLX5_CQE_STATUS_HW_OWN) + return MLX5_ERROR_CQE_MASK; + if (ret == MLX5_RECOVERY_ERROR_RET || + ret == MLX5_RECOVERY_COMPLETED_RET) + return MLX5_CRITICAL_ERROR_CQE_RET; } else { return 0; } @@ -690,8 +727,15 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, } } if (unlikely(rxq->err_state)) { + if (rxq->err_state == MLX5_RXQ_ERR_STATE_IGNORE && + ret == MLX5_CQE_STATUS_SW_OWN) { + rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; + return len & MLX5_ERROR_CQE_MASK; + } cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; ++rxq->stats.idropped; + (*skip_cnt) += mprq ? (len & MLX5_MPRQ_STRIDE_NUM_MASK) >> + MLX5_MPRQ_STRIDE_NUM_SHIFT : 1; } else { return len; } @@ -843,6 +887,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) int len = 0; /* keep its value across iterations. */ while (pkts_n) { + uint16_t skip_cnt; unsigned int idx = rq_ci & wqe_cnt; volatile struct mlx5_wqe_data_seg *wqe = &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; @@ -881,11 +926,24 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } if (!pkt) { cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; - len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); - if (len <= 0) { - rte_mbuf_raw_free(rep); - if (unlikely(len == MLX5_ERROR_CQE_RET)) + len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe, &skip_cnt, false); + if (unlikely(len & MLX5_ERROR_CQE_MASK)) { + if (len == MLX5_CRITICAL_ERROR_CQE_RET) { + rte_mbuf_raw_free(rep); rq_ci = rxq->rq_ci << sges_n; + break; + } + rq_ci >>= sges_n; + rq_ci += skip_cnt; + rq_ci <<= sges_n; + idx = rq_ci & wqe_cnt; + wqe = &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; + seg = (*rxq->elts)[idx]; + cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; + len = len & ~MLX5_ERROR_CQE_MASK; + } + if (len == 0) { + rte_mbuf_raw_free(rep); break; } pkt = seg; @@ -1095,6 +1153,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16_t strd_cnt; uint16_t strd_idx; uint32_t byte_cnt; + uint16_t skip_cnt; volatile struct mlx5_mini_cqe8 *mcqe = NULL; enum mlx5_rqx_code rxq_code; @@ -1107,14 +1166,26 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; } cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; - ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); + ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe, &skip_cnt, true); + if (unlikely(ret & MLX5_ERROR_CQE_MASK)) { + if (ret == MLX5_CRITICAL_ERROR_CQE_RET) { + rq_ci = rxq->rq_ci; + consumed_strd = rxq->consumed_strd; + break; + } + consumed_strd += skip_cnt; + while (consumed_strd >= strd_n) { + /* Replace WQE if the buffer is still in use. */ + mprq_buf_replace(rxq, rq_ci & wq_mask); + /* Advance to the next WQE. */ + consumed_strd -= strd_n; + ++rq_ci; + buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; + } + cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; + } if (ret == 0) break; - if (unlikely(ret == MLX5_ERROR_CQE_RET)) { - rq_ci = rxq->rq_ci; - consumed_strd = rxq->consumed_strd; - break; - } byte_cnt = ret; len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h index 4ba53ebc48..6b42e27c89 100644 --- a/drivers/net/mlx5/mlx5_rx.h +++ b/drivers/net/mlx5/mlx5_rx.h @@ -62,6 +62,7 @@ enum mlx5_rxq_err_state { MLX5_RXQ_ERR_STATE_NO_ERROR = 0, MLX5_RXQ_ERR_STATE_NEED_RESET, MLX5_RXQ_ERR_STATE_NEED_READY, + MLX5_RXQ_ERR_STATE_IGNORE, }; enum mlx5_rqx_code { @@ -286,8 +287,8 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx, uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq); -__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, - uint8_t vec, uint16_t err_n); +__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, + uint16_t err_n, uint16_t *skip_cnt); void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf); uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c index c6be2be763..667475a93e 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c @@ -51,6 +51,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { uint16_t n = 0; + uint16_t skip_cnt; unsigned int i; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t err_bytes = 0; @@ -74,7 +75,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, rxq->stats.ipackets -= (pkts_n - n); rxq->stats.ibytes -= err_bytes; #endif - mlx5_rx_err_handle(rxq, 1, pkts_n); + mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt); return n; } -- 2.18.2