For non-fatal syndromes like LOCAL_LENGTH_ERR, the Rx queue reset
shouldn't be triggered. Rx queue could continue with the next packets
without any recovery. Only three syndromes warrant Rx queue reset:
LOCAL_QP_OP_ERR, LOCAL_PROT_ERR and WR_FLUSH_ERR.
Do not initiate a Rx queue reset in any other cases.
Skip all non-critical error CQEs and continue with packet processing.

Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling")
Cc: sta...@dpdk.org

Signed-off-by: Alexander Kozyrev <akozy...@nvidia.com>
---
 drivers/net/mlx5/mlx5_rx.c       | 123 ++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_rx.h       |   5 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c |   3 +-
 3 files changed, 102 insertions(+), 29 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 7612d15f01..99a08ef5f1 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -39,7 +39,8 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct 
mlx5_cqe *cqe,
 
 static __rte_always_inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-                uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
+                uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+                uint16_t *skip_cnt, bool mprq);
 
 static __rte_always_inline uint32_t
 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
@@ -408,10 +409,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
        *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }
 
+#define MLX5_ERROR_CQE_MASK 0x40000000
 /* Must be negative. */
-#define MLX5_ERROR_CQE_RET (-1)
+#define MLX5_REGULAR_ERROR_CQE_RET (-5)
+#define MLX5_CRITICAL_ERROR_CQE_RET (-4)
 /* Must not be negative. */
 #define MLX5_RECOVERY_ERROR_RET 0
+#define MLX5_RECOVERY_IGNORE_RET 1
+#define MLX5_RECOVERY_COMPLETED_RET 2
 
 /**
  * Handle a Rx error.
@@ -429,10 +434,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
  *   Number of CQEs to check for an error.
  *
  * @return
- *   MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE 
status.
+ *   MLX5_RECOVERY_ERROR_RET in case of recovery error,
+ *   MLX5_RECOVERY_IGNORE_RET in case of non-critical error syndrome,
+ *   MLX5_RECOVERY_COMPLETED_RET in case of recovery is completed,
+ *   otherwise the CQE status after ignored error syndrome or queue reset.
  */
 int
-mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
+mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+                  uint16_t err_n, uint16_t *skip_cnt)
 {
        const uint16_t cqe_n = 1 << rxq->cqe_n;
        const uint16_t cqe_mask = cqe_n - 1;
@@ -447,14 +456,35 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t 
vec, uint16_t err_n)
                .cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
        };
        struct mlx5_mp_arg_queue_state_modify sm;
+       bool critical_syndrome = false;
        int ret, i;
 
        switch (rxq->err_state) {
+       case MLX5_RXQ_ERR_STATE_IGNORE:
+               ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci - vec);
+               if (ret != MLX5_CQE_STATUS_ERR) {
+                       rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+                       return ret;
+               }
+               /* Fall-through */
        case MLX5_RXQ_ERR_STATE_NO_ERROR:
                for (i = 0; i < (int)err_n; i++) {
                        u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & 
cqe_mask];
-                       if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR)
+                       if (MLX5_CQE_OPCODE(u.cqe->op_own) == 
MLX5_CQE_RESP_ERR) {
+                               if (u.err_cqe->syndrome == 
MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+                                   u.err_cqe->syndrome == 
MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+                                   u.err_cqe->syndrome == 
MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
+                                       critical_syndrome = true;
                                break;
+                       }
+               }
+               if (!critical_syndrome) {
+                       if (rxq->err_state == MLX5_RXQ_ERR_STATE_NO_ERROR) {
+                               *skip_cnt = 0;
+                               if (i == err_n)
+                                       rxq->err_state = 
MLX5_RXQ_ERR_STATE_IGNORE;
+                       }
+                       return MLX5_RECOVERY_IGNORE_RET;
                }
                rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
                /* Fall-through */
@@ -546,6 +576,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, 
uint16_t err_n)
                        }
                        mlx5_rxq_initialize(rxq);
                        rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+                       return MLX5_RECOVERY_COMPLETED_RET;
                }
                return ret;
        default:
@@ -565,19 +596,24 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t 
vec, uint16_t err_n)
  * @param[out] mcqe
  *   Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
  *   written.
- *
+ * @param[out] skip_cnt
+ *   Number of packets skipped due to recoverable errors.
+ * @param mprq
+ *   Indication if it is called from MPRQ.
  * @return
- *   0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE,
- *   otherwise the packet size in regular RxQ, and striding byte
- *   count format in mprq case.
+ *   0 in case of empty CQE, MLX5_REGULAR_ERROR_CQE_RET in case of error CQE,
+ *   MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset,
+ *   otherwise the packet size in regular RxQ,
+ *   and striding byte count format in mprq case.
  */
 static inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-                uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
+                uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+                uint16_t *skip_cnt, bool mprq)
 {
        struct rxq_zip *zip = &rxq->zip;
        uint16_t cqe_n = cqe_cnt + 1;
-       int len;
+       int len = 0, ret = 0;
        uint16_t idx, end;
 
        do {
@@ -626,7 +662,6 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct 
mlx5_cqe *cqe,
                 * compressed.
                 */
                } else {
-                       int ret;
                        int8_t op_own;
                        uint32_t cq_ci;
 
@@ -634,10 +669,12 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile 
struct mlx5_cqe *cqe,
                        if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
                                if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
                                             rxq->err_state)) {
-                                       ret = mlx5_rx_err_handle(rxq, 0, 1);
-                                       if (ret == MLX5_CQE_STATUS_HW_OWN ||
-                                           ret == MLX5_RECOVERY_ERROR_RET)
-                                               return MLX5_ERROR_CQE_RET;
+                                       ret = mlx5_rx_err_handle(rxq, 0, 1, 
skip_cnt);
+                                       if (ret == MLX5_CQE_STATUS_HW_OWN)
+                                               return MLX5_ERROR_CQE_MASK;
+                                       if (ret == MLX5_RECOVERY_ERROR_RET ||
+                                               ret == 
MLX5_RECOVERY_COMPLETED_RET)
+                                               return 
MLX5_CRITICAL_ERROR_CQE_RET;
                                } else {
                                        return 0;
                                }
@@ -690,8 +727,15 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile 
struct mlx5_cqe *cqe,
                        }
                }
                if (unlikely(rxq->err_state)) {
+                       if (rxq->err_state == MLX5_RXQ_ERR_STATE_IGNORE &&
+                           ret == MLX5_CQE_STATUS_SW_OWN) {
+                               rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+                               return len & MLX5_ERROR_CQE_MASK;
+                       }
                        cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
                        ++rxq->stats.idropped;
+                       (*skip_cnt) += mprq ? (len & MLX5_MPRQ_STRIDE_NUM_MASK) 
>>
+                               MLX5_MPRQ_STRIDE_NUM_SHIFT : 1;
                } else {
                        return len;
                }
@@ -843,6 +887,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
        int len = 0; /* keep its value across iterations. */
 
        while (pkts_n) {
+               uint16_t skip_cnt;
                unsigned int idx = rq_ci & wqe_cnt;
                volatile struct mlx5_wqe_data_seg *wqe =
                        &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
@@ -881,11 +926,24 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                }
                if (!pkt) {
                        cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
-                       len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
-                       if (len <= 0) {
-                               rte_mbuf_raw_free(rep);
-                               if (unlikely(len == MLX5_ERROR_CQE_RET))
+                       len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe, 
&skip_cnt, false);
+                       if (unlikely(len & MLX5_ERROR_CQE_MASK)) {
+                               if (len == MLX5_CRITICAL_ERROR_CQE_RET) {
+                                       rte_mbuf_raw_free(rep);
                                        rq_ci = rxq->rq_ci << sges_n;
+                                       break;
+                               }
+                               rq_ci >>= sges_n;
+                               rq_ci += skip_cnt;
+                               rq_ci <<= sges_n;
+                               idx = rq_ci & wqe_cnt;
+                               wqe = &((volatile struct mlx5_wqe_data_seg 
*)rxq->wqes)[idx];
+                               seg = (*rxq->elts)[idx];
+                               cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+                               len = len & ~MLX5_ERROR_CQE_MASK;
+                       }
+                       if (len == 0) {
+                               rte_mbuf_raw_free(rep);
                                break;
                        }
                        pkt = seg;
@@ -1095,6 +1153,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf 
**pkts, uint16_t pkts_n)
                uint16_t strd_cnt;
                uint16_t strd_idx;
                uint32_t byte_cnt;
+               uint16_t skip_cnt;
                volatile struct mlx5_mini_cqe8 *mcqe = NULL;
                enum mlx5_rqx_code rxq_code;
 
@@ -1107,14 +1166,26 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf 
**pkts, uint16_t pkts_n)
                        buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
                }
                cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
-               ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
+               ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe, &skip_cnt, 
true);
+               if (unlikely(ret & MLX5_ERROR_CQE_MASK)) {
+                       if (ret == MLX5_CRITICAL_ERROR_CQE_RET) {
+                               rq_ci = rxq->rq_ci;
+                               consumed_strd = rxq->consumed_strd;
+                               break;
+                       }
+                       consumed_strd += skip_cnt;
+                       while (consumed_strd >= strd_n) {
+                               /* Replace WQE if the buffer is still in use. */
+                               mprq_buf_replace(rxq, rq_ci & wq_mask);
+                               /* Advance to the next WQE. */
+                               consumed_strd -= strd_n;
+                               ++rq_ci;
+                               buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
+                       }
+                       cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+               }
                if (ret == 0)
                        break;
-               if (unlikely(ret == MLX5_ERROR_CQE_RET)) {
-                       rq_ci = rxq->rq_ci;
-                       consumed_strd = rxq->consumed_strd;
-                       break;
-               }
                byte_cnt = ret;
                len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
                MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 4ba53ebc48..6b42e27c89 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -62,6 +62,7 @@ enum mlx5_rxq_err_state {
        MLX5_RXQ_ERR_STATE_NO_ERROR = 0,
        MLX5_RXQ_ERR_STATE_NEED_RESET,
        MLX5_RXQ_ERR_STATE_NEED_READY,
+       MLX5_RXQ_ERR_STATE_IGNORE,
 };
 
 enum mlx5_rqx_code {
@@ -286,8 +287,8 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t 
hxrq_idx,
 
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t 
pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
-__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
-                                     uint8_t vec, uint16_t err_n);
+__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+                                     uint16_t err_n, uint16_t *skip_cnt);
 void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf);
 uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts,
                            uint16_t pkts_n);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index c6be2be763..667475a93e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -51,6 +51,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct 
rte_mbuf **pkts,
                         uint16_t pkts_n)
 {
        uint16_t n = 0;
+       uint16_t skip_cnt;
        unsigned int i;
 #ifdef MLX5_PMD_SOFT_COUNTERS
        uint32_t err_bytes = 0;
@@ -74,7 +75,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct 
rte_mbuf **pkts,
        rxq->stats.ipackets -= (pkts_n - n);
        rxq->stats.ibytes -= err_bytes;
 #endif
-       mlx5_rx_err_handle(rxq, 1, pkts_n);
+       mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
        return n;
 }
 
-- 
2.18.2

Reply via email to