When packet gets received with error it is reported in CQE
structure and PMD analyzes the error syndrome and provides
two options - either reset the entire queue for the critical
errors, or just ignore the packet.

The non-vectorized rx_burst did not ignore the non-critical
error packets, and in case of packet length exceeding the
mbuf data buffer length it took the next element in the queue
WQE ring, resulting in CQE/WQE consume indices synchronization
lost.

Fixes: aa67ed308458 ("net/mlx5: ignore non-critical syndromes for Rx queue")
Cc: sta...@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viachesl...@nvidia.com>
---
 drivers/net/mlx5/mlx5_rx.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 5bf1a679b2..cc087348a4 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -613,7 +613,8 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
  * @param mprq
  *   Indication if it is called from MPRQ.
  * @return
- *   0 in case of empty CQE, MLX5_REGULAR_ERROR_CQE_RET in case of error CQE,
+ *   0 in case of empty CQE,
+ *   MLX5_REGULAR_ERROR_CQE_RET in case of error CQE,
  *   MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset,
  *   otherwise the packet size in regular RxQ,
  *   and striding byte count format in mprq case.
@@ -697,6 +698,11 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile 
struct mlx5_cqe *cqe,
                                        if (ret == MLX5_RECOVERY_ERROR_RET ||
                                                ret == 
MLX5_RECOVERY_COMPLETED_RET)
                                                return 
MLX5_CRITICAL_ERROR_CQE_RET;
+                                       if (!mprq && ret == 
MLX5_RECOVERY_IGNORE_RET) {
+                                               *skip_cnt = 1;
+                                               ++rxq->cq_ci;
+                                               return MLX5_ERROR_CQE_MASK;
+                                       }
                                } else {
                                        return 0;
                                }
@@ -971,19 +977,18 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                        cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
                        len = mlx5_rx_poll_len(rxq, cqe, cqe_n, cqe_mask, 
&mcqe, &skip_cnt, false);
                        if (unlikely(len & MLX5_ERROR_CQE_MASK)) {
+                               /* We drop packets with non-critical errors */
+                               rte_mbuf_raw_free(rep);
                                if (len == MLX5_CRITICAL_ERROR_CQE_RET) {
-                                       rte_mbuf_raw_free(rep);
                                        rq_ci = rxq->rq_ci << sges_n;
                                        break;
                                }
+                               /* Skip specified amount of error CQEs packets 
*/
                                rq_ci >>= sges_n;
                                rq_ci += skip_cnt;
                                rq_ci <<= sges_n;
-                               idx = rq_ci & wqe_mask;
-                               wqe = &((volatile struct mlx5_wqe_data_seg 
*)rxq->wqes)[idx];
-                               seg = (*rxq->elts)[idx];
-                               cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask];
-                               len = len & ~MLX5_ERROR_CQE_MASK;
+                               MLX5_ASSERT(!pkt);
+                               continue;
                        }
                        if (len == 0) {
                                rte_mbuf_raw_free(rep);
-- 
2.18.1

Reply via email to