The rxq_data resources were shared for shared Rx queue with the same
group and queue ID.
The cq_ci:24 of rxq_data was unalignment with other fields in the one
32-bit data, like the dynf_meta and delay_drop.

  32bit:  xxxx xxxI IIII IIII IIII IIII IIII IIIx
                  ^ .... .... .... .... ...^
                  |          cq_ci         |

The issue is that while the control thread updates the dynf_meta:1 or
delay_drop:1 value during port start, another data thread updates the
cq_ci at the same time, it causes the bytes race condition with
different thread, and cq_ci value may be overwritten and updated the
abnormal value into HW CQ DB.

This patch separates the cq_ci from the configuration data spaces, and
adds checking for delay_drop and dynf_meta if shared Rx queue if
started.

Fixes: 02a6195cbe ("net/mlx5: support enhanced CQE compression in Rx burst")
Cc: sta...@dpdk.org

Signed-off-by: Jiawei Wang <jiaw...@nvidia.com>
Acked-by: Bing Zhao <bi...@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viachesl...@nvidia.com>
---
 drivers/net/mlx5/mlx5_devx.c |  3 ++-
 drivers/net/mlx5/mlx5_flow.c | 24 +++++++++++++-----------
 drivers/net/mlx5/mlx5_rx.h   |  4 ++--
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 7db271acb4..8ebe784000 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -684,7 +684,8 @@ mlx5_rxq_devx_obj_new(struct mlx5_rxq_priv *rxq)
                DRV_LOG(ERR, "Failed to create CQ.");
                goto error;
        }
-       rxq_data->delay_drop = priv->config.std_delay_drop;
+       if (!rxq_data->shared || !rxq_ctrl->started)
+               rxq_data->delay_drop = priv->config.std_delay_drop;
        /* Create RQ using DevX API. */
        ret = mlx5_rxq_create_devx_rq_resources(rxq);
        if (ret) {
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 5b3f2b9119..72fb3a55ba 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1853,18 +1853,20 @@ mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
                if (rxq == NULL || rxq->ctrl == NULL)
                        continue;
                data = &rxq->ctrl->rxq;
-               if (!rte_flow_dynf_metadata_avail()) {
-                       data->dynf_meta = 0;
-                       data->flow_meta_mask = 0;
-                       data->flow_meta_offset = -1;
-                       data->flow_meta_port_mask = 0;
-               } else {
-                       data->dynf_meta = 1;
-                       data->flow_meta_mask = rte_flow_dynf_metadata_mask;
-                       data->flow_meta_offset = rte_flow_dynf_metadata_offs;
-                       data->flow_meta_port_mask = priv->sh->dv_meta_mask;
+               if (!data->shared || !rxq->ctrl->started) {
+                       if (!rte_flow_dynf_metadata_avail()) {
+                               data->dynf_meta = 0;
+                               data->flow_meta_mask = 0;
+                               data->flow_meta_offset = -1;
+                               data->flow_meta_port_mask = 0;
+                       } else {
+                               data->dynf_meta = 1;
+                               data->flow_meta_mask = 
rte_flow_dynf_metadata_mask;
+                               data->flow_meta_offset = 
rte_flow_dynf_metadata_offs;
+                               data->flow_meta_port_mask = 
priv->sh->dv_meta_mask;
+                       }
+                       data->mark_flag = mark_flag;
                }
-               data->mark_flag = mark_flag;
        }
 }
 
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 1485556d89..7d144921ab 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -101,14 +101,14 @@ struct __rte_cache_aligned mlx5_rxq_data {
        unsigned int shared:1; /* Shared RXQ. */
        unsigned int delay_drop:1; /* Enable delay drop. */
        unsigned int cqe_comp_layout:1; /* CQE Compression Layout*/
-       unsigned int cq_ci:24;
+       uint16_t port_id;
        volatile uint32_t *rq_db;
        volatile uint32_t *cq_db;
-       uint16_t port_id;
        uint32_t elts_ci;
        uint32_t rq_ci;
        uint16_t consumed_strd; /* Number of consumed strides in WQE. */
        uint32_t rq_pi;
+       uint32_t cq_ci:24;
        uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */
        uint32_t byte_mask;
        union {
-- 
2.18.1

Reply via email to