The HW objects of the Rx queue is created/destroyed in the device start\stop stage while the ethdev configurations for the Rx queue starts from the rx_queue_setup stage. The PMD should save all the last configurations it got from the ethdev and to apply them to the device in the dev_start operation.
Wrongly, last code added to mitigate the reference counters didn't take into account the above rule and combined the configurations and HW objects to be created\destroyed together. This causes to memory leak and other memory issues. Make sure the HW object is released in stop operation when there is no any reference to it while the configurations stay saved. Fixes: 24e4b650badc ("net/mlx5: mitigate Rx queue reference counters") Signed-off-by: Matan Azrad <ma...@nvidia.com> Acked-by: Viacheslav Ovsiienko <viachesl...@nvidia.com> --- drivers/net/mlx5/mlx5_rxq.c | 23 +++++++++++++---------- drivers/net/mlx5/mlx5_rxtx.h | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index f1d8373..e1783ba 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -447,7 +447,8 @@ return -rte_errno; } rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); - return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); + return (__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED) == 1); + } /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ @@ -1541,7 +1542,7 @@ struct mlx5_rxq_ctrl * tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq; #endif tmpl->rxq.idx = idx; - rte_atomic32_inc(&tmpl->refcnt); + __atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED); LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); return tmpl; error: @@ -1588,7 +1589,7 @@ struct mlx5_rxq_ctrl * tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 }; tmpl->hairpin_conf = *hairpin_conf; tmpl->rxq.idx = idx; - rte_atomic32_inc(&tmpl->refcnt); + __atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED); LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); return tmpl; } @@ -1613,7 +1614,7 @@ struct mlx5_rxq_ctrl * if (rxq_data) { rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); - rte_atomic32_inc(&rxq_ctrl->refcnt); + __atomic_add_fetch(&rxq_ctrl->refcnt, 1, __ATOMIC_RELAXED); } return rxq_ctrl; } @@ -1638,7 +1639,7 @@ struct mlx5_rxq_ctrl * if (!(*priv->rxqs)[idx]) return 0; rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); - if (!rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) + if (__atomic_sub_fetch(&rxq_ctrl->refcnt, 1, __ATOMIC_RELAXED) > 1) return 1; if (rxq_ctrl->obj) { priv->obj_ops.rxq_obj_release(rxq_ctrl->obj); @@ -1646,13 +1647,15 @@ struct mlx5_rxq_ctrl * mlx5_free(rxq_ctrl->obj); rxq_ctrl->obj = NULL; } - if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { - mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); + if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) rxq_free_elts(rxq_ctrl); + if (!__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED)) { + if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) + mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); + LIST_REMOVE(rxq_ctrl, next); + mlx5_free(rxq_ctrl); + (*priv->rxqs)[idx] = NULL; } - LIST_REMOVE(rxq_ctrl, next); - mlx5_free(rxq_ctrl); - (*priv->rxqs)[idx] = NULL; return 0; } diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 674296e..c3734e3 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -165,7 +165,7 @@ enum mlx5_rxq_type { struct mlx5_rxq_ctrl { struct mlx5_rxq_data rxq; /* Data path structure. */ LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */ - rte_atomic32_t refcnt; /* Reference counter. */ + uint32_t refcnt; /* Reference counter. */ struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */ struct mlx5_priv *priv; /* Back pointer to private data. */ enum mlx5_rxq_type type; /* Rxq type. */ -- 1.8.3.1