MLX5 PMD uses reference counting to manage RX queue resources.
After port stop shared RSS actions kept references to RX queues,
preventing resource release. As a result, internal PMD mempool
for such queues had been exhausted after a number of port restarts.
Diagnostic message from rte_eth_dev_start():

    Rx queue allocation failed: Cannot allocate memory

Dereference RX queues used by indirect actions on port stop (detach)
and restore references on port start (attach) in order to allow RX queue
resource release, but keep indirect RSS across the port restart.
Replace queue IDs in HW by drop queue ID on detach and restore actual
queue IDs on attach.

When the port is stopped, create indirect RSS in the detached state.
As a result, MLX5 PMD is able to keep all its indirect actions
across port restart. Advertise this capability.

Fixes: 4b61b8774be9 ("ethdev: introduce indirect flow action")
Cc: bi...@nvidia.com
Cc: sta...@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozl...@nvidia.com>
Acked-by: Matan Azrad <ma...@nvidia.com>
---
 drivers/net/mlx5/mlx5_ethdev.c  |   1 +
 drivers/net/mlx5/mlx5_flow.c    | 194 ++++++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow.h    |   2 +
 drivers/net/mlx5/mlx5_rx.h      |   4 +
 drivers/net/mlx5/mlx5_rxq.c     |  99 ++++++++++++++--
 drivers/net/mlx5/mlx5_trigger.c |  10 ++
 6 files changed, 276 insertions(+), 34 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 82e2284d98..419fec3e4e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -321,6 +321,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
        info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
                                 info->rx_queue_offload_capa);
        info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
+       info->dev_capa = RTE_ETH_DEV_CAPA_FLOW_SHARED_OBJECT_KEEP;
        info->if_index = mlx5_ifindex(dev);
        info->reta_size = priv->reta_idx_n ?
                priv->reta_idx_n : config->ind_table_max_size;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2768244c2e..df1e927534 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1583,6 +1583,58 @@ mlx5_flow_validate_action_queue(const struct 
rte_flow_action *action,
        return 0;
 }
 
+/**
+ * Validate queue numbers for device RSS.
+ *
+ * @param[in] dev
+ *   Configured device.
+ * @param[in] queues
+ *   Array of queue numbers.
+ * @param[in] queues_n
+ *   Size of the @p queues array.
+ * @param[out] error
+ *   On error, filled with a textual error description.
+ * @param[out] queue
+ *   On error, filled with an offending queue index in @p queues array.
+ *
+ * @return
+ *   0 on success, a negative errno code on error.
+ */
+static int
+mlx5_validate_rss_queues(const struct rte_eth_dev *dev,
+                        const uint16_t *queues, uint32_t queues_n,
+                        const char **error, uint32_t *queue_idx)
+{
+       const struct mlx5_priv *priv = dev->data->dev_private;
+       enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
+       uint32_t i;
+
+       for (i = 0; i != queues_n; ++i) {
+               struct mlx5_rxq_ctrl *rxq_ctrl;
+
+               if (queues[i] >= priv->rxqs_n) {
+                       *error = "queue index out of range";
+                       *queue_idx = i;
+                       return -EINVAL;
+               }
+               if (!(*priv->rxqs)[queues[i]]) {
+                       *error =  "queue is not configured";
+                       *queue_idx = i;
+                       return -EINVAL;
+               }
+               rxq_ctrl = container_of((*priv->rxqs)[queues[i]],
+                                       struct mlx5_rxq_ctrl, rxq);
+               if (i == 0)
+                       rxq_type = rxq_ctrl->type;
+               if (rxq_type != rxq_ctrl->type) {
+                       *error = "combining hairpin and regular RSS queues is 
not supported";
+                       *queue_idx = i;
+                       return -ENOTSUP;
+               }
+       }
+       return 0;
+}
+
 /*
  * Validate the rss action.
  *
@@ -1603,8 +1655,9 @@ mlx5_validate_action_rss(struct rte_eth_dev *dev,
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        const struct rte_flow_action_rss *rss = action->conf;
-       enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
-       unsigned int i;
+       int ret;
+       const char *message;
+       uint32_t queue_idx;
 
        if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
            rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
@@ -1668,27 +1721,12 @@ mlx5_validate_action_rss(struct rte_eth_dev *dev,
                return rte_flow_error_set(error, EINVAL,
                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
                                          NULL, "No queues configured");
-       for (i = 0; i != rss->queue_num; ++i) {
-               struct mlx5_rxq_ctrl *rxq_ctrl;
-
-               if (rss->queue[i] >= priv->rxqs_n)
-                       return rte_flow_error_set
-                               (error, EINVAL,
-                                RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                &rss->queue[i], "queue index out of range");
-               if (!(*priv->rxqs)[rss->queue[i]])
-                       return rte_flow_error_set
-                               (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                &rss->queue[i], "queue is not configured");
-               rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
-                                       struct mlx5_rxq_ctrl, rxq);
-               if (i == 0)
-                       rxq_type = rxq_ctrl->type;
-               if (rxq_type != rxq_ctrl->type)
-                       return rte_flow_error_set
-                               (error, ENOTSUP, 
RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                &rss->queue[i],
-                                "combining hairpin and regular RSS queues is 
not supported");
+       ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
+                                      &message, &queue_idx);
+       if (ret != 0) {
+               return rte_flow_error_set(error, -ret,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &rss->queue[queue_idx], message);
        }
        return 0;
 }
@@ -8570,6 +8608,116 @@ mlx5_action_handle_flush(struct rte_eth_dev *dev)
        return ret;
 }
 
+/**
+ * Validate existing indirect actions against current device configuration
+ * and attach them to device resources.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_action_handle_attach(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_indexed_pool *ipool =
+                       priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS];
+       struct mlx5_shared_action_rss *shared_rss, *shared_rss_last;
+       int ret = 0;
+       uint32_t idx;
+
+       ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+               struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+               const char *message;
+               uint32_t queue_idx;
+
+               ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
+                                              ind_tbl->queues_n,
+                                              &message, &queue_idx);
+               if (ret != 0) {
+                       DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
+                               dev->data->port_id, ind_tbl->queues[queue_idx],
+                               message);
+                       break;
+               }
+       }
+       if (ret != 0)
+               return ret;
+       ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+               struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+               ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
+               if (ret != 0) {
+                       DRV_LOG(ERR, "Port %u could not attach "
+                               "indirection table obj %p",
+                               dev->data->port_id, (void *)ind_tbl);
+                       goto error;
+               }
+       }
+       return 0;
+error:
+       shared_rss_last = shared_rss;
+       ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+               struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+               if (shared_rss == shared_rss_last)
+                       break;
+               if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
+                       DRV_LOG(CRIT, "Port %u could not detach "
+                               "indirection table obj %p on rollback",
+                               dev->data->port_id, (void *)ind_tbl);
+       }
+       return ret;
+}
+
+/**
+ * Detach indirect actions of the device from its resources.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_action_handle_detach(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_indexed_pool *ipool =
+                       priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS];
+       struct mlx5_shared_action_rss *shared_rss, *shared_rss_last;
+       int ret = 0;
+       uint32_t idx;
+
+       ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+               struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+               ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
+               if (ret != 0) {
+                       DRV_LOG(ERR, "Port %u could not detach "
+                               "indirection table obj %p",
+                               dev->data->port_id, (void *)ind_tbl);
+                       goto error;
+               }
+       }
+       return 0;
+error:
+       shared_rss_last = shared_rss;
+       ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+               struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+               if (shared_rss == shared_rss_last)
+                       break;
+               if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
+                       DRV_LOG(CRIT, "Port %u could not attach "
+                               "indirection table obj %p on rollback",
+                               dev->data->port_id, (void *)ind_tbl);
+       }
+       return ret;
+}
+
 #ifndef HAVE_MLX5DV_DR
 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
 #else
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 8f94125f26..6bc7946cc3 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1574,6 +1574,8 @@ void mlx5_flow_destroy_sub_policy_with_rxq(struct 
rte_eth_dev *dev,
                struct mlx5_flow_meter_policy *mtr_policy);
 int mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev);
 int mlx5_flow_discover_dr_action_support(struct rte_eth_dev *dev);
+int mlx5_action_handle_attach(struct rte_eth_dev *dev);
+int mlx5_action_handle_detach(struct rte_eth_dev *dev);
 int mlx5_action_handle_flush(struct rte_eth_dev *dev);
 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id);
 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh);
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index a90cb497d1..6d010059f1 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -222,6 +222,10 @@ int mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
                              struct mlx5_ind_table_obj *ind_tbl,
                              uint16_t *queues, const uint32_t queues_n,
                              bool standalone);
+int mlx5_ind_table_obj_attach(struct rte_eth_dev *dev,
+                             struct mlx5_ind_table_obj *ind_tbl);
+int mlx5_ind_table_obj_detach(struct rte_eth_dev *dev,
+                             struct mlx5_ind_table_obj *ind_tbl);
 struct mlx5_list_entry *mlx5_hrxq_create_cb(void *tool_ctx, void *cb_ctx);
 int mlx5_hrxq_match_cb(void *tool_ctx, struct mlx5_list_entry *entry,
                       void *cb_ctx);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 60673d014d..47124f6e81 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -2028,6 +2028,26 @@ mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const 
uint16_t *queues,
        return ind_tbl;
 }
 
+static int
+mlx5_ind_table_obj_check_standalone(struct rte_eth_dev *dev __rte_unused,
+                                   struct mlx5_ind_table_obj *ind_tbl)
+{
+       uint32_t refcnt;
+
+       refcnt = __atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED);
+       if (refcnt <= 1)
+               return 0;
+       /*
+        * Modification of indirection tables having more than 1
+        * reference is unsupported.
+        */
+       DRV_LOG(DEBUG,
+               "Port %u cannot modify indirection table %p (refcnt %u > 1).",
+               dev->data->port_id, (void *)ind_tbl, refcnt);
+       rte_errno = EINVAL;
+       return -rte_errno;
+}
+
 /**
  * Modify an indirection table.
  *
@@ -2060,18 +2080,8 @@ mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
 
        MLX5_ASSERT(standalone);
        RTE_SET_USED(standalone);
-       if (__atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED) > 1) {
-               /*
-                * Modification of indirection ntables having more than 1
-                * reference unsupported. Intended for standalone indirection
-                * tables only.
-                */
-               DRV_LOG(DEBUG,
-                       "Port %u cannot modify indirection table (refcnt> 1).",
-                       dev->data->port_id);
-               rte_errno = EINVAL;
+       if (mlx5_ind_table_obj_check_standalone(dev, ind_tbl) < 0)
                return -rte_errno;
-       }
        for (i = 0; i != queues_n; ++i) {
                if (!mlx5_rxq_get(dev, queues[i])) {
                        ret = -rte_errno;
@@ -2097,6 +2107,73 @@ mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
        return ret;
 }
 
+/**
+ * Attach an indirection table to its queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param ind_table
+ *   Indirection table to attach.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ind_table_obj_attach(struct rte_eth_dev *dev,
+                         struct mlx5_ind_table_obj *ind_tbl)
+{
+       unsigned int i;
+       int ret;
+
+       ret = mlx5_ind_table_obj_modify(dev, ind_tbl, ind_tbl->queues,
+                                       ind_tbl->queues_n, true);
+       if (ret != 0) {
+               DRV_LOG(ERR, "Port %u could not modify indirect table obj %p",
+                       dev->data->port_id, (void *)ind_tbl);
+               return ret;
+       }
+       for (i = 0; i < ind_tbl->queues_n; i++)
+               mlx5_rxq_get(dev, ind_tbl->queues[i]);
+       return 0;
+}
+
+/**
+ * Detach an indirection table from its queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param ind_table
+ *   Indirection table to detach.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ind_table_obj_detach(struct rte_eth_dev *dev,
+                         struct mlx5_ind_table_obj *ind_tbl)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       const unsigned int n = rte_is_power_of_2(ind_tbl->queues_n) ?
+                              log2above(ind_tbl->queues_n) :
+                              log2above(priv->config.ind_table_max_size);
+       unsigned int i;
+       int ret;
+
+       ret = mlx5_ind_table_obj_check_standalone(dev, ind_tbl);
+       if (ret != 0)
+               return ret;
+       MLX5_ASSERT(priv->obj_ops.ind_table_modify);
+       ret = priv->obj_ops.ind_table_modify(dev, n, NULL, 0, ind_tbl);
+       if (ret != 0) {
+               DRV_LOG(ERR, "Port %u could not modify indirect table obj %p",
+                       dev->data->port_id, (void *)ind_tbl);
+               return ret;
+       }
+       for (i = 0; i < ind_tbl->queues_n; i++)
+               mlx5_rxq_release(dev, ind_tbl->queues[i]);
+       return ret;
+}
+
 int
 mlx5_hrxq_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry,
                   void *cb_ctx)
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3cbf5816a1..6295c6b3e9 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -14,6 +14,7 @@
 #include <mlx5_malloc.h>
 
 #include "mlx5.h"
+#include "mlx5_flow.h"
 #include "mlx5_mr.h"
 #include "mlx5_rx.h"
 #include "mlx5_tx.h"
@@ -1161,6 +1162,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
        mlx5_rxq_timestamp_set(dev);
        /* Set a mask and offset of scheduling on timestamp into Tx queues. */
        mlx5_txq_dynf_timestamp_set(dev);
+       /* Attach indirection table objects detached on port stop. */
+       ret = mlx5_action_handle_attach(dev);
+       if (ret) {
+               DRV_LOG(ERR,
+                       "port %u failed to attach indirect actions: %s",
+                       dev->data->port_id, rte_strerror(rte_errno));
+               goto error;
+       }
        /*
         * In non-cached mode, it only needs to start the default mreg copy
         * action and no flow created by application exists anymore.
@@ -1238,6 +1247,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
        /* All RX queue flags will be cleared in the flush interface. */
        mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
        mlx5_flow_meter_rxq_flush(dev);
+       mlx5_action_handle_detach(dev);
        mlx5_rx_intr_vec_disable(dev);
        priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
        priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
-- 
2.25.1

Reply via email to