Split the virtqs device close tasks after
stopping virt-queue between the configuration threads.
This accelerates the LM process and
reduces its time by 50%.

Signed-off-by: Li Zhang <l...@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa.c         | 56 +++++++++++++++++++++++++--
 drivers/vdpa/mlx5/mlx5_vdpa.h         |  8 ++++
 drivers/vdpa/mlx5/mlx5_vdpa_cthread.c | 20 +++++++++-
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c   | 14 +++++++
 4 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index e3b32fa087..d000854c08 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -245,7 +245,7 @@ mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
        return kern_mtu == vhost_mtu ? 0 : -1;
 }
 
-static void
+void
 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
 {
        /* Clean pre-created resource in dev removal only. */
@@ -254,6 +254,26 @@ mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
        mlx5_vdpa_mem_dereg(priv);
 }
 
+static bool
+mlx5_vdpa_wait_dev_close_tasks_done(struct mlx5_vdpa_priv *priv)
+{
+       uint32_t timeout = 0;
+
+       /* Check and wait all close tasks done. */
+       while (__atomic_load_n(&priv->dev_close_progress,
+               __ATOMIC_RELAXED) != 0 && timeout < 1000) {
+               rte_delay_us_sleep(10000);
+               timeout++;
+       }
+       if (priv->dev_close_progress) {
+               DRV_LOG(ERR,
+               "Failed to wait close device tasks done vid %d.",
+               priv->vid);
+               return true;
+       }
+       return false;
+}
+
 static int
 mlx5_vdpa_dev_close(int vid)
 {
@@ -271,6 +291,27 @@ mlx5_vdpa_dev_close(int vid)
                ret |= mlx5_vdpa_lm_log(priv);
                priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
        }
+       if (priv->use_c_thread) {
+               if (priv->last_c_thrd_idx >=
+                       (conf_thread_mng.max_thrds - 1))
+                       priv->last_c_thrd_idx = 0;
+               else
+                       priv->last_c_thrd_idx++;
+               __atomic_store_n(&priv->dev_close_progress,
+                       1, __ATOMIC_RELAXED);
+               if (mlx5_vdpa_task_add(priv,
+                       priv->last_c_thrd_idx,
+                       MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT,
+                       NULL, NULL, NULL, 1)) {
+                       DRV_LOG(ERR,
+                       "Fail to add dev close task. ");
+                       goto single_thrd;
+               }
+               priv->state = MLX5_VDPA_STATE_PROBED;
+               DRV_LOG(INFO, "vDPA device %d was closed.", vid);
+               return ret;
+       }
+single_thrd:
        pthread_mutex_lock(&priv->steer_update_lock);
        mlx5_vdpa_steer_unset(priv);
        pthread_mutex_unlock(&priv->steer_update_lock);
@@ -278,10 +319,12 @@ mlx5_vdpa_dev_close(int vid)
        mlx5_vdpa_drain_cq(priv);
        if (priv->lm_mr.addr)
                mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
-       priv->state = MLX5_VDPA_STATE_PROBED;
        if (!priv->connected)
                mlx5_vdpa_dev_cache_clean(priv);
        priv->vid = 0;
+       __atomic_store_n(&priv->dev_close_progress, 0,
+               __ATOMIC_RELAXED);
+       priv->state = MLX5_VDPA_STATE_PROBED;
        DRV_LOG(INFO, "vDPA device %d was closed.", vid);
        return ret;
 }
@@ -302,6 +345,8 @@ mlx5_vdpa_dev_config(int vid)
                DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
                return -1;
        }
+       if (mlx5_vdpa_wait_dev_close_tasks_done(priv))
+               return -1;
        priv->vid = vid;
        priv->connected = true;
        if (mlx5_vdpa_mtu_set(priv))
@@ -444,8 +489,11 @@ mlx5_vdpa_dev_cleanup(int vid)
                DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
                return -1;
        }
-       if (priv->state == MLX5_VDPA_STATE_PROBED)
+       if (priv->state == MLX5_VDPA_STATE_PROBED) {
+               if (priv->use_c_thread)
+                       mlx5_vdpa_wait_dev_close_tasks_done(priv);
                mlx5_vdpa_dev_cache_clean(priv);
+       }
        priv->connected = false;
        return 0;
 }
@@ -839,6 +887,8 @@ mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
 {
        if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
                mlx5_vdpa_dev_close(priv->vid);
+       if (priv->use_c_thread)
+               mlx5_vdpa_wait_dev_close_tasks_done(priv);
        mlx5_vdpa_release_dev_resources(priv);
        if (priv->vdev)
                rte_vdpa_unregister_device(priv->vdev);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index e08931719f..b6392b9d66 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -84,6 +84,7 @@ enum mlx5_vdpa_task_type {
        MLX5_VDPA_TASK_REG_MR = 1,
        MLX5_VDPA_TASK_SETUP_VIRTQ,
        MLX5_VDPA_TASK_STOP_VIRTQ,
+       MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT,
 };
 
 /* Generic task information and size must be multiple of 4B. */
@@ -206,6 +207,7 @@ struct mlx5_vdpa_priv {
        uint64_t features; /* Negotiated features. */
        uint16_t log_max_rqt_size;
        uint16_t last_c_thrd_idx;
+       uint16_t dev_close_progress;
        uint16_t num_mrs; /* Number of memory regions. */
        struct mlx5_vdpa_steer steer;
        struct mlx5dv_var *var;
@@ -578,4 +580,10 @@ mlx5_vdpa_c_thread_wait_bulk_tasks_done(uint32_t 
*remaining_cnt,
                uint32_t *err_cnt, uint32_t sleep_time);
 int
 mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index, bool reg_kick);
+void
+mlx5_vdpa_vq_destroy(struct mlx5_vdpa_virtq *virtq);
+void
+mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv);
+void
+mlx5_vdpa_virtq_unreg_intr_handle_all(struct mlx5_vdpa_priv *priv);
 #endif /* RTE_PMD_MLX5_VDPA_H_ */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
index 98369f0887..bb2279440b 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
@@ -63,7 +63,8 @@ mlx5_vdpa_task_add(struct mlx5_vdpa_priv *priv,
                task[i].type = task_type;
                task[i].remaining_cnt = remaining_cnt;
                task[i].err_cnt = err_cnt;
-               task[i].idx = data[i];
+               if (data)
+                       task[i].idx = data[i];
        }
        if (!mlx5_vdpa_c_thrd_ring_enqueue_bulk(rng, (void **)&task, num, NULL))
                return -1;
@@ -187,6 +188,23 @@ mlx5_vdpa_c_thread_handle(void *arg)
                            MLX5_VDPA_USED_RING_LEN(virtq->vq_size));
                        pthread_mutex_unlock(&virtq->virtq_lock);
                        break;
+               case MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT:
+                       mlx5_vdpa_virtq_unreg_intr_handle_all(priv);
+                       pthread_mutex_lock(&priv->steer_update_lock);
+                       mlx5_vdpa_steer_unset(priv);
+                       pthread_mutex_unlock(&priv->steer_update_lock);
+                       mlx5_vdpa_virtqs_release(priv);
+                       mlx5_vdpa_drain_cq(priv);
+                       if (priv->lm_mr.addr)
+                               mlx5_os_wrapped_mkey_destroy(
+                                       &priv->lm_mr);
+                       if (!priv->connected)
+                               mlx5_vdpa_dev_cache_clean(priv);
+                       priv->vid = 0;
+                       __atomic_store_n(
+                               &priv->dev_close_progress, 0,
+                               __ATOMIC_RELAXED);
+                       break;
                default:
                        DRV_LOG(ERR, "Invalid vdpa task type %d.",
                        task.type);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index db05220e76..a08c854b14 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -102,6 +102,20 @@ mlx5_vdpa_virtq_unregister_intr_handle(struct 
mlx5_vdpa_virtq *virtq)
        virtq->intr_handle = NULL;
 }
 
+void
+mlx5_vdpa_virtq_unreg_intr_handle_all(struct mlx5_vdpa_priv *priv)
+{
+       uint32_t i;
+       struct mlx5_vdpa_virtq *virtq;
+
+       for (i = 0; i < priv->nr_virtqs; i++) {
+               virtq = &priv->virtqs[i];
+               pthread_mutex_lock(&virtq->virtq_lock);
+               mlx5_vdpa_virtq_unregister_intr_handle(virtq);
+               pthread_mutex_unlock(&virtq->virtq_lock);
+       }
+}
+
 /* Release cached VQ resources. */
 void
 mlx5_vdpa_virtqs_cleanup(struct mlx5_vdpa_priv *priv)
-- 
2.31.1

Reply via email to