Split the virtqs virt-queue resource between
the configuration threads.
Also need pre-created virt-queue resource
after virtq destruction.
This accelerates the LM process and reduces its time by 30%.

Signed-off-by: Li Zhang <l...@nvidia.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa.c         | 115 ++++++++++++++++++++------
 drivers/vdpa/mlx5/mlx5_vdpa.h         |  12 ++-
 drivers/vdpa/mlx5/mlx5_vdpa_cthread.c |  15 +++-
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c   | 111 +++++++++++++++++++++----
 4 files changed, 208 insertions(+), 45 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index f006a9cd3f..c5d82872c7 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -275,23 +275,18 @@ mlx5_vdpa_wait_dev_close_tasks_done(struct mlx5_vdpa_priv 
*priv)
 }
 
 static int
-mlx5_vdpa_dev_close(int vid)
+_internal_mlx5_vdpa_dev_close(struct mlx5_vdpa_priv *priv,
+               bool release_resource)
 {
-       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
-       struct mlx5_vdpa_priv *priv =
-               mlx5_vdpa_find_priv_resource_by_vdev(vdev);
        int ret = 0;
+       int vid = priv->vid;
 
-       if (priv == NULL) {
-               DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
-               return -1;
-       }
        mlx5_vdpa_cqe_event_unset(priv);
        if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
                ret |= mlx5_vdpa_lm_log(priv);
                priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
        }
-       if (priv->use_c_thread) {
+       if (priv->use_c_thread && !release_resource) {
                if (priv->last_c_thrd_idx >=
                        (conf_thread_mng.max_thrds - 1))
                        priv->last_c_thrd_idx = 0;
@@ -315,7 +310,7 @@ mlx5_vdpa_dev_close(int vid)
        pthread_mutex_lock(&priv->steer_update_lock);
        mlx5_vdpa_steer_unset(priv);
        pthread_mutex_unlock(&priv->steer_update_lock);
-       mlx5_vdpa_virtqs_release(priv);
+       mlx5_vdpa_virtqs_release(priv, release_resource);
        mlx5_vdpa_drain_cq(priv);
        if (priv->lm_mr.addr)
                mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
@@ -329,6 +324,24 @@ mlx5_vdpa_dev_close(int vid)
        return ret;
 }
 
+static int
+mlx5_vdpa_dev_close(int vid)
+{
+       struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
+       struct mlx5_vdpa_priv *priv;
+
+       if (!vdev) {
+               DRV_LOG(ERR, "Invalid vDPA device.");
+               return -1;
+       }
+       priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
+       if (priv == NULL) {
+               DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
+               return -1;
+       }
+       return _internal_mlx5_vdpa_dev_close(priv, false);
+}
+
 static int
 mlx5_vdpa_dev_config(int vid)
 {
@@ -624,11 +637,33 @@ mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
                priv->queue_size);
 }
 
+void
+mlx5_vdpa_prepare_virtq_destroy(struct mlx5_vdpa_priv *priv)
+{
+       uint32_t max_queues, index;
+       struct mlx5_vdpa_virtq *virtq;
+
+       if (!priv->queues || !priv->queue_size)
+               return;
+       max_queues = ((priv->queues * 2) < priv->caps.max_num_virtio_queues) ?
+               (priv->queues * 2) : (priv->caps.max_num_virtio_queues);
+       if (mlx5_vdpa_is_modify_virtq_supported(priv))
+               mlx5_vdpa_steer_unset(priv);
+       for (index = 0; index < max_queues; ++index) {
+               virtq = &priv->virtqs[index];
+               if (virtq->virtq) {
+                       pthread_mutex_lock(&virtq->virtq_lock);
+                       mlx5_vdpa_virtq_unset(virtq);
+                       pthread_mutex_unlock(&virtq->virtq_lock);
+               }
+       }
+}
+
 static int
 mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv)
 {
-       uint32_t max_queues;
-       uint32_t index;
+       uint32_t remaining_cnt = 0, err_cnt = 0, task_num = 0;
+       uint32_t max_queues, index, thrd_idx, data[1];
        struct mlx5_vdpa_virtq *virtq;
 
        for (index = 0; index < priv->caps.max_num_virtio_queues;
@@ -640,25 +675,53 @@ mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv 
*priv)
                return 0;
        max_queues = (priv->queues < priv->caps.max_num_virtio_queues) ?
                (priv->queues * 2) : (priv->caps.max_num_virtio_queues);
-       for (index = 0; index < max_queues; ++index)
-               if (mlx5_vdpa_virtq_single_resource_prepare(priv,
-                       index))
+       if (priv->use_c_thread) {
+               uint32_t main_task_idx[max_queues];
+
+               for (index = 0; index < max_queues; ++index) {
+                       thrd_idx = index % (conf_thread_mng.max_thrds + 1);
+                       if (!thrd_idx) {
+                               main_task_idx[task_num] = index;
+                               task_num++;
+                               continue;
+                       }
+                       thrd_idx = priv->last_c_thrd_idx + 1;
+                       if (thrd_idx >= conf_thread_mng.max_thrds)
+                               thrd_idx = 0;
+                       priv->last_c_thrd_idx = thrd_idx;
+                       data[0] = index;
+                       if (mlx5_vdpa_task_add(priv, thrd_idx,
+                               MLX5_VDPA_TASK_PREPARE_VIRTQ,
+                               &remaining_cnt, &err_cnt,
+                               (void **)&data, 1)) {
+                               DRV_LOG(ERR, "Fail to add "
+                               "task prepare virtq (%d).", index);
+                               main_task_idx[task_num] = index;
+                               task_num++;
+                       }
+               }
+               for (index = 0; index < task_num; ++index)
+                       if (mlx5_vdpa_virtq_single_resource_prepare(priv,
+                               main_task_idx[index]))
+                               goto error;
+               if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt,
+                       &err_cnt, 2000)) {
+                       DRV_LOG(ERR,
+                       "Failed to wait virt-queue prepare tasks ready.");
                        goto error;
+               }
+       } else {
+               for (index = 0; index < max_queues; ++index)
+                       if (mlx5_vdpa_virtq_single_resource_prepare(priv,
+                               index))
+                               goto error;
+       }
        if (mlx5_vdpa_is_modify_virtq_supported(priv))
                if (mlx5_vdpa_steer_update(priv, true))
                        goto error;
        return 0;
 error:
-       for (index = 0; index < max_queues; ++index) {
-               virtq = &priv->virtqs[index];
-               if (virtq->virtq) {
-                       pthread_mutex_lock(&virtq->virtq_lock);
-                       mlx5_vdpa_virtq_unset(virtq);
-                       pthread_mutex_unlock(&virtq->virtq_lock);
-               }
-       }
-       if (mlx5_vdpa_is_modify_virtq_supported(priv))
-               mlx5_vdpa_steer_unset(priv);
+       mlx5_vdpa_prepare_virtq_destroy(priv);
        return -1;
 }
 
@@ -860,7 +923,7 @@ static void
 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
 {
        if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
-               mlx5_vdpa_dev_close(priv->vid);
+               _internal_mlx5_vdpa_dev_close(priv, true);
        if (priv->use_c_thread)
                mlx5_vdpa_wait_dev_close_tasks_done(priv);
        mlx5_vdpa_release_dev_resources(priv);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index f353db62ac..dc4dfba5ed 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -85,6 +85,7 @@ enum mlx5_vdpa_task_type {
        MLX5_VDPA_TASK_SETUP_VIRTQ,
        MLX5_VDPA_TASK_STOP_VIRTQ,
        MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT,
+       MLX5_VDPA_TASK_PREPARE_VIRTQ,
 };
 
 /* Generic task information and size must be multiple of 4B. */
@@ -128,6 +129,9 @@ struct mlx5_vdpa_virtq {
        uint32_t configured:1;
        uint32_t enable:1;
        uint32_t stopped:1;
+       uint32_t rx_csum:1;
+       uint32_t virtio_version_1_0:1;
+       uint32_t event_mode:3;
        uint32_t version;
        pthread_mutex_t virtq_lock;
        struct mlx5_vdpa_priv *priv;
@@ -355,8 +359,12 @@ void mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv 
*priv);
  *
  * @param[in] priv
  *   The vdpa driver private structure.
+ * @param[in] release_resource
+ *   The vdpa driver release resource without prepare resource.
  */
-void mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv);
+void
+mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv,
+               bool release_resource);
 
 /**
  * Cleanup cached resources of all virtqs.
@@ -595,4 +603,6 @@ int
 mlx5_vdpa_qps2rst2rts(struct mlx5_vdpa_event_qp *eqp);
 void
 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq);
+void
+mlx5_vdpa_prepare_virtq_destroy(struct mlx5_vdpa_priv *priv);
 #endif /* RTE_PMD_MLX5_VDPA_H_ */
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
index bb2279440b..6e6624e5a3 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c
@@ -153,6 +153,7 @@ mlx5_vdpa_c_thread_handle(void *arg)
                                __atomic_fetch_add(
                                        task.err_cnt, 1, __ATOMIC_RELAXED);
                        }
+                       virtq->enable = 1;
                        pthread_mutex_unlock(&virtq->virtq_lock);
                        break;
                case MLX5_VDPA_TASK_STOP_VIRTQ:
@@ -193,7 +194,7 @@ mlx5_vdpa_c_thread_handle(void *arg)
                        pthread_mutex_lock(&priv->steer_update_lock);
                        mlx5_vdpa_steer_unset(priv);
                        pthread_mutex_unlock(&priv->steer_update_lock);
-                       mlx5_vdpa_virtqs_release(priv);
+                       mlx5_vdpa_virtqs_release(priv, false);
                        mlx5_vdpa_drain_cq(priv);
                        if (priv->lm_mr.addr)
                                mlx5_os_wrapped_mkey_destroy(
@@ -205,6 +206,18 @@ mlx5_vdpa_c_thread_handle(void *arg)
                                &priv->dev_close_progress, 0,
                                __ATOMIC_RELAXED);
                        break;
+               case MLX5_VDPA_TASK_PREPARE_VIRTQ:
+                       ret = mlx5_vdpa_virtq_single_resource_prepare(
+                                       priv, task.idx);
+                       if (ret) {
+                               DRV_LOG(ERR,
+                               "Failed to prepare virtq %d.",
+                               task.idx);
+                               __atomic_fetch_add(
+                               task.err_cnt, 1,
+                               __ATOMIC_RELAXED);
+                       }
+                       break;
                default:
                        DRV_LOG(ERR, "Invalid vdpa task type %d.",
                        task.type);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 20ce382487..d4dd73f861 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -116,18 +116,29 @@ mlx5_vdpa_virtq_unreg_intr_handle_all(struct 
mlx5_vdpa_priv *priv)
        }
 }
 
+static void
+mlx5_vdpa_vq_destroy(struct mlx5_vdpa_virtq *virtq)
+{
+       /* Clean pre-created resource in dev removal only */
+       claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
+       virtq->index = 0;
+       virtq->virtq = NULL;
+       virtq->configured = 0;
+}
+
 /* Release cached VQ resources. */
 void
 mlx5_vdpa_virtqs_cleanup(struct mlx5_vdpa_priv *priv)
 {
        unsigned int i, j;
 
+       mlx5_vdpa_steer_unset(priv);
        for (i = 0; i < priv->caps.max_num_virtio_queues; i++) {
                struct mlx5_vdpa_virtq *virtq = &priv->virtqs[i];
 
-               if (virtq->index != i)
-                       continue;
                pthread_mutex_lock(&virtq->virtq_lock);
+               if (virtq->virtq)
+                       mlx5_vdpa_vq_destroy(virtq);
                for (j = 0; j < RTE_DIM(virtq->umems); ++j) {
                        if (virtq->umems[j].obj) {
                                claim_zero(mlx5_glue->devx_umem_dereg
@@ -157,29 +168,37 @@ mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
                if (ret)
                        DRV_LOG(WARNING, "Failed to stop virtq %d.",
                                virtq->index);
-               claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
-               virtq->index = 0;
-               virtq->virtq = NULL;
-               virtq->configured = 0;
        }
+       mlx5_vdpa_vq_destroy(virtq);
        virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_DISABLED;
 }
 
 void
-mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv)
+mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv,
+       bool release_resource)
 {
        struct mlx5_vdpa_virtq *virtq;
-       int i;
-
-       for (i = 0; i < priv->nr_virtqs; i++) {
+       uint32_t i, max_virtq, valid_vq_num;
+
+       valid_vq_num = ((priv->queues * 2) < priv->caps.max_num_virtio_queues) ?
+               (priv->queues * 2) : priv->caps.max_num_virtio_queues;
+       max_virtq = (release_resource &&
+               (valid_vq_num) > priv->nr_virtqs) ?
+               (valid_vq_num) : priv->nr_virtqs;
+       for (i = 0; i < max_virtq; i++) {
                virtq = &priv->virtqs[i];
                pthread_mutex_lock(&virtq->virtq_lock);
                mlx5_vdpa_virtq_unset(virtq);
-               if (i < (priv->queues * 2))
+               virtq->enable = 0;
+               if (!release_resource && i < valid_vq_num)
                        mlx5_vdpa_virtq_single_resource_prepare(
                                        priv, i);
                pthread_mutex_unlock(&virtq->virtq_lock);
        }
+       if (!release_resource && priv->queues &&
+               mlx5_vdpa_is_modify_virtq_supported(priv))
+               if (mlx5_vdpa_steer_update(priv, true))
+                       mlx5_vdpa_steer_unset(priv);
        priv->features = 0;
        priv->nr_virtqs = 0;
 }
@@ -455,6 +474,9 @@ mlx5_vdpa_virtq_single_resource_prepare(struct 
mlx5_vdpa_priv *priv,
                virtq->priv = priv;
                if (!virtq->virtq)
                        return true;
+               virtq->rx_csum = attr.rx_csum;
+               virtq->virtio_version_1_0 = attr.virtio_version_1_0;
+               virtq->event_mode = attr.event_mode;
        }
        return false;
 }
@@ -538,6 +560,9 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int 
index, bool reg_kick)
                goto error;
        }
        claim_zero(rte_vhost_enable_guest_notification(priv->vid, index, 1));
+       virtq->rx_csum = attr.rx_csum;
+       virtq->virtio_version_1_0 = attr.virtio_version_1_0;
+       virtq->event_mode = attr.event_mode;
        virtq->configured = 1;
        rte_spinlock_lock(&priv->db_lock);
        rte_write32(virtq->index, priv->virtq_db_addr);
@@ -629,6 +654,31 @@ mlx5_vdpa_features_validate(struct mlx5_vdpa_priv *priv)
        return 0;
 }
 
+static bool
+mlx5_vdpa_is_pre_created_vq_mismatch(struct mlx5_vdpa_priv *priv,
+               struct mlx5_vdpa_virtq *virtq)
+{
+       struct rte_vhost_vring vq;
+       uint32_t event_mode;
+
+       if (virtq->rx_csum !=
+               !!(priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)))
+               return true;
+       if (virtq->virtio_version_1_0 !=
+               !!(priv->features & (1ULL << VIRTIO_F_VERSION_1)))
+               return true;
+       if (rte_vhost_get_vhost_vring(priv->vid, virtq->index, &vq))
+               return true;
+       if (vq.size != virtq->vq_size)
+               return true;
+       event_mode = vq.callfd != -1 || !(priv->caps.event_mode &
+               (1 << MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
+               MLX5_VIRTQ_EVENT_MODE_QP : MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
+       if (virtq->event_mode != event_mode)
+               return true;
+       return false;
+}
+
 int
 mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
 {
@@ -664,6 +714,15 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
                        virtq = &priv->virtqs[i];
                        if (!virtq->enable)
                                continue;
+                       if (priv->queues && virtq->virtq) {
+                               if (mlx5_vdpa_is_pre_created_vq_mismatch(priv, 
virtq)) {
+                                       mlx5_vdpa_prepare_virtq_destroy(priv);
+                                       i = 0;
+                                       virtq = &priv->virtqs[i];
+                                       if (!virtq->enable)
+                                               continue;
+                               }
+                       }
                        thrd_idx = i % (conf_thread_mng.max_thrds + 1);
                        if (!thrd_idx) {
                                main_task_idx[task_num] = i;
@@ -693,6 +752,7 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
                                pthread_mutex_unlock(&virtq->virtq_lock);
                                goto error;
                        }
+                       virtq->enable = 1;
                        pthread_mutex_unlock(&virtq->virtq_lock);
                }
                if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt,
@@ -724,20 +784,32 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
        } else {
                for (i = 0; i < nr_vring; i++) {
                        virtq = &priv->virtqs[i];
+                       if (!virtq->enable)
+                               continue;
+                       if (priv->queues && virtq->virtq) {
+                               if (mlx5_vdpa_is_pre_created_vq_mismatch(priv,
+                                       virtq)) {
+                                       mlx5_vdpa_prepare_virtq_destroy(
+                                       priv);
+                                       i = 0;
+                                       virtq = &priv->virtqs[i];
+                                       if (!virtq->enable)
+                                               continue;
+                               }
+                       }
                        pthread_mutex_lock(&virtq->virtq_lock);
-                       if (virtq->enable) {
-                               if (mlx5_vdpa_virtq_setup(priv, i, true)) {
-                                       pthread_mutex_unlock(
+                       if (mlx5_vdpa_virtq_setup(priv, i, true)) {
+                               pthread_mutex_unlock(
                                                &virtq->virtq_lock);
-                                       goto error;
-                               }
+                               goto error;
                        }
+                       virtq->enable = 1;
                        pthread_mutex_unlock(&virtq->virtq_lock);
                }
        }
        return 0;
 error:
-       mlx5_vdpa_virtqs_release(priv);
+       mlx5_vdpa_virtqs_release(priv, true);
        return -1;
 }
 
@@ -795,6 +867,11 @@ mlx5_vdpa_virtq_enable(struct mlx5_vdpa_priv *priv, int 
index, int enable)
                                        "for virtq %d.", index);
                }
                mlx5_vdpa_virtq_unset(virtq);
+       } else {
+               if (virtq->virtq &&
+                       mlx5_vdpa_is_pre_created_vq_mismatch(priv, virtq))
+                       DRV_LOG(WARNING,
+                       "Configuration mismatch dummy virtq %d.", index);
        }
        if (enable) {
                ret = mlx5_vdpa_virtq_setup(priv, index, true);
-- 
2.31.1

Reply via email to