Split the virtqs virt-queue resource between the configuration threads. Also need pre-created virt-queue resource after virtq destruction. This accelerates the LM process and reduces its time by 30%.
Signed-off-by: Li Zhang <l...@nvidia.com> Acked-by: Matan Azrad <ma...@nvidia.com> --- doc/guides/rel_notes/release_22_07.rst | 1 + drivers/vdpa/mlx5/mlx5_vdpa.c | 115 +++++++++++++++++++------ drivers/vdpa/mlx5/mlx5_vdpa.h | 12 ++- drivers/vdpa/mlx5/mlx5_vdpa_cthread.c | 15 +++- drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 111 ++++++++++++++++++++---- 5 files changed, 209 insertions(+), 45 deletions(-) diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 2056cd9ee7..e1a9796e5c 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -178,6 +178,7 @@ New Features * **Updated Nvidia mlx5 vDPA driver.** * Added new devargs ``queue_size`` and ``queues`` to allow prior creation of virtq resources. + * Added new devarg ``max_conf_threads`` defines the number of multi-thread management to parallel the configurations. Removed Items diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c index f006a9cd3f..c5d82872c7 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -275,23 +275,18 @@ mlx5_vdpa_wait_dev_close_tasks_done(struct mlx5_vdpa_priv *priv) } static int -mlx5_vdpa_dev_close(int vid) +_internal_mlx5_vdpa_dev_close(struct mlx5_vdpa_priv *priv, + bool release_resource) { - struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); - struct mlx5_vdpa_priv *priv = - mlx5_vdpa_find_priv_resource_by_vdev(vdev); int ret = 0; + int vid = priv->vid; - if (priv == NULL) { - DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); - return -1; - } mlx5_vdpa_cqe_event_unset(priv); if (priv->state == MLX5_VDPA_STATE_CONFIGURED) { ret |= mlx5_vdpa_lm_log(priv); priv->state = MLX5_VDPA_STATE_IN_PROGRESS; } - if (priv->use_c_thread) { + if (priv->use_c_thread && !release_resource) { if (priv->last_c_thrd_idx >= (conf_thread_mng.max_thrds - 1)) priv->last_c_thrd_idx = 0; @@ -315,7 +310,7 @@ mlx5_vdpa_dev_close(int vid) pthread_mutex_lock(&priv->steer_update_lock); mlx5_vdpa_steer_unset(priv); pthread_mutex_unlock(&priv->steer_update_lock); - mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_virtqs_release(priv, release_resource); mlx5_vdpa_drain_cq(priv); if (priv->lm_mr.addr) mlx5_os_wrapped_mkey_destroy(&priv->lm_mr); @@ -329,6 +324,24 @@ mlx5_vdpa_dev_close(int vid) return ret; } +static int +mlx5_vdpa_dev_close(int vid) +{ + struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid); + struct mlx5_vdpa_priv *priv; + + if (!vdev) { + DRV_LOG(ERR, "Invalid vDPA device."); + return -1; + } + priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev); + if (priv == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -1; + } + return _internal_mlx5_vdpa_dev_close(priv, false); +} + static int mlx5_vdpa_dev_config(int vid) { @@ -624,11 +637,33 @@ mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist, priv->queue_size); } +void +mlx5_vdpa_prepare_virtq_destroy(struct mlx5_vdpa_priv *priv) +{ + uint32_t max_queues, index; + struct mlx5_vdpa_virtq *virtq; + + if (!priv->queues || !priv->queue_size) + return; + max_queues = ((priv->queues * 2) < priv->caps.max_num_virtio_queues) ? + (priv->queues * 2) : (priv->caps.max_num_virtio_queues); + if (mlx5_vdpa_is_modify_virtq_supported(priv)) + mlx5_vdpa_steer_unset(priv); + for (index = 0; index < max_queues; ++index) { + virtq = &priv->virtqs[index]; + if (virtq->virtq) { + pthread_mutex_lock(&virtq->virtq_lock); + mlx5_vdpa_virtq_unset(virtq); + pthread_mutex_unlock(&virtq->virtq_lock); + } + } +} + static int mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv) { - uint32_t max_queues; - uint32_t index; + uint32_t remaining_cnt = 0, err_cnt = 0, task_num = 0; + uint32_t max_queues, index, thrd_idx, data[1]; struct mlx5_vdpa_virtq *virtq; for (index = 0; index < priv->caps.max_num_virtio_queues; @@ -640,25 +675,53 @@ mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv) return 0; max_queues = (priv->queues < priv->caps.max_num_virtio_queues) ? (priv->queues * 2) : (priv->caps.max_num_virtio_queues); - for (index = 0; index < max_queues; ++index) - if (mlx5_vdpa_virtq_single_resource_prepare(priv, - index)) + if (priv->use_c_thread) { + uint32_t main_task_idx[max_queues]; + + for (index = 0; index < max_queues; ++index) { + thrd_idx = index % (conf_thread_mng.max_thrds + 1); + if (!thrd_idx) { + main_task_idx[task_num] = index; + task_num++; + continue; + } + thrd_idx = priv->last_c_thrd_idx + 1; + if (thrd_idx >= conf_thread_mng.max_thrds) + thrd_idx = 0; + priv->last_c_thrd_idx = thrd_idx; + data[0] = index; + if (mlx5_vdpa_task_add(priv, thrd_idx, + MLX5_VDPA_TASK_PREPARE_VIRTQ, + &remaining_cnt, &err_cnt, + (void **)&data, 1)) { + DRV_LOG(ERR, "Fail to add " + "task prepare virtq (%d).", index); + main_task_idx[task_num] = index; + task_num++; + } + } + for (index = 0; index < task_num; ++index) + if (mlx5_vdpa_virtq_single_resource_prepare(priv, + main_task_idx[index])) + goto error; + if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt, + &err_cnt, 2000)) { + DRV_LOG(ERR, + "Failed to wait virt-queue prepare tasks ready."); goto error; + } + } else { + for (index = 0; index < max_queues; ++index) + if (mlx5_vdpa_virtq_single_resource_prepare(priv, + index)) + goto error; + } if (mlx5_vdpa_is_modify_virtq_supported(priv)) if (mlx5_vdpa_steer_update(priv, true)) goto error; return 0; error: - for (index = 0; index < max_queues; ++index) { - virtq = &priv->virtqs[index]; - if (virtq->virtq) { - pthread_mutex_lock(&virtq->virtq_lock); - mlx5_vdpa_virtq_unset(virtq); - pthread_mutex_unlock(&virtq->virtq_lock); - } - } - if (mlx5_vdpa_is_modify_virtq_supported(priv)) - mlx5_vdpa_steer_unset(priv); + mlx5_vdpa_prepare_virtq_destroy(priv); return -1; } @@ -860,7 +923,7 @@ static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv) { if (priv->state == MLX5_VDPA_STATE_CONFIGURED) - mlx5_vdpa_dev_close(priv->vid); + _internal_mlx5_vdpa_dev_close(priv, true); if (priv->use_c_thread) mlx5_vdpa_wait_dev_close_tasks_done(priv); mlx5_vdpa_release_dev_resources(priv); diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h index f353db62ac..dc4dfba5ed 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h @@ -85,6 +85,7 @@ enum mlx5_vdpa_task_type { MLX5_VDPA_TASK_SETUP_VIRTQ, MLX5_VDPA_TASK_STOP_VIRTQ, MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT, + MLX5_VDPA_TASK_PREPARE_VIRTQ, }; /* Generic task information and size must be multiple of 4B. */ @@ -128,6 +129,9 @@ struct mlx5_vdpa_virtq { uint32_t configured:1; uint32_t enable:1; uint32_t stopped:1; + uint32_t rx_csum:1; + uint32_t virtio_version_1_0:1; + uint32_t event_mode:3; uint32_t version; pthread_mutex_t virtq_lock; struct mlx5_vdpa_priv *priv; @@ -355,8 +359,12 @@ void mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv); * * @param[in] priv * The vdpa driver private structure. + * @param[in] release_resource + * The vdpa driver release resource without prepare resource. */ -void mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv); +void +mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv, + bool release_resource); /** * Cleanup cached resources of all virtqs. @@ -595,4 +603,6 @@ int mlx5_vdpa_qps2rst2rts(struct mlx5_vdpa_event_qp *eqp); void mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq); +void +mlx5_vdpa_prepare_virtq_destroy(struct mlx5_vdpa_priv *priv); #endif /* RTE_PMD_MLX5_VDPA_H_ */ diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c index bb2279440b..6e6624e5a3 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c @@ -153,6 +153,7 @@ mlx5_vdpa_c_thread_handle(void *arg) __atomic_fetch_add( task.err_cnt, 1, __ATOMIC_RELAXED); } + virtq->enable = 1; pthread_mutex_unlock(&virtq->virtq_lock); break; case MLX5_VDPA_TASK_STOP_VIRTQ: @@ -193,7 +194,7 @@ mlx5_vdpa_c_thread_handle(void *arg) pthread_mutex_lock(&priv->steer_update_lock); mlx5_vdpa_steer_unset(priv); pthread_mutex_unlock(&priv->steer_update_lock); - mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_virtqs_release(priv, false); mlx5_vdpa_drain_cq(priv); if (priv->lm_mr.addr) mlx5_os_wrapped_mkey_destroy( @@ -205,6 +206,18 @@ mlx5_vdpa_c_thread_handle(void *arg) &priv->dev_close_progress, 0, __ATOMIC_RELAXED); break; + case MLX5_VDPA_TASK_PREPARE_VIRTQ: + ret = mlx5_vdpa_virtq_single_resource_prepare( + priv, task.idx); + if (ret) { + DRV_LOG(ERR, + "Failed to prepare virtq %d.", + task.idx); + __atomic_fetch_add( + task.err_cnt, 1, + __ATOMIC_RELAXED); + } + break; default: DRV_LOG(ERR, "Invalid vdpa task type %d.", task.type); diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c index 58466b3c0b..06a5c26947 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c @@ -116,18 +116,29 @@ mlx5_vdpa_virtq_unreg_intr_handle_all(struct mlx5_vdpa_priv *priv) } } +static void +mlx5_vdpa_vq_destroy(struct mlx5_vdpa_virtq *virtq) +{ + /* Clean pre-created resource in dev removal only */ + claim_zero(mlx5_devx_cmd_destroy(virtq->virtq)); + virtq->index = 0; + virtq->virtq = NULL; + virtq->configured = 0; +} + /* Release cached VQ resources. */ void mlx5_vdpa_virtqs_cleanup(struct mlx5_vdpa_priv *priv) { unsigned int i, j; + mlx5_vdpa_steer_unset(priv); for (i = 0; i < priv->caps.max_num_virtio_queues; i++) { struct mlx5_vdpa_virtq *virtq = &priv->virtqs[i]; - if (virtq->index != i) - continue; pthread_mutex_lock(&virtq->virtq_lock); + if (virtq->virtq) + mlx5_vdpa_vq_destroy(virtq); for (j = 0; j < RTE_DIM(virtq->umems); ++j) { if (virtq->umems[j].obj) { claim_zero(mlx5_glue->devx_umem_dereg @@ -157,29 +168,37 @@ mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq) if (ret) DRV_LOG(WARNING, "Failed to stop virtq %d.", virtq->index); - claim_zero(mlx5_devx_cmd_destroy(virtq->virtq)); - virtq->index = 0; - virtq->virtq = NULL; - virtq->configured = 0; } + mlx5_vdpa_vq_destroy(virtq); virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_DISABLED; } void -mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv) +mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv, + bool release_resource) { struct mlx5_vdpa_virtq *virtq; - int i; - - for (i = 0; i < priv->nr_virtqs; i++) { + uint32_t i, max_virtq, valid_vq_num; + + valid_vq_num = ((priv->queues * 2) < priv->caps.max_num_virtio_queues) ? + (priv->queues * 2) : priv->caps.max_num_virtio_queues; + max_virtq = (release_resource && + (valid_vq_num) > priv->nr_virtqs) ? + (valid_vq_num) : priv->nr_virtqs; + for (i = 0; i < max_virtq; i++) { virtq = &priv->virtqs[i]; pthread_mutex_lock(&virtq->virtq_lock); mlx5_vdpa_virtq_unset(virtq); - if (i < (priv->queues * 2)) + virtq->enable = 0; + if (!release_resource && i < valid_vq_num) mlx5_vdpa_virtq_single_resource_prepare( priv, i); pthread_mutex_unlock(&virtq->virtq_lock); } + if (!release_resource && priv->queues && + mlx5_vdpa_is_modify_virtq_supported(priv)) + if (mlx5_vdpa_steer_update(priv, true)) + mlx5_vdpa_steer_unset(priv); priv->features = 0; priv->nr_virtqs = 0; } @@ -455,6 +474,9 @@ mlx5_vdpa_virtq_single_resource_prepare(struct mlx5_vdpa_priv *priv, virtq->priv = priv; if (!virtq->virtq) return true; + virtq->rx_csum = attr.rx_csum; + virtq->virtio_version_1_0 = attr.virtio_version_1_0; + virtq->event_mode = attr.event_mode; } return false; } @@ -538,6 +560,9 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index, bool reg_kick) goto error; } claim_zero(rte_vhost_enable_guest_notification(priv->vid, index, 1)); + virtq->rx_csum = attr.rx_csum; + virtq->virtio_version_1_0 = attr.virtio_version_1_0; + virtq->event_mode = attr.event_mode; virtq->configured = 1; rte_spinlock_lock(&priv->db_lock); rte_write32(virtq->index, priv->virtq_db_addr); @@ -629,6 +654,31 @@ mlx5_vdpa_features_validate(struct mlx5_vdpa_priv *priv) return 0; } +static bool +mlx5_vdpa_is_pre_created_vq_mismatch(struct mlx5_vdpa_priv *priv, + struct mlx5_vdpa_virtq *virtq) +{ + struct rte_vhost_vring vq; + uint32_t event_mode; + + if (virtq->rx_csum != + !!(priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM))) + return true; + if (virtq->virtio_version_1_0 != + !!(priv->features & (1ULL << VIRTIO_F_VERSION_1))) + return true; + if (rte_vhost_get_vhost_vring(priv->vid, virtq->index, &vq)) + return true; + if (vq.size != virtq->vq_size) + return true; + event_mode = vq.callfd != -1 || !(priv->caps.event_mode & + (1 << MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ? + MLX5_VIRTQ_EVENT_MODE_QP : MLX5_VIRTQ_EVENT_MODE_NO_MSIX; + if (virtq->event_mode != event_mode) + return true; + return false; +} + int mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) { @@ -664,6 +714,15 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) virtq = &priv->virtqs[i]; if (!virtq->enable) continue; + if (priv->queues && virtq->virtq) { + if (mlx5_vdpa_is_pre_created_vq_mismatch(priv, virtq)) { + mlx5_vdpa_prepare_virtq_destroy(priv); + i = 0; + virtq = &priv->virtqs[i]; + if (!virtq->enable) + continue; + } + } thrd_idx = i % (conf_thread_mng.max_thrds + 1); if (!thrd_idx) { main_task_idx[task_num] = i; @@ -693,6 +752,7 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) pthread_mutex_unlock(&virtq->virtq_lock); goto error; } + virtq->enable = 1; pthread_mutex_unlock(&virtq->virtq_lock); } if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt, @@ -724,20 +784,32 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) } else { for (i = 0; i < nr_vring; i++) { virtq = &priv->virtqs[i]; + if (!virtq->enable) + continue; + if (priv->queues && virtq->virtq) { + if (mlx5_vdpa_is_pre_created_vq_mismatch(priv, + virtq)) { + mlx5_vdpa_prepare_virtq_destroy( + priv); + i = 0; + virtq = &priv->virtqs[i]; + if (!virtq->enable) + continue; + } + } pthread_mutex_lock(&virtq->virtq_lock); - if (virtq->enable) { - if (mlx5_vdpa_virtq_setup(priv, i, true)) { - pthread_mutex_unlock( + if (mlx5_vdpa_virtq_setup(priv, i, true)) { + pthread_mutex_unlock( &virtq->virtq_lock); - goto error; - } + goto error; } + virtq->enable = 1; pthread_mutex_unlock(&virtq->virtq_lock); } } return 0; error: - mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_virtqs_release(priv, true); return -1; } @@ -795,6 +867,11 @@ mlx5_vdpa_virtq_enable(struct mlx5_vdpa_priv *priv, int index, int enable) "for virtq %d.", index); } mlx5_vdpa_virtq_unset(virtq); + } else { + if (virtq->virtq && + mlx5_vdpa_is_pre_created_vq_mismatch(priv, virtq)) + DRV_LOG(WARNING, + "Configuration mismatch dummy virtq %d.", index); } if (enable) { ret = mlx5_vdpa_virtq_setup(priv, index, true); -- 2.31.1