Split the virtqs device close tasks after stopping virt-queue between the configuration threads. This accelerates the LM process and reduces its time by 50%.
Signed-off-by: Li Zhang <l...@nvidia.com> --- drivers/vdpa/mlx5/mlx5_vdpa.c | 56 +++++++++++++++++++++++++-- drivers/vdpa/mlx5/mlx5_vdpa.h | 8 ++++ drivers/vdpa/mlx5/mlx5_vdpa_cthread.c | 20 +++++++++- drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 14 +++++++ 4 files changed, 94 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c index e3b32fa087..d000854c08 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -245,7 +245,7 @@ mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv) return kern_mtu == vhost_mtu ? 0 : -1; } -static void +void mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv) { /* Clean pre-created resource in dev removal only. */ @@ -254,6 +254,26 @@ mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv) mlx5_vdpa_mem_dereg(priv); } +static bool +mlx5_vdpa_wait_dev_close_tasks_done(struct mlx5_vdpa_priv *priv) +{ + uint32_t timeout = 0; + + /* Check and wait all close tasks done. */ + while (__atomic_load_n(&priv->dev_close_progress, + __ATOMIC_RELAXED) != 0 && timeout < 1000) { + rte_delay_us_sleep(10000); + timeout++; + } + if (priv->dev_close_progress) { + DRV_LOG(ERR, + "Failed to wait close device tasks done vid %d.", + priv->vid); + return true; + } + return false; +} + static int mlx5_vdpa_dev_close(int vid) { @@ -271,6 +291,27 @@ mlx5_vdpa_dev_close(int vid) ret |= mlx5_vdpa_lm_log(priv); priv->state = MLX5_VDPA_STATE_IN_PROGRESS; } + if (priv->use_c_thread) { + if (priv->last_c_thrd_idx >= + (conf_thread_mng.max_thrds - 1)) + priv->last_c_thrd_idx = 0; + else + priv->last_c_thrd_idx++; + __atomic_store_n(&priv->dev_close_progress, + 1, __ATOMIC_RELAXED); + if (mlx5_vdpa_task_add(priv, + priv->last_c_thrd_idx, + MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT, + NULL, NULL, NULL, 1)) { + DRV_LOG(ERR, + "Fail to add dev close task. "); + goto single_thrd; + } + priv->state = MLX5_VDPA_STATE_PROBED; + DRV_LOG(INFO, "vDPA device %d was closed.", vid); + return ret; + } +single_thrd: pthread_mutex_lock(&priv->steer_update_lock); mlx5_vdpa_steer_unset(priv); pthread_mutex_unlock(&priv->steer_update_lock); @@ -278,10 +319,12 @@ mlx5_vdpa_dev_close(int vid) mlx5_vdpa_drain_cq(priv); if (priv->lm_mr.addr) mlx5_os_wrapped_mkey_destroy(&priv->lm_mr); - priv->state = MLX5_VDPA_STATE_PROBED; if (!priv->connected) mlx5_vdpa_dev_cache_clean(priv); priv->vid = 0; + __atomic_store_n(&priv->dev_close_progress, 0, + __ATOMIC_RELAXED); + priv->state = MLX5_VDPA_STATE_PROBED; DRV_LOG(INFO, "vDPA device %d was closed.", vid); return ret; } @@ -302,6 +345,8 @@ mlx5_vdpa_dev_config(int vid) DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid); return -1; } + if (mlx5_vdpa_wait_dev_close_tasks_done(priv)) + return -1; priv->vid = vid; priv->connected = true; if (mlx5_vdpa_mtu_set(priv)) @@ -444,8 +489,11 @@ mlx5_vdpa_dev_cleanup(int vid) DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); return -1; } - if (priv->state == MLX5_VDPA_STATE_PROBED) + if (priv->state == MLX5_VDPA_STATE_PROBED) { + if (priv->use_c_thread) + mlx5_vdpa_wait_dev_close_tasks_done(priv); mlx5_vdpa_dev_cache_clean(priv); + } priv->connected = false; return 0; } @@ -839,6 +887,8 @@ mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv) { if (priv->state == MLX5_VDPA_STATE_CONFIGURED) mlx5_vdpa_dev_close(priv->vid); + if (priv->use_c_thread) + mlx5_vdpa_wait_dev_close_tasks_done(priv); mlx5_vdpa_release_dev_resources(priv); if (priv->vdev) rte_vdpa_unregister_device(priv->vdev); diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h index e08931719f..b6392b9d66 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h @@ -84,6 +84,7 @@ enum mlx5_vdpa_task_type { MLX5_VDPA_TASK_REG_MR = 1, MLX5_VDPA_TASK_SETUP_VIRTQ, MLX5_VDPA_TASK_STOP_VIRTQ, + MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT, }; /* Generic task information and size must be multiple of 4B. */ @@ -206,6 +207,7 @@ struct mlx5_vdpa_priv { uint64_t features; /* Negotiated features. */ uint16_t log_max_rqt_size; uint16_t last_c_thrd_idx; + uint16_t dev_close_progress; uint16_t num_mrs; /* Number of memory regions. */ struct mlx5_vdpa_steer steer; struct mlx5dv_var *var; @@ -578,4 +580,10 @@ mlx5_vdpa_c_thread_wait_bulk_tasks_done(uint32_t *remaining_cnt, uint32_t *err_cnt, uint32_t sleep_time); int mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index, bool reg_kick); +void +mlx5_vdpa_vq_destroy(struct mlx5_vdpa_virtq *virtq); +void +mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv); +void +mlx5_vdpa_virtq_unreg_intr_handle_all(struct mlx5_vdpa_priv *priv); #endif /* RTE_PMD_MLX5_VDPA_H_ */ diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c index 98369f0887..bb2279440b 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c @@ -63,7 +63,8 @@ mlx5_vdpa_task_add(struct mlx5_vdpa_priv *priv, task[i].type = task_type; task[i].remaining_cnt = remaining_cnt; task[i].err_cnt = err_cnt; - task[i].idx = data[i]; + if (data) + task[i].idx = data[i]; } if (!mlx5_vdpa_c_thrd_ring_enqueue_bulk(rng, (void **)&task, num, NULL)) return -1; @@ -187,6 +188,23 @@ mlx5_vdpa_c_thread_handle(void *arg) MLX5_VDPA_USED_RING_LEN(virtq->vq_size)); pthread_mutex_unlock(&virtq->virtq_lock); break; + case MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT: + mlx5_vdpa_virtq_unreg_intr_handle_all(priv); + pthread_mutex_lock(&priv->steer_update_lock); + mlx5_vdpa_steer_unset(priv); + pthread_mutex_unlock(&priv->steer_update_lock); + mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_drain_cq(priv); + if (priv->lm_mr.addr) + mlx5_os_wrapped_mkey_destroy( + &priv->lm_mr); + if (!priv->connected) + mlx5_vdpa_dev_cache_clean(priv); + priv->vid = 0; + __atomic_store_n( + &priv->dev_close_progress, 0, + __ATOMIC_RELAXED); + break; default: DRV_LOG(ERR, "Invalid vdpa task type %d.", task.type); diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c index db05220e76..a08c854b14 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c @@ -102,6 +102,20 @@ mlx5_vdpa_virtq_unregister_intr_handle(struct mlx5_vdpa_virtq *virtq) virtq->intr_handle = NULL; } +void +mlx5_vdpa_virtq_unreg_intr_handle_all(struct mlx5_vdpa_priv *priv) +{ + uint32_t i; + struct mlx5_vdpa_virtq *virtq; + + for (i = 0; i < priv->nr_virtqs; i++) { + virtq = &priv->virtqs[i]; + pthread_mutex_lock(&virtq->virtq_lock); + mlx5_vdpa_virtq_unregister_intr_handle(virtq); + pthread_mutex_unlock(&virtq->virtq_lock); + } +} + /* Release cached VQ resources. */ void mlx5_vdpa_virtqs_cleanup(struct mlx5_vdpa_priv *priv) -- 2.31.1