Split the virtqs LM log between the configuration threads. This accelerates the LM process and reduces its time by 20%.
Signed-off-by: Li Zhang <l...@nvidia.com> --- drivers/vdpa/mlx5/mlx5_vdpa.h | 3 + drivers/vdpa/mlx5/mlx5_vdpa_cthread.c | 34 +++++++++++ drivers/vdpa/mlx5/mlx5_vdpa_lm.c | 85 +++++++++++++++++++++------ 3 files changed, 105 insertions(+), 17 deletions(-) diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h index 35221f5ddc..e08931719f 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h @@ -72,6 +72,8 @@ enum { MLX5_VDPA_NOTIFIER_STATE_ERR }; +#define MLX5_VDPA_USED_RING_LEN(size) \ + ((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3) #define MLX5_VDPA_MAX_C_THRD 256 #define MLX5_VDPA_MAX_TASKS_PER_THRD 4096 #define MLX5_VDPA_TASKS_PER_DEV 64 @@ -81,6 +83,7 @@ enum { enum mlx5_vdpa_task_type { MLX5_VDPA_TASK_REG_MR = 1, MLX5_VDPA_TASK_SETUP_VIRTQ, + MLX5_VDPA_TASK_STOP_VIRTQ, }; /* Generic task information and size must be multiple of 4B. */ diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c index 1389d369ae..98369f0887 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c @@ -104,6 +104,7 @@ mlx5_vdpa_c_thread_handle(void *arg) struct mlx5_vdpa_priv *priv; struct mlx5_vdpa_task task; struct rte_ring *rng; + uint64_t features; uint32_t thrd_idx; uint32_t task_num; int ret; @@ -153,6 +154,39 @@ mlx5_vdpa_c_thread_handle(void *arg) } pthread_mutex_unlock(&virtq->virtq_lock); break; + case MLX5_VDPA_TASK_STOP_VIRTQ: + virtq = &priv->virtqs[task.idx]; + pthread_mutex_lock(&virtq->virtq_lock); + ret = mlx5_vdpa_virtq_stop(priv, + task.idx); + if (ret) { + DRV_LOG(ERR, + "Failed to stop virtq %d.", + task.idx); + __atomic_fetch_add( + task.err_cnt, 1, + __ATOMIC_RELAXED); + pthread_mutex_unlock(&virtq->virtq_lock); + break; + } + ret = rte_vhost_get_negotiated_features( + priv->vid, &features); + if (ret) { + DRV_LOG(ERR, + "Failed to get negotiated features virtq %d.", + task.idx); + __atomic_fetch_add( + task.err_cnt, 1, + __ATOMIC_RELAXED); + pthread_mutex_unlock(&virtq->virtq_lock); + break; + } + if (RTE_VHOST_NEED_LOG(features)) + rte_vhost_log_used_vring( + priv->vid, task.idx, 0, + MLX5_VDPA_USED_RING_LEN(virtq->vq_size)); + pthread_mutex_unlock(&virtq->virtq_lock); + break; default: DRV_LOG(ERR, "Invalid vdpa task type %d.", task.type); diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_lm.c b/drivers/vdpa/mlx5/mlx5_vdpa_lm.c index efebf364d0..c2e78218ca 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_lm.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_lm.c @@ -89,39 +89,90 @@ mlx5_vdpa_dirty_bitmap_set(struct mlx5_vdpa_priv *priv, uint64_t log_base, return -1; } -#define MLX5_VDPA_USED_RING_LEN(size) \ - ((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3) - int mlx5_vdpa_lm_log(struct mlx5_vdpa_priv *priv) { + uint32_t remaining_cnt = 0, err_cnt = 0, task_num = 0; + uint32_t i, thrd_idx, data[1]; struct mlx5_vdpa_virtq *virtq; uint64_t features; - int ret = rte_vhost_get_negotiated_features(priv->vid, &features); - int i; + int ret; + ret = rte_vhost_get_negotiated_features(priv->vid, &features); if (ret) { DRV_LOG(ERR, "Failed to get negotiated features."); return -1; } - if (!RTE_VHOST_NEED_LOG(features)) - return 0; - for (i = 0; i < priv->nr_virtqs; ++i) { - virtq = &priv->virtqs[i]; - if (!priv->virtqs[i].virtq) { - DRV_LOG(DEBUG, "virtq %d is invalid for LM log.", i); - } else { + if (priv->use_c_thread && priv->nr_virtqs) { + uint32_t main_task_idx[priv->nr_virtqs]; + + for (i = 0; i < priv->nr_virtqs; i++) { + virtq = &priv->virtqs[i]; + if (!virtq->configured) + continue; + thrd_idx = i % (conf_thread_mng.max_thrds + 1); + if (!thrd_idx) { + main_task_idx[task_num] = i; + task_num++; + continue; + } + thrd_idx = priv->last_c_thrd_idx + 1; + if (thrd_idx >= conf_thread_mng.max_thrds) + thrd_idx = 0; + priv->last_c_thrd_idx = thrd_idx; + data[0] = i; + if (mlx5_vdpa_task_add(priv, thrd_idx, + MLX5_VDPA_TASK_STOP_VIRTQ, + &remaining_cnt, &err_cnt, + (void **)&data, 1)) { + DRV_LOG(ERR, "Fail to add " + "task stop virtq (%d).", i); + main_task_idx[task_num] = i; + task_num++; + } + } + for (i = 0; i < task_num; i++) { + virtq = &priv->virtqs[main_task_idx[i]]; pthread_mutex_lock(&virtq->virtq_lock); - ret = mlx5_vdpa_virtq_stop(priv, i); + ret = mlx5_vdpa_virtq_stop(priv, + main_task_idx[i]); + if (ret) { + pthread_mutex_unlock(&virtq->virtq_lock); + DRV_LOG(ERR, + "Failed to stop virtq %d.", i); + return -1; + } + if (RTE_VHOST_NEED_LOG(features)) + rte_vhost_log_used_vring(priv->vid, i, 0, + MLX5_VDPA_USED_RING_LEN(virtq->vq_size)); pthread_mutex_unlock(&virtq->virtq_lock); + } + if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt, + &err_cnt, 2000)) { + DRV_LOG(ERR, + "Failed to wait virt-queue setup tasks ready."); + return -1; + } + } else { + for (i = 0; i < priv->nr_virtqs; i++) { + virtq = &priv->virtqs[i]; + pthread_mutex_lock(&virtq->virtq_lock); + if (!virtq->configured) { + pthread_mutex_unlock(&virtq->virtq_lock); + continue; + } + ret = mlx5_vdpa_virtq_stop(priv, i); if (ret) { - DRV_LOG(ERR, "Failed to stop virtq %d for LM " - "log.", i); + pthread_mutex_unlock(&virtq->virtq_lock); + DRV_LOG(ERR, + "Failed to stop virtq %d for LM log.", i); return -1; } + if (RTE_VHOST_NEED_LOG(features)) + rte_vhost_log_used_vring(priv->vid, i, 0, + MLX5_VDPA_USED_RING_LEN(virtq->vq_size)); + pthread_mutex_unlock(&virtq->virtq_lock); } - rte_vhost_log_used_vring(priv->vid, i, 0, - MLX5_VDPA_USED_RING_LEN(priv->virtqs[i].vq_size)); } return 0; } -- 2.31.1