The LM process includes a lot of objects creations and destructions in the source and the destination servers. As much as LM time increases, the packet drop of the VM increases. To improve LM time need to parallel the configurations for mlx5 FW. Add internal multi-thread management in the driver for it.
A new devarg defines the number of threads and their CPU. The management is shared between all the devices of the driver. Since the event_core also affects the datapath events thread, reduce the priority of the datapath event thread to allow fast configuration of the devices doing the LM. Signed-off-by: Li Zhang <l...@nvidia.com> --- doc/guides/vdpadevs/mlx5.rst | 11 +++ drivers/vdpa/mlx5/meson.build | 1 + drivers/vdpa/mlx5/mlx5_vdpa.c | 41 ++++++++ drivers/vdpa/mlx5/mlx5_vdpa.h | 36 +++++++ drivers/vdpa/mlx5/mlx5_vdpa_cthread.c | 129 ++++++++++++++++++++++++++ drivers/vdpa/mlx5/mlx5_vdpa_event.c | 2 +- drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 8 +- 7 files changed, 223 insertions(+), 5 deletions(-) create mode 100644 drivers/vdpa/mlx5/mlx5_vdpa_cthread.c diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst index 0ad77bf535..b75a01688d 100644 --- a/doc/guides/vdpadevs/mlx5.rst +++ b/doc/guides/vdpadevs/mlx5.rst @@ -78,6 +78,17 @@ for an additional list of options shared with other mlx5 drivers. CPU core number to set polling thread affinity to, default to control plane cpu. +- ``max_conf_threads`` parameter [int] + + Allow the driver to use internal threads to obtain fast configuration. + All the threads will be open on the same core of the event completion queue scheduling thread. + + - 0, default, don't use internal threads for configuration. + + - 1 - 256, number of internal threads in addition to the caller thread (8 is suggested). + This value, if not 0, should be the same for all the devices; + the first prob will take it with the event_core for all the multi-thread configurations in the driver. + - ``hw_latency_mode`` parameter [int] The completion queue moderation mode: diff --git a/drivers/vdpa/mlx5/meson.build b/drivers/vdpa/mlx5/meson.build index 0fa82ad257..9d8dbb1a82 100644 --- a/drivers/vdpa/mlx5/meson.build +++ b/drivers/vdpa/mlx5/meson.build @@ -15,6 +15,7 @@ sources = files( 'mlx5_vdpa_virtq.c', 'mlx5_vdpa_steer.c', 'mlx5_vdpa_lm.c', + 'mlx5_vdpa_cthread.c', ) cflags_options = [ '-std=c11', diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c index e5a11f72fd..a9d023ed08 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -50,6 +50,8 @@ TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list = TAILQ_HEAD_INITIALIZER(priv_list); static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER; +struct mlx5_vdpa_conf_thread_mng conf_thread_mng; + static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv); static struct mlx5_vdpa_priv * @@ -493,6 +495,29 @@ mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque) DRV_LOG(WARNING, "Invalid event_core %s.", val); else priv->event_core = tmp; + } else if (strcmp(key, "max_conf_threads") == 0) { + if (tmp) { + priv->use_c_thread = true; + if (!conf_thread_mng.initializer_priv) { + conf_thread_mng.initializer_priv = priv; + if (tmp > MLX5_VDPA_MAX_C_THRD) { + DRV_LOG(WARNING, + "Invalid max_conf_threads %s " + "and set max_conf_threads to %d", + val, MLX5_VDPA_MAX_C_THRD); + tmp = MLX5_VDPA_MAX_C_THRD; + } + conf_thread_mng.max_thrds = tmp; + } else if (tmp != conf_thread_mng.max_thrds) { + DRV_LOG(WARNING, + "max_conf_threads is PMD argument and not per device, " + "only the first device configuration set it, current value is %d " + "and will not be changed to %d.", + conf_thread_mng.max_thrds, (int)tmp); + } + } else { + priv->use_c_thread = false; + } } else if (strcmp(key, "hw_latency_mode") == 0) { priv->hw_latency_mode = (uint32_t)tmp; } else if (strcmp(key, "hw_max_latency_us") == 0) { @@ -521,6 +546,9 @@ mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist, "hw_max_latency_us", "hw_max_pending_comp", "no_traffic_time", + "queue_size", + "queues", + "max_conf_threads", NULL, }; @@ -725,6 +753,13 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev, pthread_mutex_init(&priv->steer_update_lock, NULL); priv->cdev = cdev; mlx5_vdpa_config_get(mkvlist, priv); + if (priv->use_c_thread) { + if (conf_thread_mng.initializer_priv == priv) + if (mlx5_vdpa_mult_threads_create(priv->event_core)) + goto error; + __atomic_fetch_add(&conf_thread_mng.refcnt, 1, + __ATOMIC_RELAXED); + } if (mlx5_vdpa_create_dev_resources(priv)) goto error; priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops); @@ -739,6 +774,8 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev, pthread_mutex_unlock(&priv_list_lock); return 0; error: + if (conf_thread_mng.initializer_priv == priv) + mlx5_vdpa_mult_threads_destroy(false); if (priv) mlx5_vdpa_dev_release(priv); return -rte_errno; @@ -806,6 +843,10 @@ mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv) mlx5_vdpa_release_dev_resources(priv); if (priv->vdev) rte_vdpa_unregister_device(priv->vdev); + if (priv->use_c_thread) + if (__atomic_fetch_sub(&conf_thread_mng.refcnt, + 1, __ATOMIC_RELAXED) == 1) + mlx5_vdpa_mult_threads_destroy(true); rte_free(priv); } diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h index 3fd5eefc5e..4e7c2557b7 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h @@ -73,6 +73,22 @@ enum { MLX5_VDPA_NOTIFIER_STATE_ERR }; +#define MLX5_VDPA_MAX_C_THRD 256 + +/* Generic mlx5_vdpa_c_thread information. */ +struct mlx5_vdpa_c_thread { + pthread_t tid; +}; + +struct mlx5_vdpa_conf_thread_mng { + void *initializer_priv; + uint32_t refcnt; + uint32_t max_thrds; + pthread_mutex_t cthrd_lock; + struct mlx5_vdpa_c_thread cthrd[MLX5_VDPA_MAX_C_THRD]; +}; +extern struct mlx5_vdpa_conf_thread_mng conf_thread_mng; + struct mlx5_vdpa_virtq { SLIST_ENTRY(mlx5_vdpa_virtq) next; uint8_t enable; @@ -126,6 +142,7 @@ enum mlx5_dev_state { struct mlx5_vdpa_priv { TAILQ_ENTRY(mlx5_vdpa_priv) next; bool connected; + bool use_c_thread; enum mlx5_dev_state state; rte_spinlock_t db_lock; pthread_mutex_t steer_update_lock; @@ -496,4 +513,23 @@ mlx5_vdpa_drain_cq(struct mlx5_vdpa_priv *priv); bool mlx5_vdpa_is_modify_virtq_supported(struct mlx5_vdpa_priv *priv); + +/** + * Create configuration multi-threads resource + * + * @param[in] cpu_core + * CPU core number to set configuration threads affinity to. + * + * @return + * 0 on success, a negative value otherwise. + */ +int +mlx5_vdpa_mult_threads_create(int cpu_core); + +/** + * Destroy configuration multi-threads resource + * + */ +void +mlx5_vdpa_mult_threads_destroy(bool need_unlock); #endif /* RTE_PMD_MLX5_VDPA_H_ */ diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c new file mode 100644 index 0000000000..ba7d8b63b3 --- /dev/null +++ b/drivers/vdpa/mlx5/mlx5_vdpa_cthread.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2022 NVIDIA Corporation & Affiliates + */ +#include <string.h> +#include <unistd.h> +#include <sys/eventfd.h> + +#include <rte_malloc.h> +#include <rte_errno.h> +#include <rte_io.h> +#include <rte_alarm.h> +#include <rte_tailq.h> +#include <rte_ring_elem.h> + +#include <mlx5_common.h> + +#include "mlx5_vdpa_utils.h" +#include "mlx5_vdpa.h" + +static void * +mlx5_vdpa_c_thread_handle(void *arg) +{ + /* To be added later. */ + return arg; +} + +static void +mlx5_vdpa_c_thread_destroy(uint32_t thrd_idx, bool need_unlock) +{ + if (conf_thread_mng.cthrd[thrd_idx].tid) { + pthread_cancel(conf_thread_mng.cthrd[thrd_idx].tid); + pthread_join(conf_thread_mng.cthrd[thrd_idx].tid, NULL); + conf_thread_mng.cthrd[thrd_idx].tid = 0; + if (need_unlock) + pthread_mutex_init(&conf_thread_mng.cthrd_lock, NULL); + } +} + +static int +mlx5_vdpa_c_thread_create(int cpu_core) +{ + const struct sched_param sp = { + .sched_priority = sched_get_priority_max(SCHED_RR), + }; + rte_cpuset_t cpuset; + pthread_attr_t attr; + uint32_t thrd_idx; + char name[32]; + int ret; + + pthread_mutex_lock(&conf_thread_mng.cthrd_lock); + pthread_attr_init(&attr); + ret = pthread_attr_setschedpolicy(&attr, SCHED_RR); + if (ret) { + DRV_LOG(ERR, "Failed to set thread sched policy = RR."); + goto c_thread_err; + } + ret = pthread_attr_setschedparam(&attr, &sp); + if (ret) { + DRV_LOG(ERR, "Failed to set thread priority."); + goto c_thread_err; + } + for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds; + thrd_idx++) { + ret = pthread_create(&conf_thread_mng.cthrd[thrd_idx].tid, + &attr, mlx5_vdpa_c_thread_handle, + (void *)&conf_thread_mng); + if (ret) { + DRV_LOG(ERR, "Failed to create vdpa multi-threads %d.", + thrd_idx); + goto c_thread_err; + } + CPU_ZERO(&cpuset); + if (cpu_core != -1) + CPU_SET(cpu_core, &cpuset); + else + cpuset = rte_lcore_cpuset(rte_get_main_lcore()); + ret = pthread_setaffinity_np( + conf_thread_mng.cthrd[thrd_idx].tid, + sizeof(cpuset), &cpuset); + if (ret) { + DRV_LOG(ERR, "Failed to set thread affinity for " + "vdpa multi-threads %d.", thrd_idx); + goto c_thread_err; + } + snprintf(name, sizeof(name), "vDPA-mthread-%d", thrd_idx); + ret = pthread_setname_np( + conf_thread_mng.cthrd[thrd_idx].tid, name); + if (ret) + DRV_LOG(ERR, "Failed to set vdpa multi-threads name %s.", + name); + else + DRV_LOG(DEBUG, "Thread name: %s.", name); + } + pthread_mutex_unlock(&conf_thread_mng.cthrd_lock); + return 0; +c_thread_err: + for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds; + thrd_idx++) + mlx5_vdpa_c_thread_destroy(thrd_idx, false); + pthread_mutex_unlock(&conf_thread_mng.cthrd_lock); + return -1; +} + +int +mlx5_vdpa_mult_threads_create(int cpu_core) +{ + pthread_mutex_init(&conf_thread_mng.cthrd_lock, NULL); + if (mlx5_vdpa_c_thread_create(cpu_core)) { + DRV_LOG(ERR, "Cannot create vDPA configuration threads."); + mlx5_vdpa_mult_threads_destroy(false); + return -1; + } + return 0; +} + +void +mlx5_vdpa_mult_threads_destroy(bool need_unlock) +{ + uint32_t thrd_idx; + + if (!conf_thread_mng.initializer_priv) + return; + for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds; + thrd_idx++) + mlx5_vdpa_c_thread_destroy(thrd_idx, need_unlock); + pthread_mutex_destroy(&conf_thread_mng.cthrd_lock); + memset(&conf_thread_mng, 0, sizeof(struct mlx5_vdpa_conf_thread_mng)); +} diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/drivers/vdpa/mlx5/mlx5_vdpa_event.c index 2b0f5936d1..b45fbac146 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c @@ -507,7 +507,7 @@ mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv) pthread_attr_t attr; char name[16]; const struct sched_param sp = { - .sched_priority = sched_get_priority_max(SCHED_RR), + .sched_priority = sched_get_priority_max(SCHED_RR) - 1, }; if (!priv->eventc) diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c index 138b7bdbc5..599809b09b 100644 --- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c @@ -43,7 +43,7 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg) errno == EWOULDBLOCK || errno == EAGAIN) continue; - DRV_LOG(ERR, "Failed to read kickfd of virtq %d: %s", + DRV_LOG(ERR, "Failed to read kickfd of virtq %d: %s.", virtq->index, strerror(errno)); } break; @@ -57,7 +57,7 @@ mlx5_vdpa_virtq_kick_handler(void *cb_arg) rte_spinlock_unlock(&priv->db_lock); pthread_mutex_unlock(&virtq->virtq_lock); if (priv->state != MLX5_VDPA_STATE_CONFIGURED && !virtq->enable) { - DRV_LOG(ERR, "device %d queue %d down, skip kick handling", + DRV_LOG(ERR, "device %d queue %d down, skip kick handling.", priv->vid, virtq->index); return; } @@ -218,7 +218,7 @@ mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index) return -1; } if (attr.state == MLX5_VIRTQ_STATE_ERROR) - DRV_LOG(WARNING, "vid %d vring %d hw error=%hhu", + DRV_LOG(WARNING, "vid %d vring %d hw error=%hhu.", priv->vid, index, attr.error_type); return 0; } @@ -380,7 +380,7 @@ mlx5_vdpa_virtq_sub_objs_prepare(struct mlx5_vdpa_priv *priv, if (ret) { last_avail_idx = 0; last_used_idx = 0; - DRV_LOG(WARNING, "Couldn't get vring base, idx are set to 0"); + DRV_LOG(WARNING, "Couldn't get vring base, idx are set to 0."); } else { DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d for " "virtq %d.", priv->vid, last_avail_idx, -- 2.31.1