On Mon, Apr 21, 2025 at 10:44:08AM +0800, Cindy Lu wrote:
> This patch reintroduces kthread mode support in vhost,
> It also introduces struct vhost_worker_ops to abstract
> worker create/stop/wakeup operations.
> 
> * Bring back the original vhost_worker() implementation,
>   and renamed to vhost_run_work_kthread_list().
> 
> * Add cgroup support for the kthread
> 
> * Introduce struct vhost_worker_ops:
>   - Encapsulates create / stop / wake‑up callbacks.
>   - vhost_worker_create() selects the proper ops according to
>     inherit_owner.
> 
> This partially reverts or improves upon:
> commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
> commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray")
> 
> Signed-off-by: Cindy Lu <l...@redhat.com>
> ---
>  drivers/vhost/vhost.c | 188 ++++++++++++++++++++++++++++++++++++++----
>  drivers/vhost/vhost.h |  12 +++
>  2 files changed, 182 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 250dc43f1786..be97028a8baf 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -22,6 +22,7 @@
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/kthread.h>
> +#include <linux/cgroup.h>
>  #include <linux/module.h>
>  #include <linux/sort.h>
>  #include <linux/sched/mm.h>
> @@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker 
> *worker,
>                * test_and_set_bit() implies a memory barrier.
>                */
>               llist_add(&work->node, &worker->work_list);
> -             vhost_task_wake(worker->vtsk);
> +             worker->ops->wakeup(worker);
>       }
>  }
>  
> @@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>       __vhost_vq_meta_reset(vq);
>  }
>  
> +static int vhost_run_work_kthread_list(void *data)
> +{
> +     struct vhost_worker *worker = data;
> +     struct vhost_work *work, *work_next;
> +     struct vhost_dev *dev = worker->dev;
> +     struct llist_node *node;
> +
> +     kthread_use_mm(dev->mm);
> +
> +     for (;;) {
> +             /* mb paired w/ kthread_stop */
> +             set_current_state(TASK_INTERRUPTIBLE);
> +
> +             if (kthread_should_stop()) {
> +                     __set_current_state(TASK_RUNNING);
> +                     break;
> +             }
> +             node = llist_del_all(&worker->work_list);
> +             if (!node)
> +                     schedule();
> +
> +             node = llist_reverse_order(node);
> +             /* make sure flag is seen after deletion */
> +             smp_wmb();
> +             llist_for_each_entry_safe(work, work_next, node, node) {
> +                     clear_bit(VHOST_WORK_QUEUED, &work->flags);
> +                     __set_current_state(TASK_RUNNING);
> +                     kcov_remote_start_common(worker->kcov_handle);
> +                     work->fn(work);
> +                     kcov_remote_stop();
> +                     cond_resched();
> +             }
> +     }
> +     kthread_unuse_mm(dev->mm);
> +
> +     return 0;
> +}
> +
>  static bool vhost_run_work_list(void *data)
>  {
>       struct vhost_worker *worker = data;
> @@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
>  
> +struct vhost_attach_cgroups_struct {
> +     struct vhost_work work;
> +     struct task_struct *owner;
> +     int ret;
> +};
> +
> +static void vhost_attach_cgroups_work(struct vhost_work *work)
> +{
> +     struct vhost_attach_cgroups_struct *s;
> +
> +     s = container_of(work, struct vhost_attach_cgroups_struct, work);
> +     s->ret = cgroup_attach_task_all(s->owner, current);
> +}
> +
> +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
> +{
> +     struct vhost_attach_cgroups_struct attach;
> +     int saved_cnt;
> +
> +     attach.owner = current;
> +
> +     vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> +     vhost_worker_queue(worker, &attach.work);
> +
> +     mutex_lock(&worker->mutex);
> +
> +     /*
> +      * Bypass attachment_cnt check in __vhost_worker_flush:
> +      * Temporarily change it to INT_MAX to bypass the check
> +      */
> +     saved_cnt = worker->attachment_cnt;
> +     worker->attachment_cnt = INT_MAX;
> +     __vhost_worker_flush(worker);
> +     worker->attachment_cnt = saved_cnt;


You mean this one?
        if (!worker->attachment_cnt || worker->killed)
                return;


Just introduce a variant of __vhost_worker_flush that
skips this check.

E.g.

Rename __vhost_worker_flush -> _vhost_worker_flush.

then rework:

static void _vhost_worker_flush(struct vhost_worker *worker)
{
        struct vhost_flush_struct flush; 
                                  
        if (!worker->attachment_cnt || worker->killed)
                return;

        __vhost_worker_flush(worker);
}





> +
> +     mutex_unlock(&worker->mutex);
> +
> +     return attach.ret;
> +}
> +
>  /* Caller should have device mutex */
>  bool vhost_dev_has_owner(struct vhost_dev *dev)
>  {
> @@ -627,7 +706,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
>  
>       WARN_ON(!llist_empty(&worker->work_list));
>       xa_erase(&dev->worker_xa, worker->id);
> -     vhost_task_stop(worker->vtsk);
> +     worker->ops->stop(worker);
>       kfree(worker);
>  }
>  
> @@ -650,42 +729,115 @@ static void vhost_workers_free(struct vhost_dev *dev)
>       xa_destroy(&dev->worker_xa);
>  }
>  
> +static void vhost_task_wakeup(struct vhost_worker *worker)
> +{
> +     return vhost_task_wake(worker->vtsk);
> +}
> +
> +static void vhost_kthread_wakeup(struct vhost_worker *worker)
> +{
> +     wake_up_process(worker->kthread_task);
> +}
> +
> +static void vhost_task_do_stop(struct vhost_worker *worker)
> +{
> +     return vhost_task_stop(worker->vtsk);
> +}
> +
> +static void vhost_kthread_do_stop(struct vhost_worker *worker)
> +{
> +     kthread_stop(worker->kthread_task);
> +}
> +
> +static int vhost_task_worker_create(struct vhost_worker *worker,
> +                                 struct vhost_dev *dev, const char *name)
> +{
> +     struct vhost_task *vtsk;
> +     u32 id;
> +     int ret;
> +
> +     vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> +                              worker, name);
> +     if (IS_ERR(vtsk))
> +             return PTR_ERR(vtsk);
> +
> +     worker->vtsk = vtsk;
> +     vhost_task_start(vtsk);
> +     ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> +     if (ret < 0) {
> +             vhost_task_do_stop(worker);
> +             return ret;
> +     }
> +     worker->id = id;
> +     return 0;
> +}
> +
> +static int vhost_kthread_worker_create(struct vhost_worker *worker,
> +                                    struct vhost_dev *dev, const char *name)
> +{
> +     struct task_struct *task;
> +     u32 id;
> +     int ret;
> +
> +     task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name);
> +     if (IS_ERR(task))
> +             return PTR_ERR(task);
> +
> +     worker->kthread_task = task;
> +     wake_up_process(task);
> +     ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> +     if (ret < 0)
> +             goto stop_worker;
> +
> +     ret = vhost_attach_task_to_cgroups(worker);
> +     if (ret)
> +             goto stop_worker;
> +
> +     worker->id = id;
> +     return 0;
> +
> +stop_worker:
> +     vhost_kthread_do_stop(worker);
> +     return ret;
> +}
> +
> +static const struct vhost_worker_ops kthread_ops = {
> +     .create = vhost_kthread_worker_create,
> +     .stop = vhost_kthread_do_stop,
> +     .wakeup = vhost_kthread_wakeup,
> +};
> +
> +static const struct vhost_worker_ops vhost_task_ops = {
> +     .create = vhost_task_worker_create,
> +     .stop = vhost_task_do_stop,
> +     .wakeup = vhost_task_wakeup,
> +};
> +
>  static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>  {
>       struct vhost_worker *worker;
> -     struct vhost_task *vtsk;
>       char name[TASK_COMM_LEN];
>       int ret;
> -     u32 id;
> +     const struct vhost_worker_ops *ops =
> +             dev->inherit_owner ? &vhost_task_ops : &kthread_ops;
>  
>       worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
>       if (!worker)
>               return NULL;
>  
>       worker->dev = dev;
> +     worker->ops = ops;
>       snprintf(name, sizeof(name), "vhost-%d", current->pid);
>  
> -     vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> -                              worker, name);
> -     if (IS_ERR(vtsk))
> -             goto free_worker;
> -
>       mutex_init(&worker->mutex);
>       init_llist_head(&worker->work_list);
>       worker->kcov_handle = kcov_common_handle();
> -     worker->vtsk = vtsk;
> -
> -     vhost_task_start(vtsk);
> -
> -     ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> +     ret = ops->create(worker, dev, name);
>       if (ret < 0)
> -             goto stop_worker;
> -     worker->id = id;
> +             goto free_worker;
>  
>       return worker;
>  
> -stop_worker:
> -     vhost_task_stop(vtsk);
>  free_worker:
>       kfree(worker);
>       return NULL;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 19bb94922a0e..af4b2f7d3b91 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -26,7 +26,18 @@ struct vhost_work {
>       unsigned long           flags;
>  };
>  
> +struct vhost_worker;
> +struct vhost_dev;
> +
> +struct vhost_worker_ops {
> +     int (*create)(struct vhost_worker *worker, struct vhost_dev *dev,
> +                   const char *name);
> +     void (*stop)(struct vhost_worker *worker);
> +     void (*wakeup)(struct vhost_worker *worker);
> +};
> +
>  struct vhost_worker {
> +     struct task_struct *kthread_task;
>       struct vhost_task       *vtsk;
>       struct vhost_dev        *dev;
>       /* Used to serialize device wide flushing with worker swapping. */
> @@ -36,6 +47,7 @@ struct vhost_worker {
>       u32                     id;
>       int                     attachment_cnt;
>       bool                    killed;
> +     const struct vhost_worker_ops *ops;
>  };
>  
>  /* Poll a file (eventfd or socket) */
> -- 
> 2.45.0


Reply via email to