This patch reintroduces kthread mode support in vhost,
It also introduces struct vhost_worker_ops to abstract
worker create/stop/wakeup operations.

* Bring back the original vhost_worker() implementation,
  and renamed to vhost_run_work_kthread_list().

* Add cgroup support for the kthread

* Introduce struct vhost_worker_ops:
  - Encapsulates create / stop / wake‑up callbacks.
  - vhost_worker_create() selects the proper ops according to
    inherit_owner.

This partially reverts or improves upon:
commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray")

Signed-off-by: Cindy Lu <l...@redhat.com>
---
 drivers/vhost/vhost.c | 188 ++++++++++++++++++++++++++++++++++++++----
 drivers/vhost/vhost.h |  12 +++
 2 files changed, 182 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 250dc43f1786..be97028a8baf 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/kthread.h>
+#include <linux/cgroup.h>
 #include <linux/module.h>
 #include <linux/sort.h>
 #include <linux/sched/mm.h>
@@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker *worker,
                 * test_and_set_bit() implies a memory barrier.
                 */
                llist_add(&work->node, &worker->work_list);
-               vhost_task_wake(worker->vtsk);
+               worker->ops->wakeup(worker);
        }
 }
 
@@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
        __vhost_vq_meta_reset(vq);
 }
 
+static int vhost_run_work_kthread_list(void *data)
+{
+       struct vhost_worker *worker = data;
+       struct vhost_work *work, *work_next;
+       struct vhost_dev *dev = worker->dev;
+       struct llist_node *node;
+
+       kthread_use_mm(dev->mm);
+
+       for (;;) {
+               /* mb paired w/ kthread_stop */
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (kthread_should_stop()) {
+                       __set_current_state(TASK_RUNNING);
+                       break;
+               }
+               node = llist_del_all(&worker->work_list);
+               if (!node)
+                       schedule();
+
+               node = llist_reverse_order(node);
+               /* make sure flag is seen after deletion */
+               smp_wmb();
+               llist_for_each_entry_safe(work, work_next, node, node) {
+                       clear_bit(VHOST_WORK_QUEUED, &work->flags);
+                       __set_current_state(TASK_RUNNING);
+                       kcov_remote_start_common(worker->kcov_handle);
+                       work->fn(work);
+                       kcov_remote_stop();
+                       cond_resched();
+               }
+       }
+       kthread_unuse_mm(dev->mm);
+
+       return 0;
+}
+
 static bool vhost_run_work_list(void *data)
 {
        struct vhost_worker *worker = data;
@@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
 
+struct vhost_attach_cgroups_struct {
+       struct vhost_work work;
+       struct task_struct *owner;
+       int ret;
+};
+
+static void vhost_attach_cgroups_work(struct vhost_work *work)
+{
+       struct vhost_attach_cgroups_struct *s;
+
+       s = container_of(work, struct vhost_attach_cgroups_struct, work);
+       s->ret = cgroup_attach_task_all(s->owner, current);
+}
+
+static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
+{
+       struct vhost_attach_cgroups_struct attach;
+       int saved_cnt;
+
+       attach.owner = current;
+
+       vhost_work_init(&attach.work, vhost_attach_cgroups_work);
+       vhost_worker_queue(worker, &attach.work);
+
+       mutex_lock(&worker->mutex);
+
+       /*
+        * Bypass attachment_cnt check in __vhost_worker_flush:
+        * Temporarily change it to INT_MAX to bypass the check
+        */
+       saved_cnt = worker->attachment_cnt;
+       worker->attachment_cnt = INT_MAX;
+       __vhost_worker_flush(worker);
+       worker->attachment_cnt = saved_cnt;
+
+       mutex_unlock(&worker->mutex);
+
+       return attach.ret;
+}
+
 /* Caller should have device mutex */
 bool vhost_dev_has_owner(struct vhost_dev *dev)
 {
@@ -627,7 +706,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
 
        WARN_ON(!llist_empty(&worker->work_list));
        xa_erase(&dev->worker_xa, worker->id);
-       vhost_task_stop(worker->vtsk);
+       worker->ops->stop(worker);
        kfree(worker);
 }
 
@@ -650,42 +729,115 @@ static void vhost_workers_free(struct vhost_dev *dev)
        xa_destroy(&dev->worker_xa);
 }
 
+static void vhost_task_wakeup(struct vhost_worker *worker)
+{
+       return vhost_task_wake(worker->vtsk);
+}
+
+static void vhost_kthread_wakeup(struct vhost_worker *worker)
+{
+       wake_up_process(worker->kthread_task);
+}
+
+static void vhost_task_do_stop(struct vhost_worker *worker)
+{
+       return vhost_task_stop(worker->vtsk);
+}
+
+static void vhost_kthread_do_stop(struct vhost_worker *worker)
+{
+       kthread_stop(worker->kthread_task);
+}
+
+static int vhost_task_worker_create(struct vhost_worker *worker,
+                                   struct vhost_dev *dev, const char *name)
+{
+       struct vhost_task *vtsk;
+       u32 id;
+       int ret;
+
+       vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
+                                worker, name);
+       if (IS_ERR(vtsk))
+               return PTR_ERR(vtsk);
+
+       worker->vtsk = vtsk;
+       vhost_task_start(vtsk);
+       ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
+       if (ret < 0) {
+               vhost_task_do_stop(worker);
+               return ret;
+       }
+       worker->id = id;
+       return 0;
+}
+
+static int vhost_kthread_worker_create(struct vhost_worker *worker,
+                                      struct vhost_dev *dev, const char *name)
+{
+       struct task_struct *task;
+       u32 id;
+       int ret;
+
+       task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+
+       worker->kthread_task = task;
+       wake_up_process(task);
+       ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
+       if (ret < 0)
+               goto stop_worker;
+
+       ret = vhost_attach_task_to_cgroups(worker);
+       if (ret)
+               goto stop_worker;
+
+       worker->id = id;
+       return 0;
+
+stop_worker:
+       vhost_kthread_do_stop(worker);
+       return ret;
+}
+
+static const struct vhost_worker_ops kthread_ops = {
+       .create = vhost_kthread_worker_create,
+       .stop = vhost_kthread_do_stop,
+       .wakeup = vhost_kthread_wakeup,
+};
+
+static const struct vhost_worker_ops vhost_task_ops = {
+       .create = vhost_task_worker_create,
+       .stop = vhost_task_do_stop,
+       .wakeup = vhost_task_wakeup,
+};
+
 static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
 {
        struct vhost_worker *worker;
-       struct vhost_task *vtsk;
        char name[TASK_COMM_LEN];
        int ret;
-       u32 id;
+       const struct vhost_worker_ops *ops =
+               dev->inherit_owner ? &vhost_task_ops : &kthread_ops;
 
        worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
        if (!worker)
                return NULL;
 
        worker->dev = dev;
+       worker->ops = ops;
        snprintf(name, sizeof(name), "vhost-%d", current->pid);
 
-       vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
-                                worker, name);
-       if (IS_ERR(vtsk))
-               goto free_worker;
-
        mutex_init(&worker->mutex);
        init_llist_head(&worker->work_list);
        worker->kcov_handle = kcov_common_handle();
-       worker->vtsk = vtsk;
-
-       vhost_task_start(vtsk);
-
-       ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
+       ret = ops->create(worker, dev, name);
        if (ret < 0)
-               goto stop_worker;
-       worker->id = id;
+               goto free_worker;
 
        return worker;
 
-stop_worker:
-       vhost_task_stop(vtsk);
 free_worker:
        kfree(worker);
        return NULL;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 19bb94922a0e..af4b2f7d3b91 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -26,7 +26,18 @@ struct vhost_work {
        unsigned long           flags;
 };
 
+struct vhost_worker;
+struct vhost_dev;
+
+struct vhost_worker_ops {
+       int (*create)(struct vhost_worker *worker, struct vhost_dev *dev,
+                     const char *name);
+       void (*stop)(struct vhost_worker *worker);
+       void (*wakeup)(struct vhost_worker *worker);
+};
+
 struct vhost_worker {
+       struct task_struct *kthread_task;
        struct vhost_task       *vtsk;
        struct vhost_dev        *dev;
        /* Used to serialize device wide flushing with worker swapping. */
@@ -36,6 +47,7 @@ struct vhost_worker {
        u32                     id;
        int                     attachment_cnt;
        bool                    killed;
+       const struct vhost_worker_ops *ops;
 };
 
 /* Poll a file (eventfd or socket) */
-- 
2.45.0


Reply via email to