[PATCHv3 0/2]

2025-02-27 Thread Keith Busch
From: Keith Busch 

changes from v2:

  Fixed up the logical error in vhost on the new failure criteria

Keith Busch (1):
  vhost: return task creation error instead of NULL

Sean Christopherson (1):
  kvm: retry nx_huge_page_recovery_thread creation

 arch/x86/kvm/mmu/mmu.c| 12 +---
 drivers/vhost/vhost.c |  2 +-
 include/linux/call_once.h | 16 +++-
 kernel/vhost_task.c   |  4 ++--
 4 files changed, 19 insertions(+), 15 deletions(-)

-- 
2.43.5




[PATCHv3 2/2] kvm: retry nx_huge_page_recovery_thread creation

2025-02-27 Thread Keith Busch
From: Sean Christopherson 

A VMM may send a signal to its threads while they've entered KVM_RUN. If
that thread happens to be trying to make the huge page recovery vhost
task, then it fails with -ERESTARTNOINTR. We need to retry if that
happens, so call_once needs to be retryable. Make call_once complete
only if what it called was successful.

[implemented the kvm user side]
Signed-off-by: Keith Busch 
---
 arch/x86/kvm/mmu/mmu.c| 10 --
 include/linux/call_once.h | 16 +++-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 18ca1ea6dc240..8160870398b90 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
return true;
 }
 
-static void kvm_mmu_start_lpage_recovery(struct once *once)
+static int kvm_mmu_start_lpage_recovery(struct once *once)
 {
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
struct kvm *kvm = container_of(ka, struct kvm, arch);
@@ -7472,12 +7472,13 @@ static void kvm_mmu_start_lpage_recovery(struct once 
*once)
  kvm, "kvm-nx-lpage-recovery");
 
if (IS_ERR(nx_thread))
-   return;
+   return PTR_ERR(nx_thread);
 
vhost_task_start(nx_thread);
 
/* Make the task visible only once it is fully started. */
WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
+   return 0;
 }
 
 int kvm_mmu_post_init_vm(struct kvm *kvm)
@@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
if (nx_hugepage_mitigation_hard_disabled)
return 0;
 
-   call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
-   if (!kvm->arch.nx_huge_page_recovery_thread)
-   return -ENOMEM;
-   return 0;
+   return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
 }
 
 void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
diff --git a/include/linux/call_once.h b/include/linux/call_once.h
index 6261aa0b3fb00..ddcfd91493eaa 100644
--- a/include/linux/call_once.h
+++ b/include/linux/call_once.h
@@ -26,20 +26,26 @@ do {
\
__once_init((once), #once, &__key); \
 } while (0)
 
-static inline void call_once(struct once *once, void (*cb)(struct once *))
+static inline int call_once(struct once *once, int (*cb)(struct once *))
 {
+   int r;
+
 /* Pairs with atomic_set_release() below.  */
 if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
-return;
+   return 0;
 
 guard(mutex)(&once->lock);
 WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
 if (atomic_read(&once->state) != ONCE_NOT_STARTED)
-return;
+return -EINVAL;
 
 atomic_set(&once->state, ONCE_RUNNING);
-cb(once);
-atomic_set_release(&once->state, ONCE_COMPLETED);
+   r = cb(once);
+   if (r)
+   atomic_set(&once->state, ONCE_NOT_STARTED);
+   else
+   atomic_set_release(&once->state, ONCE_COMPLETED);
+   return r;
 }
 
 #endif /* _LINUX_CALL_ONCE_H */
-- 
2.43.5




[PATCHv3 1/2] vhost: return task creation error instead of NULL

2025-02-27 Thread Keith Busch
From: Keith Busch 

Lets callers distinguish why the vhost task creation failed. No one
currently cares why it failed, so no real runtime change from this
patch, but that will not be the case for long.

Signed-off-by: Keith Busch 
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 drivers/vhost/vhost.c  | 2 +-
 kernel/vhost_task.c| 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d4ac4a1f8b81b..18ca1ea6dc240 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7471,7 +7471,7 @@ static void kvm_mmu_start_lpage_recovery(struct once 
*once)
  kvm_nx_huge_page_recovery_worker_kill,
  kvm, "kvm-nx-lpage-recovery");
 
-   if (!nx_thread)
+   if (IS_ERR(nx_thread))
return;
 
vhost_task_start(nx_thread);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9ac25d08f473e..63612faeab727 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct 
vhost_dev *dev)
 
vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
 worker, name);
-   if (!vtsk)
+   if (IS_ERR(vtsk))
goto free_worker;
 
mutex_init(&worker->mutex);
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index 8800f5acc0071..2ef2e1b800916 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
 
vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL);
if (!vtsk)
-   return NULL;
+   return ERR_PTR(-ENOMEM);
init_completion(&vtsk->exited);
mutex_init(&vtsk->exit_mutex);
vtsk->data = arg;
@@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args);
if (IS_ERR(tsk)) {
kfree(vtsk);
-   return NULL;
+   return ERR_PTR(PTR_ERR(tsk));
}
 
vtsk->task = tsk;
-- 
2.43.5