]> www.infradead.org Git - users/hch/misc.git/commitdiff
kvm: retry nx_huge_page_recovery_thread creation
authorKeith Busch <kbusch@kernel.org>
Thu, 27 Feb 2025 23:06:31 +0000 (15:06 -0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Sat, 1 Mar 2025 07:54:18 +0000 (02:54 -0500)
A VMM may send a non-fatal signal to its threads, including vCPU tasks,
at any time, and thus may signal vCPU tasks during KVM_RUN.  If a vCPU
task receives the signal while its trying to spawn the huge page recovery
vhost task, then KVM_RUN will fail due to copy_process() returning
-ERESTARTNOINTR.

Rework call_once() to mark the call complete if and only if the called
function succeeds, and plumb the function's true error code back to the
call_once() invoker.  This provides userspace with the correct, non-fatal
error code so that the VMM doesn't terminate the VM on -ENOMEM, and allows
subsequent KVM_RUN a succeed by virtue of retrying creation of the NX huge
page task.

Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
[implemented the kvm user side]
Signed-off-by: Keith Busch <kbusch@kernel.org>
Message-ID: <20250227230631.303431-3-kbusch@meta.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/mmu/mmu.c
include/linux/call_once.h

index 18ca1ea6dc240f5e6ccf92dca63e6293611607d6..8160870398b9045998a18a92260657ae8bd2f700 100644 (file)
@@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
        return true;
 }
 
-static void kvm_mmu_start_lpage_recovery(struct once *once)
+static int kvm_mmu_start_lpage_recovery(struct once *once)
 {
        struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
        struct kvm *kvm = container_of(ka, struct kvm, arch);
@@ -7472,12 +7472,13 @@ static void kvm_mmu_start_lpage_recovery(struct once *once)
                                      kvm, "kvm-nx-lpage-recovery");
 
        if (IS_ERR(nx_thread))
-               return;
+               return PTR_ERR(nx_thread);
 
        vhost_task_start(nx_thread);
 
        /* Make the task visible only once it is fully started. */
        WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
+       return 0;
 }
 
 int kvm_mmu_post_init_vm(struct kvm *kvm)
@@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
        if (nx_hugepage_mitigation_hard_disabled)
                return 0;
 
-       call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
-       if (!kvm->arch.nx_huge_page_recovery_thread)
-               return -ENOMEM;
-       return 0;
+       return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
 }
 
 void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
index 6261aa0b3fb00dac0265a13b4ed25df2ae1dfbee..13cd6469e7e56edfd445376ae8963b31c9864965 100644 (file)
@@ -26,20 +26,41 @@ do {                                                                        \
        __once_init((once), #once, &__key);                             \
 } while (0)
 
-static inline void call_once(struct once *once, void (*cb)(struct once *))
+/*
+ * call_once - Ensure a function has been called exactly once
+ *
+ * @once: Tracking struct
+ * @cb: Function to be called
+ *
+ * If @once has never completed successfully before, call @cb and, if
+ * it returns a zero or positive value, mark @once as completed.  Return
+ * the value returned by @cb
+ *
+ * If @once has completed succesfully before, return 0.
+ *
+ * The call to @cb is implicitly surrounded by a mutex, though for
+ * efficiency the * function avoids taking it after the first call.
+ */
+static inline int call_once(struct once *once, int (*cb)(struct once *))
 {
-        /* Pairs with atomic_set_release() below.  */
-        if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
-                return;
-
-        guard(mutex)(&once->lock);
-        WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
-        if (atomic_read(&once->state) != ONCE_NOT_STARTED)
-                return;
-
-        atomic_set(&once->state, ONCE_RUNNING);
-        cb(once);
-        atomic_set_release(&once->state, ONCE_COMPLETED);
+       int r, state;
+
+       /* Pairs with atomic_set_release() below.  */
+       if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
+               return 0;
+
+       guard(mutex)(&once->lock);
+       state = atomic_read(&once->state);
+       if (unlikely(state != ONCE_NOT_STARTED))
+               return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0;
+
+       atomic_set(&once->state, ONCE_RUNNING);
+       r = cb(once);
+       if (r < 0)
+               atomic_set(&once->state, ONCE_NOT_STARTED);
+       else
+               atomic_set_release(&once->state, ONCE_COMPLETED);
+       return r;
 }
 
 #endif /* _LINUX_CALL_ONCE_H */