]> www.infradead.org Git - linux.git/commitdiff
Revert "KVM: async_pf: avoid recursive flushing of work items"
authorSean Christopherson <seanjc@google.com>
Tue, 23 Apr 2024 19:16:49 +0000 (12:16 -0700)
committerSean Christopherson <seanjc@google.com>
Mon, 3 Jun 2024 15:55:55 +0000 (08:55 -0700)
Now that KVM does NOT gift async #PF workers a "struct kvm" reference,
don't bother skipping "done" workers when flushing/canceling queued
workers, as the deadlock that was being fudged around can no longer occur.
When workers, i.e. async_pf_execute(), were gifted a referenced, it was
possible for a worker to put the last reference and trigger VM destruction,
i.e. trigger flushing of a workqueue from a worker in said workqueue.

Note, there is no actual lock, the deadlock was that a worker will be
stuck waiting for itself (the workqueue code simulates a lock/unlock via
lock_map_{acquire,release}()).

Skipping "done" workers isn't problematic per se, but using work->vcpu as
a "done" flag is confusing, e.g. it's not clear that async_pf.lock is
acquired to protect the work->vcpu, NOT the processing of async_pf.queue
(which is protected by vcpu->mutex).

This reverts commit 22583f0d9c85e60c9860bc8a0ebff59fe08be6d7.

Suggested-by: Xu Yilun <yilun.xu@linux.intel.com>
Link: https://lore.kernel.org/r/20240423191649.2885257-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
virt/kvm/async_pf.c

index 99a63bad0306c5699c8282b175f203bdc7bfba0a..0ee4816b079acb005b02e2c64470b50b020a854e 100644 (file)
@@ -80,7 +80,6 @@ static void async_pf_execute(struct work_struct *work)
        spin_lock(&vcpu->async_pf.lock);
        first = list_empty(&vcpu->async_pf.done);
        list_add_tail(&apf->link, &vcpu->async_pf.done);
-       apf->vcpu = NULL;
        spin_unlock(&vcpu->async_pf.lock);
 
        /*
@@ -120,8 +119,6 @@ static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 {
-       spin_lock(&vcpu->async_pf.lock);
-
        /* cancel outstanding work queue item */
        while (!list_empty(&vcpu->async_pf.queue)) {
                struct kvm_async_pf *work =
@@ -129,23 +126,15 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                                         typeof(*work), queue);
                list_del(&work->queue);
 
-               /*
-                * We know it's present in vcpu->async_pf.done, do
-                * nothing here.
-                */
-               if (!work->vcpu)
-                       continue;
-
-               spin_unlock(&vcpu->async_pf.lock);
 #ifdef CONFIG_KVM_ASYNC_PF_SYNC
                flush_work(&work->work);
 #else
                if (cancel_work_sync(&work->work))
                        kmem_cache_free(async_pf_cache, work);
 #endif
-               spin_lock(&vcpu->async_pf.lock);
        }
 
+       spin_lock(&vcpu->async_pf.lock);
        while (!list_empty(&vcpu->async_pf.done)) {
                struct kvm_async_pf *work =
                        list_first_entry(&vcpu->async_pf.done,