]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: pfncache: Wait for pending invalidations instead of spinning
authorDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 21 Aug 2024 20:11:21 +0000 (21:11 +0100)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 21 Aug 2024 20:11:21 +0000 (21:11 +0100)
The busy loop in hva_to_pfn_retry() is worse than a normal page fault
retry loop because it spins even while it's waiting for the invalidation
to complete. It isn't just that a page might get faulted out again before
it's actually accessed.

Introduce a wait queue to be woken when kvm->mn_active_invalidate_count
reaches zero, and wait on it if there is any pending invalidation which
affects the GPC being refreshed.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
include/linux/kvm_host.h
virt/kvm/kvm_main.c
virt/kvm/pfncache.c

index 1bfe2e8d52cdc6155b37c0c9f16b5d11dae777b2..a0739c343da5deacecaf843d946edcfcf5fbea58 100644 (file)
@@ -772,6 +772,7 @@ struct kvm {
        struct list_head gpc_list;
        u64 mmu_gpc_invalidate_range_start;
        u64 mmu_gpc_invalidate_range_end;
+       wait_queue_head_t gpc_invalidate_wq;
 
        /*
         * created_vcpus is protected by kvm->lock, and is incremented
index 84eb1ebb6f47dffc21971b2c3ad1c048b52fe266..e04eb700448b960bb14c9ed5cfa43cd6776159f4 100644 (file)
@@ -871,8 +871,10 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
         * There can only be one waiter, since the wait happens under
         * slots_lock.
         */
-       if (wake)
+       if (wake) {
+               wake_up(&kvm->gpc_invalidate_wq);
                rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
+       }
 }
 
 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
@@ -1182,6 +1184,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
 
        INIT_LIST_HEAD(&kvm->gpc_list);
        spin_lock_init(&kvm->gpc_lock);
+       init_waitqueue_head(&kvm->gpc_invalidate_wq);
        kvm->mmu_gpc_invalidate_range_start = KVM_HVA_ERR_BAD;
        kvm->mmu_gpc_invalidate_range_end = KVM_HVA_ERR_BAD;
 
index eeb9bf43c04a1bc050481b2f39db022deb68427b..fa494eb3d92402eeee6da445bfbee0ef20fb133c 100644 (file)
@@ -135,13 +135,38 @@ static bool gpc_invalidations_pending(struct gfn_to_pfn_cache *gpc)
         * No need for locking on GPC here because these fields are protected
         * by gpc->refresh_lock.
         */
-       guard(spinlock)(&gpc->kvm->mn_invalidate_lock);
-
        return unlikely(gpc->kvm->mn_active_invalidate_count) &&
                (gpc->kvm->mmu_gpc_invalidate_range_start <= gpc->uhva) &&
                (gpc->kvm->mmu_gpc_invalidate_range_end > gpc->uhva);
 }
 
+static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
+{
+       bool waited = false;
+
+       spin_lock(&gpc->kvm->mn_invalidate_lock);
+       if (gpc_invalidations_pending(gpc)) {
+               DEFINE_WAIT(wait);
+
+               waited = true;
+               for (;;) {
+                       prepare_to_wait(&gpc->kvm->gpc_invalidate_wq, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+
+                       if (!gpc_invalidations_pending(gpc))
+                               break;
+
+                       spin_unlock(&gpc->kvm->mn_invalidate_lock);
+                       schedule();
+                       spin_lock(&gpc->kvm->mn_invalidate_lock);
+               }
+               finish_wait(&gpc->kvm->gpc_invalidate_wq, &wait);
+       }
+       spin_unlock(&gpc->kvm->mn_invalidate_lock);
+       return waited;
+}
+
+
 static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 {
        /* Note, the new page offset may be different than the old! */
@@ -191,8 +216,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                 * to have been changted already, so hva_to_pfn() won't return
                 * a stale mapping in that case anyway.
                 */
-               while (gpc_invalidations_pending(gpc)) {
-                       cond_resched();
+               if (gpc_wait_for_invalidations(gpc)) {
                        write_lock_irq(&gpc->lock);
                        continue;
                }