]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: Avoid livelock in pfncache hva_to_pfn_retry() pfncache
authorDavid Woodhouse <dwmw@amazon.co.uk>
Fri, 2 Aug 2024 22:09:58 +0000 (23:09 +0100)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Fri, 2 Aug 2024 22:24:54 +0000 (23:24 +0100)
In some circumstances, the gpc_map() in the mmu_notifier retry loop can
cause it to livelock because it invalidates itself every time round the
loop.

Shift the gpc_map() outside the mmu_notifier_retry_cache() loop by using
a new gpc->pfn_valid flag to signal *partial* setup. It's used only by
the invalidation callback, to invalidate the GPC before we've even
finished mapping it.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
include/linux/kvm_types.h
virt/kvm/pfncache.c

index 827ecc0b7e10a8ebfb2f27fbbc43f96127abf247..5e37421de9e55c583b5bc92fb9d32c0e59b52cf4 100644 (file)
@@ -69,6 +69,7 @@ struct gfn_to_pfn_cache {
        void *khva;
        kvm_pfn_t pfn;
        bool active;
+       bool pfn_valid;
        bool valid;
 };
 
index f0039efb9e1e34315450bcb589429a8cecb37a73..581891fec2fefc998932c84132d1b756b84f68d5 100644 (file)
@@ -32,7 +32,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
                read_lock_irq(&gpc->lock);
 
                /* Only a single page so no need to care about length */
-               if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
+               if (gpc->pfn_valid && !is_error_noslot_pfn(gpc->pfn) &&
                    gpc->uhva >= start && gpc->uhva < end) {
                        read_unlock_irq(&gpc->lock);
 
@@ -45,9 +45,11 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
                         */
 
                        write_lock_irq(&gpc->lock);
-                       if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
-                           gpc->uhva >= start && gpc->uhva < end)
+                       if (gpc->pfn_valid && !is_error_noslot_pfn(gpc->pfn) &&
+                           gpc->uhva >= start && gpc->uhva < end) {
+                               gpc->pfn_valid = false;
                                gpc->valid = false;
+                       }
                        write_unlock_irq(&gpc->lock);
                        continue;
                }
@@ -164,23 +166,24 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 
        lockdep_assert_held_write(&gpc->lock);
 
-       /*
-        * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva
-        * assets have already been updated and so a concurrent check() from a
-        * different task may not fail the gpa/uhva/generation checks.
-        */
-       gpc->valid = false;
-
        do {
                mmu_seq = gpc->kvm->mmu_invalidate_seq;
                smp_rmb();
 
+               /*
+                * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva
+                * assets have already been updated and so a concurrent check() from a
+                * different task may not fail the gpa/uhva/generation checks.
+                */
+               gpc->pfn_valid = false;
+               gpc->valid = false;
+
                write_unlock_irq(&gpc->lock);
 
                /*
                 * If the previous iteration "failed" due to an mmu_notifier
                 * event, release the pfn and unmap the kernel virtual address
-                * from the previous attempt.  Unmapping might sleep, so this
+                * from the previous attempt. Unmapping might sleep, so this
                 * needs to be done after dropping the lock.  Opportunistically
                 * check for resched while the lock isn't held.
                 */
@@ -189,8 +192,10 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                         * Keep the mapping if the previous iteration reused
                         * the existing mapping and didn't create a new one.
                         */
-                       if (new_khva != old_khva)
+                       if (new_khva != old_khva) {
                                gpc_unmap(new_pfn, new_khva);
+                               new_khva = NULL;
+                       }
 
                        kvm_release_pfn_clean(new_pfn);
 
@@ -202,11 +207,29 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                if (is_error_noslot_pfn(new_pfn))
                        goto out_error;
 
+               write_lock_irq(&gpc->lock);
+
                /*
-                * Obtain a new kernel mapping if KVM itself will access the
-                * pfn.  Note, kmap() and memremap() can both sleep, so this
-                * too must be done outside of gpc->lock!
+                * Other tasks must wait for _this_ refresh to complete before
+                * attempting to refresh.
                 */
+               WARN_ON_ONCE(gpc->pfn_valid);
+               WARN_ON_ONCE(gpc->valid);
+
+               if (mmu_notifier_retry_cache(gpc->kvm, mmu_seq))
+                       continue;
+
+               gpc->pfn_valid = true;
+
+               /*
+                * Obtain a new kernel mapping of the PFN. Drop the lock first
+                * to avoid gpc_map() from triggering MMU notifications as it
+                * does so. The pfn_valid is used only during validation, to
+                * allow the notifier to invalidate this GPC even before it is
+                * fully mapped and valid.
+                */
+               write_unlock_irq(&gpc->lock);
+
                if (new_pfn == gpc->pfn)
                        new_khva = old_khva;
                else
@@ -218,13 +241,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                }
 
                write_lock_irq(&gpc->lock);
-
-               /*
-                * Other tasks must wait for _this_ refresh to complete before
-                * attempting to refresh.
-                */
-               WARN_ON_ONCE(gpc->valid);
-       } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq));
+       } while (!gpc->pfn_valid);
 
        gpc->valid = true;
        gpc->pfn = new_pfn;
@@ -338,7 +355,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
         * valid, leave it as is.
         */
        if (ret) {
-               gpc->valid = false;
+               gpc->valid = gpc->pfn_valid = false;
                gpc->pfn = KVM_PFN_ERR_FAULT;
                gpc->khva = NULL;
        }
@@ -383,7 +400,7 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
        gpc->pfn = KVM_PFN_ERR_FAULT;
        gpc->gpa = INVALID_GPA;
        gpc->uhva = KVM_HVA_ERR_BAD;
-       gpc->active = gpc->valid = false;
+       gpc->active = gpc->valid = gpc->pfn_valid = false;
 }
 
 static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
@@ -453,6 +470,7 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
                write_lock_irq(&gpc->lock);
                gpc->active = false;
                gpc->valid = false;
+               gpc->pfn_valid = false;
 
                /*
                 * Leave the GPA => uHVA cache intact, it's protected by the