]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: pfncache: Move invalidation to invalidate_range_end hook
authorDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 21 Aug 2024 20:18:14 +0000 (21:18 +0100)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 21 Aug 2024 20:18:14 +0000 (21:18 +0100)
By performing the invalidation from the 'end' hook, the process is a lot
cleaner and simpler because it is guaranteed that ->needs_invalidation
will be cleared after hva_to_pfn() has been called, so the only thing
that hva_to_pfn_retry() needs to do is *wait* for there to be no pending
invalidations.

Another false positive on the range based checks is thus removed as well.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
virt/kvm/kvm_main.c
virt/kvm/kvm_mm.h
virt/kvm/pfncache.c

index e04eb700448b960bb14c9ed5cfa43cd6776159f4..cca870f1a78c9788923dadf96a968482c500b5d6 100644 (file)
@@ -795,18 +795,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
        }
        spin_unlock(&kvm->mn_invalidate_lock);
 
-       /*
-        * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
-        * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
-        * each cache's lock.  There are relatively few caches in existence at
-        * any given time, and the caches themselves can check for hva overlap,
-        * i.e. don't need to rely on memslot overlap checks for performance.
-        * Because this runs without holding mmu_lock, the pfn caches must use
-        * mn_active_invalidate_count (see above) instead of
-        * mmu_invalidate_in_progress.
-        */
-       gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
-
        /*
         * If one or more memslots were found and thus zapped, notify arch code
         * that guest memory has been reclaimed.  This needs to be done *after*
@@ -860,6 +848,14 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 
        __kvm_handle_hva_range(kvm, &hva_range);
 
+       /*
+        * It's safe to invalidate the gfn_to_pfn_caches from this 'end'
+        * hook, because hva_to_pfn_retry() will wait until no active
+        * invalidations could be affecting the corresponding uHVA
+        * before before allowing a newly mapped GPC to become valid.
+        */
+       gfn_to_pfn_cache_invalidate(kvm, range->start, range->end);
+
        /* Pairs with the increment in range_start(). */
        spin_lock(&kvm->mn_invalidate_lock);
        if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count))
index 715f19669d01f72912af9b7393ccd01f65c0527e..34e4e67f09f81267e2976c7f9fab5339e59aa50a 100644 (file)
@@ -24,13 +24,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
                     bool *async, bool write_fault, bool *writable);
 
 #ifdef CONFIG_HAVE_KVM_PFNCACHE
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
-                                      unsigned long start,
-                                      unsigned long end);
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+                                unsigned long start,
+                                unsigned long end);
 #else
-static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
-                                                    unsigned long start,
-                                                    unsigned long end)
+static inline void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+                                              unsigned long start,
+                                              unsigned long end)
 {
 }
 #endif /* HAVE_KVM_PFNCACHE */
index fa494eb3d92402eeee6da445bfbee0ef20fb133c..3f48df8cd6e549b098e04a7dcde5a2441c86b4e0 100644 (file)
 #include "kvm_mm.h"
 
 /*
- * MMU notifier 'invalidate_range_start' hook.
+ * MMU notifier 'invalidate_range_end' hook. The hva_to_pfn_retry() function
+ * below may look up a PFN just before it is zapped, and may be mapping it
+ * concurrently with the actual invalidation (with the GPC lock dropped). By
+ * using a separate 'needs_invalidation' flag, the concurrent invalidation
+ * can handle this case, causing hva_to_pfn_retry() to drop its result and
+ * retry correctly.
  */
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
-                                      unsigned long end)
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm, unsigned long start,
+                                unsigned long end)
 {
        struct gfn_to_pfn_cache *gpc;
 
@@ -140,15 +145,12 @@ static bool gpc_invalidations_pending(struct gfn_to_pfn_cache *gpc)
                (gpc->kvm->mmu_gpc_invalidate_range_end > gpc->uhva);
 }
 
-static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
+static void gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
 {
-       bool waited = false;
-
        spin_lock(&gpc->kvm->mn_invalidate_lock);
        if (gpc_invalidations_pending(gpc)) {
                DEFINE_WAIT(wait);
 
-               waited = true;
                for (;;) {
                        prepare_to_wait(&gpc->kvm->gpc_invalidate_wq, &wait,
                                        TASK_UNINTERRUPTIBLE);
@@ -163,10 +165,8 @@ static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
                finish_wait(&gpc->kvm->gpc_invalidate_wq, &wait);
        }
        spin_unlock(&gpc->kvm->mn_invalidate_lock);
-       return waited;
 }
 
-
 static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 {
        /* Note, the new page offset may be different than the old! */
@@ -199,28 +199,6 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 
                write_unlock_irq(&gpc->lock);
 
-               /*
-                * Invalidation occurs from the invalidate_range_start() hook,
-                * which could already have happened before __kvm_gpc_refresh()
-                * (or the previous turn around this loop) took gpc->lock().
-                * If so, and if the corresponding invalidate_range_end() hook
-                * hasn't happened yet, hva_to_pfn() could return a mapping
-                * which is about to be stale and which should not be used. So
-                * check if there are any currently-running invalidations which
-                * affect the uHVA of this GPC, and retry if there are. Any
-                * invalidation which starts after gpc->needs_invalidation is
-                * set is fine, because it will clear that flag and trigger a
-                * retry. And any invalidation which *completes* by having its
-                * invalidate_range_end() hook called immediately prior to this
-                * check is also fine, because the page tables are guaranteed
-                * to have been changted already, so hva_to_pfn() won't return
-                * a stale mapping in that case anyway.
-                */
-               if (gpc_wait_for_invalidations(gpc)) {
-                       write_lock_irq(&gpc->lock);
-                       continue;
-               }
-
                /*
                 * If the previous iteration "failed" due to an mmu_notifier
                 * event, release the pfn and unmap the kernel virtual address
@@ -261,6 +239,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                        goto out_error;
                }
 
+               /*
+                * Avoid populating a GPC with a user address which is already
+                * being invalidated, if the invalidate_range_start() notifier
+                * has already been called. The actual invalidation happens in
+                * the invalidate_range_end() callback, so wait until there are
+                * no active invalidations (for the relevant HVA).
+                */
+               gpc_wait_for_invalidations(gpc);
 
                write_lock_irq(&gpc->lock);
 
@@ -269,6 +255,13 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                 * attempting to refresh.
                 */
                WARN_ON_ONCE(gpc->valid);
+
+               /*
+                * Since gfn_to_pfn_cache_invalidate() is called from the
+                * kvm_mmu_notifier_invalidate_range_end() callback, it can
+                * invalidate the GPC the moment after hva_to_pfn() returned
+                * a valid PFN. If that happens, retry.
+                */
        } while (!gpc->needs_invalidation);
 
        gpc->valid = true;