KVM: x86/mmu: Zap defunct roots via asynchronous worker

author Paolo Bonzini <pbonzini@redhat.com>

Wed, 2 Mar 2022 14:10:29 +0000 (09:10 -0500)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 2 Mar 2022 19:53:54 +0000 (14:53 -0500)
author Paolo Bonzini <pbonzini@redhat.com>
Wed, 2 Mar 2022 14:10:29 +0000 (09:10 -0500)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 2 Mar 2022 19:53:54 +0000 (14:53 -0500)
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h

index be063b6c91b7d517c2b0f7241c6eb59be5db1ffb..1bff453f7cbe28ca83a1b962436e5d732bdb625d 100644 (file)
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -65,7 +65,13 @@ struct kvm_mmu_page {
                 struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
                 tdp_ptep_t ptep;
         };
-       DECLARE_BITMAP(unsync_child_bitmap, 512);
+       union {
+               DECLARE_BITMAP(unsync_child_bitmap, 512);
+               struct {
+                       struct work_struct tdp_mmu_async_work;
+                       void *tdp_mmu_async_data;
+               };
+       };
  
         struct list_head lpage_disallowed_link;
  #ifdef CONFIG_X86_32
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index 8acdd106c7205f9f129aa23fe502e03ca0680c90..d4997cf0d7e38844e668abed2000414280880513 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -81,6 +81,45 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
  static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
                              bool shared);
  
+static void tdp_mmu_zap_root_work(struct work_struct *work)
+{
+       struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page,
+                                                tdp_mmu_async_work);
+       struct kvm *kvm = root->tdp_mmu_async_data;
+
+       read_lock(&kvm->mmu_lock);
+
+       /*
+        * A TLB flush is not necessary as KVM performs a local TLB flush when
+        * allocating a new root (see kvm_mmu_load()), and when migrating vCPU
+        * to a different pCPU.  Note, the local TLB flush on reuse also
+        * invalidates any paging-structure-cache entries, i.e. TLB entries for
+        * intermediate paging structures, that may be zapped, as such entries
+        * are associated with the ASID on both VMX and SVM.
+        */
+       tdp_mmu_zap_root(kvm, root, true);
+
+       /*
+        * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for
+        * avoiding an infinite loop.  By design, the root is reachable while
+        * it's being asynchronously zapped, thus a different task can put its
+        * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an
+        * asynchronously zapped root is unavoidable.
+        */
+       kvm_tdp_mmu_put_root(kvm, root, true);
+
+       read_unlock(&kvm->mmu_lock);
+
+       kvm_put_kvm(kvm);
+}
+
+static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root)
+{
+       root->tdp_mmu_async_data = kvm;
+       INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work);
+       schedule_work(&root->tdp_mmu_async_work);
+}
+
  static inline bool kvm_tdp_root_mark_invalid(struct kvm_mmu_page *page)
  {
         union kvm_mmu_page_role role = page->role;
@@ -106,8 +145,9 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
          * cannot acquire a reference to it because kvm_tdp_mmu_get_root()
          * rejects it.  This remains true for the rest of the execution
          * of this function, because readers visit valid roots only
-        * (except for kvm_tdp_mmu_zap_invalidated_roots(), which however
-        * does not acquire any reference itself).
+        * (except for kvm_tdp_mmu_zap_invalidated_roots() and
+        * tdp_mmu_zap_root_work(), which however rely on a reference
+        * being acquired before they're called).
          *
          * Even though there are flows that need to visit all roots for
          * correctness, they all take mmu_lock for write, so they cannot yet
@@ -127,13 +167,24 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
          */
         if (!kvm_tdp_root_mark_invalid(root)) {
                 refcount_set(&root->tdp_mmu_root_count, 1);
-               tdp_mmu_zap_root(kvm, root, shared);
  
                 /*
-                * Give back the reference that was added back above.  We now
+                * If the struct kvm is alive, we might as well zap the root
+                * in a worker.  The worker takes ownership of the reference we
+                * just added to root as well as this reference to kvm.
+                */
+               if (kvm_get_kvm_safe(kvm)) {
+                       tdp_mmu_schedule_zap_root(kvm, root);
+                       return;
+               }
+
+               /*
+                * The struct kvm is being destroyed, zap synchronously and give
+                * back immediately the reference that was added above.  We now
                  * know that the root is invalid, so go ahead and free it if
                  * no one has taken a reference in the meanwhile.
                  */
+               tdp_mmu_zap_root(kvm, root, shared);
                 if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
                         return;
         }
@@ -931,7 +982,11 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
  
         /*
          * Zap all roots, including invalid roots, as all SPTEs must be dropped
-        * before returning to the caller.
+        * before returning to the caller.  Zap directly even if the root is
+        * also being zapped by a worker.  Walking zapped top-level SPTEs isn't
+        * all that expensive and mmu_lock is already held, which means the
+        * worker has yielded, i.e. flushing the work instead of zapping here
+        * isn't guaranteed to be any faster.
          *
          * A TLB flush is unnecessary, KVM zaps everything if and only the VM
          * is being destroyed or the userspace VMM has exited.  In both cases,
author	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 2 Mar 2022 14:10:29 +0000 (09:10 -0500)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 2 Mar 2022 19:53:54 +0000 (14:53 -0500)
arch/x86/kvm/mmu/mmu_internal.h		patch \| blob \| history
arch/x86/kvm/mmu/tdp_mmu.c		patch \| blob \| history