static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared);
+static void tdp_mmu_zap_root_work(struct work_struct *work)
+{
+ struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page,
+ tdp_mmu_async_work);
+ struct kvm *kvm = root->tdp_mmu_async_data;
+
+ read_lock(&kvm->mmu_lock);
+
+ /*
+ * A TLB flush is not necessary as KVM performs a local TLB flush when
+ * allocating a new root (see kvm_mmu_load()), and when migrating vCPU
+ * to a different pCPU. Note, the local TLB flush on reuse also
+ * invalidates any paging-structure-cache entries, i.e. TLB entries for
+ * intermediate paging structures, that may be zapped, as such entries
+ * are associated with the ASID on both VMX and SVM.
+ */
+ tdp_mmu_zap_root(kvm, root, true);
+
+ /*
+ * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for
+ * avoiding an infinite loop. By design, the root is reachable while
+ * it's being asynchronously zapped, thus a different task can put its
+ * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an
+ * asynchronously zapped root is unavoidable.
+ */
+ kvm_tdp_mmu_put_root(kvm, root, true);
+
+ read_unlock(&kvm->mmu_lock);
+
+ kvm_put_kvm(kvm);
+}
+
+static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root)
+{
+ root->tdp_mmu_async_data = kvm;
+ INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work);
+ schedule_work(&root->tdp_mmu_async_work);
+}
+
static inline bool kvm_tdp_root_mark_invalid(struct kvm_mmu_page *page)
{
union kvm_mmu_page_role role = page->role;
* cannot acquire a reference to it because kvm_tdp_mmu_get_root()
* rejects it. This remains true for the rest of the execution
* of this function, because readers visit valid roots only
- * (except for kvm_tdp_mmu_zap_invalidated_roots(), which however
- * does not acquire any reference itself).
+ * (except for kvm_tdp_mmu_zap_invalidated_roots() and
+ * tdp_mmu_zap_root_work(), which however rely on a reference
+ * being acquired before they're called).
*
* Even though there are flows that need to visit all roots for
* correctness, they all take mmu_lock for write, so they cannot yet
*/
if (!kvm_tdp_root_mark_invalid(root)) {
refcount_set(&root->tdp_mmu_root_count, 1);
- tdp_mmu_zap_root(kvm, root, shared);
/*
- * Give back the reference that was added back above. We now
+ * If the struct kvm is alive, we might as well zap the root
+ * in a worker. The worker takes ownership of the reference we
+ * just added to root as well as this reference to kvm.
+ */
+ if (kvm_get_kvm_safe(kvm)) {
+ tdp_mmu_schedule_zap_root(kvm, root);
+ return;
+ }
+
+ /*
+ * The struct kvm is being destroyed, zap synchronously and give
+ * back immediately the reference that was added above. We now
* know that the root is invalid, so go ahead and free it if
* no one has taken a reference in the meanwhile.
*/
+ tdp_mmu_zap_root(kvm, root, shared);
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
return;
}
/*
* Zap all roots, including invalid roots, as all SPTEs must be dropped
- * before returning to the caller.
+ * before returning to the caller. Zap directly even if the root is
+ * also being zapped by a worker. Walking zapped top-level SPTEs isn't
+ * all that expensive and mmu_lock is already held, which means the
+ * worker has yielded, i.e. flushing the work instead of zapping here
+ * isn't guaranteed to be any faster.
*
* A TLB flush is unnecessary, KVM zaps everything if and only the VM
* is being destroyed or the userspace VMM has exited. In both cases,