*/
spin_lock(&kvm->mn_invalidate_lock);
kvm->mn_active_invalidate_count++;
+ if (likely(kvm->mn_active_invalidate_count == 1)) {
+ kvm->mmu_gpc_invalidate_range_start = range->start;
+ kvm->mmu_gpc_invalidate_range_end = range->end;
+ } else {
+ /*
+ * Fully tracking multiple concurrent ranges has diminishing
+ * returns. Keep things simple and just find the minimal range
+ * which includes the current and new ranges. As there won't be
+ * enough information to subtract a range after its invalidate
+ * completes, any ranges invalidated concurrently will
+ * accumulate and persist until all outstanding invalidates
+ * complete.
+ */
+ kvm->mmu_gpc_invalidate_range_start =
+ min(kvm->mmu_gpc_invalidate_range_start, range->start);
+ kvm->mmu_gpc_invalidate_range_end =
+ max(kvm->mmu_gpc_invalidate_range_end, range->end);
+ }
spin_unlock(&kvm->mn_invalidate_lock);
/*
INIT_LIST_HEAD(&kvm->gpc_list);
spin_lock_init(&kvm->gpc_lock);
+ kvm->mmu_gpc_invalidate_range_start = KVM_HVA_ERR_BAD;
+ kvm->mmu_gpc_invalidate_range_end = KVM_HVA_ERR_BAD;
INIT_LIST_HEAD(&kvm->devices);
kvm->max_vcpus = KVM_MAX_VCPUS;
#endif
}
-static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
+static bool gpc_invalidations_pending(struct gfn_to_pfn_cache *gpc)
{
/*
- * mn_active_invalidate_count acts for all intents and purposes
- * like mmu_invalidate_in_progress here; but the latter cannot
- * be used here because the invalidation of caches in the
- * mmu_notifier event occurs _before_ mmu_invalidate_in_progress
- * is elevated.
- *
- * Note, it does not matter that mn_active_invalidate_count
- * is not protected by gpc->lock. It is guaranteed to
- * be elevated before the mmu_notifier acquires gpc->lock, and
- * isn't dropped until after mmu_invalidate_seq is updated.
+ * No need for locking on GPC here because these fields are protected
+ * by gpc->refresh_lock.
*/
- if (kvm->mn_active_invalidate_count)
- return true;
+ guard(spinlock)(&gpc->kvm->mn_invalidate_lock);
- /*
- * Ensure mn_active_invalidate_count is read before
- * mmu_invalidate_seq. This pairs with the smp_wmb() in
- * mmu_notifier_invalidate_range_end() to guarantee either the
- * old (non-zero) value of mn_active_invalidate_count or the
- * new (incremented) value of mmu_invalidate_seq is observed.
- */
- smp_rmb();
- return kvm->mmu_invalidate_seq != mmu_seq;
+ return unlikely(gpc->kvm->mn_active_invalidate_count) &&
+ (gpc->kvm->mmu_gpc_invalidate_range_start <= gpc->uhva) &&
+ (gpc->kvm->mmu_gpc_invalidate_range_end > gpc->uhva);
}
static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
void *new_khva = NULL;
- unsigned long mmu_seq;
lockdep_assert_held(&gpc->refresh_lock);
gpc->valid = false;
do {
- mmu_seq = gpc->kvm->mmu_invalidate_seq;
- smp_rmb();
-
/*
* The translation made by hva_to_pfn() below could be made
* invalid as soon as it's mapped. But the uhva is already
write_unlock_irq(&gpc->lock);
+ /*
+ * Invalidation occurs from the invalidate_range_start() hook,
+ * which could already have happened before __kvm_gpc_refresh()
+ * (or the previous turn around this loop) took gpc->lock().
+ * If so, and if the corresponding invalidate_range_end() hook
+ * hasn't happened yet, hva_to_pfn() could return a mapping
+ * which is about to be stale and which should not be used. So
+ * check if there are any currently-running invalidations which
+ * affect the uHVA of this GPC, and retry if there are. Any
+ * invalidation which starts after gpc->needs_invalidation is
+ * set is fine, because it will clear that flag and trigger a
+ * retry. And any invalidation which *completes* by having its
+ * invalidate_range_end() hook called immediately prior to this
+ * check is also fine, because the page tables are guaranteed
+ * to have been changted already, so hva_to_pfn() won't return
+ * a stale mapping in that case anyway.
+ */
+ while (gpc_invalidations_pending(gpc)) {
+ cond_resched();
+ write_lock_irq(&gpc->lock);
+ continue;
+ }
+
/*
* If the previous iteration "failed" due to an mmu_notifier
* event, release the pfn and unmap the kernel virtual address
goto out_error;
}
+
write_lock_irq(&gpc->lock);
/*
* attempting to refresh.
*/
WARN_ON_ONCE(gpc->valid);
- } while (!gpc->needs_invalidation ||
- mmu_notifier_retry_cache(gpc->kvm, mmu_seq));
+ } while (!gpc->needs_invalidation);
gpc->valid = true;
gpc->pfn = new_pfn;