From: David Woodhouse Date: Fri, 2 Aug 2024 22:09:58 +0000 (+0100) Subject: KVM: Avoid livelock in pfncache hva_to_pfn_retry() X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=refs%2Fheads%2Fpfncache;p=users%2Fdwmw2%2Flinux.git KVM: Avoid livelock in pfncache hva_to_pfn_retry() In some circumstances, the gpc_map() in the mmu_notifier retry loop can cause it to livelock because it invalidates itself every time round the loop. Shift the gpc_map() outside the mmu_notifier_retry_cache() loop by using a new gpc->pfn_valid flag to signal *partial* setup. It's used only by the invalidation callback, to invalidate the GPC before we've even finished mapping it. Signed-off-by: David Woodhouse --- diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 827ecc0b7e10a..5e37421de9e55 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -69,6 +69,7 @@ struct gfn_to_pfn_cache { void *khva; kvm_pfn_t pfn; bool active; + bool pfn_valid; bool valid; }; diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index f0039efb9e1e3..581891fec2fef 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -32,7 +32,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, read_lock_irq(&gpc->lock); /* Only a single page so no need to care about length */ - if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && + if (gpc->pfn_valid && !is_error_noslot_pfn(gpc->pfn) && gpc->uhva >= start && gpc->uhva < end) { read_unlock_irq(&gpc->lock); @@ -45,9 +45,11 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, */ write_lock_irq(&gpc->lock); - if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && - gpc->uhva >= start && gpc->uhva < end) + if (gpc->pfn_valid && !is_error_noslot_pfn(gpc->pfn) && + gpc->uhva >= start && gpc->uhva < end) { + gpc->pfn_valid = false; gpc->valid = false; + } write_unlock_irq(&gpc->lock); continue; } @@ -164,23 +166,24 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) lockdep_assert_held_write(&gpc->lock); - /* - * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva - * assets have already been updated and so a concurrent check() from a - * different task may not fail the gpa/uhva/generation checks. - */ - gpc->valid = false; - do { mmu_seq = gpc->kvm->mmu_invalidate_seq; smp_rmb(); + /* + * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva + * assets have already been updated and so a concurrent check() from a + * different task may not fail the gpa/uhva/generation checks. + */ + gpc->pfn_valid = false; + gpc->valid = false; + write_unlock_irq(&gpc->lock); /* * If the previous iteration "failed" due to an mmu_notifier * event, release the pfn and unmap the kernel virtual address - * from the previous attempt. Unmapping might sleep, so this + * from the previous attempt. Unmapping might sleep, so this * needs to be done after dropping the lock. Opportunistically * check for resched while the lock isn't held. */ @@ -189,8 +192,10 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) * Keep the mapping if the previous iteration reused * the existing mapping and didn't create a new one. */ - if (new_khva != old_khva) + if (new_khva != old_khva) { gpc_unmap(new_pfn, new_khva); + new_khva = NULL; + } kvm_release_pfn_clean(new_pfn); @@ -202,11 +207,29 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) if (is_error_noslot_pfn(new_pfn)) goto out_error; + write_lock_irq(&gpc->lock); + /* - * Obtain a new kernel mapping if KVM itself will access the - * pfn. Note, kmap() and memremap() can both sleep, so this - * too must be done outside of gpc->lock! + * Other tasks must wait for _this_ refresh to complete before + * attempting to refresh. */ + WARN_ON_ONCE(gpc->pfn_valid); + WARN_ON_ONCE(gpc->valid); + + if (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)) + continue; + + gpc->pfn_valid = true; + + /* + * Obtain a new kernel mapping of the PFN. Drop the lock first + * to avoid gpc_map() from triggering MMU notifications as it + * does so. The pfn_valid is used only during validation, to + * allow the notifier to invalidate this GPC even before it is + * fully mapped and valid. + */ + write_unlock_irq(&gpc->lock); + if (new_pfn == gpc->pfn) new_khva = old_khva; else @@ -218,13 +241,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) } write_lock_irq(&gpc->lock); - - /* - * Other tasks must wait for _this_ refresh to complete before - * attempting to refresh. - */ - WARN_ON_ONCE(gpc->valid); - } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)); + } while (!gpc->pfn_valid); gpc->valid = true; gpc->pfn = new_pfn; @@ -338,7 +355,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l * valid, leave it as is. */ if (ret) { - gpc->valid = false; + gpc->valid = gpc->pfn_valid = false; gpc->pfn = KVM_PFN_ERR_FAULT; gpc->khva = NULL; } @@ -383,7 +400,7 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm) gpc->pfn = KVM_PFN_ERR_FAULT; gpc->gpa = INVALID_GPA; gpc->uhva = KVM_HVA_ERR_BAD; - gpc->active = gpc->valid = false; + gpc->active = gpc->valid = gpc->pfn_valid = false; } static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva, @@ -453,6 +470,7 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) write_lock_irq(&gpc->lock); gpc->active = false; gpc->valid = false; + gpc->pfn_valid = false; /* * Leave the GPA => uHVA cache intact, it's protected by the