]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
KVM: arm64: Split kvm_pgtable_stage2_destroy()
authorRaghavendra Rao Ananta <rananta@google.com>
Wed, 20 Aug 2025 16:22:41 +0000 (16:22 +0000)
committerOliver Upton <oliver.upton@linux.dev>
Thu, 21 Aug 2025 23:27:03 +0000 (16:27 -0700)
Split kvm_pgtable_stage2_destroy() into two:
  - kvm_pgtable_stage2_destroy_range(), that performs the
    page-table walk and free the entries over a range of addresses.
  - kvm_pgtable_stage2_destroy_pgd(), that frees the PGD.

This refactoring enables subsequent patches to free large page-tables
in chunks, calling cond_resched() between each chunk, to yield the
CPU as necessary.

Existing callers of kvm_pgtable_stage2_destroy(), that probably cannot
take advantage of this (such as nVMHE), will continue to function as is.

Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Suggested-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250820162242.2624752-2-rananta@google.com
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/kvm_pkvm.h
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/pkvm.c

index 2888b5d037573621c4e126a42f5f21ff9e30b9bd..1246216616b518ec2d35992c12a48a5ae4449568 100644 (file)
@@ -355,6 +355,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
        return pteref;
 }
 
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+       return pteref;
+}
+
 static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
 {
        /*
@@ -384,6 +389,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
        return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
 }
 
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+       return rcu_dereference_raw(pteref);
+}
+
 static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
 {
        if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@@ -551,6 +561,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
  */
 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
 
+/**
+ * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
+ * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr:      Intermediate physical address at which to place the mapping.
+ * @size:      Size of the mapping.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+                                       u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
+ * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init*().
+ *
+ * It is assumed that the rest of the page-table is freed before this operation.
+ */
+void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
+
 /**
  * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
  * @mm_ops:    Memory management callbacks.
index ea58282f59bb4fe8841064bfafffe9af3c17b667..35f9d94780048c606c3717cb7a3295f9a695f2e9 100644 (file)
@@ -179,7 +179,9 @@ struct pkvm_mapping {
 
 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
                             struct kvm_pgtable_mm_ops *mm_ops);
-void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+                                       u64 addr, u64 size);
+void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
                            enum kvm_pgtable_prot prot, void *mc,
                            enum kvm_pgtable_walk_flags flags);
index c351b4abd5dbfbcbe738ba0f9efc41a1c99ecc05..c36f282a175dfc336a83ee913726de90194ad5cb 100644 (file)
@@ -1551,21 +1551,38 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
        return 0;
 }
 
-void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+                                      u64 addr, u64 size)
 {
-       size_t pgd_sz;
        struct kvm_pgtable_walker walker = {
                .cb     = stage2_free_walker,
                .flags  = KVM_PGTABLE_WALK_LEAF |
                          KVM_PGTABLE_WALK_TABLE_POST,
        };
 
-       WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
+       WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
+}
+
+void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
+{
+       size_t pgd_sz;
+
        pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
-       pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
+
+       /*
+        * Since the pgtable is unlinked at this point, and not shared with
+        * other walkers, safely deference pgd with kvm_dereference_pteref_raw()
+        */
+       pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
        pgt->pgd = NULL;
 }
 
+void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+{
+       kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
+       kvm_pgtable_stage2_destroy_pgd(pgt);
+}
+
 void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
 {
        kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
index 9a45daf817bfd2f5fe6d479720be19b43857f61c..6330a02c8418667aa9fcb8fd78df0e52311da7d9 100644 (file)
@@ -904,6 +904,14 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
        return 0;
 }
 
+static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
+{
+       unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);
+
+       KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, 0, BIT(ia_bits));
+       KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
+}
+
 /**
  * kvm_init_stage2_mmu - Initialise a S2 MMU structure
  * @kvm:       The pointer to the KVM structure
@@ -980,7 +988,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
        return 0;
 
 out_destroy_pgtable:
-       KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
+       kvm_stage2_destroy(pgt);
 out_free_pgtable:
        kfree(pgt);
        return err;
@@ -1077,7 +1085,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
        write_unlock(&kvm->mmu_lock);
 
        if (pgt) {
-               KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
+               kvm_stage2_destroy(pgt);
                kfree(pgt);
        }
 }
index fcd70bfe44fb8cce247900c77721a202b46563d8..61827cf6fea4aafa926f71f5f15e50655535a8b5 100644 (file)
@@ -316,9 +316,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
        return 0;
 }
 
-void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+                                       u64 addr, u64 size)
 {
-       __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
+       __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
+}
+
+void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
+{
+       /* Expected to be called after all pKVM mappings have been released. */
+       WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
 }
 
 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,