]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: arm64: Plumb the pKVM MMU in KVM
authorQuentin Perret <qperret@google.com>
Wed, 18 Dec 2024 19:40:59 +0000 (19:40 +0000)
committerMarc Zyngier <maz@kernel.org>
Fri, 20 Dec 2024 09:44:00 +0000 (09:44 +0000)
Introduce the KVM_PGT_CALL() helper macro to allow switching from the
traditional pgtable code to the pKVM version easily in mmu.c. The cost
of this 'indirection' is expected to be very minimal due to
is_protected_kvm_enabled() being backed by a static key.

With this, everything is in place to allow the delegation of
non-protected guest stage-2 page-tables to pKVM, so let's stop using the
host's kvm_s2_mmu from EL2 and enjoy the ride.

Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20241218194059.3670226-19-qperret@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/kvm/arm.c
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/mmu.c

index 66d93e320ec8eec222e4fc43b99d414ef0a0eac1..d116ab4230e81bd504865b6fa444b8f4640a5f10 100644 (file)
@@ -353,6 +353,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
        return &kvm->arch.mmu != mmu;
 }
 
+static inline void kvm_fault_lock(struct kvm *kvm)
+{
+       if (is_protected_kvm_enabled())
+               write_lock(&kvm->mmu_lock);
+       else
+               read_lock(&kvm->mmu_lock);
+}
+
+static inline void kvm_fault_unlock(struct kvm *kvm)
+{
+       if (is_protected_kvm_enabled())
+               write_unlock(&kvm->mmu_lock);
+       else
+               read_unlock(&kvm->mmu_lock);
+}
+
 #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
 void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
 #else
index 55cc62b2f4691f06b379af94caafda0369d2afd4..9bcbc7b8ed38ec24bde333da45956156a75a6a2d 100644 (file)
@@ -502,7 +502,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-       kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+       if (!is_protected_kvm_enabled())
+               kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+       else
+               free_hyp_memcache(&vcpu->arch.pkvm_memcache);
        kvm_timer_vcpu_terminate(vcpu);
        kvm_pmu_vcpu_destroy(vcpu);
        kvm_vgic_vcpu_destroy(vcpu);
@@ -574,6 +577,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        struct kvm_s2_mmu *mmu;
        int *last_ran;
 
+       if (is_protected_kvm_enabled())
+               goto nommu;
+
        if (vcpu_has_nv(vcpu))
                kvm_vcpu_load_hw_mmu(vcpu);
 
@@ -594,6 +600,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                *last_ran = vcpu->vcpu_idx;
        }
 
+nommu:
        vcpu->cpu = cpu;
 
        kvm_vgic_load(vcpu);
index 130f5f23bcb589ca94766dc5c13d44bc03df7d07..258d572eed62f97e44166ae659f5860f2b7e0a69 100644 (file)
@@ -103,8 +103,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
        /* Limit guest vector length to the maximum supported by the host.  */
        hyp_vcpu->vcpu.arch.sve_max_vl  = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
 
-       hyp_vcpu->vcpu.arch.hw_mmu      = host_vcpu->arch.hw_mmu;
-
        hyp_vcpu->vcpu.arch.mdcr_el2    = host_vcpu->arch.mdcr_el2;
        hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
        hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
index 641e4fec16593cb12b9db3b15e04b1deea98c87e..9403524c11c614e1c8ac39ee61dc445411ae988b 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
 #include <asm/kvm_ras.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
@@ -31,6 +32,8 @@ static phys_addr_t __ro_after_init hyp_idmap_vector;
 
 static unsigned long __ro_after_init io_map_base;
 
+#define KVM_PGT_FN(fn)         (!is_protected_kvm_enabled() ? fn : p ## fn)
+
 static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
                                           phys_addr_t size)
 {
@@ -147,7 +150,7 @@ static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
                        return -EINVAL;
 
                next = __stage2_range_addr_end(addr, end, chunk_size);
-               ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
+               ret = KVM_PGT_FN(kvm_pgtable_stage2_split)(pgt, addr, next - addr, cache);
                if (ret)
                        break;
        } while (addr = next, addr != end);
@@ -168,15 +171,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
  */
 int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
-       kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+       if (is_protected_kvm_enabled())
+               kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle);
+       else
+               kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
        return 0;
 }
 
 int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
                                      gfn_t gfn, u64 nr_pages)
 {
-       kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
-                               gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+       u64 size = nr_pages << PAGE_SHIFT;
+       u64 addr = gfn << PAGE_SHIFT;
+
+       if (is_protected_kvm_enabled())
+               kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle);
+       else
+               kvm_tlb_flush_vmid_range(&kvm->arch.mmu, addr, size);
        return 0;
 }
 
@@ -225,7 +236,7 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
        void *pgtable = page_to_virt(page);
        s8 level = page_private(page);
 
-       kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
+       KVM_PGT_FN(kvm_pgtable_stage2_free_unlinked)(&kvm_s2_mm_ops, pgtable, level);
 }
 
 static void stage2_free_unlinked_table(void *addr, s8 level)
@@ -324,7 +335,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
 
        lockdep_assert_held_write(&kvm->mmu_lock);
        WARN_ON(size & ~PAGE_MASK);
-       WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
+       WARN_ON(stage2_apply_range(mmu, start, end, KVM_PGT_FN(kvm_pgtable_stage2_unmap),
                                   may_block));
 }
 
@@ -336,7 +347,7 @@ void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
 
 void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
 {
-       stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush);
+       stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_flush));
 }
 
 static void stage2_flush_memslot(struct kvm *kvm,
@@ -942,10 +953,14 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
                return -ENOMEM;
 
        mmu->arch = &kvm->arch;
-       err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
+       err = KVM_PGT_FN(kvm_pgtable_stage2_init)(pgt, mmu, &kvm_s2_mm_ops);
        if (err)
                goto out_free_pgtable;
 
+       mmu->pgt = pgt;
+       if (is_protected_kvm_enabled())
+               return 0;
+
        mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
        if (!mmu->last_vcpu_ran) {
                err = -ENOMEM;
@@ -959,7 +974,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
        mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
        mmu->split_page_cache.gfp_zero = __GFP_ZERO;
 
-       mmu->pgt = pgt;
        mmu->pgd_phys = __pa(pgt->pgd);
 
        if (kvm_is_nested_s2_mmu(kvm, mmu))
@@ -968,7 +982,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
        return 0;
 
 out_destroy_pgtable:
-       kvm_pgtable_stage2_destroy(pgt);
+       KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
 out_free_pgtable:
        kfree(pgt);
        return err;
@@ -1065,7 +1079,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
        write_unlock(&kvm->mmu_lock);
 
        if (pgt) {
-               kvm_pgtable_stage2_destroy(pgt);
+               KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
                kfree(pgt);
        }
 }
@@ -1082,9 +1096,11 @@ static void *hyp_mc_alloc_fn(void *unused)
 
 void free_hyp_memcache(struct kvm_hyp_memcache *mc)
 {
-       if (is_protected_kvm_enabled())
-               __free_hyp_memcache(mc, hyp_mc_free_fn,
-                                   kvm_host_va, NULL);
+       if (!is_protected_kvm_enabled())
+               return;
+
+       kfree(mc->mapping);
+       __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL);
 }
 
 int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
@@ -1092,6 +1108,12 @@ int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
        if (!is_protected_kvm_enabled())
                return 0;
 
+       if (!mc->mapping) {
+               mc->mapping = kzalloc(sizeof(struct pkvm_mapping), GFP_KERNEL_ACCOUNT);
+               if (!mc->mapping)
+                       return -ENOMEM;
+       }
+
        return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
                                    kvm_host_pa, NULL);
 }
@@ -1130,8 +1152,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
                        break;
 
                write_lock(&kvm->mmu_lock);
-               ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
-                                            &cache, 0);
+               ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, addr, PAGE_SIZE,
+                                pa, prot, &cache, 0);
                write_unlock(&kvm->mmu_lock);
                if (ret)
                        break;
@@ -1151,7 +1173,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
  */
 void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
 {
-       stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
+       stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_wrprotect));
 }
 
 /**
@@ -1442,9 +1464,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        unsigned long mmu_seq;
        phys_addr_t ipa = fault_ipa;
        struct kvm *kvm = vcpu->kvm;
-       struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
        struct vm_area_struct *vma;
        short vma_shift;
+       void *memcache;
        gfn_t gfn;
        kvm_pfn_t pfn;
        bool logging_active = memslot_is_logging(memslot);
@@ -1472,8 +1494,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         * and a write fault needs to collapse a block entry into a table.
         */
        if (!fault_is_perm || (logging_active && write_fault)) {
-               ret = kvm_mmu_topup_memory_cache(memcache,
-                                                kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
+               int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
+
+               if (!is_protected_kvm_enabled()) {
+                       memcache = &vcpu->arch.mmu_page_cache;
+                       ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
+               } else {
+                       memcache = &vcpu->arch.pkvm_memcache;
+                       ret = topup_hyp_memcache(memcache, min_pages);
+               }
                if (ret)
                        return ret;
        }
@@ -1494,7 +1523,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         * logging_active is guaranteed to never be true for VM_PFNMAP
         * memslots.
         */
-       if (logging_active) {
+       if (logging_active || is_protected_kvm_enabled()) {
                force_pte = true;
                vma_shift = PAGE_SHIFT;
        } else {
@@ -1634,7 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                prot |= kvm_encode_nested_level(nested);
        }
 
-       read_lock(&kvm->mmu_lock);
+       kvm_fault_lock(kvm);
        pgt = vcpu->arch.hw_mmu->pgt;
        if (mmu_invalidate_retry(kvm, mmu_seq)) {
                ret = -EAGAIN;
@@ -1696,16 +1725,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                 * PTE, which will be preserved.
                 */
                prot &= ~KVM_NV_GUEST_MAP_SZ;
-               ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot, flags);
+               ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags);
        } else {
-               ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
+               ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, vma_pagesize,
                                             __pfn_to_phys(pfn), prot,
                                             memcache, flags);
        }
 
 out_unlock:
        kvm_release_faultin_page(kvm, page, !!ret, writable);
-       read_unlock(&kvm->mmu_lock);
+       kvm_fault_unlock(kvm);
 
        /* Mark the page dirty only if the fault is handled successfully */
        if (writable && !ret)
@@ -1724,7 +1753,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 
        read_lock(&vcpu->kvm->mmu_lock);
        mmu = vcpu->arch.hw_mmu;
-       kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa, flags);
+       KVM_PGT_FN(kvm_pgtable_stage2_mkyoung)(mmu->pgt, fault_ipa, flags);
        read_unlock(&vcpu->kvm->mmu_lock);
 }
 
@@ -1764,7 +1793,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
                }
 
                /* Falls between the IPA range and the PARange? */
-               if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
+               if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
                        fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
 
                        if (is_iabt)
@@ -1930,7 +1959,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
        if (!kvm->arch.mmu.pgt)
                return false;
 
-       return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+       return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
                                                   range->start << PAGE_SHIFT,
                                                   size, true);
        /*
@@ -1946,7 +1975,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
        if (!kvm->arch.mmu.pgt)
                return false;
 
-       return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+       return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
                                                   range->start << PAGE_SHIFT,
                                                   size, false);
 }