From: Marc Zyngier Date: Mon, 5 Dec 2022 14:30:49 +0000 (+0000) Subject: Merge branch kvm-arm64/pkvm-vcpu-state into kvmarm-master/next X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=cfa72993d13302fe958a1a58234a1a8efa72a5b8;p=users%2Fhch%2Fblock.git Merge branch kvm-arm64/pkvm-vcpu-state into kvmarm-master/next * kvm-arm64/pkvm-vcpu-state: (25 commits) : . : Large drop of pKVM patches from Will Deacon and co, adding : a private vm/vcpu state at EL2, managed independently from : the EL1 state. From the cover letter: : : "This is version six of the pKVM EL2 state series, extending the pKVM : hypervisor code so that it can dynamically instantiate and manage VM : data structures without the host being able to access them directly. : These structures consist of a hyp VM, a set of hyp vCPUs and the stage-2 : page-table for the MMU. The pages used to hold the hypervisor structures : are returned to the host when the VM is destroyed." : . KVM: arm64: Use the pKVM hyp vCPU structure in handle___kvm_vcpu_run() KVM: arm64: Don't unnecessarily map host kernel sections at EL2 KVM: arm64: Explicitly map 'kvm_vgic_global_state' at EL2 KVM: arm64: Maintain a copy of 'kvm_arm_vmid_bits' at EL2 KVM: arm64: Unmap 'kvm_arm_hyp_percpu_base' from the host KVM: arm64: Return guest memory from EL2 via dedicated teardown memcache KVM: arm64: Instantiate guest stage-2 page-tables at EL2 KVM: arm64: Consolidate stage-2 initialisation into a single function KVM: arm64: Add generic hyp_memcache helpers KVM: arm64: Provide I-cache invalidation by virtual address at EL2 KVM: arm64: Initialise hypervisor copies of host symbols unconditionally KVM: arm64: Add per-cpu fixmap infrastructure at EL2 KVM: arm64: Instantiate pKVM hypervisor VM and vCPU structures from EL1 KVM: arm64: Add infrastructure to create and track pKVM instances at EL2 KVM: arm64: Rename 'host_kvm' to 'host_mmu' KVM: arm64: Add hyp_spinlock_t static initializer KVM: arm64: Include asm/kvm_mmu.h in nvhe/mem_protect.h KVM: arm64: Add helpers to pin memory shared with the hypervisor at EL2 KVM: arm64: Prevent the donation of no-map pages KVM: arm64: Implement do_donate() helper for donating memory ... Signed-off-by: Marc Zyngier --- cfa72993d13302fe958a1a58234a1a8efa72a5b8 diff --cc arch/arm64/kvm/arm.c index b4123fd15789,1d4b8122d010..1d1a3b93a3de --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@@ -2043,9 -2071,11 +2060,11 @@@ static int init_hyp_mode(void } /* Prepare the CPU initialization parameters */ - cpu_prepare_hyp_mode(cpu); + cpu_prepare_hyp_mode(cpu, hyp_va_bits); } + kvm_hyp_init_symbols(); + if (is_protected_kvm_enabled()) { init_cpu_logical_map(); diff --cc arch/arm64/kvm/hyp/nvhe/mem_protect.c index 55326b1bd247,94cd48f7850e..552653fa18be --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@@ -79,11 -91,6 +91,11 @@@ static void host_s2_put_page(void *addr hyp_put_page(&host_s2_pool, addr); } +static void host_s2_free_removed_table(void *addr, u32 level) +{ - kvm_pgtable_stage2_free_removed(&host_kvm.mm_ops, addr, level); ++ kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level); +} + static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; @@@ -95,10 -102,9 +107,10 @@@ if (ret) return ret; - host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { + host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_pages_exact = host_s2_zalloc_pages_exact, .zalloc_page = host_s2_zalloc_page, + .free_removed_table = host_s2_free_removed_table, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, @@@ -256,8 -397,8 +403,8 @@@ static bool range_is_memory(u64 start, static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot) { - return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, + return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, - prot, &host_s2_pool); + prot, &host_s2_pool, 0); } /* @@@ -423,15 -565,18 +571,15 @@@ struct check_walk_data enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); }; -static int __check_page_state_visitor(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct check_walk_data *d = arg; - kvm_pte_t pte = *ptep; + struct check_walk_data *d = ctx->arg; - if (kvm_pte_valid(ctx->old) && !addr_is_memory(kvm_pte_to_phys(ctx->old))) - if (kvm_pte_valid(pte) && !addr_is_allowed_memory(kvm_pte_to_phys(pte))) ++ if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old))) return -EINVAL; - return d->get_page_state(pte) == d->desired ? 0 : -EPERM; + return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; } static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, diff --cc arch/arm64/kvm/hyp/nvhe/mm.c index 96193cb31a39,c80b2c007619..318298eb3d6b --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@@ -189,6 -219,103 +219,102 @@@ int hyp_map_vectors(void return 0; } + void *hyp_fixmap_map(phys_addr_t phys) + { + struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); + kvm_pte_t pte, *ptep = slot->ptep; + + pte = *ptep; + pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID); + pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID; + WRITE_ONCE(*ptep, pte); + dsb(ishst); + + return (void *)slot->addr; + } + + static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) + { + kvm_pte_t *ptep = slot->ptep; + u64 addr = slot->addr; + + WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); + + /* + * Irritatingly, the architecture requires that we use inner-shareable + * broadcast TLB invalidation here in case another CPU speculates + * through our fixmap and decides to create an "amalagamation of the + * values held in the TLB" due to the apparent lack of a + * break-before-make sequence. + * + * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 + */ + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1)); + dsb(ish); + isb(); + } + + void hyp_fixmap_unmap(void) + { + fixmap_clear_slot(this_cpu_ptr(&fixmap_slots)); + } + -static int __create_fixmap_slot_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) ++static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx, ++ enum kvm_pgtable_walk_flags visit) + { - struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)arg); ++ struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg); + - if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1) ++ if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + - slot->addr = addr; - slot->ptep = ptep; ++ slot->addr = ctx->addr; ++ slot->ptep = ctx->ptep; + + /* + * Clear the PTE, but keep the page-table page refcount elevated to + * prevent it from ever being freed. This lets us manipulate the PTEs + * by hand safely without ever needing to allocate memory. + */ + fixmap_clear_slot(slot); + + return 0; + } + + static int create_fixmap_slot(u64 addr, u64 cpu) + { + struct kvm_pgtable_walker walker = { + .cb = __create_fixmap_slot_cb, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = (void *)cpu, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); + } + + int hyp_create_pcpu_fixmap(void) + { + unsigned long addr, i; + int ret; + + for (i = 0; i < hyp_nr_cpus; i++) { + ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr); + if (ret) + return ret; + + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE, + __hyp_pa(__hyp_bss_start), PAGE_HYP); + if (ret) + return ret; + + ret = create_fixmap_slot(addr, i); + if (ret) + return ret; + } + + return 0; + } + int hyp_create_idmap(u32 hyp_va_bits) { unsigned long start, end; diff --cc arch/arm64/kvm/hyp/nvhe/setup.c index 1068338d77f3,5cdf3fb09bb4..110f04627785 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@@ -186,30 -189,23 +189,20 @@@ static void hpool_put_page(void *addr hyp_put_page(&hpool, addr); } - static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx, - enum kvm_pgtable_walk_flags visit) -static int fix_host_ownership_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) ++static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, ++ enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; enum kvm_pgtable_prot prot; enum pkvm_page_state state; - kvm_pte_t pte = *ptep; phys_addr_t phys; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; - /* - * Fix-up the refcount for the page-table pages as the early allocator - * was unable to access the hyp_vmemmap and so the buddy allocator has - * initialised the refcount to '1'. - */ - mm_ops->get_page(ctx->ptep); - if (visit != KVM_PGTABLE_WALK_LEAF) - return 0; - - if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) + if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; - phys = kvm_pte_to_phys(pte); + phys = kvm_pte_to_phys(ctx->old); if (!addr_is_memory(phys)) return -EINVAL; @@@ -217,10 -213,10 +210,10 @@@ * Adjust the host stage-2 mappings to match the ownership attributes * configured in the hypervisor stage-1. */ - state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); + state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); switch (state) { case PKVM_PAGE_OWNED: - return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); + return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); case PKVM_PAGE_SHARED_OWNED: prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); break; @@@ -234,11 -230,30 +227,25 @@@ return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); } - static int finalize_host_mappings(void) -static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) ++static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx, ++ enum kvm_pgtable_walk_flags visit) + { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; - + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ - if (kvm_pte_valid(pte)) - mm_ops->get_page(ptep); ++ if (kvm_pte_valid(ctx->old)) ++ ctx->mm_ops->get_page(ctx->ptep); + + return 0; + } + + static int fix_host_ownership(void) { struct kvm_pgtable_walker walker = { - .cb = finalize_host_mappings_walker, - .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .cb = fix_host_ownership_walker, + .flags = KVM_PGTABLE_WALK_LEAF, }; int i, ret; diff --cc arch/arm64/kvm/hyp/pgtable.c index 58dbe0ab567f,2bcb2d5903ba..b11cf2c618a6 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@@ -1217,18 -1188,29 +1205,27 @@@ int __kvm_pgtable_stage2_init(struct kv return 0; } + size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) + { + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; + } + -static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (!stage2_pte_is_counted(pte)) + if (!stage2_pte_is_counted(ctx->old)) return 0; - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, level)) - mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); + if (kvm_pte_table(ctx->old, ctx->level)) + mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); return 0; }