]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: arm64: Introduce the EL1 pKVM MMU
authorQuentin Perret <qperret@google.com>
Wed, 18 Dec 2024 19:40:58 +0000 (19:40 +0000)
committerMarc Zyngier <maz@kernel.org>
Fri, 20 Dec 2024 09:44:00 +0000 (09:44 +0000)
Introduce a set of helper functions allowing to manipulate the pKVM
guest stage-2 page-tables from EL1 using pKVM's HVC interface.

Each helper has an exact one-to-one correspondance with the traditional
kvm_pgtable_stage2_*() functions from pgtable.c, with a strictly
matching prototype. This will ease plumbing later on in mmu.c.

These callbacks track the gfn->pfn mappings in a simple rb_tree indexed
by IPA in lieu of a page-table. This rb-tree is kept in sync with pKVM's
state and is protected by the mmu_lock like a traditional stage-2
page-table.

Signed-off-by: Quentin Perret <qperret@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20241218194059.3670226-18-qperret@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/kvm_pkvm.h
arch/arm64/kvm/pkvm.c

index 1246f1d01dbf604439c64a0a47afa094b1bc725f..f23f4ea9ec8bac5590ff6e314b402f71da7b693a 100644 (file)
@@ -85,6 +85,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
 struct kvm_hyp_memcache {
        phys_addr_t head;
        unsigned long nr_pages;
+       struct pkvm_mapping *mapping; /* only used from EL1 */
 };
 
 static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
index 04418b5e3004e63d8bff99636e60c8c1a60085c6..6b9d274052c7a777729da58934c5da684dad1a65 100644 (file)
@@ -412,15 +412,20 @@ static inline bool kvm_pgtable_walk_lock_held(void)
  *                     be used instead of block mappings.
  */
 struct kvm_pgtable {
-       u32                                     ia_bits;
-       s8                                      start_level;
-       kvm_pteref_t                            pgd;
-       struct kvm_pgtable_mm_ops               *mm_ops;
-
-       /* Stage-2 only */
-       struct kvm_s2_mmu                       *mmu;
-       enum kvm_pgtable_stage2_flags           flags;
-       kvm_pgtable_force_pte_cb_t              force_pte_cb;
+       union {
+               struct rb_root                                  pkvm_mappings;
+               struct {
+                       u32                                     ia_bits;
+                       s8                                      start_level;
+                       kvm_pteref_t                            pgd;
+                       struct kvm_pgtable_mm_ops               *mm_ops;
+
+                       /* Stage-2 only */
+                       enum kvm_pgtable_stage2_flags           flags;
+                       kvm_pgtable_force_pte_cb_t              force_pte_cb;
+               };
+       };
+       struct kvm_s2_mmu                                       *mmu;
 };
 
 /**
index cd56acd9a842ccfaaa486cb4fef22e3777b90172..65f988b6fe0dcabba8c9afbef4acfea98b0262de 100644 (file)
@@ -137,4 +137,30 @@ static inline size_t pkvm_host_sve_state_size(void)
                        SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
 }
 
+struct pkvm_mapping {
+       struct rb_node node;
+       u64 gfn;
+       u64 pfn;
+};
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+                            struct kvm_pgtable_mm_ops *mm_ops);
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
+                           enum kvm_pgtable_prot prot, void *mc,
+                           enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+                                   enum kvm_pgtable_walk_flags flags);
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+                                enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+                             struct kvm_mmu_memory_cache *mc);
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+                                              enum kvm_pgtable_prot prot, void *mc,
+                                              bool force_pte);
 #endif /* __ARM64_KVM_PKVM_H__ */
index 85117ea8f351553fcdb18c7c1147ad2043f6545f..930b677eb9b0a76e3c3f211abecdf5f3e59ae82c 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/kmemleak.h>
 #include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
 #include <linux/memblock.h>
 #include <linux/mutex.h>
 #include <linux/sort.h>
@@ -268,3 +269,203 @@ static int __init finalize_pkvm(void)
        return ret;
 }
 device_initcall_sync(finalize_pkvm);
+
+static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
+{
+       struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
+       struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
+
+       if (a->gfn < b->gfn)
+               return -1;
+       if (a->gfn > b->gfn)
+               return 1;
+       return 0;
+}
+
+static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
+{
+       struct rb_node *node = root->rb_node, *prev = NULL;
+       struct pkvm_mapping *mapping;
+
+       while (node) {
+               mapping = rb_entry(node, struct pkvm_mapping, node);
+               if (mapping->gfn == gfn)
+                       return node;
+               prev = node;
+               node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
+       }
+
+       return prev;
+}
+
+/*
+ * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
+ * of __map inline.
+ */
+#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map)                           \
+       for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings,           \
+                                                            ((__start) >> PAGE_SHIFT));        \
+            __tmp && ({                                                                        \
+                               __map = rb_entry(__tmp, struct pkvm_mapping, node);             \
+                               __tmp = rb_next(__tmp);                                         \
+                               true;                                                           \
+                      });                                                                      \
+           )                                                                                   \
+               if (__map->gfn < ((__start) >> PAGE_SHIFT))                                     \
+                       continue;                                                               \
+               else if (__map->gfn >= ((__end) >> PAGE_SHIFT))                                 \
+                       break;                                                                  \
+               else
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+                            struct kvm_pgtable_mm_ops *mm_ops)
+{
+       pgt->pkvm_mappings      = RB_ROOT;
+       pgt->mmu                = mmu;
+
+       return 0;
+}
+
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+{
+       struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+       pkvm_handle_t handle = kvm->arch.pkvm.handle;
+       struct pkvm_mapping *mapping;
+       struct rb_node *node;
+
+       if (!handle)
+               return;
+
+       node = rb_first(&pgt->pkvm_mappings);
+       while (node) {
+               mapping = rb_entry(node, struct pkvm_mapping, node);
+               kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
+               node = rb_next(node);
+               rb_erase(&mapping->node, &pgt->pkvm_mappings);
+               kfree(mapping);
+       }
+}
+
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
+                          u64 phys, enum kvm_pgtable_prot prot,
+                          void *mc, enum kvm_pgtable_walk_flags flags)
+{
+       struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+       struct pkvm_mapping *mapping = NULL;
+       struct kvm_hyp_memcache *cache = mc;
+       u64 gfn = addr >> PAGE_SHIFT;
+       u64 pfn = phys >> PAGE_SHIFT;
+       int ret;
+
+       if (size != PAGE_SIZE)
+               return -EINVAL;
+
+       lockdep_assert_held_write(&kvm->mmu_lock);
+       ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
+       if (ret) {
+               /* Is the gfn already mapped due to a racing vCPU? */
+               if (ret == -EPERM)
+                       return -EAGAIN;
+       }
+
+       swap(mapping, cache->mapping);
+       mapping->gfn = gfn;
+       mapping->pfn = pfn;
+       WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
+
+       return ret;
+}
+
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+       struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+       pkvm_handle_t handle = kvm->arch.pkvm.handle;
+       struct pkvm_mapping *mapping;
+       int ret = 0;
+
+       lockdep_assert_held_write(&kvm->mmu_lock);
+       for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
+               ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
+               if (WARN_ON(ret))
+                       break;
+               rb_erase(&mapping->node, &pgt->pkvm_mappings);
+               kfree(mapping);
+       }
+
+       return ret;
+}
+
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+       struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+       pkvm_handle_t handle = kvm->arch.pkvm.handle;
+       struct pkvm_mapping *mapping;
+       int ret = 0;
+
+       lockdep_assert_held(&kvm->mmu_lock);
+       for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
+               ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
+               if (WARN_ON(ret))
+                       break;
+       }
+
+       return ret;
+}
+
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+       struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+       struct pkvm_mapping *mapping;
+
+       lockdep_assert_held(&kvm->mmu_lock);
+       for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
+               __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
+
+       return 0;
+}
+
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
+{
+       struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+       pkvm_handle_t handle = kvm->arch.pkvm.handle;
+       struct pkvm_mapping *mapping;
+       bool young = false;
+
+       lockdep_assert_held(&kvm->mmu_lock);
+       for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
+               young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
+                                          mkold);
+
+       return young;
+}
+
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+                                   enum kvm_pgtable_walk_flags flags)
+{
+       return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
+}
+
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+                                enum kvm_pgtable_walk_flags flags)
+{
+       WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
+}
+
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
+{
+       WARN_ON_ONCE(1);
+}
+
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+                                       enum kvm_pgtable_prot prot, void *mc, bool force_pte)
+{
+       WARN_ON_ONCE(1);
+       return NULL;
+}
+
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+                             struct kvm_mmu_memory_cache *mc)
+{
+       WARN_ON_ONCE(1);
+       return -EINVAL;
+}