]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
KVM: arm64: Protect stage-2 traversal with RCU
authorOliver Upton <oliver.upton@linux.dev>
Mon, 7 Nov 2022 21:56:38 +0000 (21:56 +0000)
committerMarc Zyngier <maz@kernel.org>
Thu, 10 Nov 2022 14:43:46 +0000 (14:43 +0000)
Use RCU to safely walk the stage-2 page tables in parallel. Acquire and
release the RCU read lock when traversing the page tables. Defer the
freeing of table memory to an RCU callback. Indirect the calls into RCU
and provide stubs for hypervisor code, as RCU is not available in such a
context.

The RCU protection doesn't amount to much at the moment, as readers are
already protected by the read-write lock (all walkers that free table
memory take the write lock). Nonetheless, a subsequent change will
futher relax the locking requirements around the stage-2 MMU, thereby
depending on RCU.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-9-oliver.upton@linux.dev
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/mmu.c

index e70cf57b719eca31f3d346e06d9daecb98109559..7634b6964779aedd35e4a1f808c9e14687fc30b3 100644 (file)
@@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
 typedef kvm_pte_t *kvm_pteref_t;
 
 static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
@@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
        return pteref;
 }
 
+static inline void kvm_pgtable_walk_begin(void) {}
+static inline void kvm_pgtable_walk_end(void) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+       return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
+{
+       return rcu_dereference_check(pteref, !shared);
+}
+
+static inline void kvm_pgtable_walk_begin(void)
+{
+       rcu_read_lock();
+}
+
+static inline void kvm_pgtable_walk_end(void)
+{
+       rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+       return rcu_read_lock_held();
+}
+
+#endif
+
 #define KVM_PTE_VALID                  BIT(0)
 
 #define KVM_PTE_ADDR_MASK              GENMASK(47, PAGE_SHIFT)
@@ -202,11 +243,14 @@ struct kvm_pgtable {
  *                                     children.
  * @KVM_PGTABLE_WALK_TABLE_POST:       Visit table entries after their
  *                                     children.
+ * @KVM_PGTABLE_WALK_SHARED:           Indicates the page-tables may be shared
+ *                                     with other software walkers.
  */
 enum kvm_pgtable_walk_flags {
        KVM_PGTABLE_WALK_LEAF                   = BIT(0),
        KVM_PGTABLE_WALK_TABLE_PRE              = BIT(1),
        KVM_PGTABLE_WALK_TABLE_POST             = BIT(2),
+       KVM_PGTABLE_WALK_SHARED                 = BIT(3),
 };
 
 struct kvm_pgtable_visit_ctx {
@@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx {
 typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
                                        enum kvm_pgtable_walk_flags visit);
 
+static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
+{
+       return ctx->flags & KVM_PGTABLE_WALK_SHARED;
+}
+
 /**
  * struct kvm_pgtable_walker - Hook into a page-table walk.
  * @cb:                Callback function to invoke during the walk.
index 7c9782347570b7bc8f48292ce99ca1d0afb82f49..d8d963521d4eb28a0a4a9dc7913731c4b5fe1473 100644 (file)
@@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
                                  enum kvm_pgtable_walk_flags visit)
 {
        struct kvm_pgtable_walker *walker = data->walker;
+
+       /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */
+       WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held());
        return walker->cb(ctx, visit);
 }
 
@@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
                .end    = PAGE_ALIGN(walk_data.addr + size),
                .walker = walker,
        };
+       int r;
+
+       kvm_pgtable_walk_begin();
+       r = _kvm_pgtable_walk(pgt, &walk_data);
+       kvm_pgtable_walk_end();
 
-       return _kvm_pgtable_walk(pgt, &walk_data);
+       return r;
 }
 
 struct leaf_walk_data {
index 73ae908eb5d93501fb72b138d7e9e34d58bc1413..52e042399ba5d254096682bbf12c152e5d5ee24a 100644 (file)
@@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
 
 static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
 
+static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
+{
+       struct page *page = container_of(head, struct page, rcu_head);
+       void *pgtable = page_to_virt(page);
+       u32 level = page_private(page);
+
+       kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
+}
+
 static void stage2_free_removed_table(void *addr, u32 level)
 {
-       kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level);
+       struct page *page = virt_to_page(addr);
+
+       set_page_private(page, (unsigned long)level);
+       call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
 }
 
 static void kvm_host_get_page(void *addr)