]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: arm64: Move host page ownership tracking to the hyp vmemmap
authorQuentin Perret <qperret@google.com>
Wed, 18 Dec 2024 19:40:45 +0000 (19:40 +0000)
committerMarc Zyngier <maz@kernel.org>
Fri, 20 Dec 2024 09:43:59 +0000 (09:43 +0000)
We currently store part of the page-tracking state in PTE software bits
for the host, guests and the hypervisor. This is sub-optimal when e.g.
sharing pages as this forces to break block mappings purely to support
this software tracking. This causes an unnecessarily fragmented stage-2
page-table for the host in particular when it shares pages with Secure,
which can lead to measurable regressions. Moreover, having this state
stored in the page-table forces us to do multiple costly walks on the
page transition path, hence causing overhead.

In order to work around these problems, move the host-side page-tracking
logic from SW bits in its stage-2 PTEs to the hypervisor's vmemmap.

Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20241218194059.3670226-5-qperret@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/kvm/hyp/include/nvhe/memory.h
arch/arm64/kvm/hyp/nvhe/mem_protect.c
arch/arm64/kvm/hyp/nvhe/setup.c

index 8f2b42bcc8e115f1071e192689dcf3845c14b1f8..2a5eabf4b753faf7248f14cfd248bfdcf392144b 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 
 /*
- * SW bits 0-1 are reserved to track the memory ownership state of each page:
+ * Bits 0-1 are reserved to track the memory ownership state of each page:
  *   00: The page is owned exclusively by the page-table owner.
  *   01: The page is owned by the page-table owner, but is shared
  *       with another entity.
@@ -43,7 +43,9 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
 struct hyp_page {
        u16 refcount;
        u8 order;
-       u8 reserved;
+
+       /* Host (non-meta) state. Guarded by the host stage-2 lock. */
+       enum pkvm_page_state host_state : 8;
 };
 
 extern u64 __hyp_vmemmap;
@@ -63,7 +65,13 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
 
 #define hyp_phys_to_pfn(phys)  ((phys) >> PAGE_SHIFT)
 #define hyp_pfn_to_phys(pfn)   ((phys_addr_t)((pfn) << PAGE_SHIFT))
-#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
+
+static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
+{
+       BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u32));
+       return &hyp_vmemmap[hyp_phys_to_pfn(phys)];
+}
+
 #define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
 #define hyp_virt_to_pfn(virt)  hyp_phys_to_pfn(__hyp_pa(virt))
 
index caba3e4bd09e8405a4138a8d61c59375187550e0..12bb5445fe47e3cd2d20eb3bd1e6458a9e01eb24 100644 (file)
@@ -201,8 +201,8 @@ static void *guest_s2_zalloc_page(void *mc)
 
        memset(addr, 0, PAGE_SIZE);
        p = hyp_virt_to_page(addr);
-       memset(p, 0, sizeof(*p));
        p->refcount = 1;
+       p->order = 0;
 
        return addr;
 }
@@ -268,6 +268,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
 
 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
 {
+       struct hyp_page *page;
        void *addr;
 
        /* Dump all pgtable pages in the hyp_pool */
@@ -279,7 +280,9 @@ void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
        /* Drain the hyp_pool into the memcache */
        addr = hyp_alloc_pages(&vm->pool, 0);
        while (addr) {
-               memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
+               page = hyp_virt_to_page(addr);
+               page->refcount = 0;
+               page->order = 0;
                push_hyp_memcache(mc, addr, hyp_virt_to_phys);
                WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
                addr = hyp_alloc_pages(&vm->pool, 0);
@@ -382,19 +385,28 @@ bool addr_is_memory(phys_addr_t phys)
        return !!find_mem_range(phys, &range);
 }
 
-static bool addr_is_allowed_memory(phys_addr_t phys)
+static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
+{
+       return range->start <= addr && addr < range->end;
+}
+
+static int check_range_allowed_memory(u64 start, u64 end)
 {
        struct memblock_region *reg;
        struct kvm_mem_range range;
 
-       reg = find_mem_range(phys, &range);
+       /*
+        * Callers can't check the state of a range that overlaps memory and
+        * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
+        */
+       reg = find_mem_range(start, &range);
+       if (!is_in_mem_range(end - 1, &range))
+               return -EINVAL;
 
-       return reg && !(reg->flags & MEMBLOCK_NOMAP);
-}
+       if (!reg || reg->flags & MEMBLOCK_NOMAP)
+               return -EPERM;
 
-static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
-{
-       return range->start <= addr && addr < range->end;
+       return 0;
 }
 
 static bool range_is_memory(u64 start, u64 end)
@@ -454,8 +466,10 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
        if (kvm_pte_valid(pte))
                return -EAGAIN;
 
-       if (pte)
+       if (pte) {
+               WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
                return -EPERM;
+       }
 
        do {
                u64 granule = kvm_granule_size(level);
@@ -477,10 +491,33 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
        return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
 }
 
+static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
+{
+       phys_addr_t end = addr + size;
+
+       for (; addr < end; addr += PAGE_SIZE)
+               hyp_phys_to_page(addr)->host_state = state;
+}
+
 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
 {
-       return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
-                              addr, size, &host_s2_pool, owner_id);
+       int ret;
+
+       if (!addr_is_memory(addr))
+               return -EPERM;
+
+       ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
+                             addr, size, &host_s2_pool, owner_id);
+       if (ret)
+               return ret;
+
+       /* Don't forget to update the vmemmap tracking for the host */
+       if (owner_id == PKVM_ID_HOST)
+               __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
+       else
+               __host_update_page_state(addr, size, PKVM_NOPAGE);
+
+       return 0;
 }
 
 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
@@ -604,35 +641,38 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
        return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 
-static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
-{
-       if (!addr_is_allowed_memory(addr))
-               return PKVM_NOPAGE;
-
-       if (!kvm_pte_valid(pte) && pte)
-               return PKVM_NOPAGE;
-
-       return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
-}
-
 static int __host_check_page_state_range(u64 addr, u64 size,
                                         enum pkvm_page_state state)
 {
-       struct check_walk_data d = {
-               .desired        = state,
-               .get_page_state = host_get_page_state,
-       };
+       u64 end = addr + size;
+       int ret;
+
+       ret = check_range_allowed_memory(addr, end);
+       if (ret)
+               return ret;
 
        hyp_assert_lock_held(&host_mmu.lock);
-       return check_page_state_range(&host_mmu.pgt, addr, size, &d);
+       for (; addr < end; addr += PAGE_SIZE) {
+               if (hyp_phys_to_page(addr)->host_state != state)
+                       return -EPERM;
+       }
+
+       return 0;
 }
 
 static int __host_set_page_state_range(u64 addr, u64 size,
                                       enum pkvm_page_state state)
 {
-       enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+       if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
+               int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
 
-       return host_stage2_idmap_locked(addr, size, prot);
+               if (ret)
+                       return ret;
+       }
+
+       __host_update_page_state(addr, size, state);
+
+       return 0;
 }
 
 static int host_request_owned_transition(u64 *completer_addr,
index cbdd18cd3f9843f5e2e14fab7bbc50a5e7ed1930..7e04d1c2a03ddd07b4568640a6eba75fd56de94a 100644 (file)
@@ -180,7 +180,6 @@ static void hpool_put_page(void *addr)
 static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
                                     enum kvm_pgtable_walk_flags visit)
 {
-       enum kvm_pgtable_prot prot;
        enum pkvm_page_state state;
        phys_addr_t phys;
 
@@ -203,16 +202,16 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
        case PKVM_PAGE_OWNED:
                return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
        case PKVM_PAGE_SHARED_OWNED:
-               prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
+               hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED;
                break;
        case PKVM_PAGE_SHARED_BORROWED:
-               prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+               hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED;
                break;
        default:
                return -EINVAL;
        }
 
-       return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+       return 0;
 }
 
 static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,