KVM: PPC: Book3S HV: Framework and hcall stubs for nested virtualization

author Paul Mackerras <paulus@ozlabs.org>

Mon, 8 Oct 2018 05:31:03 +0000 (16:31 +1100)

committer Michael Ellerman <mpe@ellerman.id.au>

Tue, 9 Oct 2018 05:04:27 +0000 (16:04 +1100)
author Paul Mackerras <paulus@ozlabs.org>
Mon, 8 Oct 2018 05:31:03 +0000 (16:31 +1100)
committer Michael Ellerman <mpe@ellerman.id.au>
Tue, 9 Oct 2018 05:04:27 +0000 (16:04 +1100)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h

index a0b17f9f1ea4e5c40e1b2d7a2c6e1aa913759ea5..c95c6518bf27079a729cb1026a54a0b78b8b6bbb 100644 (file)
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -322,6 +322,11 @@
  #define H_GET_24X7_DATA                0xF07C
  #define H_GET_PERF_COUNTER_INFO        0xF080
  
+/* Platform-specific hcalls used for nested HV KVM */
+#define H_SET_PARTITION_TABLE  0xF800
+#define H_ENTER_NESTED         0xF804
+#define H_TLB_INVALIDATE       0xF808
+
  /* Values for 2nd argument to H_SET_MODE */
  #define H_SET_MODE_RESOURCE_SET_CIABR          1
  #define H_SET_MODE_RESOURCE_SET_DAWR           2
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h

index 91c977948828155c09e598d61e5df35b28ac5a27..43f212e38b89e86f8b2fed6274d9bf83acb2d4a6 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -274,6 +274,13 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
  static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
  #endif
  
+long kvmhv_nested_init(void);
+void kvmhv_nested_exit(void);
+void kvmhv_vm_nested_init(struct kvm *kvm);
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
+void kvmhv_release_all_nested(struct kvm *kvm);
+
  void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
  
  extern int kvm_irq_bypass;
@@ -387,9 +394,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
  /* TO = 31 for unconditional trap */
  #define INS_TW                         0x7fe00008
  
-/* LPIDs we support with this build -- runtime limit may be lower */
-#define KVMPPC_NR_LPIDS                        (LPID_RSVD + 1)
-
  #define SPLIT_HACK_MASK                        0xff000000
  #define SPLIT_HACK_OFFS                        0xfb000000
  
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h

index 5c0e2d9a7e15149dd1354fd75036a7d7e73daabf..6d67b6a9e78468b35f0d29f507ec6966b58dfdcb 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -23,6 +23,39 @@
  #include <linux/string.h>
  #include <asm/bitops.h>
  #include <asm/book3s/64/mmu-hash.h>
+#include <asm/cpu_has_feature.h>
+
+#ifdef CONFIG_PPC_PSERIES
+static inline bool kvmhv_on_pseries(void)
+{
+       return !cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool kvmhv_on_pseries(void)
+{
+       return false;
+}
+#endif
+
+/*
+ * Structure for a nested guest, that is, for a guest that is managed by
+ * one of our guests.
+ */
+struct kvm_nested_guest {
+       struct kvm *l1_host;            /* L1 VM that owns this nested guest */
+       int l1_lpid;                    /* lpid L1 guest thinks this guest is */
+       int shadow_lpid;                /* real lpid of this nested guest */
+       pgd_t *shadow_pgtable;          /* our page table for this guest */
+       u64 l1_gr_to_hr;                /* L1's addr of part'n-scoped table */
+       u64 process_table;              /* process table entry for this guest */
+       long refcnt;                    /* number of pointers to this struct */
+       struct mutex tlb_lock;          /* serialize page faults and tlbies */
+       struct kvm_nested_guest *next;
+};
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+                                         bool create);
+void kvmhv_put_nested(struct kvm_nested_guest *gp);
  
  /* Power architecture requires HPT is at least 256kiB, at most 64TiB */
  #define PPC_MIN_HPT_ORDER      18
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h

index d978fdf698af2ad5e89a4243e7bf2efe3e4e15bb..eb3ba6390108215c2a0c628c43a27266b1444ff5 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,9 @@
  #define XICS_MFRR              0xc
  #define XICS_IPI               2       /* interrupt source # for IPIs */
  
+/* LPIDs we support with this build -- runtime limit may be lower */
+#define KVMPPC_NR_LPIDS                        (LPID_RSVD + 1)
+
  /* Maximum number of threads per physical core */
  #define MAX_SMT_THREADS                8
  
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h

index c9cc42f73b3c57b92a74a8214582266d4917c270..c35d4f2c4d908adf1053addce1f4e5cdd9bc51ba 100644 (file)
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  #include <asm/kvm_book3s_asm.h>                /* for MAX_SMT_THREADS */
  #define KVM_MAX_VCPU_ID                (MAX_SMT_THREADS * KVM_MAX_VCORES)
+#define KVM_MAX_NESTED_GUESTS  KVMPPC_NR_LPIDS
  
  #else
  #define KVM_MAX_VCPU_ID                KVM_MAX_VCPUS
@@ -287,6 +288,7 @@ struct kvm_arch {
         u8 radix;
         u8 fwnmi_enabled;
         bool threads_indep;
+       bool nested_enable;
         pgd_t *pgtable;
         u64 process_table;
         struct dentry *debugfs_dir;
@@ -312,6 +314,9 @@ struct kvm_arch {
  #endif
         struct kvmppc_ops *kvm_ops;
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       u64 l1_ptcr;
+       int max_nested_lpid;
+       struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
         /* This array can grow quite large, keep it at the end */
         struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
  #endif
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile

index f872c04bb5b1bb1185a7fcc9df1597540d3d3120..e814f40ab836e3bfc66f9e529299fb8202af2f6a 100644 (file)
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -75,7 +75,8 @@ kvm-hv-y += \
         book3s_hv.o \
         book3s_hv_interrupts.o \
         book3s_64_mmu_hv.o \
-       book3s_64_mmu_radix.o
+       book3s_64_mmu_radix.o \
+       book3s_hv_nested.o
  
  kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
         book3s_hv_tm.o
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 8c20a90a68519737677ccf97865850fc57f64064..d8fc49effab0b9ce0a6336eaa4b01b34254b5560 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -934,6 +934,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                 if (ret == H_TOO_HARD)
                         return RESUME_HOST;
                 break;
+
+       case H_SET_PARTITION_TABLE:
+               ret = H_FUNCTION;
+               if (vcpu->kvm->arch.nested_enable)
+                       ret = kvmhv_set_partition_table(vcpu);
+               break;
+       case H_ENTER_NESTED:
+               ret = H_FUNCTION;
+               break;
+       case H_TLB_INVALIDATE:
+               ret = H_FUNCTION;
+               break;
+
         default:
                 return RESUME_HOST;
         }
@@ -4157,8 +4170,7 @@ void kvmppc_setup_partition_table(struct kvm *kvm)
                         __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
                 dw1 = PATB_GR | kvm->arch.process_table;
         }
-
-       mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
+       kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
  }
  
  /*
@@ -4254,6 +4266,10 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
  /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
  int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
  {
+       if (kvm->arch.nested_enable) {
+               kvm->arch.nested_enable = false;
+               kvmhv_release_all_nested(kvm);
+       }
         kvmppc_free_radix(kvm);
         kvmppc_update_lpcr(kvm, LPCR_VPM1,
                            LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
@@ -4374,6 +4390,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
  
         kvmppc_alloc_host_rm_ops();
  
+       kvmhv_vm_nested_init(kvm);
+
         /*
          * Since we don't flush the TLB when tearing down a VM,
          * and this lpid might have previously been used,
@@ -4517,8 +4535,10 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
  
         /* Perform global invalidation and return lpid to the pool */
         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               if (kvm->arch.nested_enable)
+                       kvmhv_release_all_nested(kvm);
                 kvm->arch.process_table = 0;
-               kvmppc_setup_partition_table(kvm);
+               kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
         }
         kvmppc_free_lpid(kvm->arch.lpid);
  
@@ -4989,6 +5009,10 @@ static int kvmppc_book3s_init_hv(void)
         if (r < 0)
                 return -ENODEV;
  
+       r = kvmhv_nested_init();
+       if (r)
+               return r;
+
         r = kvm_init_subcore_bitmap();
         if (r)
                 return r;
@@ -5047,6 +5071,7 @@ static void kvmppc_book3s_exit_hv(void)
         if (kvmppc_radix_possible())
                 kvmppc_radix_exit();
         kvmppc_hv_ops = NULL;
+       kvmhv_nested_exit();
  }
  
  module_init(kvmppc_book3s_init_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c

new file mode 100644 (file)

index 0000000..3278262
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2018
+ * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
+ *        Paul Mackerras <paulus@ozlabs.org>
+ *
+ * Description: KVM functions specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+
+static struct patb_entry *pseries_partition_tb;
+
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
+
+long kvmhv_nested_init(void)
+{
+       long int ptb_order;
+       unsigned long ptcr;
+       long rc;
+
+       if (!kvmhv_on_pseries())
+               return 0;
+       if (!radix_enabled())
+               return -ENODEV;
+
+       /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
+       ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
+       if (ptb_order < 8)
+               ptb_order = 8;
+       pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
+                                      GFP_KERNEL);
+       if (!pseries_partition_tb) {
+               pr_err("kvm-hv: failed to allocated nested partition table\n");
+               return -ENOMEM;
+       }
+
+       ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
+       rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
+       if (rc != H_SUCCESS) {
+               pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
+                      rc);
+               kfree(pseries_partition_tb);
+               pseries_partition_tb = NULL;
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+void kvmhv_nested_exit(void)
+{
+       /*
+        * N.B. the kvmhv_on_pseries() test is there because it enables
+        * the compiler to remove the call to plpar_hcall_norets()
+        * when CONFIG_PPC_PSERIES=n.
+        */
+       if (kvmhv_on_pseries() && pseries_partition_tb) {
+               plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
+               kfree(pseries_partition_tb);
+               pseries_partition_tb = NULL;
+       }
+}
+
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
+{
+       if (cpu_has_feature(CPU_FTR_HVMODE)) {
+               mmu_partition_table_set_entry(lpid, dw0, dw1);
+       } else {
+               pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+               pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+       }
+}
+
+static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
+{
+       unsigned long dw0;
+
+       dw0 = PATB_HR | radix__get_tree_size() |
+               __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
+       kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
+}
+
+void kvmhv_vm_nested_init(struct kvm *kvm)
+{
+       kvm->arch.max_nested_lpid = -1;
+}
+
+/*
+ * Handle the H_SET_PARTITION_TABLE hcall.
+ * r4 = guest real address of partition table + log_2(size) - 12
+ * (formatted as for the PTCR).
+ */
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
+{
+       struct kvm *kvm = vcpu->kvm;
+       unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
+       int srcu_idx;
+       long ret = H_SUCCESS;
+
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       /*
+        * Limit the partition table to 4096 entries (because that's what
+        * hardware supports), and check the base address.
+        */
+       if ((ptcr & PRTS_MASK) > 12 - 8 ||
+           !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
+               ret = H_PARAMETER;
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+       if (ret == H_SUCCESS)
+               kvm->arch.l1_ptcr = ptcr;
+       return ret;
+}
+
+/*
+ * Reload the partition table entry for a guest.
+ * Caller must hold gp->tlb_lock.
+ */
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
+{
+       int ret;
+       struct patb_entry ptbl_entry;
+       unsigned long ptbl_addr;
+       struct kvm *kvm = gp->l1_host;
+
+       ret = -EFAULT;
+       ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
+       if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
+               ret = kvm_read_guest(kvm, ptbl_addr,
+                                    &ptbl_entry, sizeof(ptbl_entry));
+       if (ret) {
+               gp->l1_gr_to_hr = 0;
+               gp->process_table = 0;
+       } else {
+               gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
+               gp->process_table = be64_to_cpu(ptbl_entry.patb1);
+       }
+       kvmhv_set_nested_ptbl(gp);
+}
+
+struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
+{
+       struct kvm_nested_guest *gp;
+       long shadow_lpid;
+
+       gp = kzalloc(sizeof(*gp), GFP_KERNEL);
+       if (!gp)
+               return NULL;
+       gp->l1_host = kvm;
+       gp->l1_lpid = lpid;
+       mutex_init(&gp->tlb_lock);
+       gp->shadow_pgtable = pgd_alloc(kvm->mm);
+       if (!gp->shadow_pgtable)
+               goto out_free;
+       shadow_lpid = kvmppc_alloc_lpid();
+       if (shadow_lpid < 0)
+               goto out_free2;
+       gp->shadow_lpid = shadow_lpid;
+
+       return gp;
+
+ out_free2:
+       pgd_free(kvm->mm, gp->shadow_pgtable);
+ out_free:
+       kfree(gp);
+       return NULL;
+}
+
+/*
+ * Free up any resources allocated for a nested guest.
+ */
+static void kvmhv_release_nested(struct kvm_nested_guest *gp)
+{
+       kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
+       kvmppc_free_lpid(gp->shadow_lpid);
+       if (gp->shadow_pgtable)
+               pgd_free(gp->l1_host->mm, gp->shadow_pgtable);
+       kfree(gp);
+}
+
+static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
+{
+       struct kvm *kvm = gp->l1_host;
+       int lpid = gp->l1_lpid;
+       long ref;
+
+       spin_lock(&kvm->mmu_lock);
+       if (gp == kvm->arch.nested_guests[lpid]) {
+               kvm->arch.nested_guests[lpid] = NULL;
+               if (lpid == kvm->arch.max_nested_lpid) {
+                       while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
+                               ;
+                       kvm->arch.max_nested_lpid = lpid;
+               }
+               --gp->refcnt;
+       }
+       ref = gp->refcnt;
+       spin_unlock(&kvm->mmu_lock);
+       if (ref == 0)
+               kvmhv_release_nested(gp);
+}
+
+/*
+ * Free up all nested resources allocated for this guest.
+ * This is called with no vcpus of the guest running, when
+ * switching the guest to HPT mode or when destroying the
+ * guest.
+ */
+void kvmhv_release_all_nested(struct kvm *kvm)
+{
+       int i;
+       struct kvm_nested_guest *gp;
+       struct kvm_nested_guest *freelist = NULL;
+
+       spin_lock(&kvm->mmu_lock);
+       for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
+               gp = kvm->arch.nested_guests[i];
+               if (!gp)
+                       continue;
+               kvm->arch.nested_guests[i] = NULL;
+               if (--gp->refcnt == 0) {
+                       gp->next = freelist;
+                       freelist = gp;
+               }
+       }
+       kvm->arch.max_nested_lpid = -1;
+       spin_unlock(&kvm->mmu_lock);
+       while ((gp = freelist) != NULL) {
+               freelist = gp->next;
+               kvmhv_release_nested(gp);
+       }
+}
+
+/* caller must hold gp->tlb_lock */
+void kvmhv_flush_nested(struct kvm_nested_guest *gp)
+{
+       kvmhv_update_ptbl_cache(gp);
+       if (gp->l1_gr_to_hr == 0)
+               kvmhv_remove_nested(gp);
+}
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+                                         bool create)
+{
+       struct kvm_nested_guest *gp, *newgp;
+
+       if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
+           l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
+               return NULL;
+
+       spin_lock(&kvm->mmu_lock);
+       gp = kvm->arch.nested_guests[l1_lpid];
+       if (gp)
+               ++gp->refcnt;
+       spin_unlock(&kvm->mmu_lock);
+
+       if (gp || !create)
+               return gp;
+
+       newgp = kvmhv_alloc_nested(kvm, l1_lpid);
+       if (!newgp)
+               return NULL;
+       spin_lock(&kvm->mmu_lock);
+       if (kvm->arch.nested_guests[l1_lpid]) {
+               /* someone else beat us to it */
+               gp = kvm->arch.nested_guests[l1_lpid];
+       } else {
+               kvm->arch.nested_guests[l1_lpid] = newgp;
+               ++newgp->refcnt;
+               gp = newgp;
+               newgp = NULL;
+               if (l1_lpid > kvm->arch.max_nested_lpid)
+                       kvm->arch.max_nested_lpid = l1_lpid;
+       }
+       ++gp->refcnt;
+       spin_unlock(&kvm->mmu_lock);
+
+       if (newgp)
+               kvmhv_release_nested(newgp);
+
+       return gp;
+}
+
+void kvmhv_put_nested(struct kvm_nested_guest *gp)
+{
+       struct kvm *kvm = gp->l1_host;
+       long ref;
+
+       spin_lock(&kvm->mmu_lock);
+       ref = --gp->refcnt;
+       spin_unlock(&kvm->mmu_lock);
+       if (ref == 0)
+               kvmhv_release_nested(gp);
+}
author	Paul Mackerras <paulus@ozlabs.org>
	Mon, 8 Oct 2018 05:31:03 +0000 (16:31 +1100)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Tue, 9 Oct 2018 05:04:27 +0000 (16:04 +1100)
arch/powerpc/include/asm/hvcall.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_book3s.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_book3s_64.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_book3s_asm.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_host.h		patch \| blob \| history
arch/powerpc/kvm/Makefile		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_nested.c	[new file with mode: 0644]	patch \| blob