KVM: x86/xen: handle PV spinlocks slowpath

author Boris Ostrovsky <boris.ostrovsky@oracle.com>

Wed, 16 Jan 2019 21:22:30 +0000 (16:22 -0500)

committer David Woodhouse <dwmw@amazon.co.uk>

Tue, 8 Feb 2022 10:06:50 +0000 (10:06 +0000)
author Boris Ostrovsky <boris.ostrovsky@oracle.com>
Wed, 16 Jan 2019 21:22:30 +0000 (16:22 -0500)
committer David Woodhouse <dwmw@amazon.co.uk>
Tue, 8 Feb 2022 10:06:50 +0000 (10:06 +0000)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 398bb1a6c82e3220d0e1434ea496324dcba34d87..fa0a95a22cc4f65cb95e9cb20da9fd1da0100b31 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -611,6 +611,8 @@ struct kvm_vcpu_xen {
         u64 runstate_entry_time;
         u64 runstate_times[4];
         unsigned long evtchn_pending_sel;
+       int poll_evtchn;
+       struct timer_list poll_timer;
  };
  
  struct kvm_vcpu_arch {
@@ -1025,6 +1027,7 @@ struct kvm_xen {
         u8 upcall_vector;
         struct gfn_to_pfn_cache shinfo_cache;
         struct idr evtchn_ports;
+       unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
  };
  
  enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 2da4ff7c5e6f8244054cb187c3be42f475a2f478..532d26cb78d3d097b9717be3c33c8bf079b3c2af 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11138,6 +11138,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
         vcpu->arch.pending_external_vector = -1;
         vcpu->arch.preempted_in_kernel = false;
  
+       kvm_xen_init_vcpu(vcpu);
+
  #if IS_ENABLED(CONFIG_HYPERV)
         vcpu->arch.hv_root_tdp = INVALID_PAGE;
  #endif
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index d0a7a67c41f303588539294f78ca8027964ba493..0c38225d78709550103655ce9164c4b583626472 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -9,6 +9,7 @@
  #include "x86.h"
  #include "xen.h"
  #include "hyperv.h"
+#include "lapic.h"
  
  #include <linux/eventfd.h>
  #include <linux/kvm_host.h>
@@ -798,9 +799,133 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
         return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
  }
  
+static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
+                              evtchn_port_t *ports)
+{
+       struct kvm *kvm = vcpu->kvm;
+       struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+       unsigned long *pending_bits;
+       unsigned long flags;
+       bool ret = true;
+       int idx, i;
+
+       read_lock_irqsave(&gpc->lock, flags);
+       idx = srcu_read_lock(&kvm->srcu);
+       if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+               goto out_rcu;
+
+       ret = false;
+       if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+               struct shared_info *shinfo = gpc->khva;
+               pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+       } else {
+               struct compat_shared_info *shinfo = gpc->khva;
+               pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+       }
+
+       for (i = 0; i < nr_ports; i++) {
+               if (test_bit(ports[i], pending_bits)) {
+                       ret = true;
+                       break;
+               }
+       }
+
+ out_rcu:
+       srcu_read_unlock(&kvm->srcu, idx);
+       read_unlock_irqrestore(&gpc->lock, flags);
+
+       return ret;
+}
+
+static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, u64 param, u64 *r)
+{
+       int idx, i;
+       struct sched_poll sched_poll;
+       evtchn_port_t port, *ports;
+       int ret = 0;
+       gpa_t gpa;
+
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+       gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+       if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
+                                       sizeof(sched_poll))) {
+               *r = -EFAULT;
+               return true;
+       }
+
+       if (unlikely(sched_poll.nr_ports > 1)) {
+               /* Xen (unofficially) limits number of pollers to 128 */
+               if (sched_poll.nr_ports > 128)
+                       return -EINVAL;
+
+               ports = kmalloc_array(sched_poll.nr_ports,
+                                     sizeof(*ports), GFP_KERNEL);
+               if (!ports)
+                       return -ENOMEM;
+       } else
+               ports = &port;
+
+       for (i = 0; i < sched_poll.nr_ports; i++) {
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
+               gpa = kvm_mmu_gva_to_gpa_system(vcpu,
+                                               (gva_t)(sched_poll.ports + i),
+                                               NULL);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+               if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
+                                               &ports[i], sizeof(port))) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+       }
+
+       if (sched_poll.nr_ports == 1)
+               vcpu->arch.xen.poll_evtchn = port;
+       else
+               vcpu->arch.xen.poll_evtchn = -1;
+
+       set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+       if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) {
+               vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+
+               if (sched_poll.timeout)
+                       mod_timer(&vcpu->arch.xen.poll_timer, jiffies + nsecs_to_jiffies(sched_poll.timeout));
+
+               kvm_vcpu_halt(vcpu);
+
+               if (sched_poll.timeout)
+                       del_timer(&vcpu->arch.xen.poll_timer);
+       }
+
+       vcpu->arch.xen.poll_evtchn = 0;
+
+out:
+       /* Really, this is only needed in case of timeout */
+       clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+       if (unlikely(sched_poll.nr_ports > 1))
+               kfree(ports);
+       return ret;
+}
+
+static void cancel_evtchn_poll(struct timer_list *t)
+{
+       struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer);
+
+       kvm_make_request(KVM_REQ_UNHALT, vcpu);
+}
+
  static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, int cmd, u64 param, u64 *r)
  {
         switch (cmd) {
+       case SCHEDOP_poll:
+               if ((vcpu->kvm->arch.xen_hvm_config.flags &
+                    KVM_XEN_HVM_CONFIG_EVTCHN_SEND) && lapic_in_kernel(vcpu))
+                       return kvm_xen_schedop_poll(vcpu, param, r);
+               fallthrough;
         case SCHEDOP_yield:
                 kvm_vcpu_on_spin(vcpu, true);
                 *r = 0;
@@ -888,6 +1013,17 @@ static inline int max_evtchn_port(struct kvm *kvm)
                 return COMPAT_EVTCHN_2L_NR_CHANNELS;
  }
  
+static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
+{
+       int poll_evtchn = vcpu->arch.xen.poll_evtchn;
+
+       if ((poll_evtchn == port || poll_evtchn == -1) &&
+           test_and_clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask)) {
+               kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+               kvm_vcpu_kick(vcpu);
+       }
+}
+
  /*
   * The return value from this function is propagated to kvm_set_irq() API,
   * so it returns:
@@ -954,6 +1090,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                 rc = 0; /* It was already raised */
         } else if (test_bit(xe->port, mask_bits)) {
                 rc = -ENOTCONN; /* Masked */
+               kvm_xen_check_poller(vcpu, xe->port);
         } else {
                 rc = 1; /* Delivered. But was the vCPU waking already? */
                 if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
@@ -1327,6 +1464,12 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
         return true;
  }
  
+void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.xen.poll_evtchn = 0;
+       timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
+}
+
  void kvm_xen_init_vm(struct kvm *kvm)
  {
         idr_init(&kvm->arch.xen.evtchn_ports);
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h

index 66a830e7d55af7bab1100259142fd8971b92abde..e11a10fbdd6700918f22aa2f4a4505dbdb67506c 100644 (file)
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -23,6 +23,7 @@ int kvm_xen_hvm_evtchn_send(struct kvm *kvm, struct kvm_irq_routing_xen_evtchn *
  int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
  int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
  void kvm_xen_init_vm(struct kvm *kvm);
+void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu);
  void kvm_xen_destroy_vm(struct kvm *kvm);
  
  int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
@@ -58,6 +59,10 @@ static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
         return 1;
  }
  
+static inline void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
+{
+}
+
  static inline void kvm_xen_init_vm(struct kvm *kvm)
  {
  }
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

index 865e17146815a6585d801a31abf4a63c853d1dff..376c611443cd33bfa0ba4a302ef753a5b313b9b2 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -233,6 +233,12 @@ int main(int argc, char *argv[])
                 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
                 .msr = XEN_HYPERCALL_MSR,
         };
+
+       /* Let the kernel know that we *will* use it for sending all
+        * event channels, which lets it intercept SCHEDOP_poll */
+       if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)
+               hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
         vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
  
         struct kvm_xen_hvm_attr lm = {
author	Boris Ostrovsky <boris.ostrovsky@oracle.com>
	Wed, 16 Jan 2019 21:22:30 +0000 (16:22 -0500)
committer	David Woodhouse <dwmw@amazon.co.uk>
	Tue, 8 Feb 2022 10:06:50 +0000 (10:06 +0000)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/xen.c		patch \| blob \| history
arch/x86/kvm/xen.h		patch \| blob \| history
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c		patch \| blob \| history