KVM: x86/xen: register runstate info

author Joao Martins <joao.m.martins@oracle.com>

Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)

committer David Woodhouse <dwmw@amazon.co.uk>

Sat, 12 Dec 2020 21:07:13 +0000 (21:07 +0000)
author Joao Martins <joao.m.martins@oracle.com>
Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)
committer David Woodhouse <dwmw@amazon.co.uk>
Sat, 12 Dec 2020 21:07:13 +0000 (21:07 +0000)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index b7dfcb4de92a16c2fcc5acfc89f3268c4a38047b..4b345a8945ea0ae1b155bf9b31658bef520c325b 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -523,10 +523,15 @@ struct kvm_vcpu_hv {
  /* Xen HVM per vcpu emulation context */
  struct kvm_vcpu_xen {
         u64 hypercall_rip;
+       u32 current_runstate;
         bool vcpu_info_set;
         bool vcpu_time_info_set;
+       bool runstate_set;
         struct gfn_to_hva_cache vcpu_info_cache;
         struct gfn_to_hva_cache vcpu_time_info_cache;
+       struct gfn_to_hva_cache runstate_cache;
+       u64 last_steal;
+       u64 last_state_ns;
  };
  
  struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 2234fdf49d82723c66bc8cf3fbcde7cca54c559a..bd4bd9a818d89d0202aaaefd0735e48f052d4f4d 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2940,6 +2940,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
         struct kvm_host_map map;
         struct kvm_steal_time *st;
  
+       if (vcpu->arch.xen.runstate_set) {
+               kvm_xen_setup_runstate_page(vcpu);
+               return;
+       }
+
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                 return;
  
@@ -3968,6 +3973,11 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
         struct kvm_host_map map;
         struct kvm_steal_time *st;
  
+       if (vcpu->arch.xen.runstate_set) {
+               kvm_xen_runstate_set_preempted(vcpu);
+               return;
+       }
+
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                 return;
  
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index 1cca46effec850d72993a4d709db338b9f51702f..17cbb4462b7e05fa4e10256a0c9cd469cb567e49 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -11,9 +11,11 @@
  #include "hyperv.h"
  
  #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
  
  #include <trace/events/kvm.h>
  #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
  
  #include "trace.h"
  
@@ -56,6 +58,124 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
         return 0;
  }
  
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state, u64 steal_ns)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       struct vcpu_runstate_info runstate;
+       unsigned int offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+       u64 now, delta;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+#ifdef CONFIG_X86_64
+       /*
+        * The only difference is alignment of uint64_t in 32-bit.
+        * So the first field 'state' is accessed via *runstate_state
+        * which is unmodified, while the other fields are accessed
+        * through 'runstate->' which we tweak here by adding 4.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+       offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+       /*
+        * Although it's called "state_entry_time" and explicitly documented
+        * as being "the system time at which the VCPU was last scheduled to
+        * run", Xen just treats it as a counter for HVM domains too.
+        */
+       if (kvm_read_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                        &runstate.state_entry_time, offset,
+                                        sizeof(u64) * 5))
+               return;
+
+       runstate.state_entry_time = XEN_RUNSTATE_UPDATE |
+               (runstate.state_entry_time + 1);
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.state_entry_time, offset,
+                                         sizeof(u64)))
+               return;
+       smp_wmb();
+
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state,
+                                         offsetof(struct vcpu_runstate_info, state),
+                                         sizeof(runstate.state)))
+               return;
+
+       now = ktime_get_ns();
+       delta = now - vcpu_xen->last_state_ns - steal_ns;
+       runstate.time[vcpu_xen->current_runstate] += delta;
+       if (steal_ns)
+               runstate.time[RUNSTATE_runnable] += steal_ns;
+
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.time[0],
+                                         offset + sizeof(u64),
+                                         sizeof(runstate.time)))
+               return;
+       smp_wmb();
+       vcpu_xen->current_runstate = state;
+       vcpu_xen->last_state_ns = now;
+
+       runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.state_entry_time, offset,
+                                         sizeof(u64)))
+               return;
+}
+
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *v)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       int new_state;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+       if (v->preempted) {
+               new_state = RUNSTATE_runnable;
+       } else {
+               new_state = RUNSTATE_blocked;
+               vcpu_xen->last_steal = current->sched_info.run_delay;
+       }
+
+       kvm_xen_update_runstate(v, new_state, 0);
+}
+
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *v)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       u64 steal_time = 0;
+
+       /*
+        * If the CPU was blocked when it last stopped, presumably
+        * it became unblocked at some point because it's being run
+        * again now. The scheduler run_delay is the runnable time,
+        * to be subtracted from the blocked time.
+        */
+       if (vcpu_xen->current_runstate == RUNSTATE_blocked)
+               steal_time = current->sched_info.run_delay - vcpu_xen->last_steal;
+
+       kvm_xen_update_runstate(v, RUNSTATE_running, steal_time);
+}
+
  int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
  {
         struct kvm_vcpu *v;
@@ -78,7 +198,6 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                 v = kvm_get_vcpu_by_id(kvm, data->u.vcpu_attr.vcpu_id);
                 if (!v)
                         return -EINVAL;
-
                 /* No compat necessary here. */
                 BUILD_BUG_ON(sizeof(struct vcpu_info) !=
                              sizeof(struct compat_vcpu_info));
@@ -110,6 +229,22 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
                 break;
  
+       case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+               v = kvm_get_vcpu_by_id(kvm, data->u.vcpu_attr.vcpu_id);
+               if (!v)
+                       return -EINVAL;
+
+               r = kvm_gfn_to_hva_cache_init(kvm, &v->arch.xen.runstate_cache,
+                                             data->u.vcpu_attr.gpa,
+                                             sizeof(struct vcpu_runstate_info));
+               if (r)
+                       return r;
+
+               v->arch.xen.runstate_set = true;
+               v->arch.xen.current_runstate = RUNSTATE_blocked;
+               v->arch.xen.last_state_ns = ktime_get_ns();
+               break;
+
         default:
                 break;
         }
@@ -157,6 +292,17 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                 }
                 break;
  
+       case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+               v = kvm_get_vcpu_by_id(kvm, data->u.vcpu_attr.vcpu_id);
+               if (!v)
+                       return -EINVAL;
+
+               if (v->arch.xen.runstate_set) {
+                       data->u.vcpu_attr.gpa = v->arch.xen.runstate_cache.gpa;
+                       r = 0;
+               }
+               break;
+
         default:
                 break;
         }
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h

index 120b7450252aed3a34ed7b2affcbff804cd27475..407e717476d66e8d50baf7d35d4e44224d146c2d 100644 (file)
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,6 +9,8 @@
  #ifndef __ARCH_X86_KVM_XEN_H__
  #define __ARCH_X86_KVM_XEN_H__
  
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
  int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
  int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
  int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
@@ -54,7 +56,12 @@ struct compat_shared_info {
         uint32_t evtchn_mask[32];
         struct pvclock_wall_clock wc;
         struct compat_arch_shared_info arch;
-
  };
  
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
  #endif /* __ARCH_X86_KVM_XEN_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index f60c5c61761cd3c16edacfa13cc97f8ff2939c4f..ab83f35887190e2f21cbd0bf68bf7cbe5afee281 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1600,6 +1600,7 @@ struct kvm_xen_hvm_attr {
  #define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
  #define KVM_XEN_ATTR_TYPE_VCPU_INFO            0x2
  #define KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO       0x3
+#define KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE                0x4
  
  /* Secure Encrypted Virtualization command */
  enum sev_cmd_id {
author	Joao Martins <joao.m.martins@oracle.com>
	Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)
committer	David Woodhouse <dwmw@amazon.co.uk>
	Sat, 12 Dec 2020 21:07:13 +0000 (21:07 +0000)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/xen.c		patch \| blob \| history
arch/x86/kvm/xen.h		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history