KVM: x86/xen: register runstate info

author Joao Martins <joao.m.martins@oracle.com>

Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)

committer David Woodhouse <dwmw@amazon.co.uk>

Wed, 3 Feb 2021 14:45:58 +0000 (14:45 +0000)
author Joao Martins <joao.m.martins@oracle.com>
Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)
committer David Woodhouse <dwmw@amazon.co.uk>
Wed, 3 Feb 2021 14:45:58 +0000 (14:45 +0000)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 9693ec3c20427bbd8c219fde07274902c090a0a5..b1cc73a190219ef453b08d193f0ebb41d1cd4968 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -523,10 +523,15 @@ struct kvm_vcpu_hv {
  /* Xen HVM per vcpu emulation context */
  struct kvm_vcpu_xen {
         u64 hypercall_rip;
+       u32 current_runstate;
         bool vcpu_info_set;
         bool vcpu_time_info_set;
+       bool runstate_set;
         struct gfn_to_hva_cache vcpu_info_cache;
         struct gfn_to_hva_cache vcpu_time_info_cache;
+       struct gfn_to_hva_cache runstate_cache;
+       u64 last_steal;
+       u64 last_state_ns;
  };
  
  struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 6dc0376beac5d1d501cd09c8ce6fd19c5b97bc30..16f916da81fcfc0bc4ef3047d41846c4c43ae9bf 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2947,6 +2947,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
         struct kvm_host_map map;
         struct kvm_steal_time *st;
  
+       if (static_branch_unlikely(&kvm_xen_enabled.key) &&
+           vcpu->arch.xen.runstate_set) {
+               kvm_xen_setup_runstate_page(vcpu);
+               return;
+       }
+
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                 return;
  
@@ -4001,6 +4007,12 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
         struct kvm_host_map map;
         struct kvm_steal_time *st;
  
+       if (static_branch_unlikely(&kvm_xen_enabled.key) &&
+           vcpu->arch.xen.runstate_set) {
+               kvm_xen_runstate_set_preempted(vcpu);
+               return;
+       }
+
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                 return;
  
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index 39a7ffcdcf2280e329ba25590180b5099cbacc82..76049478a7fa469d4da62834f6efa6fa82f76245 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -11,9 +11,11 @@
  #include "hyperv.h"
  
  #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
  
  #include <trace/events/kvm.h>
  #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
  
  #include "trace.h"
  
@@ -61,6 +63,124 @@ out:
         return ret;
  }
  
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state, u64 steal_ns)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       struct vcpu_runstate_info runstate;
+       unsigned int offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+       u64 now, delta;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+#ifdef CONFIG_X86_64
+       /*
+        * The only difference is alignment of uint64_t in 32-bit.
+        * So the first field 'state' is accessed via *runstate_state
+        * which is unmodified, while the other fields are accessed
+        * through 'runstate->' which we tweak here by adding 4.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+       offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+       /*
+        * Although it's called "state_entry_time" and explicitly documented
+        * as being "the system time at which the VCPU was last scheduled to
+        * run", Xen just treats it as a counter for HVM domains too.
+        */
+       if (kvm_read_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                        &runstate.state_entry_time, offset,
+                                        sizeof(u64) * 5))
+               return;
+
+       runstate.state_entry_time = XEN_RUNSTATE_UPDATE |
+               (runstate.state_entry_time + 1);
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.state_entry_time, offset,
+                                         sizeof(u64)))
+               return;
+       smp_wmb();
+
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state,
+                                         offsetof(struct vcpu_runstate_info, state),
+                                         sizeof(runstate.state)))
+               return;
+
+       now = ktime_get_ns();
+       delta = now - vcpu_xen->last_state_ns - steal_ns;
+       runstate.time[vcpu_xen->current_runstate] += delta;
+       if (steal_ns)
+               runstate.time[RUNSTATE_runnable] += steal_ns;
+
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.time[0],
+                                         offset + sizeof(u64),
+                                         sizeof(runstate.time)))
+               return;
+       smp_wmb();
+       vcpu_xen->current_runstate = state;
+       vcpu_xen->last_state_ns = now;
+
+       runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.state_entry_time, offset,
+                                         sizeof(u64)))
+               return;
+}
+
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *v)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       int new_state;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+       if (v->preempted) {
+               new_state = RUNSTATE_runnable;
+       } else {
+               new_state = RUNSTATE_blocked;
+               vcpu_xen->last_steal = current->sched_info.run_delay;
+       }
+
+       kvm_xen_update_runstate(v, new_state, 0);
+}
+
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *v)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       u64 steal_time = 0;
+
+       /*
+        * If the CPU was blocked when it last stopped, presumably
+        * it became unblocked at some point because it's being run
+        * again now. The scheduler run_delay is the runnable time,
+        * to be subtracted from the blocked time.
+        */
+       if (vcpu_xen->current_runstate == RUNSTATE_blocked)
+               steal_time = current->sched_info.run_delay - vcpu_xen->last_steal;
+
+       kvm_xen_update_runstate(v, RUNSTATE_running, steal_time);
+}
+
  int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
  {
         u8 rc = 0;
@@ -205,6 +325,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                 }
                 break;
  
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_RUNSTATE:
+               r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+                                             &vcpu->arch.xen.runstate_cache,
+                                             data->u.gpa,
+                                             sizeof(struct vcpu_runstate_info));
+               if (!r) {
+                       vcpu->arch.xen.runstate_set = true;
+                       vcpu->arch.xen.current_runstate = RUNSTATE_blocked;
+                       vcpu->arch.xen.last_state_ns = ktime_get_ns();
+               }
+               break;
+
         default:
                 break;
         }
@@ -235,6 +367,13 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                 }
                 break;
  
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_RUNSTATE:
+               if (vcpu->arch.xen.runstate_set) {
+                       data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
+                       r = 0;
+               }
+               break;
+
         default:
                 break;
         }
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h

index 4b32489c0cecd934f28e5cefa4160a366c6573cc..a78f72e33bd883edb119f2e8e0d2b30f15d1f4d0 100644 (file)
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -14,6 +14,8 @@
  extern struct static_key_false_deferred kvm_xen_enabled;
  
  int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
  int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
  int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
  int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
@@ -74,4 +76,10 @@ struct compat_shared_info {
         struct compat_arch_shared_info arch;
  };
  
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
  #endif /* __ARCH_X86_KVM_XEN_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index c61c49bbe0aa3fd66b7bc1a638a43d6e9e7ef1e9..4fb6b73ffde6defa443d0a761acabe326f41c625 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1624,6 +1624,7 @@ struct kvm_xen_vcpu_attr {
  /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
  #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
  #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_RUNSTATE   0x2
  
  /* Secure Encrypted Virtualization command */
  enum sev_cmd_id {
author	Joao Martins <joao.m.martins@oracle.com>
	Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)
committer	David Woodhouse <dwmw@amazon.co.uk>
	Wed, 3 Feb 2021 14:45:58 +0000 (14:45 +0000)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/kvm/xen.c		patch \| blob \| history
arch/x86/kvm/xen.h		patch \| blob \| history
include/uapi/linux/kvm.h		patch \| blob \| history