]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: x86/xen: register runstate info
authorJoao Martins <joao.m.martins@oracle.com>
Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Sun, 6 Dec 2020 10:52:11 +0000 (10:52 +0000)
Allow emulator to register vcpu runstates which allow Xen guests
to use that for steal clock. The 'preempted' state of KVM steal clock
equates to 'runnable' state, 'running' has similar meanings for both and
'offline' is used when system admin needs to bring vcpu offline or
hotplug.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/x86.c
arch/x86/kvm/xen.c
arch/x86/kvm/xen.h
include/uapi/linux/kvm.h

index ec942528920922969a540d8c7acf1a05899e3f0b..d8716ef277283ee8cb9475cd0b571c028ed7373c 100644 (file)
@@ -527,6 +527,11 @@ struct kvm_vcpu_xen {
        struct vcpu_info *vcpu_info;
        struct kvm_host_map pv_time_map;
        struct pvclock_vcpu_time_info *pv_time;
+       struct kvm_host_map runstate_map;
+       void *runstate;
+       uint32_t current_runstate;
+       uint64_t last_steal;
+       uint64_t last_state_ns;
 };
 
 struct kvm_vcpu_arch {
index 27f3a59f45b1a23d4e53ed2b5d3eace291df3565..9cbe8ee0de47daec300ce127dd6a2799a0b48bee 100644 (file)
@@ -2935,6 +2935,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
        struct kvm_host_map map;
        struct kvm_steal_time *st;
 
+       if (vcpu->arch.xen.runstate) {
+               kvm_xen_setup_runstate_page(vcpu);
+               return;
+       }
+
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
@@ -3963,6 +3968,11 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
        struct kvm_host_map map;
        struct kvm_steal_time *st;
 
+       if (vcpu->arch.xen.runstate) {
+               kvm_xen_runstate_set_preempted(vcpu);
+               return;
+       }
+
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
index 74716c2b455fb2ce1aa5740d4b5da5598a31a838..4aa776c1ad57a05cc7173982382bf41a63d13dd0 100644 (file)
 #include "hyperv.h"
 
 #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
 
 #include <trace/events/kvm.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
 
 #include "trace.h"
 
@@ -133,6 +135,102 @@ static void kvm_xen_update_vcpu_time(struct kvm_vcpu *v,
        guest_hv_clock->version = vcpu->hv_clock.version;
 }
 
+static void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state, u64 steal_ns)
+{
+       struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+       struct compat_vcpu_runstate_info *runstate;
+       u32 *runstate_state;
+       u64 now, delta;
+       int idx;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+       runstate = READ_ONCE(vcpu_xen->runstate);
+       runstate_state = &runstate->state;
+
+#ifdef CONFIG_64BIT
+       /*
+        * The only different is alignment of uint64_t in 32-bit.
+        * So the first field 'state' is accessed via *runstate_state
+        * which is unmodified, while the other fields are accessed
+        * through 'runstate->' which we tweak here by adding 4.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) + 4);
+       if (vcpu->kvm->arch.xen.long_mode)
+               runstate = ((void *)runstate) + 4;
+#endif
+       /*
+        * Although it's called "state_entry_time" and explicitly documented
+        * as being "the system time at which the VCPU was last scheduled to
+        * run", Xen just treats it as a counter for HVM domains too.
+        */
+       runstate->state_entry_time = XEN_RUNSTATE_UPDATE |
+               (runstate->state_entry_time + 1);
+       smp_wmb();
+
+       now = ktime_get_ns();
+       delta = now - vcpu_xen->last_state_ns - steal_ns;
+
+       *runstate_state = state;
+       runstate->time[vcpu_xen->current_runstate] += delta;
+       if (steal_ns)
+               runstate->time[RUNSTATE_runnable] += steal_ns;
+       smp_wmb();
+       vcpu_xen->current_runstate = state;
+       vcpu_xen->last_state_ns = now;
+
+       runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+       smp_wmb();
+
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
+}
+
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+       int new_state;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+       if (vcpu->preempted) {
+               new_state = RUNSTATE_runnable;
+       } else {
+               new_state = RUNSTATE_blocked;
+               vcpu_xen->last_steal = current->sched_info.run_delay;
+       }
+
+       kvm_xen_update_runstate(vcpu, new_state, 0);
+}
+
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+       u64 steal_time = 0;
+
+       /*
+        * If the CPU was blocked when it last stopped, presumably
+        * it became unblocked at some point because it's being run
+        * again now. The scheduler run_delay is the runnable time,
+        * to be subtracted from the blocked time.
+        */
+       if (vcpu_xen->current_runstate == RUNSTATE_blocked)
+               steal_time = current->sched_info.run_delay - vcpu_xen->last_steal;
+
+       kvm_xen_update_runstate(vcpu, RUNSTATE_running, steal_time);
+}
+
 void kvm_xen_setup_pvclock_page(struct kvm_vcpu *v)
 {
        struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(v);
@@ -176,6 +274,15 @@ static int vcpu_attr_loc(struct kvm_vcpu *vcpu, u16 type,
                if (sz)
                        *sz = sizeof(struct pvclock_vcpu_time_info);
                return 0;
+
+       case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+               *map = &vcpu->arch.xen.runstate_map;
+               *hva = (void **)&vcpu->arch.xen.runstate;
+               if (sz)
+                       *sz = vcpu->kvm->arch.xen.long_mode ?
+                               sizeof(struct shared_info) :
+                               sizeof(struct compat_shared_info);
+               return 0;
        }
        return -EINVAL;
 }
@@ -200,6 +307,10 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                break;
        }
 
+       case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+               if (unlikely(!sched_info_on()))
+                       return -ENOTSUPP;
+       /* fallthrough */
        case KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO:
        case KVM_XEN_ATTR_TYPE_VCPU_INFO: {
                gpa_t gpa = data->u.vcpu_attr.gpa;
@@ -217,9 +328,13 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                        return r;
 
                r = kvm_xen_map_guest_page(kvm, map, hva, gpa, sz);
-               if (!r)
-                       kvm_xen_setup_pvclock_page(v);
-
+               if (!r) {
+                       if (data->type == KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE) {
+                               v->arch.xen.current_runstate = RUNSTATE_blocked;
+                               v->arch.xen.last_state_ns = ktime_get_ns();
+                       } else
+                               kvm_xen_setup_pvclock_page(v);
+               }
                break;
        }
 
@@ -248,6 +363,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                break;
        }
 
+       case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
        case KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO:
        case KVM_XEN_ATTR_TYPE_VCPU_INFO: {
                struct kvm_host_map *map;
@@ -423,6 +539,11 @@ void kvm_xen_vcpu_uninit(struct kvm_vcpu *vcpu)
                              NULL, true, false);
                vcpu_xen->pv_time = NULL;
        }
+       if (vcpu_xen->runstate) {
+               kvm_unmap_gfn(vcpu->kvm, &vcpu_xen->runstate_map,
+                             NULL, true, false);
+               vcpu_xen->runstate = NULL;
+       }
 }
 
 void kvm_xen_destroy_vm(struct kvm *kvm)
index a4d80cc21ee4091a6de6cd3f8347d23b5d62c503..ccd6002f55bc029ef2fb4bab6a1425f91293aa7d 100644 (file)
@@ -23,6 +23,8 @@ static inline struct kvm_vcpu *xen_vcpu_to_vcpu(struct kvm_vcpu_xen *xen_vcpu)
 }
 
 void kvm_xen_setup_pvclock_page(struct kvm_vcpu *vcpu);
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
@@ -70,7 +72,12 @@ struct compat_shared_info {
        uint32_t evtchn_mask[32];
        struct pvclock_wall_clock wc;
        struct compat_arch_shared_info arch;
-
 };
 
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
 #endif /* __ARCH_X86_KVM_XEN_H__ */
index 3ff3f72ac6b33838a3d786ed36ad5773f450d496..98b6cd747a017b50180aaafe129d2e314e69249d 100644 (file)
@@ -1600,6 +1600,7 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
 #define KVM_XEN_ATTR_TYPE_VCPU_INFO            0x2
 #define KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO       0x3
+#define KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE                0x4
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {