]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: x86/xen: register runstate info xenpv-post-6
authorJoao Martins <joao.m.martins@oracle.com>
Tue, 24 Jul 2018 16:47:14 +0000 (12:47 -0400)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 3 Feb 2021 14:45:58 +0000 (14:45 +0000)
Allow emulator to register vcpu runstates which allow Xen guests
to use that for steal clock. The 'preempted' state of KVM steal clock
equates to 'runnable' state, 'running' has similar meanings for both and
'offline' is used when system admin needs to bring vcpu offline or
hotplug.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/x86.c
arch/x86/kvm/xen.c
arch/x86/kvm/xen.h
include/uapi/linux/kvm.h

index 9693ec3c20427bbd8c219fde07274902c090a0a5..b1cc73a190219ef453b08d193f0ebb41d1cd4968 100644 (file)
@@ -523,10 +523,15 @@ struct kvm_vcpu_hv {
 /* Xen HVM per vcpu emulation context */
 struct kvm_vcpu_xen {
        u64 hypercall_rip;
+       u32 current_runstate;
        bool vcpu_info_set;
        bool vcpu_time_info_set;
+       bool runstate_set;
        struct gfn_to_hva_cache vcpu_info_cache;
        struct gfn_to_hva_cache vcpu_time_info_cache;
+       struct gfn_to_hva_cache runstate_cache;
+       u64 last_steal;
+       u64 last_state_ns;
 };
 
 struct kvm_vcpu_arch {
index 6dc0376beac5d1d501cd09c8ce6fd19c5b97bc30..16f916da81fcfc0bc4ef3047d41846c4c43ae9bf 100644 (file)
@@ -2947,6 +2947,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
        struct kvm_host_map map;
        struct kvm_steal_time *st;
 
+       if (static_branch_unlikely(&kvm_xen_enabled.key) &&
+           vcpu->arch.xen.runstate_set) {
+               kvm_xen_setup_runstate_page(vcpu);
+               return;
+       }
+
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
@@ -4001,6 +4007,12 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
        struct kvm_host_map map;
        struct kvm_steal_time *st;
 
+       if (static_branch_unlikely(&kvm_xen_enabled.key) &&
+           vcpu->arch.xen.runstate_set) {
+               kvm_xen_runstate_set_preempted(vcpu);
+               return;
+       }
+
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
index 39a7ffcdcf2280e329ba25590180b5099cbacc82..76049478a7fa469d4da62834f6efa6fa82f76245 100644 (file)
 #include "hyperv.h"
 
 #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
 
 #include <trace/events/kvm.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
 
 #include "trace.h"
 
@@ -61,6 +63,124 @@ out:
        return ret;
 }
 
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state, u64 steal_ns)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       struct vcpu_runstate_info runstate;
+       unsigned int offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+       u64 now, delta;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+#ifdef CONFIG_X86_64
+       /*
+        * The only difference is alignment of uint64_t in 32-bit.
+        * So the first field 'state' is accessed via *runstate_state
+        * which is unmodified, while the other fields are accessed
+        * through 'runstate->' which we tweak here by adding 4.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+       offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+       /*
+        * Although it's called "state_entry_time" and explicitly documented
+        * as being "the system time at which the VCPU was last scheduled to
+        * run", Xen just treats it as a counter for HVM domains too.
+        */
+       if (kvm_read_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                        &runstate.state_entry_time, offset,
+                                        sizeof(u64) * 5))
+               return;
+
+       runstate.state_entry_time = XEN_RUNSTATE_UPDATE |
+               (runstate.state_entry_time + 1);
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.state_entry_time, offset,
+                                         sizeof(u64)))
+               return;
+       smp_wmb();
+
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state,
+                                         offsetof(struct vcpu_runstate_info, state),
+                                         sizeof(runstate.state)))
+               return;
+
+       now = ktime_get_ns();
+       delta = now - vcpu_xen->last_state_ns - steal_ns;
+       runstate.time[vcpu_xen->current_runstate] += delta;
+       if (steal_ns)
+               runstate.time[RUNSTATE_runnable] += steal_ns;
+
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.time[0],
+                                         offset + sizeof(u64),
+                                         sizeof(runstate.time)))
+               return;
+       smp_wmb();
+       vcpu_xen->current_runstate = state;
+       vcpu_xen->last_state_ns = now;
+
+       runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &runstate.state_entry_time, offset,
+                                         sizeof(u64)))
+               return;
+}
+
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *v)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       int new_state;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+       if (v->preempted) {
+               new_state = RUNSTATE_runnable;
+       } else {
+               new_state = RUNSTATE_blocked;
+               vcpu_xen->last_steal = current->sched_info.run_delay;
+       }
+
+       kvm_xen_update_runstate(v, new_state, 0);
+}
+
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *v)
+{
+       struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+       u64 steal_time = 0;
+
+       /*
+        * If the CPU was blocked when it last stopped, presumably
+        * it became unblocked at some point because it's being run
+        * again now. The scheduler run_delay is the runnable time,
+        * to be subtracted from the blocked time.
+        */
+       if (vcpu_xen->current_runstate == RUNSTATE_blocked)
+               steal_time = current->sched_info.run_delay - vcpu_xen->last_steal;
+
+       kvm_xen_update_runstate(v, RUNSTATE_running, steal_time);
+}
+
 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 {
        u8 rc = 0;
@@ -205,6 +325,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                }
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_RUNSTATE:
+               r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+                                             &vcpu->arch.xen.runstate_cache,
+                                             data->u.gpa,
+                                             sizeof(struct vcpu_runstate_info));
+               if (!r) {
+                       vcpu->arch.xen.runstate_set = true;
+                       vcpu->arch.xen.current_runstate = RUNSTATE_blocked;
+                       vcpu->arch.xen.last_state_ns = ktime_get_ns();
+               }
+               break;
+
        default:
                break;
        }
@@ -235,6 +367,13 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                }
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_RUNSTATE:
+               if (vcpu->arch.xen.runstate_set) {
+                       data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
+                       r = 0;
+               }
+               break;
+
        default:
                break;
        }
index 4b32489c0cecd934f28e5cefa4160a366c6573cc..a78f72e33bd883edb119f2e8e0d2b30f15d1f4d0 100644 (file)
@@ -14,6 +14,8 @@
 extern struct static_key_false_deferred kvm_xen_enabled;
 
 int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
 int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
@@ -74,4 +76,10 @@ struct compat_shared_info {
        struct compat_arch_shared_info arch;
 };
 
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
 #endif /* __ARCH_X86_KVM_XEN_H__ */
index c61c49bbe0aa3fd66b7bc1a638a43d6e9e7ef1e9..4fb6b73ffde6defa443d0a761acabe326f41c625 100644 (file)
@@ -1624,6 +1624,7 @@ struct kvm_xen_vcpu_attr {
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_RUNSTATE   0x2
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {