#include "hyperv.h"
#include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
#include "trace.h"
guest_hv_clock->version = vcpu->hv_clock.version;
}
+static void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state, u64 steal_ns)
+{
+ struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+ struct compat_vcpu_runstate_info *runstate;
+ u32 *runstate_state;
+ u64 now, delta;
+ int idx;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+ offsetof(struct compat_vcpu_runstate_info, state));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ runstate = READ_ONCE(vcpu_xen->runstate);
+ runstate_state = &runstate->state;
+
+#ifdef CONFIG_64BIT
+ /*
+ * The only different is alignment of uint64_t in 32-bit.
+ * So the first field 'state' is accessed via *runstate_state
+ * which is unmodified, while the other fields are accessed
+ * through 'runstate->' which we tweak here by adding 4.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) + 4);
+ if (vcpu->kvm->arch.xen.long_mode)
+ runstate = ((void *)runstate) + 4;
+#endif
+ /*
+ * Although it's called "state_entry_time" and explicitly documented
+ * as being "the system time at which the VCPU was last scheduled to
+ * run", Xen just treats it as a counter for HVM domains too.
+ */
+ runstate->state_entry_time = XEN_RUNSTATE_UPDATE |
+ (runstate->state_entry_time + 1);
+ smp_wmb();
+
+ now = ktime_get_ns();
+ delta = now - vcpu_xen->last_state_ns - steal_ns;
+
+ *runstate_state = state;
+ runstate->time[vcpu_xen->current_runstate] += delta;
+ if (steal_ns)
+ runstate->time[RUNSTATE_runnable] += steal_ns;
+ smp_wmb();
+ vcpu_xen->current_runstate = state;
+ vcpu_xen->last_state_ns = now;
+
+ runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+ smp_wmb();
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+}
+
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+ int new_state;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+ offsetof(struct compat_vcpu_runstate_info, state));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+ if (vcpu->preempted) {
+ new_state = RUNSTATE_runnable;
+ } else {
+ new_state = RUNSTATE_blocked;
+ vcpu_xen->last_steal = current->sched_info.run_delay;
+ }
+
+ kvm_xen_update_runstate(vcpu, new_state, 0);
+}
+
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+ u64 steal_time = 0;
+
+ /*
+ * If the CPU was blocked when it last stopped, presumably
+ * it became unblocked at some point because it's being run
+ * again now. The scheduler run_delay is the runnable time,
+ * to be subtracted from the blocked time.
+ */
+ if (vcpu_xen->current_runstate == RUNSTATE_blocked)
+ steal_time = current->sched_info.run_delay - vcpu_xen->last_steal;
+
+ kvm_xen_update_runstate(vcpu, RUNSTATE_running, steal_time);
+}
+
void kvm_xen_setup_pvclock_page(struct kvm_vcpu *v)
{
struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(v);
if (sz)
*sz = sizeof(struct pvclock_vcpu_time_info);
return 0;
+
+ case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+ *map = &vcpu->arch.xen.runstate_map;
+ *hva = (void **)&vcpu->arch.xen.runstate;
+ if (sz)
+ *sz = vcpu->kvm->arch.xen.long_mode ?
+ sizeof(struct shared_info) :
+ sizeof(struct compat_shared_info);
+ return 0;
}
return -EINVAL;
}
break;
}
+ case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+ if (unlikely(!sched_info_on()))
+ return -ENOTSUPP;
+ /* fallthrough */
case KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO:
case KVM_XEN_ATTR_TYPE_VCPU_INFO: {
gpa_t gpa = data->u.vcpu_attr.gpa;
return r;
r = kvm_xen_map_guest_page(kvm, map, hva, gpa, sz);
- if (!r)
- kvm_xen_setup_pvclock_page(v);
-
+ if (!r) {
+ if (data->type == KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE) {
+ v->arch.xen.current_runstate = RUNSTATE_blocked;
+ v->arch.xen.last_state_ns = ktime_get_ns();
+ } else
+ kvm_xen_setup_pvclock_page(v);
+ }
break;
}
break;
}
+ case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
case KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO:
case KVM_XEN_ATTR_TYPE_VCPU_INFO: {
struct kvm_host_map *map;
NULL, true, false);
vcpu_xen->pv_time = NULL;
}
+ if (vcpu_xen->runstate) {
+ kvm_unmap_gfn(vcpu->kvm, &vcpu_xen->runstate_map,
+ NULL, true, false);
+ vcpu_xen->runstate = NULL;
+ }
}
void kvm_xen_destroy_vm(struct kvm *kvm)