]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
xen: add steal_clock support on x86
authorJuergen Gross <jgross@suse.com>
Wed, 20 Apr 2016 12:03:32 +0000 (20:03 +0800)
committerBob Liu <bob.liu@oracle.com>
Thu, 20 Oct 2016 07:56:44 +0000 (03:56 -0400)
The pv_time_ops structure contains a function pointer for the
"steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
uses its own mechanism to account for the "stolen" time a thread wasn't
able to run due to hypervisor scheduling.

Add support in Xen arch independent time handling for this feature by
moving it out of the arm arch into drivers/xen and remove the x86 Xen
hack.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
(cherry picked from commit ecb23dc6f2eff0ce64dd60351a81f376f13b12cc)
Signed-off-by: Bob Liu <bob.liu@oracle.com>
Orabug: 24820937

 Conflicts:
arch/arm/xen/enlighten.c

arch/arm/xen/enlighten.c
arch/x86/xen/time.c
drivers/xen/time.c
include/linux/kernel_stat.h
include/xen/xen-ops.h
kernel/sched/cputime.c

index fc7ea529f462de75149c31168f01f17a42908e76..27d42e80c3a101598c50c3fae5b5175ef490a254 100644 (file)
@@ -79,6 +79,70 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
 
+static void xen_read_wallclock(struct timespec64 *ts)
+{
+       u32 version;
+       struct timespec64 now, ts_monotonic;
+       struct shared_info *s = HYPERVISOR_shared_info;
+       struct pvclock_wall_clock *wall_clock = &(s->wc);
+
+       /* get wallclock at system boot */
+       do {
+               version = wall_clock->version;
+               rmb();          /* fetch version before time */
+               now.tv_sec  = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec;
+               now.tv_nsec = wall_clock->nsec;
+               rmb();          /* fetch time before checking version */
+       } while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+       /* time since system boot */
+       ktime_get_ts64(&ts_monotonic);
+       *ts = timespec64_add(now, ts_monotonic);
+}
+
+static int xen_pvclock_gtod_notify(struct notifier_block *nb,
+                                  unsigned long was_set, void *priv)
+{
+       /* Protected by the calling core code serialization */
+       static struct timespec64 next_sync;
+
+       struct xen_platform_op op;
+       struct timespec64 now, system_time;
+       struct timekeeper *tk = priv;
+
+       now.tv_sec = tk->xtime_sec;
+       now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+       system_time = timespec64_add(now, tk->wall_to_monotonic);
+
+       /*
+        * We only take the expensive HV call when the clock was set
+        * or when the 11 minutes RTC synchronization time elapsed.
+        */
+       if (!was_set && timespec64_compare(&now, &next_sync) < 0)
+               return NOTIFY_OK;
+
+       op.cmd = XENPF_settime64;
+       op.u.settime64.mbz = 0;
+       op.u.settime64.secs = now.tv_sec;
+       op.u.settime64.nsecs = now.tv_nsec;
+       op.u.settime64.system_time = timespec64_to_ns(&system_time);
+       (void)HYPERVISOR_platform_op(&op);
+
+       /*
+        * Move the next drift compensation time 11 minutes
+        * ahead. That's emulating the sync_cmos_clock() update for
+        * the hardware RTC.
+        */
+       next_sync = now;
+       next_sync.tv_sec += 11 * 60;
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block xen_pvclock_gtod_notifier = {
+       .notifier_call = xen_pvclock_gtod_notify,
+};
+
 static void xen_percpu_init(void)
 {
        struct vcpu_register_vcpu_info info;
@@ -271,6 +335,11 @@ static int __init xen_guest_init(void)
 
        register_cpu_notifier(&xen_cpu_notifier);
 
+       xen_time_setup_guest();
+
+       if (xen_initial_domain())
+               pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
+
        return 0;
 }
 early_initcall(xen_guest_init);
index 11d2f09bdc284e940dfdd7f12229a4cfdad7102e..33659e3276138763560d4d17434130a65486759d 100644 (file)
@@ -11,8 +11,6 @@
 #include <linux/interrupt.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/pvclock_gtod.h>
 
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP     100000
-#define NS_PER_TICK    (1000000000LL / HZ)
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-
-static void do_stolen_accounting(void)
-{
-       struct vcpu_runstate_info state;
-       struct vcpu_runstate_info *snap;
-       s64 runnable, offline, stolen;
-       cputime_t ticks;
-
-       xen_get_runstate_snapshot(&state);
-
-       WARN_ON(state.state != RUNSTATE_running);
-
-       snap = this_cpu_ptr(&xen_runstate_snapshot);
-
-       /* work out how much time the VCPU has not been runn*ing*  */
-       runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
-       offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
-       *snap = state;
-
-       /* Add the appropriate number of ticks of stolen time,
-          including any left-overs from last time. */
-       stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
-       if (stolen < 0)
-               stolen = 0;
-
-       ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
-       __this_cpu_write(xen_residual_stolen, stolen);
-       account_steal_ticks(ticks);
-}
 
 /* Get the TSC speed from Xen */
 static unsigned long xen_tsc_khz(void)
@@ -353,8 +313,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
                ret = IRQ_HANDLED;
        }
 
-       do_stolen_accounting();
-
        return ret;
 }
 
@@ -449,6 +407,8 @@ static void __init xen_time_init(void)
        xen_setup_timer(cpu);
        xen_setup_cpu_clockevents();
 
+       xen_time_setup_guest();
+
        if (xen_initial_domain())
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
index 71078425c9ea04641cfde0a8c944b055b056ec1d..2257b666376647452186fbf5c50529edc03b1e3f 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/math64.h>
 #include <linux/gfp.h>
 
+#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu)
        return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
 }
 
+static u64 xen_steal_clock(int cpu)
+{
+       struct vcpu_runstate_info state;
+
+       BUG_ON(cpu != smp_processor_id());
+       xen_get_runstate_snapshot(&state);
+       return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
+}
+
 void xen_setup_runstate_info(int cpu)
 {
        struct vcpu_register_runstate_memory_area area;
@@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu)
                BUG();
 }
 
+void __init xen_time_setup_guest(void)
+{
+       pv_time_ops.steal_clock = xen_steal_clock;
+
+       static_key_slow_inc(&paravirt_steal_enabled);
+       /*
+        * We can't set paravirt_steal_rq_enabled as this would require the
+        * capability to read another cpu's runstate info.
+        */
+}
index 25a822f6f0009f97c47d9f5fb0d27bed0e79fcd5..44fda64ad4344c9ea34eac09616a02eba44a479a 100644 (file)
@@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user)
 extern void account_process_tick(struct task_struct *, int user);
 #endif
 
-extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
 #endif /* _LINUX_KERNEL_STAT_H */
index 3491582bf50a317959504be93886e403c8e54459..355275bad2cff558851372717e9df6ab7b827875 100644 (file)
@@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb);
 
 bool xen_vcpu_stolen(int vcpu);
 void xen_setup_runstate_info(int cpu);
+void xen_time_setup_guest(void);
 void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
 
 int xen_setup_shutdown_event(void);
index 8394b1ee600c38ba6e9144a6326369b6ef0cdacd..20e1790a40a8834de0b4bfd8a6038c822b89b435 100644 (file)
@@ -485,16 +485,6 @@ void account_process_tick(struct task_struct *p, int user_tick)
                account_idle_time(cputime_one_jiffy);
 }
 
-/*
- * Account multiple ticks of steal time.
- * @p: the process from which the cpu time has been stolen
- * @ticks: number of stolen ticks
- */
-void account_steal_ticks(unsigned long ticks)
-{
-       account_steal_time(jiffies_to_cputime(ticks));
-}
-
 /*
  * Account multiple ticks of idle time.
  * @ticks: number of stolen ticks