From: Juergen Gross Date: Wed, 20 Apr 2016 12:03:32 +0000 (+0800) Subject: xen: add steal_clock support on x86 X-Git-Tag: v4.1.12-92~55^2~34 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=5e9e4a098c52c0561629fa3d62e0c65738bf7df8;p=users%2Fjedix%2Flinux-maple.git xen: add steal_clock support on x86 The pv_time_ops structure contains a function pointer for the "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86 uses its own mechanism to account for the "stolen" time a thread wasn't able to run due to hypervisor scheduling. Add support in Xen arch independent time handling for this feature by moving it out of the arm arch into drivers/xen and remove the x86 Xen hack. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel (cherry picked from commit ecb23dc6f2eff0ce64dd60351a81f376f13b12cc) Signed-off-by: Bob Liu Orabug: 24820937 Conflicts: arch/arm/xen/enlighten.c --- diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index fc7ea529f462..27d42e80c3a1 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -79,6 +79,70 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); +static void xen_read_wallclock(struct timespec64 *ts) +{ + u32 version; + struct timespec64 now, ts_monotonic; + struct shared_info *s = HYPERVISOR_shared_info; + struct pvclock_wall_clock *wall_clock = &(s->wc); + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + /* time since system boot */ + ktime_get_ts64(&ts_monotonic); + *ts = timespec64_add(now, ts_monotonic); +} + +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) +{ + /* Protected by the calling core code serialization */ + static struct timespec64 next_sync; + + struct xen_platform_op op; + struct timespec64 now, system_time; + struct timekeeper *tk = priv; + + now.tv_sec = tk->xtime_sec; + now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); + system_time = timespec64_add(now, tk->wall_to_monotonic); + + /* + * We only take the expensive HV call when the clock was set + * or when the 11 minutes RTC synchronization time elapsed. + */ + if (!was_set && timespec64_compare(&now, &next_sync) < 0) + return NOTIFY_OK; + + op.cmd = XENPF_settime64; + op.u.settime64.mbz = 0; + op.u.settime64.secs = now.tv_sec; + op.u.settime64.nsecs = now.tv_nsec; + op.u.settime64.system_time = timespec64_to_ns(&system_time); + (void)HYPERVISOR_platform_op(&op); + + /* + * Move the next drift compensation time 11 minutes + * ahead. That's emulating the sync_cmos_clock() update for + * the hardware RTC. + */ + next_sync = now; + next_sync.tv_sec += 11 * 60; + + return NOTIFY_OK; +} + +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + static void xen_percpu_init(void) { struct vcpu_register_vcpu_info info; @@ -271,6 +335,11 @@ static int __init xen_guest_init(void) register_cpu_notifier(&xen_cpu_notifier); + xen_time_setup_guest(); + + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); + return 0; } early_initcall(xen_guest_init); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 11d2f09bdc28..33659e327613 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include #include @@ -31,44 +29,6 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); - -/* unused ns of stolen time */ -static DEFINE_PER_CPU(u64, xen_residual_stolen); - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 runnable, offline, stolen; - cputime_t ticks; - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = this_cpu_ptr(&xen_runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. */ - stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __this_cpu_write(xen_residual_stolen, stolen); - account_steal_ticks(ticks); -} /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) @@ -353,8 +313,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; } - do_stolen_accounting(); - return ret; } @@ -449,6 +407,8 @@ static void __init xen_time_init(void) xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 71078425c9ea..2257b6663766 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } +static u64 xen_steal_clock(int cpu) +{ + struct vcpu_runstate_info state; + + BUG_ON(cpu != smp_processor_id()); + xen_get_runstate_snapshot(&state); + return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; +} + void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu) BUG(); } +void __init xen_time_setup_guest(void) +{ + pv_time_ops.steal_clock = xen_steal_clock; + + static_key_slow_inc(¶virt_steal_enabled); + /* + * We can't set paravirt_steal_rq_enabled as this would require the + * capability to read another cpu's runstate info. + */ +} diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 25a822f6f000..44fda64ad434 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user) extern void account_process_tick(struct task_struct *, int user); #endif -extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 3491582bf50a..355275bad2cf 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb); bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); +void xen_time_setup_guest(void); void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); int xen_setup_shutdown_event(void); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 8394b1ee600c..20e1790a40a8 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -485,16 +485,6 @@ void account_process_tick(struct task_struct *p, int user_tick) account_idle_time(cputime_one_jiffy); } -/* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - /* * Account multiple ticks of idle time. * @ticks: number of stolen ticks