From: Joao Martins Date: Mon, 15 May 2017 16:51:10 +0000 (+0100) Subject: x86/xen/time: setup secondary time info for vdso X-Git-Tag: v4.1.12-102.0.20170601_1400~213^2~23 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=d5ae16b5584e1e01b1d4a17ae6b206a7eeceb498;p=users%2Fjedix%2Flinux-maple.git x86/xen/time: setup secondary time info for vdso In order to support pvclock vdso on xen we need to setup the time info page for each vcpu and register those pages with Xen using the VCPUOP_register_vcpu_time_memory_area hypercall. This hypercall will also forcefully update the pvti which will set some of the necessary flags for vdso. Afterwards we check if it supports the PVCLOCK_TSC_STABLE_BIT flag which is mandatory for having vdso/vsyscall support. And if so, it will set the cpu pvti's that will be later used when mapping the vdso image. Note that before setting up vdso we check if PVCLOCK_TSC_STABLE_BIT with the primary vcpu_info which if supported adds up this flag to the pvclock supported ones. This is to allow Xen clocksource to be faster irrespesctive of how the pvclock vdso pages are setup. This allows to speed up pvclock_clocksource_read() users. The xen headers are also updated to include the new hypercall for registering the secondary vcpu_time_info copy. Signed-off-by: Joao Martins Reviewed-by: Boris Ostrovsky Acked-by: Konrad Rzeszutek Wilk Orabug: 26107942 --- diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 9bbe9cb10b9c4..67260aa019009 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2327,6 +2327,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) case FIX_KMAP_BEGIN ... FIX_KMAP_END: # endif #elif defined(CONFIG_X86_VSYSCALL_EMULATION) + case PVCLOCK_FIXMAP_BEGIN ... PVCLOCK_FIXMAP_END: case VSYSCALL_PAGE: #endif case FIX_TEXT_POKE0: @@ -2368,7 +2369,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #ifdef CONFIG_X86_VSYSCALL_EMULATION /* Replicate changes to map the vsyscall page into the user pagetable vsyscall mapping. */ - if (idx == VSYSCALL_PAGE) { + if (idx == VSYSCALL_PAGE || + (idx >= PVCLOCK_FIXMAP_BEGIN && idx <= PVCLOCK_FIXMAP_END)) { unsigned long vaddr = __fix_to_virt(idx); set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 38ecdee525ebb..a3e44070d5d51 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -503,6 +503,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) if (rc) return rc; + xen_setup_vcpu_vsyscall_time_info(cpu); + rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); BUG_ON(rc); @@ -794,6 +796,8 @@ static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) if (!rc) rc = native_cpu_up(cpu, tidle); + xen_setup_vcpu_vsyscall_time_info(cpu); + /* * We must initialize the slowpath CPU kicker _after_ the native * path has executed. If we initialized it before none of the diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index d0d4ec3a102f2..376ac405dc8b1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -379,6 +380,90 @@ static const struct pv_time_ops xen_time_ops __initconst = { .steal_clock = xen_steal_clock, }; +static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; + +void xen_setup_vcpu_vsyscall_time_info(int cpu) +{ + struct vcpu_register_time_memory_area t; + int ret; + + if (!xen_clock) + return; + + t.addr.v = &xen_clock[cpu].pvti; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, + cpu, &t); + + /* + * We don't disable VCLOCK_PVCLOCK entirely if one of the vCPUS fails + * to register the secondary time info with Xen. If it does fail worse + * it can happen is process seeing a zeroed out pvti. Though userspace + * checks the PVCLOCK_TSC_STABLE_BIT and if 0, it discards the data + * in pvti and fallbacks to a system call for a reliable timestamp. + */ + WARN_ONCE(ret != 0, + "CPU%d: Cannot register secondary vcpu_time_info", cpu); +} + +static __init void xen_setup_vsyscall_time_info(void) +{ + struct vcpu_register_time_memory_area t; + struct pvclock_vsyscall_time_info *ti; + struct pvclock_vcpu_time_info *pvti; + int cpu = smp_processor_id(); + unsigned long size; + int ret; + + pvti = &__this_cpu_read(xen_vcpu)->time; + + /* + * We check ahead on the primary time info if this + * bit is supported hence speeding up Xen clocksource. + */ + if (!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)) + return; + + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + + size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS); + ti = (struct pvclock_vsyscall_time_info *) + alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (!ti) + return; + + t.addr.v = &ti->pvti; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, + cpu, &t); + if (ret) { + pr_debug("xen: Cannot register secondary time_info err %d\n", + ret); + free_pages_exact(ti, get_order(size)); + return; + } + + /* If the check above succedded this one most likely too since it's the + * same data on both primary and secondary time infos just different + * memory regions. But we still check in case hypervisor is buggy. + */ + pvti = &ti->pvti; + if (!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)) { + t.addr.v = NULL; + if (!HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, + cpu, &t)) + free_pages_exact(ti, get_order(size)); + + pr_debug("xen: VCLOCK_PVCLOCK not supported\n"); + return; + } + + xen_clock = ti; + pvclock_init_vsyscall(xen_clock, size); + + xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK; +} + static void __init xen_time_init(void) { int cpu = smp_processor_id(); @@ -409,6 +494,7 @@ static void __init xen_time_init(void) xen_setup_cpu_clockevents(); xen_time_setup_guest(); + xen_setup_vsyscall_time_info(); if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 8defcdd4b06d7..91ce183640509 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -78,6 +78,7 @@ bool xen_vcpu_stolen(int vcpu); void xen_vcpu_setup(int cpu); void xen_setup_vcpu_info_placement(void); +void xen_setup_vcpu_vsyscall_time_info(int cpu); #ifdef CONFIG_SMP void xen_smp_init(void); diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index 98188c87f5c15..8da788c5bd4f5 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -178,4 +178,32 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); /* Send an NMI to the specified VCPU. @extra_arg == NULL. */ #define VCPUOP_send_nmi 11 + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + GUEST_HANDLE(vcpu_time_info_t) h; + struct pvclock_vcpu_time_info *v; + uint64_t p; + } addr; +}; +DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area_t); + #endif /* __XEN_PUBLIC_VCPU_H__ */