From ec9cfbba5dd531c9afeb473f99fafbe5cb5e9570 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 21 Aug 2018 12:14:20 -0400 Subject: [PATCH] i386/xen: evtchn VIRQ support Add VIRQ support and handle the basic ops to support event channels: bind, unmask, close, status. For now we only implement the 2 level ABI, which is limited to 4k event channels on 64-bit or 1k on 32-bit. The basic mechanism works by setting the shared_info bit array and setting the port bit to 1 in evtchn_pending. Additionally we have another word which depicts which groups have pending ports. Current implementations of test_and_set/clear_bit() are non-atomic and because Xen HVM guests (on KVM), a word can be accessed simultaneously by qemu and the hypervisor (and possibly guest itself). And thus we need bitops atomic ops. The new header is ported from what's used on Xen. Co-developed-by: Boris Ostrovsky Signed-off-by: Joao Martins Signed-off-by: Boris Ostrovsky --- .../standard-headers/asm-x86/atomic_bitops.h | 79 ++++ target/i386/Makefile.objs | 2 +- target/i386/xen-proto.h | 15 + target/i386/xen.c | 42 ++- target/i386/xen.h | 2 + target/i386/xen_evtchn.c | 357 ++++++++++++++++++ target/i386/xen_evtchn.h | 26 ++ 7 files changed, 514 insertions(+), 9 deletions(-) create mode 100644 include/standard-headers/asm-x86/atomic_bitops.h create mode 100644 target/i386/xen_evtchn.c create mode 100644 target/i386/xen_evtchn.h diff --git a/include/standard-headers/asm-x86/atomic_bitops.h b/include/standard-headers/asm-x86/atomic_bitops.h new file mode 100644 index 0000000000..99495b8ec6 --- /dev/null +++ b/include/standard-headers/asm-x86/atomic_bitops.h @@ -0,0 +1,79 @@ +/* + * x86-specific Atomic Bitops Module + * + * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved. + * + * Taken from Xen. + * + * This work is licensed under the terms of the GNU GPLv2. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef X86_BITOPS_H +#define X86_BITOPS_H + +/** + * clear_bit_atomic - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit_atomic() is atomic and may not be reordered. + */ +static inline void clear_bit_atomic(int nr, volatile void *addr) +{ + asm volatile ( "lock; btrl %1,%0" + : "+m" ((*(volatile long *) addr)) : "Ir" (nr) : "memory"); +} + +/** + * test_and_set_bit_atomic - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit_atomic(int nr, volatile void *addr) +{ + int oldbit; + + asm volatile ( + "lock; btsl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" ((*(volatile long *) addr)) + : "Ir" (nr), "m" ((*(volatile long *) addr)) : "memory"); + return oldbit; +} + + +/** + * test_and_clear_bit_atomic - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit_atomic(int nr, volatile void *addr) +{ + int oldbit; + + asm volatile ( + "lock; btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" ((*(volatile long *) addr)) + : "Ir" (nr), "m" ((*(volatile long *) addr)) : "memory"); + return oldbit; +} + +#endif /* X86_BITOPS_H */ + diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs index 8496f1444e..58b74a6b0b 100644 --- a/target/i386/Makefile.objs +++ b/target/i386/Makefile.objs @@ -9,7 +9,7 @@ obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o obj-$(CONFIG_HYPERV) += hyperv.o obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o -obj-$(CONFIG_XEN) += xen.o +obj-$(CONFIG_XEN) += xen.o xen_evtchn.o ifeq ($(CONFIG_WIN32),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o endif diff --git a/target/i386/xen-proto.h b/target/i386/xen-proto.h index 27fb3a3035..18adfe2ddd 100644 --- a/target/i386/xen-proto.h +++ b/target/i386/xen-proto.h @@ -17,17 +17,32 @@ typedef struct XenCallbackVector { int virq; } XenCallbackVector; +typedef struct XenEvtChn { + int notify_vcpu_id; + int port; + int virq; +#define XEN_EVTCHN_TYPE_VIRQ 0 + int type; +#define XEN_EVTCHN_STATE_FREE 0 +#define XEN_EVTCHN_STATE_INUSE 1 + int state; +} XenEvtChn; + typedef struct XenState { struct shared_info *shared_info; union { struct XenCallbackVector cb; }; + int port; + QemuMutex port_lock; } XenState; typedef struct XenCPUState { struct vcpu_info *info; /* per cpu vector */ struct XenCallbackVector cb; +#define NR_VIRQS 24 + struct XenEvtChn *virq_to_evtchn[NR_VIRQS]; } XenCPUState; #endif diff --git a/target/i386/xen.c b/target/i386/xen.c index f9d309709c..23fda5b166 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -16,6 +16,7 @@ #include "cpu.h" #include "xen.h" #include "trace.h" +#include "xen_evtchn.h" #include "sysemu/sysemu.h" #include "monitor/monitor.h" #include "qapi/qmp/qdict.h" @@ -125,9 +126,12 @@ int kvm_xen_set_hypercall_page(CPUState *env) void kvm_xen_init(XenState *xen) { qemu_mutex_init(&xen_global_mutex); + qemu_mutex_init(&xen->port_lock); + + kvm_xen_evtchn_init(xen); } -static void kvm_xen_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) +void kvm_xen_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) { do_run_on_cpu(cpu, func, RUN_ON_CPU_HOST_PTR(data), &xen_global_mutex); } @@ -516,21 +520,43 @@ err: return err ? HCALL_ERR : 0; } -static int kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, +static int kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu, int cmd, uint64_t arg) { int err = -ENOSYS; + void *eop; + + eop = gva_to_hva(CPU(cpu), arg); + if (!eop) { + err = -EFAULT; + goto err; + } switch (cmd) { + case EVTCHNOP_bind_virq: + err = kvm_xen_evtchn_bind_virq(cpu, eop); + break; + case EVTCHNOP_close: + err = kvm_xen_evtchn_close(cpu, eop); + break; + case EVTCHNOP_unmask: + err = kvm_xen_evtchn_unmask(cpu, eop); + break; + case EVTCHNOP_status: + err = kvm_xen_evtchn_status(cpu, eop); + break; + /* FIFO ABI only */ case EVTCHNOP_init_control: - /* FIFO ABI */ + case EVTCHNOP_expand_array: + case EVTCHNOP_set_priority: default: - exit->u.hcall.result = err; - return 0; + err = -ENOSYS; + break; } +err: exit->u.hcall.result = err; - return err ? HCALL_ERR : 0; + return 0; } static int schedop_shutdown(CPUState *cs, uint64_t arg) @@ -587,7 +613,7 @@ static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) return kvm_xen_hcall_evtchn_op_compat(exit, cpu, exit->u.hcall.params[0]); case __HYPERVISOR_event_channel_op: - return kvm_xen_hcall_evtchn_op(exit, exit->u.hcall.params[0], + return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0], exit->u.hcall.params[1]); case __HYPERVISOR_vcpu_op: return kvm_xen_hcall_vcpu_op(exit, cpu, @@ -626,7 +652,7 @@ int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) } } -static int kvm_xen_vcpu_inject_upcall(X86CPU *cpu) +int kvm_xen_vcpu_inject_upcall(X86CPU *cpu) { XenCPUState *xcpu = &cpu->env.xen_vcpu; CPUState *cs = CPU(cpu); diff --git a/target/i386/xen.h b/target/i386/xen.h index 86f610f7f7..e1872c8518 100644 --- a/target/i386/xen.h +++ b/target/i386/xen.h @@ -23,8 +23,10 @@ int kvm_xen_set_hypercall_page(CPUState *env); int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit); +int kvm_xen_vcpu_inject_upcall(X86CPU *cpu); void kvm_xen_init(XenState *xen); +void kvm_xen_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); void hmp_xen_inject_callback(Monitor *mon, const QDict *qdict); #endif diff --git a/target/i386/xen_evtchn.c b/target/i386/xen_evtchn.c new file mode 100644 index 0000000000..5f62b7f77e --- /dev/null +++ b/target/i386/xen_evtchn.c @@ -0,0 +1,357 @@ +/* + * Event channels implementation on Xen HVM guests in KVM. + * + * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/log.h" +#include "linux/kvm.h" +#include "exec/address-spaces.h" +#include "sysemu/sysemu.h" +#include "sysemu/cpus.h" +#include "cpu.h" +#include "monitor/monitor.h" +#include "qapi/qmp/qdict.h" +#include "qom/cpu.h" +#include "xen_evtchn.h" +#include "xen.h" + +#ifndef __XEN_INTERFACE_VERSION__ +#define __XEN_INTERFACE_VERSION__ 0x00040400 +#endif + +#include "standard-headers/xen/xen.h" +#include "standard-headers/xen/event_channel.h" +#include "standard-headers/asm-x86/atomic_bitops.h" + +/* + * 2 Level ABI supports up to: + * - 4096 event channels on 64-bit + * - 1024 event channels on 32-bit + */ +#define EVTCHN_2L_MAX_ABI 4096 +#define EVTCHN_2L_PER_GROUP (sizeof(xen_ulong_t) * 8) + +#ifndef EVTCHN_MAX_ABI +/* Maximum amount of event channels in 2-Level ABI */ +#define EVTCHN_MAX_ABI EVTCHN_2L_MAX_ABI +#define EVTCHN_PER_GROUP EVTCHN_2L_PER_GROUP +#endif + +#define EVTCHN_MAX_GROUPS (EVTCHN_MAX_ABI / EVTCHN_PER_GROUP) + +#define groupid_from_port(p) (p / EVTCHN_PER_GROUP) +#define group_from_port(p) (evtchns[groupid_from_port(p)]) +#define bucket_from_port(p) (p % EVTCHN_PER_GROUP) + +static struct XenEvtChn *evtchns[EVTCHN_MAX_GROUPS]; + +static int alloc_group(XenState *xen_state, int port) +{ + struct XenEvtChn *group; + int i, g, p; + + if ((port / EVTCHN_PER_GROUP) >= EVTCHN_MAX_GROUPS) { + return -ENOSPC; + } + + if (group_from_port(port) != NULL) { + return 0; + } + + qemu_mutex_lock(&xen_state->port_lock); + group = g_malloc0(sizeof(XenEvtChn) * EVTCHN_PER_GROUP); + if (!group) { + return -ENOMEM; + } + + g = port / EVTCHN_PER_GROUP; + p = g * EVTCHN_PER_GROUP; + for (i = 0; i < EVTCHN_PER_GROUP; i++) { + group[i].port = p + i; + } + + evtchns[g] = group; + qemu_mutex_unlock(&xen_state->port_lock); + + return 0; +} + +static XenEvtChn *alloc_evtchn(XenState *xen_state) +{ + struct XenEvtChn *event = NULL; + int i, j; + + /* Find next free port */ + for (i = 0; i < EVTCHN_MAX_GROUPS; i++) { + for (j = 0; j < EVTCHN_PER_GROUP; j++) { + struct XenEvtChn *e; + + /* Port 0 is not valid */ + if (!(i + j) || !evtchns[i]) { + continue; + } + + e = &evtchns[i][j]; + if (e->state == XEN_EVTCHN_STATE_FREE) { + event = e; + goto out; + } + } + } + + /* Find next group to be created */ + for (i = 0; i < EVTCHN_MAX_GROUPS; i++) { + if (!evtchns[i]) { + break; + } + } + + /* New group hence first port to be allocated */ + j = i * EVTCHN_PER_GROUP; + if (!alloc_group(xen_state, j)) { + event = group_from_port(j); + } + + out: + if (event) { + event->state = XEN_EVTCHN_STATE_INUSE; + } + + return event; +} + +int kvm_xen_evtchn_init(XenState *xen_state) +{ + return alloc_group(xen_state, 1); +} + +static struct XenEvtChn *evtchn_from_port(int port) +{ + if (port <= 0 || !group_from_port(port)) { + return NULL; + } + + return &group_from_port(port)[bucket_from_port(port)]; +} + +#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t) * 8) + +static void evtchn_2l_vcpu_set_pending(X86CPU *cpu) +{ + struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info; + unsigned long *upcall_pending; + int pending; + + upcall_pending = (unsigned long *) &vcpu_info->evtchn_upcall_pending; + pending = test_and_set_bit_atomic(0, upcall_pending); + if (pending) { + return; + } + + kvm_xen_vcpu_inject_upcall(cpu); +} + +static void __attribute__((unused)) evtchn_2l_set_pending(X86CPU *cpu, + XenEvtChn *evtchn) +{ + struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info; + struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info; + int port = evtchn->port; + unsigned long *pending; + + pending = (unsigned long *) shared_info->evtchn_pending; + if (test_and_set_bit_atomic(port, pending)) { + return; + } + + if (!test_bit(port, (unsigned long *) shared_info->evtchn_mask) && + !test_and_set_bit_atomic(port / BITS_PER_EVTCHN_WORD, + (unsigned long *) &vcpu_info->evtchn_pending_sel)) + evtchn_2l_vcpu_set_pending(cpu); +} + +static void evtchn_2l_clear_pending(X86CPU *cpu, XenEvtChn *evtchn) +{ + struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info; + int port = evtchn->port; + + clear_bit_atomic(port, (unsigned long *) shared_info->evtchn_pending); +} + +static bool __attribute__((unused)) evtchn_2l_is_pending(X86CPU *cpu, + XenEvtChn *evtchn) +{ + struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info; + int port = evtchn->port; + + return !!test_bit(port, (unsigned long *) shared_info->evtchn_pending); +} + +static bool __attribute__((unused)) evtchn_2l_is_masked(X86CPU *cpu, + XenEvtChn *evtchn) +{ + struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info; + int port = evtchn->port; + + return !!test_bit(port, (unsigned long *) shared_info->evtchn_mask); +} + +static int __attribute__((unused)) evtchn_2l_state(X86CPU *cpu, + XenEvtChn *evtchn) +{ + struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info; + int port = evtchn->port; + + return !!test_bit(port / BITS_PER_EVTCHN_WORD, + (unsigned long *) &vcpu_info->evtchn_pending_sel); +} + +static void evtchn_2l_unmask(X86CPU *cpu, XenEvtChn *evtchn) +{ + struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info; + struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info; + unsigned long *masked = (unsigned long *) shared_info->evtchn_mask; + int port = evtchn->port; + + if (test_and_clear_bit_atomic(port, masked) && + test_bit(port, (unsigned long *) shared_info->evtchn_pending) && + !test_and_set_bit_atomic(port / BITS_PER_EVTCHN_WORD, + (unsigned long *) &vcpu_info->evtchn_pending_sel)) + evtchn_2l_vcpu_set_pending(cpu); +} + +static void xen_vcpu_set_evtchn(CPUState *cpu, run_on_cpu_data data) +{ + XenCPUState *xen_vcpu = &X86_CPU(cpu)->env.xen_vcpu; + struct XenEvtChn *evtchn = data.host_ptr; + + xen_vcpu->virq_to_evtchn[evtchn->virq] = evtchn; +} + +int kvm_xen_evtchn_bind_virq(X86CPU *cpu, void *arg) +{ + XenCPUState *destxcpu; + struct evtchn_bind_virq *out = arg; + struct evtchn_bind_virq bind_virq; + struct XenEvtChn *evtchn; + CPUState *dest; + + memcpy(&bind_virq, arg, sizeof(bind_virq)); + + dest = qemu_get_cpu(bind_virq.vcpu); + if (!dest || bind_virq.virq >= NR_VIRQS) { + return -EINVAL; + } + + destxcpu = &X86_CPU(dest)->env.xen_vcpu; + if (destxcpu->virq_to_evtchn[bind_virq.virq]) { + return -EEXIST; + } + + evtchn = alloc_evtchn(CPU(cpu)->xen_state); + if (!evtchn) { + return -ENOMEM; + } + + evtchn->type = XEN_EVTCHN_TYPE_VIRQ; + evtchn->virq = bind_virq.virq; + evtchn->notify_vcpu_id = bind_virq.vcpu; + + kvm_xen_run_on_cpu(dest, xen_vcpu_set_evtchn, evtchn); + + out->port = evtchn->port; + + return 0; +} + +int kvm_xen_evtchn_close(X86CPU *cpu, void *arg) +{ + struct evtchn_close close; + struct XenEvtChn *evtchn; + + memcpy(&close, arg, sizeof(close)); + + evtchn = evtchn_from_port(close.port); + if (!evtchn) { + return -EINVAL; + } + + evtchn_2l_clear_pending(cpu, evtchn); + + evtchn->state = XEN_EVTCHN_STATE_FREE; + evtchn->notify_vcpu_id = 0; + + return 0; +} + +int kvm_xen_evtchn_unmask(X86CPU *cpu, void *arg) +{ + struct evtchn_unmask unmask; + struct XenEvtChn *evtchn; + + memcpy(&unmask, arg, sizeof(unmask)); + + evtchn = evtchn_from_port(unmask.port); + if (!evtchn) { + return -EINVAL; + } + + evtchn_2l_unmask(cpu, evtchn); + + return 0; +} + +int kvm_xen_evtchn_status(X86CPU *cpu, void *arg) +{ + struct evtchn_status status; + struct XenEvtChn *evtchn; + int type = -1; + + memcpy(&status, arg, sizeof(status)); + + evtchn = evtchn_from_port(status.port); + if (!evtchn) { + return -EINVAL; + } + + if (evtchn->state == XEN_EVTCHN_STATE_INUSE) { + type = evtchn->type; + } + + status.status = EVTCHNSTAT_closed; + status.vcpu = evtchn->notify_vcpu_id; + + switch (type) { + case XEN_EVTCHN_TYPE_VIRQ: + status.status = EVTCHNSTAT_virq; + status.u.virq = evtchn->virq; + break; + default: + break; + } + + memcpy(arg, &status, sizeof(status)); + + return 0; +} + +int kvm_xen_evtchn_vcpu_init(X86CPU *cpu, struct vcpu_info *vcpu) +{ + int i; + + vcpu->evtchn_upcall_pending = 1; + for (i = 0; i < BITS_PER_EVTCHN_WORD; i++) { + set_bit(i, &vcpu->evtchn_pending_sel); + } + kvm_xen_vcpu_inject_upcall(cpu); + + return 0; +} + diff --git a/target/i386/xen_evtchn.h b/target/i386/xen_evtchn.h new file mode 100644 index 0000000000..429dab5d7b --- /dev/null +++ b/target/i386/xen_evtchn.h @@ -0,0 +1,26 @@ +/* + * Event channels implementation on Xen HVM guests in KVM + * + * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef TARGET_I386_XEN_EVTCHN_H +#define TARGET_I386_XEN_EVTCHN_H + +#include "cpu.h" +#include "sysemu/kvm.h" +#include "qemu/event_notifier.h" + +int kvm_xen_evtchn_init(XenState *xen_state); + +int kvm_xen_evtchn_bind_virq(X86CPU *cpu, void *arg); +int kvm_xen_evtchn_close(X86CPU *cpu, void *arg); +int kvm_xen_evtchn_unmask(X86CPU *cpu, void *arg); +int kvm_xen_evtchn_status(X86CPU *cpu, void *arg); +int kvm_xen_evtchn_vcpu_init(X86CPU *cpu, struct vcpu_info *info); + +#endif -- 2.50.1