--- /dev/null
+/*
+ * x86-specific Atomic Bitops Module
+ *
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ *
+ * Taken from Xen.
+ *
+ * This work is licensed under the terms of the GNU GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef X86_BITOPS_H
+#define X86_BITOPS_H
+
+/**
+ * clear_bit_atomic - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit_atomic() is atomic and may not be reordered.
+ */
+static inline void clear_bit_atomic(int nr, volatile void *addr)
+{
+ asm volatile ( "lock; btrl %1,%0"
+ : "+m" ((*(volatile long *) addr)) : "Ir" (nr) : "memory");
+}
+
+/**
+ * test_and_set_bit_atomic - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit_atomic(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm volatile (
+ "lock; btsl %2,%1\n\tsbbl %0,%0"
+ : "=r" (oldbit), "=m" ((*(volatile long *) addr))
+ : "Ir" (nr), "m" ((*(volatile long *) addr)) : "memory");
+ return oldbit;
+}
+
+
+/**
+ * test_and_clear_bit_atomic - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit_atomic(int nr, volatile void *addr)
+{
+ int oldbit;
+
+ asm volatile (
+ "lock; btrl %2,%1\n\tsbbl %0,%0"
+ : "=r" (oldbit), "=m" ((*(volatile long *) addr))
+ : "Ir" (nr), "m" ((*(volatile long *) addr)) : "memory");
+ return oldbit;
+}
+
+#endif /* X86_BITOPS_H */
+
#include "cpu.h"
#include "xen.h"
#include "trace.h"
+#include "xen_evtchn.h"
#include "sysemu/sysemu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qdict.h"
void kvm_xen_init(XenState *xen)
{
qemu_mutex_init(&xen_global_mutex);
+ qemu_mutex_init(&xen->port_lock);
+
+ kvm_xen_evtchn_init(xen);
}
-static void kvm_xen_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+void kvm_xen_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
{
do_run_on_cpu(cpu, func, RUN_ON_CPU_HOST_PTR(data), &xen_global_mutex);
}
return err ? HCALL_ERR : 0;
}
-static int kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit,
+static int kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
int cmd, uint64_t arg)
{
int err = -ENOSYS;
+ void *eop;
+
+ eop = gva_to_hva(CPU(cpu), arg);
+ if (!eop) {
+ err = -EFAULT;
+ goto err;
+ }
switch (cmd) {
+ case EVTCHNOP_bind_virq:
+ err = kvm_xen_evtchn_bind_virq(cpu, eop);
+ break;
+ case EVTCHNOP_close:
+ err = kvm_xen_evtchn_close(cpu, eop);
+ break;
+ case EVTCHNOP_unmask:
+ err = kvm_xen_evtchn_unmask(cpu, eop);
+ break;
+ case EVTCHNOP_status:
+ err = kvm_xen_evtchn_status(cpu, eop);
+ break;
+ /* FIFO ABI only */
case EVTCHNOP_init_control:
- /* FIFO ABI */
+ case EVTCHNOP_expand_array:
+ case EVTCHNOP_set_priority:
default:
- exit->u.hcall.result = err;
- return 0;
+ err = -ENOSYS;
+ break;
}
+err:
exit->u.hcall.result = err;
- return err ? HCALL_ERR : 0;
+ return 0;
}
static int schedop_shutdown(CPUState *cs, uint64_t arg)
return kvm_xen_hcall_evtchn_op_compat(exit, cpu,
exit->u.hcall.params[0]);
case __HYPERVISOR_event_channel_op:
- return kvm_xen_hcall_evtchn_op(exit, exit->u.hcall.params[0],
+ return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
exit->u.hcall.params[1]);
case __HYPERVISOR_vcpu_op:
return kvm_xen_hcall_vcpu_op(exit, cpu,
}
}
-static int kvm_xen_vcpu_inject_upcall(X86CPU *cpu)
+int kvm_xen_vcpu_inject_upcall(X86CPU *cpu)
{
XenCPUState *xcpu = &cpu->env.xen_vcpu;
CPUState *cs = CPU(cpu);
--- /dev/null
+/*
+ * Event channels implementation on Xen HVM guests in KVM.
+ *
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/log.h"
+#include "linux/kvm.h"
+#include "exec/address-spaces.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/cpus.h"
+#include "cpu.h"
+#include "monitor/monitor.h"
+#include "qapi/qmp/qdict.h"
+#include "qom/cpu.h"
+#include "xen_evtchn.h"
+#include "xen.h"
+
+#ifndef __XEN_INTERFACE_VERSION__
+#define __XEN_INTERFACE_VERSION__ 0x00040400
+#endif
+
+#include "standard-headers/xen/xen.h"
+#include "standard-headers/xen/event_channel.h"
+#include "standard-headers/asm-x86/atomic_bitops.h"
+
+/*
+ * 2 Level ABI supports up to:
+ * - 4096 event channels on 64-bit
+ * - 1024 event channels on 32-bit
+ */
+#define EVTCHN_2L_MAX_ABI 4096
+#define EVTCHN_2L_PER_GROUP (sizeof(xen_ulong_t) * 8)
+
+#ifndef EVTCHN_MAX_ABI
+/* Maximum amount of event channels in 2-Level ABI */
+#define EVTCHN_MAX_ABI EVTCHN_2L_MAX_ABI
+#define EVTCHN_PER_GROUP EVTCHN_2L_PER_GROUP
+#endif
+
+#define EVTCHN_MAX_GROUPS (EVTCHN_MAX_ABI / EVTCHN_PER_GROUP)
+
+#define groupid_from_port(p) (p / EVTCHN_PER_GROUP)
+#define group_from_port(p) (evtchns[groupid_from_port(p)])
+#define bucket_from_port(p) (p % EVTCHN_PER_GROUP)
+
+static struct XenEvtChn *evtchns[EVTCHN_MAX_GROUPS];
+
+static int alloc_group(XenState *xen_state, int port)
+{
+ struct XenEvtChn *group;
+ int i, g, p;
+
+ if ((port / EVTCHN_PER_GROUP) >= EVTCHN_MAX_GROUPS) {
+ return -ENOSPC;
+ }
+
+ if (group_from_port(port) != NULL) {
+ return 0;
+ }
+
+ qemu_mutex_lock(&xen_state->port_lock);
+ group = g_malloc0(sizeof(XenEvtChn) * EVTCHN_PER_GROUP);
+ if (!group) {
+ return -ENOMEM;
+ }
+
+ g = port / EVTCHN_PER_GROUP;
+ p = g * EVTCHN_PER_GROUP;
+ for (i = 0; i < EVTCHN_PER_GROUP; i++) {
+ group[i].port = p + i;
+ }
+
+ evtchns[g] = group;
+ qemu_mutex_unlock(&xen_state->port_lock);
+
+ return 0;
+}
+
+static XenEvtChn *alloc_evtchn(XenState *xen_state)
+{
+ struct XenEvtChn *event = NULL;
+ int i, j;
+
+ /* Find next free port */
+ for (i = 0; i < EVTCHN_MAX_GROUPS; i++) {
+ for (j = 0; j < EVTCHN_PER_GROUP; j++) {
+ struct XenEvtChn *e;
+
+ /* Port 0 is not valid */
+ if (!(i + j) || !evtchns[i]) {
+ continue;
+ }
+
+ e = &evtchns[i][j];
+ if (e->state == XEN_EVTCHN_STATE_FREE) {
+ event = e;
+ goto out;
+ }
+ }
+ }
+
+ /* Find next group to be created */
+ for (i = 0; i < EVTCHN_MAX_GROUPS; i++) {
+ if (!evtchns[i]) {
+ break;
+ }
+ }
+
+ /* New group hence first port to be allocated */
+ j = i * EVTCHN_PER_GROUP;
+ if (!alloc_group(xen_state, j)) {
+ event = group_from_port(j);
+ }
+
+ out:
+ if (event) {
+ event->state = XEN_EVTCHN_STATE_INUSE;
+ }
+
+ return event;
+}
+
+int kvm_xen_evtchn_init(XenState *xen_state)
+{
+ return alloc_group(xen_state, 1);
+}
+
+static struct XenEvtChn *evtchn_from_port(int port)
+{
+ if (port <= 0 || !group_from_port(port)) {
+ return NULL;
+ }
+
+ return &group_from_port(port)[bucket_from_port(port)];
+}
+
+#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t) * 8)
+
+static void evtchn_2l_vcpu_set_pending(X86CPU *cpu)
+{
+ struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info;
+ unsigned long *upcall_pending;
+ int pending;
+
+ upcall_pending = (unsigned long *) &vcpu_info->evtchn_upcall_pending;
+ pending = test_and_set_bit_atomic(0, upcall_pending);
+ if (pending) {
+ return;
+ }
+
+ kvm_xen_vcpu_inject_upcall(cpu);
+}
+
+static void __attribute__((unused)) evtchn_2l_set_pending(X86CPU *cpu,
+ XenEvtChn *evtchn)
+{
+ struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info;
+ struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info;
+ int port = evtchn->port;
+ unsigned long *pending;
+
+ pending = (unsigned long *) shared_info->evtchn_pending;
+ if (test_and_set_bit_atomic(port, pending)) {
+ return;
+ }
+
+ if (!test_bit(port, (unsigned long *) shared_info->evtchn_mask) &&
+ !test_and_set_bit_atomic(port / BITS_PER_EVTCHN_WORD,
+ (unsigned long *) &vcpu_info->evtchn_pending_sel))
+ evtchn_2l_vcpu_set_pending(cpu);
+}
+
+static void evtchn_2l_clear_pending(X86CPU *cpu, XenEvtChn *evtchn)
+{
+ struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info;
+ int port = evtchn->port;
+
+ clear_bit_atomic(port, (unsigned long *) shared_info->evtchn_pending);
+}
+
+static bool __attribute__((unused)) evtchn_2l_is_pending(X86CPU *cpu,
+ XenEvtChn *evtchn)
+{
+ struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info;
+ int port = evtchn->port;
+
+ return !!test_bit(port, (unsigned long *) shared_info->evtchn_pending);
+}
+
+static bool __attribute__((unused)) evtchn_2l_is_masked(X86CPU *cpu,
+ XenEvtChn *evtchn)
+{
+ struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info;
+ int port = evtchn->port;
+
+ return !!test_bit(port, (unsigned long *) shared_info->evtchn_mask);
+}
+
+static int __attribute__((unused)) evtchn_2l_state(X86CPU *cpu,
+ XenEvtChn *evtchn)
+{
+ struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info;
+ int port = evtchn->port;
+
+ return !!test_bit(port / BITS_PER_EVTCHN_WORD,
+ (unsigned long *) &vcpu_info->evtchn_pending_sel);
+}
+
+static void evtchn_2l_unmask(X86CPU *cpu, XenEvtChn *evtchn)
+{
+ struct shared_info *shared_info = CPU(cpu)->xen_state->shared_info;
+ struct vcpu_info *vcpu_info = cpu->env.xen_vcpu.info;
+ unsigned long *masked = (unsigned long *) shared_info->evtchn_mask;
+ int port = evtchn->port;
+
+ if (test_and_clear_bit_atomic(port, masked) &&
+ test_bit(port, (unsigned long *) shared_info->evtchn_pending) &&
+ !test_and_set_bit_atomic(port / BITS_PER_EVTCHN_WORD,
+ (unsigned long *) &vcpu_info->evtchn_pending_sel))
+ evtchn_2l_vcpu_set_pending(cpu);
+}
+
+static void xen_vcpu_set_evtchn(CPUState *cpu, run_on_cpu_data data)
+{
+ XenCPUState *xen_vcpu = &X86_CPU(cpu)->env.xen_vcpu;
+ struct XenEvtChn *evtchn = data.host_ptr;
+
+ xen_vcpu->virq_to_evtchn[evtchn->virq] = evtchn;
+}
+
+int kvm_xen_evtchn_bind_virq(X86CPU *cpu, void *arg)
+{
+ XenCPUState *destxcpu;
+ struct evtchn_bind_virq *out = arg;
+ struct evtchn_bind_virq bind_virq;
+ struct XenEvtChn *evtchn;
+ CPUState *dest;
+
+ memcpy(&bind_virq, arg, sizeof(bind_virq));
+
+ dest = qemu_get_cpu(bind_virq.vcpu);
+ if (!dest || bind_virq.virq >= NR_VIRQS) {
+ return -EINVAL;
+ }
+
+ destxcpu = &X86_CPU(dest)->env.xen_vcpu;
+ if (destxcpu->virq_to_evtchn[bind_virq.virq]) {
+ return -EEXIST;
+ }
+
+ evtchn = alloc_evtchn(CPU(cpu)->xen_state);
+ if (!evtchn) {
+ return -ENOMEM;
+ }
+
+ evtchn->type = XEN_EVTCHN_TYPE_VIRQ;
+ evtchn->virq = bind_virq.virq;
+ evtchn->notify_vcpu_id = bind_virq.vcpu;
+
+ kvm_xen_run_on_cpu(dest, xen_vcpu_set_evtchn, evtchn);
+
+ out->port = evtchn->port;
+
+ return 0;
+}
+
+int kvm_xen_evtchn_close(X86CPU *cpu, void *arg)
+{
+ struct evtchn_close close;
+ struct XenEvtChn *evtchn;
+
+ memcpy(&close, arg, sizeof(close));
+
+ evtchn = evtchn_from_port(close.port);
+ if (!evtchn) {
+ return -EINVAL;
+ }
+
+ evtchn_2l_clear_pending(cpu, evtchn);
+
+ evtchn->state = XEN_EVTCHN_STATE_FREE;
+ evtchn->notify_vcpu_id = 0;
+
+ return 0;
+}
+
+int kvm_xen_evtchn_unmask(X86CPU *cpu, void *arg)
+{
+ struct evtchn_unmask unmask;
+ struct XenEvtChn *evtchn;
+
+ memcpy(&unmask, arg, sizeof(unmask));
+
+ evtchn = evtchn_from_port(unmask.port);
+ if (!evtchn) {
+ return -EINVAL;
+ }
+
+ evtchn_2l_unmask(cpu, evtchn);
+
+ return 0;
+}
+
+int kvm_xen_evtchn_status(X86CPU *cpu, void *arg)
+{
+ struct evtchn_status status;
+ struct XenEvtChn *evtchn;
+ int type = -1;
+
+ memcpy(&status, arg, sizeof(status));
+
+ evtchn = evtchn_from_port(status.port);
+ if (!evtchn) {
+ return -EINVAL;
+ }
+
+ if (evtchn->state == XEN_EVTCHN_STATE_INUSE) {
+ type = evtchn->type;
+ }
+
+ status.status = EVTCHNSTAT_closed;
+ status.vcpu = evtchn->notify_vcpu_id;
+
+ switch (type) {
+ case XEN_EVTCHN_TYPE_VIRQ:
+ status.status = EVTCHNSTAT_virq;
+ status.u.virq = evtchn->virq;
+ break;
+ default:
+ break;
+ }
+
+ memcpy(arg, &status, sizeof(status));
+
+ return 0;
+}
+
+int kvm_xen_evtchn_vcpu_init(X86CPU *cpu, struct vcpu_info *vcpu)
+{
+ int i;
+
+ vcpu->evtchn_upcall_pending = 1;
+ for (i = 0; i < BITS_PER_EVTCHN_WORD; i++) {
+ set_bit(i, &vcpu->evtchn_pending_sel);
+ }
+ kvm_xen_vcpu_inject_upcall(cpu);
+
+ return 0;
+}
+