This allows drivers to register a callback on a qemu_irq, which is
invoked when a level-triggered IRQ is acked on the irqchip.
This allows us to simulate level-triggered interrupts more efficiently,
by resampling the state of the interrupt only when it actually matters.
This can be used in two ways.
The example in the patch below shows the event source literally being
resampled from the callback — in this case the line level is tied to
the Xen evtchn_upcall_pending flag in vCPU0's vcpu_info, and the
callback from the irqchip allows us to avoid having to constantly poll
for that being clearer).
As we hook it up to INTx interrupts on VFIO PCI devices, it would
unconditionally return 'true' to clear the level in the irqchip, and
also send an event on the 'resample' eventfd to the kernel, so that the
kernel will reraise the interrupt if it's still actually physically set
on the device.
There's theoretically a race condition there, if the kernel reraises
the interrupt before the callback even returns and the irqchip clears
its internal s->irr. But I think we get away with it by being single-
threaded for the I/O processing so we won't actually consume the event
until later?
It was the Xen part firt that offended me, having to poll on vmexit:
https://git.infradead.org/users/dwmw2/qemu.git/commitdiff/
7bada5e4f#patch5
But then I looked at how VFIO handles this, and it offends me even
more; sending the resample eventfd down to the kernel on *ever* MMIO
read/write.... having unmapped the device's BARs from the guest in
order to *trap* those MMIO accesses... with a timer to map it back
again...
It'll take a little more work to hook up the reverse path for the ack
back through PCI INTx handling, a bit like I've had to do it with
gsi_ack_handler to convey the ack events back from the {i8259,ioapic}
qemu_irq to the GSI qemu_irqs. And I'll need to do it for more than
just i8259 and ioapic. But I suspect it'll be worth it.
Opinions?
Tested by booting a (KVM) Xen guest with xen_no_vector_callback on its
command line, and sometimes also 'apic=off'. In PIC mode we still seem
to get two interrupts per event, but I think that's actually genuine
because printfs in the evtchn code confirm that ->evtchn_upcall_pending
for vCPU0 really *is* still set the first time the interrupt is acked
in the i8259 and genuinely doesn't get cleared.
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
struct IRQState {
Object parent_obj;
+ qemu_irq_ack_fn ack_cb;
+ void *ack_opaque;
+
qemu_irq_handler handler;
void *opaque;
int n;
irq->handler(irq->opaque, irq->n, level);
}
+void qemu_set_irq_ack_callback(qemu_irq irq, qemu_irq_ack_fn cb, void *opaque)
+{
+ if (irq) {
+ irq->ack_cb = cb;
+ irq->ack_opaque = opaque;
+ }
+}
+
+bool qemu_notify_irq_ack(qemu_irq irq)
+{
+ if (irq && irq->ack_cb) {
+ return irq->ack_cb(irq, irq->ack_opaque);
+ }
+ return false;
+}
+
qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, qemu_irq_handler handler,
void *opaque, int n)
{
EvtchnInfoList *qmp_xen_event_list(Error **errp)
{
- error_setg(errp, "Xen event channel emulation not enabled\n");
+ error_setg(errp, "Xen event channel emulation not enabled");
return NULL;
}
void qmp_xen_event_inject(uint32_t port, Error **errp)
{
- error_setg(errp, "Xen event channel emulation not enabled\n");
+ error_setg(errp, "Xen event channel emulation not enabled");
}
}
}
+static bool resample_evtchn_irq(qemu_irq irq, void *opaques)
+{
+ struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
+
+ if (vi && !vi->evtchn_upcall_pending) {
+ return true;
+ }
+ return false;
+}
+
int xen_evtchn_set_callback_param(uint64_t param)
{
XenEvtchnState *s = xen_evtchn_singleton;
if (gsi != s->callback_gsi) {
struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
- xen_evtchn_set_callback_level(0);
+ if (s->callback_gsi) {
+ xen_evtchn_set_callback_level(0);
+ qemu_set_irq_ack_callback(s->gsis[s->callback_gsi], NULL, NULL);
+ }
s->callback_gsi = gsi;
+ if (s->callback_gsi) {
+ qemu_set_irq_ack_callback(s->gsis[s->callback_gsi], resample_evtchn_irq, s);
+ }
if (gsi && vi && vi->evtchn_upcall_pending) {
/*
if (kvm_ioapic_in_kernel()) {
kvm_pc_setup_irq_routing(pci_enabled);
}
- *irqs = qemu_allocate_irqs(gsi_handler, s, GSI_NUM_PINS);
+ s->gsi_irq = *irqs = qemu_allocate_irqs(gsi_handler, s, GSI_NUM_PINS);
return s;
}
rom_reset_order_override();
}
-void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
+void pc_i8259_create(ISABus *isa_bus, GSIState *gsi_state)
{
qemu_irq *i8259;
}
for (size_t i = 0; i < ISA_NUM_IRQS; i++) {
- i8259_irqs[i] = i8259[i];
+ gsi_state->i8259_irq[i] = i8259[i];
+ qemu_set_irq_ack_callback(gsi_state->i8259_irq[i], gsi_ack_handler,
+ gsi_state->gsi_irq[i]);
+
}
g_free(i8259);
isa_bus_irqs(isa_bus, x86ms->gsi);
if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
- pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+ pc_i8259_create(isa_bus, gsi_state);
}
if (pcmc->pci_enabled) {
isa_bus = ich9_lpc->isa_bus;
if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
- pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+ pc_i8259_create(isa_bus, gsi_state);
}
if (pcmc->pci_enabled) {
}
}
+bool gsi_ack_handler(qemu_irq irq, void *opaque)
+{
+ /*
+ * This is a callback on the underlying PIC/IOAPIC irq but the
+ * opaque pointer that was registered is the GSI irq. Propagate
+ * the notifiation.
+ */
+ return qemu_notify_irq_ack(opaque);
+}
+
void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
{
DeviceState *dev;
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
+ qemu_set_irq_ack_callback(gsi_state->ioapic_irq[i], gsi_ack_handler,
+ gsi_state->gsi_irq[i]);
}
}
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i);
+ qemu_set_irq_ack_callback(gsi_state->ioapic2_irq[i], gsi_ack_handler,
+ gsi_state->gsi_irq[IO_APIC_SECONDARY_IRQBASE + i]);
}
return dev;
}
} else {
s->isr |= (1 << irq);
}
- /* We don't clear a level sensitive interrupt here */
+ /*
+ * We don't clear a level sensitive interrupt here, unless the
+ * ack notifier asks us to.
+ */
if (!(s->elcr & (1 << irq))) {
s->irr &= ~(1 << irq);
+ } else if (qemu_notify_irq_ack(qdev_get_gpio_in(DEVICE(s), irq))) {
+ s->irr &= ~(1 << irq);
+ s->last_irr &= ~(1 << irq);
}
pic_update_irq(s);
}
#include "sysemu/sysemu.h"
#include "hw/i386/apic-msidef.h"
#include "hw/i386/x86-iommu.h"
+#include "hw/irq.h"
#include "trace.h"
#define APIC_DELIVERY_MODE_SHIFT 8
*/
kvm_resample_fd_notify(n);
#endif
-
+ if (qemu_notify_irq_ack(qdev_get_gpio_in(DEVICE(s), n))) {
+ s->irr &= ~(1 << n);
+ }
if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
continue;
}
ISADevice *s);
void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus);
-void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs);
+void pc_i8259_create(ISABus *isa_bus, GSIState *gsi_state);
/* port92.c */
#define PORT92_A20_LINE "a20"
#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11))
typedef struct GSIState {
+ qemu_irq *gsi_irq;
qemu_irq i8259_irq[ISA_NUM_IRQS];
qemu_irq ioapic_irq[IOAPIC_NUM_PINS];
qemu_irq ioapic2_irq[IOAPIC_NUM_PINS];
qemu_irq x86_allocate_cpu_irq(void);
void gsi_handler(void *opaque, int n, int level);
+bool gsi_ack_handler(qemu_irq, void *opaque);
void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name);
DeviceState *ioapic_init_secondary(GSIState *gsi_state);
qemu_set_irq(irq, 0);
}
+/*
+ * Allows a callback to be invoked when an IRQ is acked at the irqchip,
+ * allowing it to be resampled and reasserted as appropriate. If the
+ * callback function returns true, the interrupt is deasserted at the
+ * irqchip.
+ */
+typedef bool (*qemu_irq_ack_fn)(qemu_irq irq, void *opaque);
+void qemu_set_irq_ack_callback(qemu_irq irq, qemu_irq_ack_fn cb, void *opaque);
+bool qemu_notify_irq_ack(qemu_irq irq);
+
/* Returns an array of N IRQs. Each IRQ is assigned the argument handler and
* opaque data.
*/
/* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
if (!vi->evtchn_upcall_pending) {
env->xen_callback_asserted = false;
- xen_evtchn_set_callback_level(0);
+ //xen_evtchn_set_callback_level(0);
}
}
case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
if (vcpu_id == 0) {
xen_evtchn_set_callback_level(1);
- X86_CPU(cs)->env.xen_callback_asserted = true;
+ //X86_CPU(cs)->env.xen_callback_asserted = true;
}
break;
}