#include "qapi/error.h"
#include "qapi/visitor.h"
#include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
#include "sysemu/block-backend.h"
#include "sysemu/hostmem.h"
#include "hw/pci/msix.h"
}
}
+static int nvme_kvm_vector_use(NvmeCtrl *n, NvmeCQueue *cq, uint32_t vector)
+{
+ KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
+ int ret;
+
+ ret = kvm_irqchip_add_msi_route(&c, vector, &n->parent_obj);
+ if (ret < 0) {
+ return ret;
+ }
+
+ kvm_irqchip_commit_route_changes(&c);
+
+ cq->virq = ret;
+
+ return 0;
+}
+
+static int nvme_kvm_vector_unmask(PCIDevice *pci_dev, unsigned vector,
+ MSIMessage msg)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+ int ret;
+
+ trace_pci_nvme_irq_unmask(vector, msg.address, msg.data);
+
+ for (uint32_t i = 1; i <= n->params.max_ioqpairs; i++) {
+ NvmeCQueue *cq = n->cq[i];
+
+ if (!cq) {
+ continue;
+ }
+
+ if (cq->vector == vector) {
+ if (cq->msg.data != msg.data || cq->msg.address != msg.address) {
+ ret = kvm_irqchip_update_msi_route(kvm_state, cq->virq, msg,
+ pci_dev);
+ if (ret < 0) {
+ return ret;
+ }
+
+ kvm_irqchip_commit_routes(kvm_state);
+
+ cq->msg = msg;
+ }
+
+ ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier,
+ NULL, cq->virq);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void nvme_kvm_vector_mask(PCIDevice *pci_dev, unsigned vector)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+
+ trace_pci_nvme_irq_mask(vector);
+
+ for (uint32_t i = 1; i <= n->params.max_ioqpairs; i++) {
+ NvmeCQueue *cq = n->cq[i];
+
+ if (!cq) {
+ continue;
+ }
+
+ if (cq->vector == vector) {
+ kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier,
+ cq->virq);
+ }
+ }
+}
+
+static void nvme_kvm_vector_poll(PCIDevice *pci_dev, unsigned int vector_start,
+ unsigned int vector_end)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+
+ trace_pci_nvme_irq_poll(vector_start, vector_end);
+
+ for (uint32_t i = 1; i <= n->params.max_ioqpairs; i++) {
+ NvmeCQueue *cq = n->cq[i];
+
+ if (!cq) {
+ continue;
+ }
+
+ if (!msix_is_masked(pci_dev, cq->vector)) {
+ continue;
+ }
+
+ if (cq->vector >= vector_start && cq->vector <= vector_end) {
+ if (event_notifier_test_and_clear(&cq->assert_notifier)) {
+ msix_set_pending(pci_dev, i);
+ }
+ }
+ }
+}
+
+
static void nvme_init_irq_notifier(NvmeCtrl *n, NvmeCQueue *cq)
{
+ bool with_irqfd = msix_enabled(&n->parent_obj) &&
+ kvm_msi_via_irqfd_enabled();
int ret;
ret = event_notifier_init(&cq->assert_notifier, 0);
return;
}
- event_notifier_set_handler(&cq->assert_notifier, nvme_irq_assert_notify);
+ if (with_irqfd) {
+ ret = nvme_kvm_vector_use(n, cq, cq->vector);
+ if (ret < 0) {
+ event_notifier_cleanup(&cq->assert_notifier);
+
+ return;
+ }
+ } else {
+ event_notifier_set_handler(&cq->assert_notifier,
+ nvme_irq_assert_notify);
+ }
if (!msix_enabled(&n->parent_obj)) {
ret = event_notifier_init(&cq->deassert_notifier, 0);
if (ret < 0) {
+ if (with_irqfd) {
+ kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier,
+ cq->virq);
+ }
+
event_notifier_set_handler(&cq->assert_notifier, NULL);
event_notifier_cleanup(&cq->assert_notifier);
NvmeCQueue *cq = container_of(e, NvmeCQueue, notifier);
bool start_sqs = nvme_cq_full(cq);
+ trace_pci_nvme_cq_notifier(cq->cqid);
+
if (!event_notifier_test_and_clear(e)) {
return;
}
{
NvmeSQueue *sq = container_of(e, NvmeSQueue, notifier);
+ trace_pci_nvme_sq_notifier(sq->sqid);
+
if (!event_notifier_test_and_clear(e)) {
return;
}
static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
{
PCIDevice *pci = PCI_DEVICE(n);
+ bool with_irqfd = msix_enabled(pci) && kvm_msi_via_irqfd_enabled();
uint16_t offset = (cq->cqid << 3) + (1 << 2);
n->cq[cq->cqid] = NULL;
event_notifier_cleanup(&cq->notifier);
}
if (cq->assert_notifier.initialized) {
+ if (with_irqfd) {
+ kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier,
+ cq->virq);
+ kvm_irqchip_release_virq(kvm_state, cq->virq);
+ }
event_notifier_set_handler(&cq->assert_notifier, NULL);
event_notifier_cleanup(&cq->assert_notifier);
}
uint32_t page_size = 1 << page_bits;
NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
+ bool with_irqfd = msix_enabled(&n->parent_obj) &&
+ kvm_msi_via_irqfd_enabled();
+
if (pci_is_vf(PCI_DEVICE(n)) && !sctrl->scs) {
trace_pci_nvme_err_startfail_virt_state(le16_to_cpu(sctrl->nvi),
le16_to_cpu(sctrl->nvq));
nvme_select_iocs(n);
+ if (n->params.irq_eventfd && with_irqfd) {
+ return msix_set_vector_notifiers(PCI_DEVICE(n), nvme_kvm_vector_unmask,
+ nvme_kvm_vector_mask,
+ nvme_kvm_vector_poll);
+ }
+
return 0;
}
pcie_sriov_pf_exit(pci_dev);
}
+ msix_unset_vector_notifiers(pci_dev);
msix_uninit(pci_dev, &n->bar0, &n->bar0);
memory_region_del_subregion(&n->bar0, &n->iomem);
}
pci_nvme_irq_deassert_notify(uint16_t cqid) "cqid %"PRIu16""
pci_nvme_irq_do_assert(uint16_t cqid) "cqid %"PRIu16""
pci_nvme_irq_do_deassert(uint16_t cqid) "cqid %"PRIu16""
+pci_nvme_irq_mask(uint32_t vector) "IRQ %u gets masked"
+pci_nvme_irq_unmask(uint32_t vector, uint64_t addr, uint32_t data) "IRQ %u gets unmasked, addr=0x%"PRIx64" data=0x%"PRIu32""
+pci_nvme_irq_poll(uint32_t vector_start, uint32_t vector_end) "IRQ poll, start=0x%"PRIu32" end=0x%"PRIu32""
pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
pci_nvme_dbbuf_config(uint64_t dbs_addr, uint64_t eis_addr) "dbs_addr=0x%"PRIx64" eis_addr=0x%"PRIx64""
pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16""
+pci_nvme_cq_notifier(uint16_t cqid) "cqid %"PRIu16""
+pci_nvme_sq_notifier(uint16_t sqid) "sqid %"PRIu16""
pci_nvme_update_cq_eventidx(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" new_eventidx %"PRIu16""
pci_nvme_update_sq_eventidx(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" new_eventidx %"PRIu16""
pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"