]> www.infradead.org Git - qemu-nvme.git/commitdiff
hw/nvme: more cqe posting opt [WIP]
authorKlaus Jensen <k.jensen@samsung.com>
Thu, 9 Mar 2023 07:01:41 +0000 (08:01 +0100)
committerKlaus Jensen <k.jensen@samsung.com>
Thu, 8 Jun 2023 19:15:34 +0000 (21:15 +0200)
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
hw/nvme/ctrl.c
hw/nvme/nvme.h
hw/nvme/trace-events

index b01637c2b70625474aedb13bdd67371aef5bf830..beeef2aeb90ba1151a9b36efad892c6858005383 100644 (file)
@@ -642,16 +642,15 @@ static inline bool nvme_sq_empty(NvmeSQueue *sq)
     return sq->head == sq->tail;
 }
 
-static void nvme_irq_check(NvmeCtrl *n)
+static void nvme_irq_check_intx(NvmeCtrl *n)
 {
     PCIDevice *pci = PCI_DEVICE(n);
     uint32_t intms = ldl_le_p(&n->bar.intms);
 
-    trace_pci_nvme_irq_check(intms, n->irq_status);
+    trace_pci_nvme_irq_check_intx(intms, n->irq_status);
+
+    assert(!msix_enabled(pci));
 
-    if (msix_enabled(pci)) {
-        return;
-    }
     if (~intms & n->irq_status) {
         trace_pci_nvme_irq_pin(1);
         pci_irq_assert(pci);
@@ -665,33 +664,39 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 {
     PCIDevice *pci = PCI_DEVICE(n);
 
-    if (cq->irq_enabled) {
-        if (msix_enabled(pci)) {
-            trace_pci_nvme_irq_msix(cq->vector);
-            msix_notify(pci, cq->vector);
-        } else {
-            assert(cq->vector < 32);
-            n->irq_status |= 1 << cq->vector;
-            nvme_irq_check(n);
-        }
-    } else {
-        trace_pci_nvme_irq_masked();
+    assert(cq->irq_enabled);
+
+    if (msix_enabled(pci)) {
+        trace_pci_nvme_irq_msix(cq->vector);
+
+        msix_notify(pci, cq->vector);
+
+        return;
     }
+
+    assert(cq->vector < 32);
+    n->irq_status |= 1 << cq->vector;
+
+    nvme_irq_check_intx(n);
 }
 
 static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
 {
-    if (cq->irq_enabled) {
-        if (msix_enabled(PCI_DEVICE(n))) {
-            return;
-        } else {
-            assert(cq->vector < 32);
-            if (!n->cq_pending) {
-                n->irq_status &= ~(1 << cq->vector);
-            }
-            nvme_irq_check(n);
-        }
+    PCIDevice *pci = PCI_DEVICE(n);
+
+    assert(cq->irq_enabled);
+
+    if (msix_enabled(pci)) {
+        return;
+    }
+    
+    assert(cq->vector < 32);
+
+    if (qatomic_dec_fetch(&n->cq_pending) == 0) {
+        n->irq_status &= ~(1 << cq->vector);
     }
+
+    nvme_irq_check_intx(n);
 }
 
 static void nvme_req_clear(NvmeRequest *req)
@@ -1508,7 +1513,7 @@ static void nvme_post_cqe(NvmeCQueue *cq, NvmeRequest *req)
     }
 
     if (cq->irq_enabled && nvme_cq_empty(cq)) {
-        n->cq_pending++;
+        qatomic_inc(&n->cq_pending);
     }
 
     nvme_inc_cq_tail(cq);
@@ -1522,7 +1527,6 @@ static void nvme_post_cqes(void *opaque)
     NvmeCQueue *cq = opaque;
     NvmeCtrl *n = cq->ctrl;
     NvmeRequest *req, *next;
-    bool pending = cq->head != cq->tail;
 
     trace_pci_nvme_post_cqes(cq->cqid, cq->head, cq->tail);
 
@@ -1546,18 +1550,34 @@ static void nvme_post_cqes(void *opaque)
         }
     }
 
-    if (cq->tail != cq->head) {
-        if (cq->irq_enabled && !pending) {
-            n->cq_pending++;
-        }
+    if (cq->irq_enabled) {
+        qemu_bh_schedule(cq->do_irq);
+    }
+}
 
-        nvme_irq_assert(n, cq);
+static void nvme_do_irq(void *opaque)
+{
+    NvmeCQueue *cq = opaque;
+    NvmeCtrl *n = cq->ctrl;
+
+    if (n->dbbuf_enabled) {
+        nvme_update_cq_eventidx(cq);
+        nvme_update_cq_head(cq);
+    }
+
+    if (nvme_cq_empty(cq)) {
+        return;
     }
+
+    nvme_irq_assert(n, cq);
 }
 
 static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
 {
+    NvmeCtrl *n = cq->ctrl;
+
     assert(cq->cqid == req->sq->cqid);
+
     trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
                                           le32_to_cpu(req->cqe.result),
                                           le32_to_cpu(req->cqe.dw1),
@@ -1570,14 +1590,21 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
 
     QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
 
-    if (nvme_cq_full(cq)) {
+    if (n->dbbuf_enabled) {
+        nvme_update_cq_head(cq);
+    }
+
+    /* queue the cqe up for deferred posting to deter starvation */
+    if (nvme_cq_full(cq) || !QTAILQ_EMPTY(&cq->req_list)) {
         QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
         return;
     }
 
     nvme_post_cqe(cq, req);
 
-    qemu_bh_schedule(cq->bh);
+    if (cq->irq_enabled) {
+        qemu_bh_schedule(cq->do_irq);
+    }
 }
 
 static void nvme_process_aers(void *opaque)
@@ -4492,29 +4519,28 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
     return NVME_INVALID_OPCODE | NVME_DNR;
 }
 
-static void nvme_cq_update(NvmeCQueue *cq, bool start_sqs)
+/* may be called from mmio trap context, so defer additional processing */
+static void nvme_cq_kick(NvmeCQueue *cq, bool start_sqs)
 {
+    if (!QTAILQ_EMPTY(&cq->req_list)) {
+        qemu_bh_schedule(cq->post_queued_cqes);
+    }
+
     if (start_sqs) {
         NvmeSQueue *sq;
 
-        /* cq was full; start processing sqes on affected sqs again */
+        /* cq was full; start processing associated sqs again */
         QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
             qemu_bh_schedule(sq->bh);
         }
-
-        /* if no sq has pending sqes, make sure any pending cqes are posted */
-        qemu_bh_schedule(cq->bh);
     }
 
-
     if (nvme_cq_empty(cq)) {
         NvmeCtrl *n = cq->ctrl;
 
         if (cq->irq_enabled) {
-            n->cq_pending--;
+            nvme_irq_deassert(n, cq);
         }
-
-        nvme_irq_deassert(n, cq);
     }
 }
 
@@ -4528,7 +4554,7 @@ static void nvme_cq_notifier(EventNotifier *e)
     }
 
     nvme_update_cq_head(cq);
-    nvme_cq_update(cq, start_sqs);
+    nvme_cq_kick(cq, start_sqs);
 }
 
 static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
@@ -4625,6 +4651,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
         QTAILQ_REMOVE(&cq->sq_list, sq, entry);
 
         nvme_post_cqes(cq);
+
         QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
             if (r->sq == sq) {
                 QTAILQ_REMOVE(&cq->req_list, r, entry);
@@ -5233,7 +5260,8 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
     uint16_t offset = (cq->cqid << 3) + (1 << 2);
 
     n->cq[cq->cqid] = NULL;
-    qemu_bh_delete(cq->bh);
+    qemu_bh_delete(cq->do_irq);
+    qemu_bh_delete(cq->post_queued_cqes);
     if (cq->ioeventfd_enabled) {
         memory_region_del_eventfd(&n->iomem,
                                   0x1000 + offset, 4, false, 0, &cq->notifier);
@@ -5265,11 +5293,10 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
         return NVME_INVALID_QUEUE_DEL;
     }
 
-    if (cq->irq_enabled && cq->tail != cq->head) {
-        n->cq_pending--;
+    if (cq->irq_enabled) {
+        nvme_irq_deassert(n, cq);
     }
 
-    nvme_irq_deassert(n, cq);
     trace_pci_nvme_del_cq(qid);
     nvme_free_cq(cq, n);
     return NVME_SUCCESS;
@@ -5280,6 +5307,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
                          uint16_t irq_enabled)
 {
     PCIDevice *pci = PCI_DEVICE(n);
+    DeviceState *qdev = DEVICE(pci);
+    MemReentrancyGuard *guard = &qdev->mem_reentrancy_guard;
 
     if (msix_enabled(pci)) {
         msix_vector_use(pci, vector);
@@ -5304,9 +5333,11 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
             }
         }
     }
+
     n->cq[cqid] = cq;
-    cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
-                                 &DEVICE(cq->ctrl)->mem_reentrancy_guard);
+
+    cq->do_irq = qemu_bh_new_guarded(nvme_do_irq, cq, guard);
+    cq->post_queued_cqes = qemu_bh_new_guarded(nvme_post_cqes, cq, guard);
 }
 
 static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
@@ -7360,7 +7391,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
         stl_le_p(&n->bar.intms, intms);
         n->bar.intmc = n->bar.intms;
         trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms);
-        nvme_irq_check(n);
+        nvme_irq_check_intx(n);
         break;
     case NVME_REG_INTMC:
         if (unlikely(msix_enabled(pci))) {
@@ -7373,7 +7404,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
         stl_le_p(&n->bar.intms, intms);
         n->bar.intmc = n->bar.intms;
         trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms);
-        nvme_irq_check(n);
+        nvme_irq_check_intx(n);
         break;
     case NVME_REG_CC:
         stl_le_p(&n->bar.cc, data);
@@ -7680,7 +7711,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
             pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head));
         }
 
-        nvme_cq_update(cq, start_sqs);
+        nvme_cq_kick(cq, start_sqs);
     } else {
         /* Submission queue doorbell write */
 
index 209e8f5b4c085777b94a081712bb33548b0f42b1..8e4b78492b8f3a5babfb6d3e1de791e45c234caa 100644 (file)
@@ -484,7 +484,7 @@ typedef struct NvmeCQueue {
     uint64_t    dma_addr;
     uint64_t    db_addr;
     uint64_t    ei_addr;
-    QEMUBH      *bh;
+    QEMUBH      *do_irq, *post_queued_cqes;
     EventNotifier notifier;
     bool        ioeventfd_enabled;
     QTAILQ_HEAD(, NvmeSQueue) sq_list;
index 08a32efa762d1e08a03d777e7258b9e9b85ea3d5..c19f756429e971e2cc5201eee7bd526013f81212 100644 (file)
@@ -1,6 +1,6 @@
 # successful events
 pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
-pci_nvme_irq_check(uint32_t intms, uint32_t irq_status) "intms 0x%"PRIx32" irq_status 0x%"PRIx32""
+pci_nvme_irq_check_intx(uint32_t intms, uint32_t irq_status) "intms 0x%"PRIx32" irq_status 0x%"PRIx32""
 pci_nvme_irq_pin(uint8_t assert) "assert %"PRIu8""
 pci_nvme_irq_masked(void) "IRQ is masked"
 pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""