]> www.infradead.org Git - nvme.git/commitdiff
nvmet: pci-epf: Keep completion queues mapped
authorDamien Le Moal <dlemoal@kernel.org>
Thu, 13 Mar 2025 08:22:18 +0000 (17:22 +0900)
committerKeith Busch <kbusch@kernel.org>
Mon, 31 Mar 2025 15:48:25 +0000 (08:48 -0700)
Instead of mapping and unmapping the completion queues memory to the
host PCI address space whenever nvmet_pci_epf_cq_work() is called, map
a completion queue to the host PCI address space when the completion
queue is created with nvmet_pci_epf_create_cq() and unmap it when the
completion queue is deleted with nvmet_pci_epf_delete_cq().

This removes the completion queue mapping/unmapping from
nvmet_pci_epf_cq_work() and significantly increases performance. For
a single job 4K random read QD=1 workload, the IOPS is increased from
23 KIOPS to 25 KIOPS. Some significant throughput increasde for high
queue depth and large IOs workloads can also be seen.

Since the functions nvmet_pci_epf_map_queue() and
nvmet_pci_epf_unmap_queue() are called respectively only from
nvmet_pci_epf_create_cq() and nvmet_pci_epf_delete_cq(), these functions
are removed and open-coded in their respective call sites.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/target/pci-epf.c

index b54b3fdbe3898a2fa003ea291856bac628d94fb2..51c27b32248d0137fd3a557728eaab72fc7fca0c 100644 (file)
@@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
        struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
        struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
        u16 status;
+       int ret;
 
        if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
                return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
@@ -1298,6 +1299,24 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
        if (status != NVME_SC_SUCCESS)
                goto err;
 
+       /*
+        * Map the CQ PCI address space and since PCI endpoint controllers may
+        * return a partial mapping, check that the mapping is large enough.
+        */
+       ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size,
+                                   &cq->pci_map);
+       if (ret) {
+               dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n",
+                       cq->qid, ret);
+               goto err_internal;
+       }
+
+       if (cq->pci_map.pci_size < cq->pci_size) {
+               dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
+                       cq->qid);
+               goto err_unmap_queue;
+       }
+
        set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
 
        dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
@@ -1305,6 +1324,10 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
 
        return NVME_SC_SUCCESS;
 
+err_unmap_queue:
+       nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
+err_internal:
+       status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
 err:
        if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
                nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
@@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
        cancel_delayed_work_sync(&cq->work);
        nvmet_pci_epf_drain_queue(cq);
        nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
+       nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
 
        return NVME_SC_SUCCESS;
 }
@@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl)
        ctrl->cq = NULL;
 }
 
-static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
-                                  struct nvmet_pci_epf_queue *queue)
-{
-       struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
-       int ret;
-
-       ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
-                                     queue->pci_size, &queue->pci_map);
-       if (ret) {
-               dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
-                       queue->qid, ret);
-               return ret;
-       }
-
-       if (queue->pci_map.pci_size < queue->pci_size) {
-               dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
-                       queue->qid);
-               nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
-                                            struct nvmet_pci_epf_queue *queue)
-{
-       nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
-}
-
 static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
 {
        struct nvmet_pci_epf_iod *iod =
@@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
        struct nvme_completion *cqe;
        struct nvmet_pci_epf_iod *iod;
        unsigned long flags;
-       int ret, n = 0;
-
-       ret = nvmet_pci_epf_map_queue(ctrl, cq);
-       if (ret)
-               goto again;
+       int ret = 0, n = 0;
 
        while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) {
 
@@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
                n++;
        }
 
-       nvmet_pci_epf_unmap_queue(ctrl, cq);
-
        /*
         * We do not support precise IRQ coalescing time (100ns units as per
         * NVMe specifications). So if we have posted completion entries without
@@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
        if (n)
                nvmet_pci_epf_raise_irq(ctrl, cq, true);
 
-again:
        if (ret < 0)
                queue_delayed_work(system_highpri_wq, &cq->work,
                                   NVMET_PCI_EPF_CQ_RETRY_INTERVAL);