]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
nvme-rdma: serialize controller teardown sequences
authorSagi Grimberg <sagi@grimberg.me>
Thu, 6 Aug 2020 01:13:58 +0000 (18:13 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 17 Sep 2020 11:47:47 +0000 (13:47 +0200)
[ Upstream commit 5110f40241d08334375eb0495f174b1d2c07657e ]

In the timeout handler we may need to complete a request because the
request that timed out may be an I/O that is a part of a serial sequence
of controller teardown or initialization. In order to complete the
request, we need to fence any other context that may compete with us
and complete the request that is timing out.

In this case, we could have a potential double completion in case
a hard-irq or a different competing context triggered error recovery
and is running inflight request cancellation concurrently with the
timeout handler.

Protect using a ctrl teardown_lock to serialize contexts that may
complete a cancelled request due to error recovery or a reset.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/nvme/host/rdma.c

index d0336545e1fe0fec326ec7121641b98546a86dff..b164c662fed3020e278f4a31112f9741097ed037 100644 (file)
@@ -110,6 +110,7 @@ struct nvme_rdma_ctrl {
        struct sockaddr_storage src_addr;
 
        struct nvme_ctrl        ctrl;
+       struct mutex            teardown_lock;
        bool                    use_inline_data;
        u32                     io_queues[HCTX_MAX_TYPES];
 };
@@ -920,6 +921,7 @@ out_free_io_queues:
 static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
+       mutex_lock(&ctrl->teardown_lock);
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
        nvme_rdma_stop_queue(&ctrl->queues[0]);
        if (ctrl->ctrl.admin_tagset) {
@@ -930,11 +932,13 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
        if (remove)
                blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
        nvme_rdma_destroy_admin_queue(ctrl, remove);
+       mutex_unlock(&ctrl->teardown_lock);
 }
 
 static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
+       mutex_lock(&ctrl->teardown_lock);
        if (ctrl->ctrl.queue_count > 1) {
                nvme_start_freeze(&ctrl->ctrl);
                nvme_stop_queues(&ctrl->ctrl);
@@ -948,6 +952,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
                        nvme_start_queues(&ctrl->ctrl);
                nvme_rdma_destroy_io_queues(ctrl, remove);
        }
+       mutex_unlock(&ctrl->teardown_lock);
 }
 
 static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
@@ -1988,6 +1993,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                return ERR_PTR(-ENOMEM);
        ctrl->ctrl.opts = opts;
        INIT_LIST_HEAD(&ctrl->list);
+       mutex_init(&ctrl->teardown_lock);
 
        if (!(opts->mask & NVMF_OPT_TRSVCID)) {
                opts->trsvcid =