]> www.infradead.org Git - nvme.git/commitdiff
nvme-multipath: defer partition scanning
authorKeith Busch <kbusch@kernel.org>
Tue, 15 Oct 2024 14:30:17 +0000 (07:30 -0700)
committerKeith Busch <kbusch@kernel.org>
Tue, 15 Oct 2024 15:32:07 +0000 (08:32 -0700)
We need to suppress the partition scan from occuring within the
controller's scan_work context. If a path error occurs here, the IO will
wait until a path becomes available or all paths are torn down, but that
action also occurs within scan_work, so it would deadlock. Defer the
partion scan to a different context that does not block scan_work.

Reported-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h

index bad1620fbbfc1955dee91311335caabd52b286bd..6a15873055b9513f827709ad780bc7e18f75e439 100644 (file)
@@ -579,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
        return ret;
 }
 
+static void nvme_partition_scan_work(struct work_struct *work)
+{
+       struct nvme_ns_head *head =
+               container_of(work, struct nvme_ns_head, partition_scan_work);
+
+       if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
+                                            &head->disk->state)))
+               return;
+
+       mutex_lock(&head->disk->open_mutex);
+       bdev_disk_changed(head->disk, false);
+       mutex_unlock(&head->disk->open_mutex);
+}
+
 static void nvme_requeue_work(struct work_struct *work)
 {
        struct nvme_ns_head *head =
@@ -605,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
        bio_list_init(&head->requeue_list);
        spin_lock_init(&head->requeue_lock);
        INIT_WORK(&head->requeue_work, nvme_requeue_work);
+       INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
 
        /*
         * Add a multipath node if the subsystems supports multiple controllers.
@@ -628,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
                return PTR_ERR(head->disk);
        head->disk->fops = &nvme_ns_head_ops;
        head->disk->private_data = head;
+
+       /*
+        * We need to suppress the partition scan from occuring within the
+        * controller's scan_work context. If a path error occurs here, the IO
+        * will wait until a path becomes available or all paths are torn down,
+        * but that action also occurs within scan_work, so it would deadlock.
+        * Defer the partion scan to a different context that does not block
+        * scan_work.
+        */
+       set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
        sprintf(head->disk->disk_name, "nvme%dn%d",
                        ctrl->subsys->instance, head->instance);
        return 0;
@@ -654,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
                        return;
                }
                nvme_add_ns_head_cdev(head);
+               kblockd_schedule_work(&head->partition_scan_work);
        }
 
        mutex_lock(&head->lock);
@@ -973,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
                return;
        if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
                nvme_cdev_del(&head->cdev, &head->cdev_device);
+               /*
+                * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
+                * to allow multipath to fail all I/O.
+                */
+               synchronize_srcu(&head->srcu);
+               kblockd_schedule_work(&head->requeue_work);
                del_gendisk(head->disk);
        }
-       /*
-        * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
-        * to allow multipath to fail all I/O.
-        */
-       synchronize_srcu(&head->srcu);
-       kblockd_schedule_work(&head->requeue_work);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -990,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
        /* make sure all pending bios are cleaned up */
        kblockd_schedule_work(&head->requeue_work);
        flush_work(&head->requeue_work);
+       flush_work(&head->partition_scan_work);
        put_disk(head->disk);
 }
 
index 313a4f978a2cf3ce00fb3e9918fb17729a39ce90..093cb423f536bebb2272bf7985b0413f9c2b0237 100644 (file)
@@ -494,6 +494,7 @@ struct nvme_ns_head {
        struct bio_list         requeue_list;
        spinlock_t              requeue_lock;
        struct work_struct      requeue_work;
+       struct work_struct      partition_scan_work;
        struct mutex            lock;
        unsigned long           flags;
 #define NVME_NSHEAD_DISK_LIVE  0