]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
NVMe: Automatic namespace rescan
authorKeith Busch <keith.busch@intel.com>
Thu, 6 Aug 2015 21:43:00 +0000 (14:43 -0700)
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>
Fri, 7 Aug 2015 18:21:07 +0000 (11:21 -0700)
Namespaces may be dynamically allocated and deleted or attached and
detached. This has the driver rescan the device for namespace changes
after each device reset or namespace change asynchronous event.

There could potentially be many detached namespaces that we don't want
polluting /dev/ with unusable block handles, so this will delete disks
if the namespace is not active as indicated by the response from identify
namespace. This also skips adding the disk if no capacity is provisioned
to the namespace in the first place.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
drivers/block/nvme-core.c
include/linux/nvme.h
include/uapi/linux/nvme.h

index 6f24dcbaa979eed9bff05481cbd0d57cab619b50..7701b80884c3bf423e13fcd78d6bc6e6e808254d 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -315,9 +316,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 
        if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
                ++nvmeq->dev->event_limit;
-       if (status == NVME_SC_SUCCESS)
-               dev_warn(nvmeq->q_dmadev,
-                       "async event result %08x\n", result);
+       if (status != NVME_SC_SUCCESS)
+               return;
+
+       switch (result & 0xff07) {
+       case NVME_AER_NOTICE_NS_CHANGED:
+               dev_info(nvmeq->q_dmadev, "rescanning\n");
+               schedule_work(&nvmeq->dev->scan_work);
+       default:
+               dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+       }
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -2018,7 +2026,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        if (!id) {
                dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n",
                                                                __func__);
-               return 0;
+               return -ENODEV;
        }
        if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
                dev_warn(&dev->pci_dev->dev,
@@ -2026,6 +2034,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
                        ns->ns_id);
                memset(id, 0, sizeof(*id));
        }
+       if (id->ncap == 0) {
+               dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+               return -ENODEV;
+       }
 
        old_ms = ns->ms;
        lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
@@ -2058,7 +2070,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
                                                                !ns->ext)
                nvme_init_integrity(ns);
 
-       if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+       if (ns->ms && !blk_get_integrity(disk))
                set_capacity(disk, 0);
        else
                set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2173,11 +2185,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
         * requires it.
         */
        set_capacity(disk, 0);
-       nvme_revalidate_disk(ns->disk);
+       if (nvme_revalidate_disk(ns->disk))
+               goto out_free_disk;
+
        add_disk(ns->disk);
        if (ns->ms)
                revalidate_disk(ns->disk);
        return;
+ out_free_disk:
+       kfree(disk);
+       list_del(&ns->list);
  out_free_queue:
        blk_cleanup_queue(ns->queue);
  out_free_ns:
@@ -2295,6 +2312,106 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
        return result;
 }
 
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+       list_del(&ns->list);
+
+       spin_lock(&dev_list_lock);
+       ns->disk->private_data = NULL;
+       spin_unlock(&dev_list_lock);
+
+       put_disk(ns->disk);
+       kfree(ns);
+}
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+       struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+       struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+       return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+       struct nvme_ns *ns;
+
+       list_for_each_entry(ns, &dev->namespaces, list) {
+               if (ns->ns_id == nsid)
+                       return ns;
+               if (ns->ns_id > nsid)
+                       break;
+       }
+       return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+       return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+                                                       dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+       bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+       if (kill)
+               blk_set_queue_dying(ns->queue);
+       if (ns->disk->flags & GENHD_FL_UP) {
+               if (blk_get_integrity(ns->disk))
+                       blk_integrity_unregister(ns->disk);
+               del_gendisk(ns->disk);
+       }
+       if (kill || !blk_queue_dying(ns->queue)) {
+               blk_mq_abort_requeue_list(ns->queue);
+               blk_cleanup_queue(ns->queue);
+        }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+       struct nvme_ns *ns, *next;
+       unsigned i;
+
+       for (i = 1; i <= nn; i++) {
+               ns = nvme_find_ns(dev, i);
+               if (ns) {
+                       if (revalidate_disk(ns->disk)) {
+                               nvme_ns_remove(ns);
+                               nvme_free_namespace(ns);
+                       }
+               } else
+                       nvme_alloc_ns(dev, i);
+       }
+       list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+               if (ns->ns_id > nn) {
+                       nvme_ns_remove(ns);
+                       nvme_free_namespace(ns);
+               }
+       }
+       list_sort(NULL, &dev->namespaces, ns_cmp);
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+       struct nvme_id_ctrl *ctrl;
+       dma_addr_t dma_addr;
+
+       if (!dev->tagset.tags)
+               return;
+
+       ctrl = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, GFP_KERNEL);
+       if (!ctrl)
+               return;
+       if (nvme_identify(dev, 0, 1, dma_addr)) {
+               dma_free_coherent(&dev->pci_dev->dev, 4096, ctrl, dma_addr);
+               return;
+       }
+       nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
+       dma_free_coherent(&dev->pci_dev->dev, 4096, ctrl, dma_addr);
+}
+
 /*
  * Return: error value if an error occurred setting up the queues or calling
  * Identify Device.  0 if these succeeded, even if adding some of the
@@ -2305,7 +2422,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 {
        struct pci_dev *pdev = dev->pci_dev;
        int res;
-       unsigned nn, i;
        struct nvme_id_ctrl *ctrl;
        void *mem;
        dma_addr_t dma_addr;
@@ -2323,7 +2439,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
        }
 
        ctrl = mem;
-       nn = le32_to_cpup(&ctrl->nn);
        dev->oncs = le16_to_cpup(&ctrl->oncs);
        dev->abort_limit = ctrl->acl + 1;
        dev->vwc = ctrl->vwc;
@@ -2359,9 +2474,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
        if (blk_mq_alloc_tag_set(&dev->tagset))
                return 0;
 
-       for (i = 1; i <= nn; i++)
-               nvme_alloc_ns(dev, i);
-
+       schedule_work(&dev->scan_work);
        return 0;
 }
 
@@ -2659,17 +2772,8 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 {
        struct nvme_ns *ns;
 
-       list_for_each_entry(ns, &dev->namespaces, list) {
-               if (ns->disk->flags & GENHD_FL_UP) {
-                       if (blk_get_integrity(ns->disk))
-                               blk_integrity_unregister(ns->disk);
-                       del_gendisk(ns->disk);
-               }
-               if (!blk_queue_dying(ns->queue)) {
-                       blk_mq_abort_requeue_list(ns->queue);
-                       blk_cleanup_queue(ns->queue);
-               }
-       }
+       list_for_each_entry(ns, &dev->namespaces, list)
+               nvme_ns_remove(ns);
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -2729,16 +2833,8 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
 {
        struct nvme_ns *ns, *next;
 
-       list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-               list_del(&ns->list);
-
-               spin_lock(&dev_list_lock);
-               ns->disk->private_data = NULL;
-               spin_unlock(&dev_list_lock);
-
-               put_disk(ns->disk);
-               kfree(ns);
-       }
+       list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+               nvme_free_namespace(ns);
 }
 
 static void nvme_free_dev(struct kref *kref)
@@ -2926,6 +3022,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
                spin_unlock(&dev_list_lock);
        } else {
                nvme_unfreeze_queues(dev);
+               schedule_work(&dev->scan_work);
                nvme_set_irq_hints(dev);
        }
        return 0;
@@ -3047,6 +3144,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto put_dev;
 
        INIT_LIST_HEAD(&dev->node);
+       INIT_WORK(&dev->scan_work, nvme_dev_scan);
        INIT_WORK(&dev->probe_work, nvme_async_probe);
        schedule_work(&dev->probe_work);
        return 0;
@@ -3119,6 +3217,7 @@ static void nvme_remove(struct pci_dev *pdev)
        pci_set_drvdata(pdev, NULL);
        flush_work(&dev->probe_work);
        flush_work(&dev->reset_work);
+       flush_work(&dev->scan_work);
        device_remove_file(dev->device, &dev_attr_reset_controller);
        nvme_dev_shutdown(dev);
        nvme_dev_remove(dev);
index 8dbd05e70f095b1bd7246677c695693b47e3f5fe..539e5e5e90fb1e61f3f94289430cd60827f0444e 100644 (file)
@@ -92,6 +92,7 @@ struct nvme_dev {
        work_func_t reset_workfn;
        struct work_struct reset_work;
        struct work_struct probe_work;
+       struct work_struct scan_work;
        char name[12];
        char serial[20];
        char model[40];
index b660dc2fadfba0393dd7e419f8beac34f13971e3..732b32e92b02efb257414a4bd24e552c078cdade 100644 (file)
@@ -179,6 +179,10 @@ enum {
        NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
 };
 
+enum {
+       NVME_AER_NOTICE_NS_CHANGED      = 0x0002,
+};
+
 struct nvme_lba_range_type {
        __u8                    type;
        __u8                    attributes;