blk-mq: avoid inserting requests before establishing new mapping

author Akinobu Mita <akinobu.mita@gmail.com>

Sat, 26 Sep 2015 17:09:23 +0000 (02:09 +0900)

committer Jens Axboe <axboe@fb.com>

Tue, 29 Sep 2015 17:32:50 +0000 (11:32 -0600)
author Akinobu Mita <akinobu.mita@gmail.com>
Sat, 26 Sep 2015 17:09:23 +0000 (02:09 +0900)
committer Jens Axboe <axboe@fb.com>
Tue, 29 Sep 2015 17:32:50 +0000 (11:32 -0600)
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c

index 1e28ddb656b891b92d7c135fa65914939b1451aa..8764c241e5bb44858e753b75f6c102c06a927171 100644 (file)
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu)
         return cpu;
  }
  
-int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
+int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
+                           const struct cpumask *online_mask)
  {
         unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
         cpumask_var_t cpus;
@@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
  
         cpumask_clear(cpus);
         nr_cpus = nr_uniq_cpus = 0;
-       for_each_online_cpu(i) {
+       for_each_cpu(i, online_mask) {
                 nr_cpus++;
                 first_sibling = get_first_sibling(i);
                 if (!cpumask_test_cpu(first_sibling, cpus))
@@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
  
         queue = 0;
         for_each_possible_cpu(i) {
-               if (!cpu_online(i)) {
+               if (!cpumask_test_cpu(i, online_mask)) {
                         map[i] = 0;
                         continue;
                 }
@@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
         if (!map)
                 return NULL;
  
-       if (!blk_mq_update_queue_map(map, set->nr_hw_queues))
+       if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask))
                 return map;
  
         kfree(map);
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 3a39184e82e50e993c0da682cfe7c213c8f8c33d..a5dbd069c9dab665cc87ac9dc0f3d4271ed45694 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1789,7 +1789,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
         }
  }
  
-static void blk_mq_map_swqueue(struct request_queue *q)
+static void blk_mq_map_swqueue(struct request_queue *q,
+                              const struct cpumask *online_mask)
  {
         unsigned int i;
         struct blk_mq_hw_ctx *hctx;
@@ -1806,7 +1807,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
          */
         queue_for_each_ctx(q, ctx, i) {
                 /* If the cpu isn't online, the cpu is mapped to first hctx */
-               if (!cpu_online(i))
+               if (!cpumask_test_cpu(i, online_mask))
                         continue;
  
                 hctx = q->mq_ops->map_queue(q, i);
@@ -1852,7 +1853,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
         }
  
         queue_for_each_ctx(q, ctx, i) {
-               if (!cpu_online(i))
+               if (!cpumask_test_cpu(i, online_mask))
                         continue;
  
                 hctx = q->mq_ops->map_queue(q, i);
@@ -2037,13 +2038,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
         if (blk_mq_init_hw_queues(q, set))
                 goto err_hctxs;
  
+       get_online_cpus();
         mutex_lock(&all_q_mutex);
  
         list_add_tail(&q->all_q_node, &all_q_list);
         blk_mq_add_queue_tag_set(set, q);
-       blk_mq_map_swqueue(q);
+       blk_mq_map_swqueue(q, cpu_online_mask);
  
         mutex_unlock(&all_q_mutex);
+       put_online_cpus();
  
         return q;
  
@@ -2080,13 +2083,14 @@ void blk_mq_free_queue(struct request_queue *q)
  }
  
  /* Basically redo blk_mq_init_queue with queue frozen */
-static void blk_mq_queue_reinit(struct request_queue *q)
+static void blk_mq_queue_reinit(struct request_queue *q,
+                               const struct cpumask *online_mask)
  {
         WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
  
         blk_mq_sysfs_unregister(q);
  
-       blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues);
+       blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
  
         /*
          * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
@@ -2094,7 +2098,7 @@ static void blk_mq_queue_reinit(struct request_queue *q)
          * involves free and re-allocate memory, worthy doing?)
          */
  
-       blk_mq_map_swqueue(q);
+       blk_mq_map_swqueue(q, online_mask);
  
         blk_mq_sysfs_register(q);
  }
@@ -2103,16 +2107,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
                                       unsigned long action, void *hcpu)
  {
         struct request_queue *q;
+       int cpu = (unsigned long)hcpu;
+       /*
+        * New online cpumask which is going to be set in this hotplug event.
+        * Declare this cpumasks as global as cpu-hotplug operation is invoked
+        * one-by-one and dynamically allocating this could result in a failure.
+        */
+       static struct cpumask online_new;
  
         /*
-        * Before new mappings are established, hotadded cpu might already
-        * start handling requests. This doesn't break anything as we map
-        * offline CPUs to first hardware queue. We will re-init the queue
-        * below to get optimal settings.
+        * Before hotadded cpu starts handling requests, new mappings must
+        * be established.  Otherwise, these requests in hw queue might
+        * never be dispatched.
+        *
+        * For example, there is a single hw queue (hctx) and two CPU queues
+        * (ctx0 for CPU0, and ctx1 for CPU1).
+        *
+        * Now CPU1 is just onlined and a request is inserted into
+        * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is
+        * still zero.
+        *
+        * And then while running hw queue, flush_busy_ctxs() finds bit0 is
+        * set in pending bitmap and tries to retrieve requests in
+        * hctx->ctxs[0]->rq_list.  But htx->ctxs[0] is a pointer to ctx0,
+        * so the request in ctx1->rq_list is ignored.
          */
-       if (action != CPU_DEAD && action != CPU_DEAD_FROZEN &&
-           action != CPU_ONLINE && action != CPU_ONLINE_FROZEN)
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DEAD:
+       case CPU_UP_CANCELED:
+               cpumask_copy(&online_new, cpu_online_mask);
+               break;
+       case CPU_UP_PREPARE:
+               cpumask_copy(&online_new, cpu_online_mask);
+               cpumask_set_cpu(cpu, &online_new);
+               break;
+       default:
                 return NOTIFY_OK;
+       }
  
         mutex_lock(&all_q_mutex);
  
@@ -2136,7 +2167,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
         }
  
         list_for_each_entry(q, &all_q_list, all_q_node)
-               blk_mq_queue_reinit(q);
+               blk_mq_queue_reinit(q, &online_new);
  
         list_for_each_entry(q, &all_q_list, all_q_node)
                 blk_mq_unfreeze_queue(q);
diff --git a/block/blk-mq.h b/block/blk-mq.h

index 6a48c4c0d8a2a6efb881ea29b772df3bba9d5540..f4fea79649105b4e134860b53294ef2dac90a95f 100644 (file)
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void);
   * CPU -> queue mappings
   */
  extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
-extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
+extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
+                                  const struct cpumask *online_mask);
  extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
  
  /*
author	Akinobu Mita <akinobu.mita@gmail.com>
	Sat, 26 Sep 2015 17:09:23 +0000 (02:09 +0900)
committer	Jens Axboe <axboe@fb.com>
	Tue, 29 Sep 2015 17:32:50 +0000 (11:32 -0600)
block/blk-mq-cpumap.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
block/blk-mq.h		patch \| blob \| history