static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
 static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
+static DEFINE_MUTEX(blk_mq_cpuhp_lock);
 
 static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
 static void blk_mq_request_bypass_insert(struct request *rq,
        return 0;
 }
 
-static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
+static void __blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
 {
-       if (!(hctx->flags & BLK_MQ_F_STACKING))
+       lockdep_assert_held(&blk_mq_cpuhp_lock);
+
+       if (!(hctx->flags & BLK_MQ_F_STACKING) &&
+           !hlist_unhashed(&hctx->cpuhp_online)) {
                cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
                                                    &hctx->cpuhp_online);
-       cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
-                                           &hctx->cpuhp_dead);
+               INIT_HLIST_NODE(&hctx->cpuhp_online);
+       }
+
+       if (!hlist_unhashed(&hctx->cpuhp_dead)) {
+               cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
+                                                   &hctx->cpuhp_dead);
+               INIT_HLIST_NODE(&hctx->cpuhp_dead);
+       }
+}
+
+static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
+{
+       mutex_lock(&blk_mq_cpuhp_lock);
+       __blk_mq_remove_cpuhp(hctx);
+       mutex_unlock(&blk_mq_cpuhp_lock);
+}
+
+static void __blk_mq_add_cpuhp(struct blk_mq_hw_ctx *hctx)
+{
+       lockdep_assert_held(&blk_mq_cpuhp_lock);
+
+       if (!(hctx->flags & BLK_MQ_F_STACKING) &&
+           hlist_unhashed(&hctx->cpuhp_online))
+               cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
+                               &hctx->cpuhp_online);
+
+       if (hlist_unhashed(&hctx->cpuhp_dead))
+               cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD,
+                               &hctx->cpuhp_dead);
+}
+
+static void __blk_mq_remove_cpuhp_list(struct list_head *head)
+{
+       struct blk_mq_hw_ctx *hctx;
+
+       lockdep_assert_held(&blk_mq_cpuhp_lock);
+
+       list_for_each_entry(hctx, head, hctx_list)
+               __blk_mq_remove_cpuhp(hctx);
+}
+
+/*
+ * Unregister cpuhp callbacks from exited hw queues
+ *
+ * Safe to call if this `request_queue` is live
+ */
+static void blk_mq_remove_hw_queues_cpuhp(struct request_queue *q)
+{
+       LIST_HEAD(hctx_list);
+
+       spin_lock(&q->unused_hctx_lock);
+       list_splice_init(&q->unused_hctx_list, &hctx_list);
+       spin_unlock(&q->unused_hctx_lock);
+
+       mutex_lock(&blk_mq_cpuhp_lock);
+       __blk_mq_remove_cpuhp_list(&hctx_list);
+       mutex_unlock(&blk_mq_cpuhp_lock);
+
+       spin_lock(&q->unused_hctx_lock);
+       list_splice(&hctx_list, &q->unused_hctx_list);
+       spin_unlock(&q->unused_hctx_lock);
+}
+
+/*
+ * Register cpuhp callbacks from all hw queues
+ *
+ * Safe to call if this `request_queue` is live
+ */
+static void blk_mq_add_hw_queues_cpuhp(struct request_queue *q)
+{
+       struct blk_mq_hw_ctx *hctx;
+       unsigned long i;
+
+       mutex_lock(&blk_mq_cpuhp_lock);
+       queue_for_each_hw_ctx(q, hctx, i)
+               __blk_mq_add_cpuhp(hctx);
+       mutex_unlock(&blk_mq_cpuhp_lock);
 }
 
 /*
        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);
 
-       blk_mq_remove_cpuhp(hctx);
-
        xa_erase(&q->hctx_table, hctx_idx);
 
        spin_lock(&q->unused_hctx_lock);
        queue_for_each_hw_ctx(q, hctx, i) {
                if (i == nr_queue)
                        break;
+               blk_mq_remove_cpuhp(hctx);
                blk_mq_exit_hctx(q, set, hctx, i);
        }
 }
        if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
                goto exit_flush_rq;
 
-       if (!(hctx->flags & BLK_MQ_F_STACKING))
-               cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
-                               &hctx->cpuhp_online);
-       cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
-
        return 0;
 
  exit_flush_rq:
        INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
        spin_lock_init(&hctx->lock);
        INIT_LIST_HEAD(&hctx->dispatch);
+       INIT_HLIST_NODE(&hctx->cpuhp_dead);
+       INIT_HLIST_NODE(&hctx->cpuhp_online);
        hctx->queue = q;
        hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
 
        xa_for_each_start(&q->hctx_table, j, hctx, j)
                blk_mq_exit_hctx(q, set, hctx, j);
        mutex_unlock(&q->sysfs_lock);
+
+       /* unregister cpuhp callbacks for exited hctxs */
+       blk_mq_remove_hw_queues_cpuhp(q);
+
+       /* register cpuhp for new initialized hctxs */
+       blk_mq_add_hw_queues_cpuhp(q);
 }
 
 int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,