ws = bt_wait_ptr(bt, data->hctx);
        drop_ctx = data->ctx == NULL;
        do {
+               struct sbitmap_queue *bt_prev;
+
                /*
                 * We're out of tags on this hardware queue, kick any
                 * pending IO submits before going to sleep waiting for
                if (data->ctx)
                        blk_mq_put_ctx(data->ctx);
 
+               bt_prev = bt;
                io_schedule();
 
                data->ctx = blk_mq_get_ctx(data->q);
                        bt = &tags->bitmap_tags;
 
                finish_wait(&ws->wait, &wait);
+
+               /*
+                * If destination hw queue is changed, fake wake up on
+                * previous queue for compensating the wake up miss, so
+                * other allocations on previous queue won't be starved.
+                */
+               if (bt != bt_prev)
+                       sbitmap_queue_wake_up(bt_prev);
+
                ws = bt_wait_ptr(bt, data->hctx);
        } while (1);
 
 
  */
 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
 
+/**
+ * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
+ * on a &struct sbitmap_queue.
+ * @sbq: Bitmap queue to wake up.
+ */
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
+
 /**
  * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
  * seq_file.
 
        if (sbq->wake_batch != wake_batch) {
                WRITE_ONCE(sbq->wake_batch, wake_batch);
                /*
-                * Pairs with the memory barrier in sbq_wake_up() to ensure that
-                * the batch size is updated before the wait counts.
+                * Pairs with the memory barrier in sbitmap_queue_wake_up()
+                * to ensure that the batch size is updated before the wait
+                * counts.
                 */
                smp_mb__before_atomic();
                for (i = 0; i < SBQ_WAIT_QUEUES; i++)
        unsigned int wake_batch;
        int wait_cnt;
 
-       /*
-        * Pairs with the memory barrier in set_current_state() to ensure the
-        * proper ordering of clear_bit()/waitqueue_active() in the waker and
-        * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
-        * waiter. See the comment on waitqueue_active(). This is __after_atomic
-        * because we just did clear_bit_unlock() in the caller.
-        */
-       smp_mb__after_atomic();
-
        ws = sbq_wake_ptr(sbq);
        if (!ws)
                return false;
        return false;
 }
 
-static void sbq_wake_up(struct sbitmap_queue *sbq)
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
 {
        while (__sbq_wake_up(sbq))
                ;
 }
+EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
 
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
                         unsigned int cpu)
 {
        sbitmap_clear_bit_unlock(&sbq->sb, nr);
-       sbq_wake_up(sbq);
+       /*
+        * Pairs with the memory barrier in set_current_state() to ensure the
+        * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
+        * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+        * waiter. See the comment on waitqueue_active().
+        */
+       smp_mb__after_atomic();
+       sbitmap_queue_wake_up(sbq);
+
        if (likely(!sbq->round_robin && nr < sbq->sb.depth))
                *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
 }
 
        /*
         * Pairs with the memory barrier in set_current_state() like in
-        * sbq_wake_up().
+        * sbitmap_queue_wake_up().
         */
        smp_mb();
        wake_index = atomic_read(&sbq->wake_index);