}
 EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
 
+void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
+{
+       struct task_struct *tsk = current;
+
+       /*
+        * If this is a nested plug, don't actually assign it.
+        */
+       if (tsk->plug)
+               return;
+
+       INIT_LIST_HEAD(&plug->mq_list);
+       plug->cached_rq = NULL;
+       plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
+       plug->rq_count = 0;
+       plug->multiple_queues = false;
+       plug->nowait = false;
+       INIT_LIST_HEAD(&plug->cb_list);
+
+       /*
+        * Store ordering should not be needed here, since a potential
+        * preempt will imply a full memory barrier
+        */
+       tsk->plug = plug;
+}
+
 /**
  * blk_start_plug - initialize blk_plug and track it inside the task_struct
  * @plug:      The &struct blk_plug that needs to be initialized
  */
 void blk_start_plug(struct blk_plug *plug)
 {
-       struct task_struct *tsk = current;
-
-       /*
-        * If this is a nested plug, don't actually assign it.
-        */
-       if (tsk->plug)
-               return;
-
-       INIT_LIST_HEAD(&plug->mq_list);
-       INIT_LIST_HEAD(&plug->cb_list);
-       plug->rq_count = 0;
-       plug->multiple_queues = false;
-       plug->nowait = false;
-
-       /*
-        * Store ordering should not be needed here, since a potential
-        * preempt will imply a full memory barrier
-        */
-       tsk->plug = plug;
+       blk_start_plug_nr_ios(plug, 1);
 }
 EXPORT_SYMBOL(blk_start_plug);
 
 
        if (!list_empty(&plug->mq_list))
                blk_mq_flush_plug_list(plug, from_schedule);
+       if (unlikely(!from_schedule && plug->cached_rq))
+               blk_mq_free_plug_rqs(plug);
 }
 
 /**
 
        struct request_queue *q = data->q;
        struct elevator_queue *e = q->elevator;
        u64 alloc_time_ns = 0;
+       struct request *rq;
        unsigned int tag;
 
        /* alloc_time includes depth and tag waits */
         * case just retry the hctx assignment and tag allocation as CPU hotplug
         * should have migrated us to an online CPU by now.
         */
-       tag = blk_mq_get_tag(data);
-       if (tag == BLK_MQ_NO_TAG) {
+       do {
+               tag = blk_mq_get_tag(data);
+               if (tag != BLK_MQ_NO_TAG) {
+                       rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+                       if (!--data->nr_tags)
+                               return rq;
+                       if (e || data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+                               return rq;
+                       rq->rq_next = *data->cached_rq;
+                       *data->cached_rq = rq;
+                       data->flags |= BLK_MQ_REQ_NOWAIT;
+                       continue;
+               }
                if (data->flags & BLK_MQ_REQ_NOWAIT)
-                       return NULL;
+                       break;
 
                /*
                 * Give up the CPU and sleep for a random short time to ensure
                 */
                msleep(3);
                goto retry;
+       } while (1);
+
+       if (data->cached_rq) {
+               rq = *data->cached_rq;
+               *data->cached_rq = rq->rq_next;
+               return rq;
        }
-       return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+
+       return NULL;
 }
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
                .q              = q,
                .flags          = flags,
                .cmd_flags      = op,
+               .nr_tags        = 1,
        };
        struct request *rq;
        int ret;
                .q              = q,
                .flags          = flags,
                .cmd_flags      = op,
+               .nr_tags        = 1,
        };
        u64 alloc_time_ns = 0;
        unsigned int cpu;
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
+void blk_mq_free_plug_rqs(struct blk_plug *plug)
+{
+       while (plug->cached_rq) {
+               struct request *rq;
+
+               rq = plug->cached_rq;
+               plug->cached_rq = rq->rq_next;
+               percpu_ref_get(&rq->q->q_usage_counter);
+               blk_mq_free_request(rq);
+       }
+}
+
 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
        u64 now = 0;
        const int is_flush_fua = op_is_flush(bio->bi_opf);
        struct blk_mq_alloc_data data = {
                .q              = q,
+               .nr_tags        = 1,
        };
        struct request *rq;
        struct blk_plug *plug;
 
        hipri = bio->bi_opf & REQ_HIPRI;
 
-       data.cmd_flags = bio->bi_opf;
-       rq = __blk_mq_alloc_request(&data);
-       if (unlikely(!rq)) {
-               rq_qos_cleanup(q, bio);
-               if (bio->bi_opf & REQ_NOWAIT)
-                       bio_wouldblock_error(bio);
-               goto queue_exit;
+       plug = blk_mq_plug(q, bio);
+       if (plug && plug->cached_rq) {
+               rq = plug->cached_rq;
+               plug->cached_rq = rq->rq_next;
+               INIT_LIST_HEAD(&rq->queuelist);
+               data.hctx = rq->mq_hctx;
+       } else {
+               data.cmd_flags = bio->bi_opf;
+               if (plug) {
+                       data.nr_tags = plug->nr_ios;
+                       plug->nr_ios = 1;
+                       data.cached_rq = &plug->cached_rq;
+               }
+               rq = __blk_mq_alloc_request(&data);
+               if (unlikely(!rq)) {
+                       rq_qos_cleanup(q, bio);
+                       if (bio->bi_opf & REQ_NOWAIT)
+                               bio_wouldblock_error(bio);
+                       goto queue_exit;
+               }
        }
 
        trace_block_getrq(bio);
                return BLK_QC_T_NONE;
        }
 
-       plug = blk_mq_plug(q, bio);
        if (unlikely(is_flush_fua)) {
                /* Bypass scheduler for flush requests */
                blk_insert_flush(rq);
 
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
+void blk_mq_free_plug_rqs(struct blk_plug *plug);
 
 void blk_mq_release(struct request_queue *q);
 
        unsigned int shallow_depth;
        unsigned int cmd_flags;
 
+       /* allocate multiple requests/tags in one go */
+       unsigned int nr_tags;
+       struct request **cached_rq;
+
        /* input & output parameter */
        struct blk_mq_ctx *ctx;
        struct blk_mq_hw_ctx *hctx;
 
        struct bio *bio;
        struct bio *biotail;
 
-       struct list_head queuelist;
+       union {
+               struct list_head queuelist;
+               struct request *rq_next;
+       };
 
        /*
         * The hash is used inside the scheduler, and killed once the
 
  */
 struct blk_plug {
        struct list_head mq_list; /* blk-mq requests */
-       struct list_head cb_list; /* md requires an unplug callback */
+
+       /* if ios_left is > 1, we can batch tag/rq allocations */
+       struct request *cached_rq;
+       unsigned short nr_ios;
+
        unsigned short rq_count;
+
        bool multiple_queues;
        bool nowait;
+
+       struct list_head cb_list; /* md requires an unplug callback */
 };
 
 struct blk_plug_cb;
 extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
                                             void *data, int size);
 extern void blk_start_plug(struct blk_plug *);
+extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
 extern void blk_finish_plug(struct blk_plug *);
 extern void blk_flush_plug_list(struct blk_plug *, bool);
 
 struct blk_plug {
 };
 
+static inline void blk_start_plug_nr_ios(struct blk_plug *plug,
+                                        unsigned short nr_ios)
+{
+}
+
 static inline void blk_start_plug(struct blk_plug *plug)
 {
 }