q->flush_pending_idx ^= 1;
 
        blk_rq_init(q, q->flush_rq);
-       if (q->mq_ops) {
-               /*
-                * Reuse the tag value from the fist waiting request,
-                * with blk-mq the tag is generated during request
-                * allocation and drivers can rely on it being inside
-                * the range they asked for.
-                */
-               q->flush_rq->mq_ctx = first_rq->mq_ctx;
-               q->flush_rq->tag = first_rq->tag;
-       }
+       if (q->mq_ops)
+               blk_mq_clone_flush_request(q->flush_rq, first_rq);
 
        q->flush_rq->cmd_type = REQ_TYPE_FS;
        q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
 
        __blk_mq_free_request(hctx, ctx, rq);
 }
 
+/*
+ * Clone all relevant state from a request that has been put on hold in
+ * the flush state machine into the preallocated flush request that hangs
+ * off the request queue.
+ *
+ * For a driver the flush request should be invisible, that's why we are
+ * impersonating the original request here.
+ */
+void blk_mq_clone_flush_request(struct request *flush_rq,
+               struct request *orig_rq)
+{
+       struct blk_mq_hw_ctx *hctx =
+               orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
+
+       flush_rq->mq_ctx = orig_rq->mq_ctx;
+       flush_rq->tag = orig_rq->tag;
+       memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
+               hctx->cmd_size);
+}
+
 bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes)
 {
        if (blk_update_request(rq, error, blk_rq_bytes(rq)))
 
 void blk_mq_init_flush(struct request_queue *q);
 void blk_mq_drain_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
+void blk_mq_clone_flush_request(struct request *flush_rq,
+               struct request *orig_rq);
 
 /*
  * CPU hotplug helpers