u16 q_depth;
        s16 cq_vector;
        u16 sq_tail;
+       u16 last_sq_tail;
        u16 cq_head;
        u16 last_cq_head;
        u16 qid;
        return 0;
 }
 
+/*
+ * Write sq tail if we are asked to, or if the next command would wrap.
+ */
+static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
+{
+       if (!write_sq) {
+               u16 next_tail = nvmeq->sq_tail + 1;
+
+               if (next_tail == nvmeq->q_depth)
+                       next_tail = 0;
+               if (next_tail != nvmeq->last_sq_tail)
+                       return;
+       }
+
+       if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
+                       nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
+               writel(nvmeq->sq_tail, nvmeq->q_db);
+       nvmeq->last_sq_tail = nvmeq->sq_tail;
+}
+
 /**
  * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
  * @nvmeq: The queue to use
  * @cmd: The command to send
+ * @write_sq: whether to write to the SQ doorbell
  */
-static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
+                           bool write_sq)
 {
        spin_lock(&nvmeq->sq_lock);
-
        memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
-
        if (++nvmeq->sq_tail == nvmeq->q_depth)
                nvmeq->sq_tail = 0;
-       if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
-                       nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
-               writel(nvmeq->sq_tail, nvmeq->q_db);
+       nvme_write_sq_db(nvmeq, write_sq);
+       spin_unlock(&nvmeq->sq_lock);
+}
+
+static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+       struct nvme_queue *nvmeq = hctx->driver_data;
+
+       spin_lock(&nvmeq->sq_lock);
+       if (nvmeq->sq_tail != nvmeq->last_sq_tail)
+               nvme_write_sq_db(nvmeq, true);
        spin_unlock(&nvmeq->sq_lock);
 }
 
        }
 
        blk_mq_start_request(req);
-       nvme_submit_cmd(nvmeq, &cmnd);
+       nvme_submit_cmd(nvmeq, &cmnd, bd->last);
        return BLK_STS_OK;
 out_cleanup_iod:
        nvme_free_iod(dev, req);
        memset(&c, 0, sizeof(c));
        c.common.opcode = nvme_admin_async_event;
        c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
-       nvme_submit_cmd(nvmeq, &c);
+       nvme_submit_cmd(nvmeq, &c, true);
 }
 
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 
        spin_lock_irq(&nvmeq->cq_lock);
        nvmeq->sq_tail = 0;
+       nvmeq->last_sq_tail = 0;
        nvmeq->cq_head = 0;
        nvmeq->cq_phase = 1;
        nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 
 #define NVME_SHARED_MQ_OPS                                     \
        .queue_rq               = nvme_queue_rq,                \
+       .commit_rqs             = nvme_commit_rqs,              \
        .rq_flags_to_type       = nvme_rq_flags_to_type,        \
        .complete               = nvme_pci_complete_rq,         \
        .init_hctx              = nvme_init_hctx,               \