struct nvme_dev *dev;
        spinlock_t sq_lock;
        struct nvme_command *sq_cmds;
-       spinlock_t cq_lock ____cacheline_aligned_in_smp;
+        /* only used for poll queues: */
+       spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
        volatile struct nvme_completion *cqes;
        struct blk_mq_tags **tags;
        dma_addr_t sq_dma_addr;
        irqreturn_t ret = IRQ_NONE;
        u16 start, end;
 
-       spin_lock(&nvmeq->cq_lock);
+       /*
+        * The rmb/wmb pair ensures we see all updates from a previous run of
+        * the irq handler, even if that was on another CPU.
+        */
+       rmb();
        if (nvmeq->cq_head != nvmeq->last_cq_head)
                ret = IRQ_HANDLED;
        nvme_process_cq(nvmeq, &start, &end, -1);
        nvmeq->last_cq_head = nvmeq->cq_head;
-       spin_unlock(&nvmeq->cq_lock);
+       wmb();
 
        if (start != end) {
                nvme_complete_cqes(nvmeq, start, end);
  */
 static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag)
 {
-       unsigned long flags;
+       struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
        u16 start, end;
        int found;
 
-       spin_lock_irqsave(&nvmeq->cq_lock, flags);
+       /*
+        * For a poll queue we need to protect against the polling thread
+        * using the CQ lock.  For normal interrupt driven threads we have
+        * to disable the interrupt to avoid racing with it.
+        */
+       if (nvmeq->cq_vector == -1)
+               spin_lock(&nvmeq->cq_poll_lock);
+       else
+               disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
        found = nvme_process_cq(nvmeq, &start, &end, tag);
-       spin_unlock_irqrestore(&nvmeq->cq_lock, flags);
+       if (nvmeq->cq_vector == -1)
+               spin_unlock(&nvmeq->cq_poll_lock);
+       else
+               enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
 
        nvme_complete_cqes(nvmeq, start, end);
        return found;
        if (!nvme_cqe_pending(nvmeq))
                return 0;
 
-       spin_lock(&nvmeq->cq_lock);
+       spin_lock(&nvmeq->cq_poll_lock);
        found = nvme_process_cq(nvmeq, &start, &end, -1);
-       spin_unlock(&nvmeq->cq_lock);
+       spin_unlock(&nvmeq->cq_poll_lock);
 
        nvme_complete_cqes(nvmeq, start, end);
        return found;
        nvmeq->q_dmadev = dev->dev;
        nvmeq->dev = dev;
        spin_lock_init(&nvmeq->sq_lock);
-       spin_lock_init(&nvmeq->cq_lock);
+       spin_lock_init(&nvmeq->cq_poll_lock);
        nvmeq->cq_head = 0;
        nvmeq->cq_phase = 1;
        nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 {
        struct nvme_dev *dev = nvmeq->dev;
 
-       spin_lock_irq(&nvmeq->cq_lock);
        nvmeq->sq_tail = 0;
        nvmeq->last_sq_tail = 0;
        nvmeq->cq_head = 0;
        memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
        nvme_dbbuf_init(dev, nvmeq, qid);
        dev->online_queues++;
-       spin_unlock_irq(&nvmeq->cq_lock);
+       wmb(); /* ensure the first interrupt sees the initialization */
 }
 
 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)