/**
  * struct iowait - linkage for delayed progress/waiting
  * @list: used to add/insert into QP/PQ wait lists
+ * @lock: uses to record the list head lock
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
  * @wakeup: space callback wakeup
  * so sleeping is not allowed.
  *
  * The wait_dma member along with the iow
+ *
+ * The lock field is used by waiters to record
+ * the seqlock_t that guards the list head.
+ * Waiters explicity know that, but the destroy
+ * code that unwaits QPs does not.
  */
 
 struct iowait {
                unsigned seq);
        void (*wakeup)(struct iowait *wait, int reason);
        void (*sdma_drained)(struct iowait *wait);
+       seqlock_t *lock;
        struct work_struct iowork;
        wait_queue_head_t wait_dma;
        wait_queue_head_t wait_pio;
        void (*sdma_drained)(struct iowait *wait))
 {
        wait->count = 0;
+       wait->lock = NULL;
        INIT_LIST_HEAD(&wait->list);
        INIT_LIST_HEAD(&wait->tx_head);
        INIT_WORK(&wait->iowork, func);
 
                qp = iowait_to_qp(wait);
                priv = qp->priv;
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                /* refcount held until actual wake up */
                qps[n++] = qp;
        }
 
 static void flush_iowait(struct rvt_qp *qp)
 {
        struct hfi1_qp_priv *priv = qp->priv;
-       struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
        unsigned long flags;
+       seqlock_t *lock = priv->s_iowait.lock;
 
-       write_seqlock_irqsave(&dev->iowait_lock, flags);
+       if (!lock)
+               return;
+       write_seqlock_irqsave(lock, flags);
        if (!list_empty(&priv->s_iowait.list)) {
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                rvt_put_qp(qp);
        }
-       write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+       write_sequnlock_irqrestore(lock, flags);
 }
 
 static inline int opa_mtu_enum_to_int(int mtu)
                        ibp->rvp.n_dmawait++;
                        qp->s_flags |= RVT_S_WAIT_DMA_DESC;
                        list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+                       priv->s_iowait.lock = &dev->iowait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
                        rvt_get_qp(qp);
                }
        if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
                qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                rvt_put_qp(qp);
        }
        write_sequnlock(&dev->iowait_lock);
 
                qp = iowait_to_qp(wait);
                priv = qp->priv;
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                /* refcount held until actual wake up */
                if (!list_empty(list))
                        mod_timer(&dev->mem_timer, jiffies + 1);
                                mod_timer(&dev->mem_timer, jiffies + 1);
                        qp->s_flags |= RVT_S_WAIT_KMEM;
                        list_add_tail(&priv->s_iowait.list, &dev->memwait);
+                       priv->s_iowait.lock = &dev->iowait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
                        rvt_get_qp(qp);
                }
                        qp->s_flags |= flag;
                        was_empty = list_empty(&sc->piowait);
                        list_add_tail(&priv->s_iowait.list, &sc->piowait);
+                       priv->s_iowait.lock = &dev->iowait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
                        rvt_get_qp(qp);
                        /* counting: only call wantpiobuf_intr if first user */
        setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
 
        seqlock_init(&dev->iowait_lock);
+       seqlock_init(&dev->txwait_lock);
        INIT_LIST_HEAD(&dev->txwait);
        INIT_LIST_HEAD(&dev->memwait);
 
 
        struct rvt_dev_info rdi; /* Must be first */
 
        /* QP numbers are shared by all IB ports */
-       /* protect wait lists */
-       seqlock_t iowait_lock;
+       /* protect txwait list */
+       seqlock_t txwait_lock ____cacheline_aligned_in_smp;
        struct list_head txwait;        /* list for wait verbs_txreq */
        struct list_head memwait;       /* list for wait kernel memory */
-       struct list_head txreq_free;
        struct kmem_cache *verbs_txreq_cache;
-       struct timer_list mem_timer;
+       u64 n_txwait;
+       u64 n_kmem_wait;
 
+       /* protect iowait lists */
+       seqlock_t iowait_lock ____cacheline_aligned_in_smp;
        u64 n_piowait;
        u64 n_piodrain;
-       u64 n_txwait;
-       u64 n_kmem_wait;
+       struct timer_list mem_timer;
 
 #ifdef CONFIG_DEBUG_FS
        /* per HFI debugfs */
 
        kmem_cache_free(dev->verbs_txreq_cache, tx);
 
        do {
-               seq = read_seqbegin(&dev->iowait_lock);
+               seq = read_seqbegin(&dev->txwait_lock);
                if (!list_empty(&dev->txwait)) {
                        struct iowait *wait;
 
-                       write_seqlock_irqsave(&dev->iowait_lock, flags);
+                       write_seqlock_irqsave(&dev->txwait_lock, flags);
                        wait = list_first_entry(&dev->txwait, struct iowait,
                                                list);
                        qp = iowait_to_qp(wait);
                        priv = qp->priv;
                        list_del_init(&priv->s_iowait.list);
                        /* refcount held until actual wake up */
-                       write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+                       write_sequnlock_irqrestore(&dev->txwait_lock, flags);
                        hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
                        break;
                }
-       } while (read_seqretry(&dev->iowait_lock, seq));
+       } while (read_seqretry(&dev->txwait_lock, seq));
 }
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 {
        struct verbs_txreq *tx = ERR_PTR(-EBUSY);
 
-       write_seqlock(&dev->iowait_lock);
+       write_seqlock(&dev->txwait_lock);
        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
                struct hfi1_qp_priv *priv;
 
                        dev->n_txwait++;
                        qp->s_flags |= RVT_S_WAIT_TX;
                        list_add_tail(&priv->s_iowait.list, &dev->txwait);
+                       priv->s_iowait.lock = &dev->txwait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
                        rvt_get_qp(qp);
                }
                qp->s_flags &= ~RVT_S_BUSY;
        }
 out:
-       write_sequnlock(&dev->iowait_lock);
+       write_sequnlock(&dev->txwait_lock);
        return tx;
 }