ptr = cq->sw_rptr;
        while (!Q_EMPTY(ptr, cq->sw_wptr)) {
                cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
-               if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
+               if ((SQ_TYPE(*cqe) ||
+                    ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
                    (CQE_QPID(*cqe) == wq->qpid))
                        (*count)++;
                ptr++;
        wqe->mpaattrs = attr->mpaattrs;
        wqe->qpcaps = attr->qpcaps;
        wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
-       wqe->flags = cpu_to_be32(attr->flags);
+       wqe->rqe_count = cpu_to_be16(attr->rqe_count);
+       wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
        wqe->ord = cpu_to_be32(attr->ord);
        wqe->ird = cpu_to_be32(attr->ird);
        wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
         */
        if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
 
+               /*
+                * If this is an unsolicited read response, then the read
+                * was generated by the kernel driver as part of peer-2-peer
+                * connection setup.  So ignore the completion.
+                */
+               if (!wq->oldest_read) {
+                       if (CQE_STATUS(*hw_cqe))
+                               wq->error = 1;
+                       ret = -1;
+                       goto skip_cqe;
+               }
+
                /*
                 * Don't write to the HWCQ, so create a new read req CQE
                 * in local memory.
 
        uP_RI_QP_STAG0_ENABLE = 0x10
 } __attribute__ ((packed));
 
+enum rdma_init_rtr_types {
+       RTR_READ = 1,
+       RTR_WRITE = 2,
+       RTR_SEND = 3,
+};
+
+#define S_RTR_TYPE     2
+#define M_RTR_TYPE     0x3
+#define V_RTR_TYPE(x)  ((x) << S_RTR_TYPE)
+#define G_RTR_TYPE(x)  ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
 struct t3_rdma_init_attr {
        u32 tid;
        u32 qpid;
        u32 ird;
        u64 qp_dma_addr;
        u32 qp_dma_size;
-       u32 flags;
+       enum rdma_init_rtr_types rtr_type;
+       u16 flags;
+       u16 rqe_count;
        u32 irs;
 };
 
        u8 mpaattrs;            /* 5 */
        u8 qpcaps;
        __be16 ulpdu_size;
-       __be32 flags;           /* bits 31-1 - reservered */
-                               /* bit     0 - set if RECV posted */
+       __be16 flags_rtr_type;
+       __be16 rqe_count;
        __be32 ord;             /* 6 */
        __be32 ird;
        __be64 qp_dma_addr;     /* 7 */
 };
 
 enum rdma_init_wr_flags {
-       RECVS_POSTED = (1<<0),
+       MPA_INITIATOR = (1<<0),
        PRIV_QP = (1<<1),
 };
 
 
        NULL,
 };
 
+int peer2peer = 0;
+module_param(peer2peer, int, 0644);
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+
 static int ep_timeout_secs = 10;
 module_param(ep_timeout_secs, int, 0644);
 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
        skb_reset_transport_header(skb);
        len = skb->len;
        req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
-       req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+       req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
        req->len = htonl(len);
        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
        set_arp_failure_handler(skb, arp_failure_discard);
        skb_reset_transport_header(skb);
        req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
-       req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+       req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
        req->len = htonl(mpalen);
        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
        skb_reset_transport_header(skb);
        len = skb->len;
        req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
-       req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+       req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
        req->wr_lo = htonl(V_WR_TID(ep->hwtid));
        req->len = htonl(len);
        req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
         * the MPA header is valid.
         */
        state_set(&ep->com, FPDU_MODE);
+       ep->mpa_attr.initiator = 1;
        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
        ep->mpa_attr.recv_marker_enabled = markers_enabled;
        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
        /* bind QP and TID with INIT_WR */
        err = iwch_modify_qp(ep->com.qp->rhp,
                             ep->com.qp, mask, &attrs, 1);
-       if (!err)
-               goto out;
+       if (err)
+               goto err;
+
+       if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
+               iwch_post_zb_read(ep->com.qp);
+       }
+
+       goto out;
 err:
        abort_connection(ep, skb, GFP_KERNEL);
 out:
         * If we get here we have accumulated the entire mpa
         * start reply message including private data.
         */
+       ep->mpa_attr.initiator = 0;
        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
        ep->mpa_attr.recv_marker_enabled = markers_enabled;
        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 
        PDBG("%s ep %p credits %u\n", __func__, ep, credits);
 
-       if (credits == 0)
+       if (credits == 0) {
+               PDBG(KERN_ERR "%s 0 credit ack  ep %p state %u\n",
+                       __func__, ep, state_read(&ep->com));
                return CPL_RET_BUF_DONE;
+       }
+
        BUG_ON(credits != 1);
-       BUG_ON(ep->mpa_skb == NULL);
-       kfree_skb(ep->mpa_skb);
-       ep->mpa_skb = NULL;
        dst_confirm(ep->dst);
-       if (state_read(&ep->com) == MPA_REP_SENT) {
-               ep->com.rpl_done = 1;
-               PDBG("waking up ep %p\n", ep);
-               wake_up(&ep->com.waitq);
+       if (!ep->mpa_skb) {
+               PDBG("%s rdma_init wr_ack ep %p state %u\n",
+                       __func__, ep, state_read(&ep->com));
+               if (ep->mpa_attr.initiator) {
+                       PDBG("%s initiator ep %p state %u\n",
+                               __func__, ep, state_read(&ep->com));
+                       if (peer2peer)
+                               iwch_post_zb_read(ep->com.qp);
+               } else {
+                       PDBG("%s responder ep %p state %u\n",
+                               __func__, ep, state_read(&ep->com));
+                       ep->com.rpl_done = 1;
+                       wake_up(&ep->com.waitq);
+               }
+       } else {
+               PDBG("%s lsm ack ep %p state %u freeing skb\n",
+                       __func__, ep, state_read(&ep->com));
+               kfree_skb(ep->mpa_skb);
+               ep->mpa_skb = NULL;
        }
        return CPL_RET_BUF_DONE;
 }
        if (err)
                goto err;
 
+       /* if needed, wait for wr_ack */
+       if (iwch_rqes_posted(qp)) {
+               wait_event(ep->com.waitq, ep->com.rpl_done);
+               err = ep->com.rpl_err;
+               if (err)
+                       goto err;
+       }
+
        err = send_mpa_reply(ep, conn_param->private_data,
                             conn_param->private_data_len);
        if (err)
                goto err;
 
-       /* wait for wr_ack */
-       wait_event(ep->com.waitq, ep->com.rpl_done);
-       err = ep->com.rpl_err;
-       if (err)
-               goto err;
 
        state_set(&ep->com, FPDU_MODE);
        established_upcall(ep);
 
 
 int __init iwch_cm_init(void);
 void __exit iwch_cm_term(void);
+extern int peer2peer;
 
 #endif                         /* _IWCH_CM_H_ */
 
 };
 
 struct iwch_mpa_attributes {
+       u8 initiator;
        u8 recv_marker_enabled;
        u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
        u8 crc_enabled;
        IWCH_QP_QUERY_TEST_USERWRITE = 0x32     /* Test special */
 };
 
+u16 iwch_rqes_posted(struct iwch_qp *qhp);
 int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr);
 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                             struct ib_mw_bind *mw_bind);
 int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_post_zb_read(struct iwch_qp *qhp);
 int iwch_register_device(struct iwch_dev *dev);
 void iwch_unregister_device(struct iwch_dev *dev);
 int iwch_quiesce_qps(struct iwch_cq *chp);
 
        }
 }
 
+int iwch_post_zb_read(struct iwch_qp *qhp)
+{
+       union t3_wr *wqe;
+       struct sk_buff *skb;
+       u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+
+       PDBG("%s enter\n", __func__);
+       skb = alloc_skb(40, GFP_KERNEL);
+       if (!skb) {
+               printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+               return -ENOMEM;
+       }
+       wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
+       memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
+       wqe->read.rdmaop = T3_READ_REQ;
+       wqe->read.reserved[0] = 0;
+       wqe->read.reserved[1] = 0;
+       wqe->read.reserved[2] = 0;
+       wqe->read.rem_stag = cpu_to_be32(1);
+       wqe->read.rem_to = cpu_to_be64(1);
+       wqe->read.local_stag = cpu_to_be32(1);
+       wqe->read.local_len = cpu_to_be32(0);
+       wqe->read.local_to = cpu_to_be64(1);
+       wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
+       wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
+                                               V_FW_RIWR_LEN(flit_cnt));
+       skb->priority = CPL_PRIORITY_DATA;
+       return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+}
+
 /*
  * This posts a TERMINATE with layer=RDMA, type=catastrophic.
  */
 
 
 /*
- * Return non zero if at least one RECV was pre-posted.
+ * Return count of RECV WRs posted
  */
-static int rqes_posted(struct iwch_qp *qhp)
+u16 iwch_rqes_posted(struct iwch_qp *qhp)
 {
-       return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+       union t3_wr *wqe = qhp->wq.queue;
+       u16 count = 0;
+       while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+               count++;
+               wqe++;
+       }
+       PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+       return count;
 }
 
 static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
        init_attr.ird = qhp->attr.max_ird;
        init_attr.qp_dma_addr = qhp->wq.dma_addr;
        init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
-       init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+       init_attr.rqe_count = iwch_rqes_posted(qhp);
+       init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
        init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
+       if (peer2peer) {
+               init_attr.rtr_type = RTR_READ;
+               if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
+                       init_attr.ord = 1;
+               if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
+                       init_attr.ird = 1;
+       } else
+               init_attr.rtr_type = 0;
        init_attr.irs = qhp->ep->rcv_seq;
        PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
             "flags 0x%x qpcaps 0x%x\n", __func__,
 
 #define DRV_VERSION "1.0-ko"
 
 /* Firmware version */
-#define FW_VERSION_MAJOR 5
+#define FW_VERSION_MAJOR 6
 #define FW_VERSION_MINOR 0
 #define FW_VERSION_MICRO 0
 #endif                         /* __CHELSIO_VERSION_H */