/*
         * The RMT entries are currently allocated as shown below:
         * 1. QOS (0 to 128 entries);
-        * 2. FECN for PSM (num_user_contexts + num_vnic_contexts);
+        * 2. FECN (num_kernel_context - 1 + num_user_contexts +
+        *    num_vnic_contexts);
         * 3. VNIC (num_vnic_contexts).
-        * It should be noted that PSM FECN oversubscribe num_vnic_contexts
+        * It should be noted that FECN oversubscribe num_vnic_contexts
         * entries of RMT because both VNIC and PSM could allocate any receive
         * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
         * and PSM FECN must reserve an RMT entry for each possible PSM receive
         * context.
         */
        rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+       if (HFI1_CAP_IS_KSET(TID_RDMA))
+               rmt_count += num_kernel_contexts - 1;
        if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
                user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
                dd_dev_err(dd,
        init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
 }
 
-static void init_user_fecn_handling(struct hfi1_devdata *dd,
-                                   struct rsm_map_table *rmt)
+static void init_fecn_handling(struct hfi1_devdata *dd,
+                              struct rsm_map_table *rmt)
 {
        struct rsm_rule_data rrd;
        u64 reg;
-       int i, idx, regoff, regidx;
+       int i, idx, regoff, regidx, start;
        u8 offset;
        u32 total_cnt;
 
+       if (HFI1_CAP_IS_KSET(TID_RDMA))
+               /* Exclude context 0 */
+               start = 1;
+       else
+               start = dd->first_dyn_alloc_ctxt;
+
+       total_cnt = dd->num_rcv_contexts - start;
+
        /* there needs to be enough room in the map table */
-       total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
        if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
-               dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
+               dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n");
                return;
        }
 
        /*
         * RSM will extract the destination context as an index into the
         * map table.  The destination contexts are a sequential block
-        * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
+        * in the range start...num_rcv_contexts-1 (inclusive).
         * Map entries are accessed as offset + extracted value.  Adjust
         * the added offset so this sequence can be placed anywhere in
         * the table - as long as the entries themselves do not wrap.
         * There are only enough bits in offset for the table size, so
         * start with that to allow for a "negative" offset.
         */
-       offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
-                                               (int)dd->first_dyn_alloc_ctxt);
+       offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start);
 
-       for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
-                               i < dd->num_rcv_contexts; i++, idx++) {
+       for (i = start, idx = rmt->used; i < dd->num_rcv_contexts;
+            i++, idx++) {
                /* replace with identity mapping */
                regoff = (idx % 8) * 8;
                regidx = idx / 8;
        rmt = alloc_rsm_map_table(dd);
        /* set up QOS, including the QPN map table */
        init_qos(dd, rmt);
-       init_user_fecn_handling(dd, rmt);
+       init_fecn_handling(dd, rmt);
        complete_rsm_map_table(dd, rmt);
        /* record number of used rsm map entries for vnic */
        dd->vnic.rmt_start = rmt->used;
 
                             struct hfi1_pkt_state *ps);
 static void hfi1_do_tid_send(struct rvt_qp *qp);
 static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
+static void tid_rdma_rcv_err(struct hfi1_packet *packet,
+                            struct ib_other_headers *ohdr,
+                            struct rvt_qp *qp, u32 psn, int diff, bool fecn);
+static void update_r_next_psn_fecn(struct hfi1_packet *packet,
+                                  struct hfi1_qp_priv *priv,
+                                  struct hfi1_ctxtdata *rcd,
+                                  struct tid_rdma_flow *flow,
+                                  bool fecn);
 
 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
 {
        struct ib_reth *reth;
        struct hfi1_qp_priv *qpriv = qp->priv;
        u32 bth0, psn, len, rkey;
-       bool is_fecn;
+       bool fecn;
        u8 next;
        u64 vaddr;
        int diff;
        if (hfi1_ruc_check_hdr(ibp, packet))
                return;
 
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
        psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
        trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
 
 
        diff = delta_psn(psn, qp->r_psn);
        if (unlikely(diff)) {
-               if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff))
-                       return;
-               goto send_ack;
+               tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
+               return;
        }
 
        /* We've verified the request, insert it into the ack queue. */
 
        /* Schedule the send tasklet. */
        qp->s_flags |= RVT_S_RESP_PENDING;
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
        hfi1_schedule_send(qp);
 
        spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               goto send_ack;
        return;
 
 nack_inv_unlock:
        rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
        qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
        qp->r_ack_psn = qp->r_psn;
-send_ack:
-       hfi1_send_rc_ack(packet, is_fecn);
 }
 
 u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
        struct tid_rdma_request *req;
        struct tid_rdma_flow *flow;
        u32 opcode, aeth;
-       bool is_fecn;
+       bool fecn;
        unsigned long flags;
        u32 kpsn, ipsn;
 
        trace_hfi1_sender_rcv_tid_read_resp(qp);
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
        kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
        aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
        flow = &req->flows[req->clear_tail];
        /* When header suppression is disabled */
        if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
+               update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
+
                if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
                        goto ack_done;
                flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
+               /*
+                * Copy the payload to destination buffer if this packet is
+                * delivered as an eager packet due to RSM rule and FECN.
+                * The RSM rule selects FECN bit in BTH and SH bit in
+                * KDETH header and therefore will not match the last
+                * packet of each segment that has SH bit cleared.
+                */
+               if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
+                       struct rvt_sge_state ss;
+                       u32 len;
+                       u32 tlen = packet->tlen;
+                       u16 hdrsize = packet->hlen;
+                       u8 pad = packet->pad;
+                       u8 extra_bytes = pad + packet->extra_byte +
+                               (SIZE_OF_CRC << 2);
+                       u32 pmtu = qp->pmtu;
+
+                       if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
+                               goto ack_op_err;
+                       len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
+                       if (unlikely(len < pmtu))
+                               goto ack_op_err;
+                       rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
+                                    false);
+                       /* Raise the sw sequence check flag for next packet */
+                       priv->s_flags |= HFI1_R_TID_SW_PSN;
+               }
+
                goto ack_done;
        }
        flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
 
 ack_done:
        spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               hfi1_send_rc_ack(packet, is_fecn);
 }
 
 void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
        struct hfi1_qp_priv *qpriv = qp->priv;
        struct tid_rdma_request *req;
        u32 bth0, psn, len, rkey, num_segs;
-       bool is_fecn;
+       bool fecn;
        u8 next;
        u64 vaddr;
        int diff;
        if (hfi1_ruc_check_hdr(ibp, packet))
                return;
 
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
        psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
        trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
 
        num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
        diff = delta_psn(psn, qp->r_psn);
        if (unlikely(diff)) {
-               if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff))
-                       return;
-               goto send_ack;
+               tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
+               return;
        }
 
        /*
 
        /* Schedule the send tasklet. */
        qp->s_flags |= RVT_S_RESP_PENDING;
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
        hfi1_schedule_send(qp);
 
        spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               goto send_ack;
        return;
 
 nack_inv_unlock:
        rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
        qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
        qp->r_ack_psn = qp->r_psn;
-send_ack:
-       hfi1_send_rc_ack(packet, is_fecn);
 }
 
 u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
        struct tid_rdma_flow *flow;
        enum ib_wc_status status;
        u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
-       bool is_fecn;
+       bool fecn;
        unsigned long flags;
 
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
        psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
        aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
                qpriv->s_tid_cur = i;
        }
        qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
-
        hfi1_schedule_tid_send(qp);
        goto ack_done;
 
 ack_err:
        rvt_error_qp(qp, status);
 ack_done:
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
        spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               hfi1_send_rc_ack(packet, is_fecn);
 }
 
 bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
        unsigned long flags;
        u32 psn, next;
        u8 opcode;
+       bool fecn;
 
+       fecn = process_ecn(qp, packet);
        psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
 
        req = ack_to_tid_req(e);
        flow = &req->flows[req->clear_tail];
        if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
+               update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
+
                if (cmp_psn(psn, flow->flow_state.r_next_psn))
                        goto send_nak;
+
                flow->flow_state.r_next_psn = mask_psn(psn + 1);
+               /*
+                * Copy the payload to destination buffer if this packet is
+                * delivered as an eager packet due to RSM rule and FECN.
+                * The RSM rule selects FECN bit in BTH and SH bit in
+                * KDETH header and therefore will not match the last
+                * packet of each segment that has SH bit cleared.
+                */
+               if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
+                       struct rvt_sge_state ss;
+                       u32 len;
+                       u32 tlen = packet->tlen;
+                       u16 hdrsize = packet->hlen;
+                       u8 pad = packet->pad;
+                       u8 extra_bytes = pad + packet->extra_byte +
+                               (SIZE_OF_CRC << 2);
+                       u32 pmtu = qp->pmtu;
+
+                       if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
+                               goto send_nak;
+                       len = req->comp_seg * req->seg_len;
+                       len += delta_psn(psn,
+                               full_flow_psn(flow, flow->flow_state.spsn)) *
+                               pmtu;
+                       if (unlikely(req->total_len - len < pmtu))
+                               goto send_nak;
+
+                       /*
+                        * The e->rdma_sge field is set when TID RDMA WRITE REQ
+                        * is first received and is never modified thereafter.
+                        */
+                       ss.sge = e->rdma_sge;
+                       ss.sg_list = NULL;
+                       ss.num_sge = 1;
+                       ss.total_len = req->total_len;
+                       rvt_skip_sge(&ss, len, false);
+                       rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
+                                    false);
+                       /* Raise the sw sequence check flag for next packet */
+                       priv->r_next_psn_kdeth = mask_psn(psn + 1);
+                       priv->s_flags |= HFI1_R_TID_SW_PSN;
+               }
                goto exit;
        }
        flow->flow_state.r_next_psn = mask_psn(psn + 1);
        hfi1_schedule_tid_send(qp);
 exit:
        priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
        spin_unlock_irqrestore(&qp->s_lock, flags);
        return;
 
        struct tid_rdma_request *req;
        struct tid_rdma_flow *flow;
        u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn;
-       bool is_fecn;
        unsigned long flags;
        u16 fidx;
 
        trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
-       is_fecn = process_ecn(qp, packet);
+       process_ecn(qp, packet);
        psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
        aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
        req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
        struct tid_rdma_flow *flow;
        struct tid_flow_state *fs = &qpriv->flow_state;
        u32 psn, generation, idx, gen_next;
-       bool is_fecn;
+       bool fecn;
        unsigned long flags;
 
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
        psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
 
        generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
        qpriv->s_flags |= RVT_S_ACK_PENDING;
        hfi1_schedule_tid_send(qp);
 bail:
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
        spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
        reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
        return mask_psn(reg);
 }
+
+static void tid_rdma_rcv_err(struct hfi1_packet *packet,
+                            struct ib_other_headers *ohdr,
+                            struct rvt_qp *qp, u32 psn, int diff, bool fecn)
+{
+       unsigned long flags;
+
+       tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
+       if (fecn) {
+               spin_lock_irqsave(&qp->s_lock, flags);
+               qp->s_flags |= RVT_S_ECN;
+               spin_unlock_irqrestore(&qp->s_lock, flags);
+       }
+}
+
+static void update_r_next_psn_fecn(struct hfi1_packet *packet,
+                                  struct hfi1_qp_priv *priv,
+                                  struct hfi1_ctxtdata *rcd,
+                                  struct tid_rdma_flow *flow,
+                                  bool fecn)
+{
+       /*
+        * If a start/middle packet is delivered here due to
+        * RSM rule and FECN, we need to update the r_next_psn.
+        */
+       if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
+           !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
+               struct hfi1_devdata *dd = rcd->dd;
+
+               flow->flow_state.r_next_psn =
+                       read_r_next_psn(dd, rcd->ctxt, flow->idx);
+       }
+}