From: Wei Lin Guay Date: Thu, 4 Aug 2016 09:02:44 +0000 (+0200) Subject: sif: sqflush: Handle duplicate completions in poll_cq X-Git-Tag: v4.1.12-92~80^2~8 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=9db36855abfdbd7bdc05e1e2aaa66f3ed32cda8a;p=users%2Fjedix%2Flinux-maple.git sif: sqflush: Handle duplicate completions in poll_cq Orabug: 23759723 During the QP transition from RTS-> ERR, the HW might generate duplicate FLUSHED-IN-ERR completion. The SIF driver inverses the sq_seq in a dedicated completion entry and sets the CQ_POLLING_IGNORED_SEQ bit in the cq_sw flags. Nevertheless, this bit is cleared once a duplicate FLUSHED-IN-ERR completion is detected in poll_cq. The above mentioned method cannot handle a scenario where HW generates multiple duplicate completions. Thus, this patch moves the detection of the duplicate completions to translate_wr_id. Then, SIF driver will only return non duplicate completions to the user. Signed-off-by: Wei Lin Guay Reviewed-by: Knut Omang --- diff --git a/drivers/infiniband/hw/sif/sif_cq.c b/drivers/infiniband/hw/sif/sif_cq.c index 6bc61e49ad3e..6dee19d520f5 100644 --- a/drivers/infiniband/hw/sif/sif_cq.c +++ b/drivers/infiniband/hw/sif/sif_cq.c @@ -50,12 +50,13 @@ static inline int translate_wr_id( return -EFAULT; } if (!unlikely(wh->used)) { - if (sq_seq_num == wh->sq_seq) - sif_log(sdev, SIF_INFO, + if (sq_seq_num == wh->sq_seq) { + sif_log(sdev, SIF_WCE, "dupl cqe 0x%x for cq %d: got sq_seq 0x%x, last exp.0x%x, sts %d opc 0x%x", cqe->seq_num, cq->index, sq_seq_num, wh->sq_seq, cqe->status, cqe->opcode); - else + return -EIO; + } else sif_log(sdev, SIF_INFO, "unexp. cqe 0x%x for cq %d: got sq_seq 0x%x, last exp.0x%x, sts %d opc 0x%x", cqe->seq_num, cq->index, sq_seq_num, wh->sq_seq, @@ -63,10 +64,12 @@ static inline int translate_wr_id( return -EFAULT; } if (unlikely(wh->sq_seq != sq_seq_num)) { - sif_log(sdev, SIF_INFO, + bool duplicate_comp_wrap_case = (wh->sq_seq - sq_seq_num == sq->entries); + int log_level = duplicate_comp_wrap_case ? SIF_WCE : SIF_INFO; + sif_log(sdev, log_level, "wrong cqe 0x%x for cq %d: got sq_seq 0x%x, expected 0x%x, sts %d opc 0x%x", cqe->seq_num, cq->index, sq_seq_num, wh->sq_seq, cqe->status, cqe->opcode); - return -EFAULT; + return duplicate_comp_wrap_case ? -EIO : -EFAULT; } *wr_id = wh->wr_id; wh->used = false; @@ -401,7 +404,7 @@ static int handle_send_wc(struct sif_dev *sdev, struct sif_cq *cq, wc->wr_id = cqe->wc_id.rq_id; /* No more work, when QP is gone */ - return 0; + return cqe->status == PSIF_WC_STATUS_DUPL_COMPL_ERR ? -EIO : 0; } ret = translate_wr_id(&wc->wr_id, sdev, cq, sq, cqe, sq_seq_num, cqe->qp); @@ -790,7 +793,9 @@ int sif_fixup_cqes(struct sif_cq *cq, struct sif_sq *sq, struct sif_qp *qp) /* If a send completion, handle the wr_id */ ret = translate_wr_id(&wr_id_host_order, sdev, cq, sq, &lcqe, lcqe.wc_id.sq_id.sq_seq_num, lcqe.qp); - if (ret) + if (ret == -EIO) + set_psif_cq_entry__status(cqe, PSIF_WC_STATUS_DUPL_COMPL_ERR); + else if (ret) goto err; set_psif_cq_entry__wc_id(cqe, wr_id_host_order); @@ -845,12 +850,6 @@ int sif_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) polled_value = get_psif_cq_entry__seq_num(cqe); - if ((test_bit(CQ_POLLING_IGNORED_SEQ, &cq_sw->flags)) && ~seqno == polled_value) { - seqno = ++cq_sw->next_seq; - clear_bit(CQ_POLLING_IGNORED_SEQ, &cq_sw->flags); - continue; - } - if (seqno == polled_value) npolled++; else @@ -858,7 +857,15 @@ int sif_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) if (likely(wc)) { ret = handle_wc(sdev, cq, cqe, wc); - if (ret < 0) + if (unlikely(ret == -EIO)) { + /* -EIO indicates that this is the duplicate + * FLUSH-IN-ERR completion generated by the HW. + */ + seqno = ++cq_sw->next_seq; + npolled--; + ret = 0; + continue; + } else if (ret < 0) goto handle_failed; wc++; seqno = ++cq_sw->next_seq; diff --git a/drivers/infiniband/hw/sif/sif_defs.c b/drivers/infiniband/hw/sif/sif_defs.c index 22a402d63fec..f68a7a38753e 100644 --- a/drivers/infiniband/hw/sif/sif_defs.c +++ b/drivers/infiniband/hw/sif/sif_defs.c @@ -246,6 +246,11 @@ enum ib_wc_status sif2ib_wc_status(enum psif_wc_status status) return IB_WC_RESP_TIMEOUT_ERR; case PSIF_WC_STATUS_GENERAL_ERR: return IB_WC_GENERAL_ERR; + /* A software defined state to indicate duplicate completion + * generated by HW. + * + * case PSIF_WC_STATUS_DUPL_COMPL_ERR: + */ case PSIF_WC_STATUS_FIELD_MAX: return -1; } diff --git a/drivers/infiniband/hw/sif/sif_defs.h b/drivers/infiniband/hw/sif/sif_defs.h index f0a06db3fe8b..3c1975e823c2 100644 --- a/drivers/infiniband/hw/sif/sif_defs.h +++ b/drivers/infiniband/hw/sif/sif_defs.h @@ -36,6 +36,8 @@ struct xchar { }; #define GREATER_16(a, b) ((s16)((s16)(a) - (s16)(b)) > 0) +#define LESS_OR_EQUAL_16(a, b) (!(GREATER_16((a), (b)))) +#define PSIF_WC_STATUS_DUPL_COMPL_ERR (PSIF_WC_STATUS_FIELD_MAX - 1) #define XFILE struct xchar diff --git a/drivers/infiniband/hw/sif/sif_int_user.h b/drivers/infiniband/hw/sif/sif_int_user.h index 1654268208d5..0fd6fe33d7b3 100644 --- a/drivers/infiniband/hw/sif/sif_int_user.h +++ b/drivers/infiniband/hw/sif/sif_int_user.h @@ -38,6 +38,7 @@ enum sq_sw_state { FLUSH_SQ_IN_PROGRESS = 0, FLUSH_SQ_IN_FLIGHT = 1, + FLUSH_SQ_FIRST_TIME = 2, }; struct sif_sq_sw { @@ -47,6 +48,7 @@ struct sif_sq_sw { __u16 head_seq; /* Last sq seq.number seen in a compl (req. cq->lock) */ __u16 trusted_seq; /* Last next_seq that was either generate or exist in the cq */ __u8 tsl; /* Valid after transition to RTR */ + bool need_flush; /* user level flag to indicate SQ needs flushing*/ unsigned long flags; /* Flags, using unsigned long due to test_set/test_and_set_bit */ }; @@ -69,8 +71,6 @@ struct sif_rq_sw { enum cq_sw_state { CQ_POLLING_NOT_ALLOWED = 0, - CQ_POLLING_IGNORED_SEQ = 1, - FLUSH_SQ_FIRST_TIME = 2, }; struct sif_cq_sw { diff --git a/drivers/infiniband/hw/sif/sif_qp.c b/drivers/infiniband/hw/sif/sif_qp.c index 835108ba0e7d..890b04099195 100644 --- a/drivers/infiniband/hw/sif/sif_qp.c +++ b/drivers/infiniband/hw/sif/sif_qp.c @@ -863,6 +863,12 @@ int modify_qp_hw_wa_qp_retry(struct sif_dev *sdev, struct sif_qp *qp, } /* Restore QP SW state to ERROR */ qp->last_set_state = qp->tracked_state = IB_QPS_ERR; + if (qp->flags & SIF_QPF_USER_MODE) { + struct sif_sq *sq = get_sq(sdev, qp); + struct sif_sq_sw *sq_sw = sq ? get_sif_sq_sw(sdev, qp->qp_idx) : NULL; + if (sq_sw) + sq_sw->need_flush = true; + } } qp->flags &= ~SIF_QPF_HW_OWNED; @@ -974,6 +980,8 @@ int modify_qp(struct sif_dev *sdev, struct sif_qp *qp, } return ret; case FLUSH_SQ: + sif_log(sdev, SIF_WCE_V, "user trying to flush SQ %d", qp->qp_idx); + if (unlikely(!sq)) { ret = -EINVAL; sif_log(sdev, SIF_INFO, diff --git a/drivers/infiniband/hw/sif/sif_r3.c b/drivers/infiniband/hw/sif/sif_r3.c index 133f952a31d7..20e224da5693 100644 --- a/drivers/infiniband/hw/sif/sif_r3.c +++ b/drivers/infiniband/hw/sif/sif_r3.c @@ -583,7 +583,7 @@ int post_process_wa4074(struct sif_dev *sdev, struct sif_qp *qp) } copy_conv_to_sw(&lqqp, &qp->d, sizeof(lqqp)); - last_seq = sq_sw->last_seq; + last_seq = READ_ONCE(sq_sw->last_seq); set_bit(CQ_POLLING_NOT_ALLOWED, &cq_sw->flags); @@ -687,13 +687,13 @@ flush_sq_again: * completed before generating a sq_flush_cqe. */ spin_lock_irqsave(&sq->lock, flags); - last_gen_seq = sq_sw->last_seq; + last_gen_seq = READ_ONCE(sq_sw->last_seq); spin_unlock_irqrestore(&sq->lock, flags); sif_log(sdev, SIF_WCE_V, "generate completion from %x to %x", last_seq, last_gen_seq); - for (; (!GREATER_16(last_seq, last_gen_seq)); ++last_seq) { + for (; (LESS_OR_EQUAL_16(last_seq, last_gen_seq)); ++last_seq) { if (unlikely(cq->entries < ((u32) (last_seq - sq_sw->head_seq)))) { sif_log(sdev, SIF_INFO, "cq (%d) is full! (len = %d, used = %d)", cq->index, cq->entries, last_seq - sq_sw->head_seq - 1 ); @@ -748,6 +748,12 @@ flush_sq_again: sq_sw->trusted_seq = last_seq; check_in_flight_and_return: + last_gen_seq = READ_ONCE(sq_sw->last_seq); + + + if (LESS_OR_EQUAL_16(last_seq, last_gen_seq)) + goto flush_sq_again; + if (test_and_clear_bit(FLUSH_SQ_IN_FLIGHT, &sq_sw->flags)) goto flush_sq_again; @@ -799,21 +805,15 @@ static u16 walk_and_update_cqes(struct sif_dev *sdev, struct sif_qp *qp, u16 hea last_seq = lcqe.wc_id.sq_id.sq_seq_num; sif_log(sdev, SIF_WCE_V, "last_seq %x updated_seq %x lcqe.seq_num %x", last_seq, updated_seq, lcqe.seq_num); - if (last_seq != updated_seq) { + if (last_seq != updated_seq) lcqe.wc_id.sq_id.sq_seq_num = updated_seq; - if (GREATER_16(updated_seq, end)) { - /* A scenario might be that an additional CQE - * must be generated to flush all the HW - * generated completions. Thus, ignore the polling of the cqe. - */ - lcqe.seq_num = ~lcqe.seq_num; - sif_log(sdev, SIF_WCE_V, "corrupt: lcqe.seq_num %x", - lcqe.seq_num); - set_bit(CQ_POLLING_IGNORED_SEQ, &cq_sw->flags); - } - copy_conv_to_hw(cqe, &lcqe, sizeof(lcqe)); - } - if (!GREATER_16(updated_seq, end)) + + if (GREATER_16(updated_seq, end)) + lcqe.wc_id.sq_id.sq_seq_num = end; + + copy_conv_to_hw(cqe, &lcqe, sizeof(lcqe)); + + if (LESS_OR_EQUAL_16(updated_seq, end)) updated_seq++; ++n; } diff --git a/drivers/infiniband/hw/sif/sif_user.h b/drivers/infiniband/hw/sif/sif_user.h index b46211bc608b..f355f5e3f634 100644 --- a/drivers/infiniband/hw/sif/sif_user.h +++ b/drivers/infiniband/hw/sif/sif_user.h @@ -21,7 +21,7 @@ * */ #define SIF_UVERBS_ABI_MAJOR_VERSION 3 -#define SIF_UVERBS_ABI_MINOR_VERSION 4 +#define SIF_UVERBS_ABI_MINOR_VERSION 5 #define SIF_UVERBS_VERSION(x, y) ((x) << 8 | (y))