From: Triviño Date: Thu, 30 Jun 2016 14:30:07 +0000 (+0200) Subject: sif: sif_r3: implemented WA#4074 stats counters X-Git-Tag: v4.1.12-92~96^2~13 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=7df48f2a9e4da85ac1d5367dae71cef51056ca24;p=users%2Fjedix%2Flinux-maple.git sif: sif_r3: implemented WA#4074 stats counters This commit added both wa4074 and wa4059 statistics to help to identify potential issues when the work- around are applied. The wa4074 stats implementation is based on: a) pre_wa4074_cnt == post_wa4074_cnt. This means the w/a is triggered from the modify_qp_hw. b) pre_wa4074_cnt != post_wa4074_cnt. post_wa4074 is triggered from other scenarios too. c) post_wa4074_err_cnt != 0. It means that post_wa4074 fails. d) wrs_csum_corr_wa4074_cnt indicates the number of WRs that were csum corrupted. e) rcv_snd_gen_wa4074_cnt shows the number of recv and send cqe's were generated. The wa4059 stats indicate the number of keep-alive events that have been sent. This commit also improves wa3714 stats implementation by using atomic64 counters and enumeration values, and other minor changes such as clean up and fix typos on comment messages. Orabug: 23760170 Signed-off-by: Triviño Reviewed-by: Håkon Bugge --- diff --git a/drivers/infiniband/hw/sif/sif_epsc.c b/drivers/infiniband/hw/sif/sif_epsc.c index 01f79c87114e..95a213dbd711 100644 --- a/drivers/infiniband/hw/sif/sif_epsc.c +++ b/drivers/infiniband/hw/sif/sif_epsc.c @@ -1511,7 +1511,11 @@ static int __sif_eps_send_keep_alive(struct sif_dev *sdev, enum psif_mbox_type e int ret = 0; if (sif_eps_keep_alive_timeout(es) || force) { - sif_log(sdev, SIF_INFO, "Sending keep-alive (force=%i)", force); + sif_log(sdev, SIF_INTR, "Sending keep-alive (force=%i)", force); + if (force) + atomic64_inc(&sdev->wa_stats.wa4059[SND_INTR_KEEP_ALIVE_WA4059_CNT]); + else + atomic64_inc(&sdev->wa_stats.wa4059[SND_THREAD_KEEP_ALIVE_WA4059_CNT]); /* prevent infinite loop with __sif_post_eps_wr */ es->last_req_posted = jiffies; diff --git a/drivers/infiniband/hw/sif/sif_r3.c b/drivers/infiniband/hw/sif/sif_r3.c index 2446fbc56916..f60e6082eda8 100644 --- a/drivers/infiniband/hw/sif/sif_r3.c +++ b/drivers/infiniband/hw/sif/sif_r3.c @@ -292,7 +292,7 @@ static int sif_hw_allocate_flush_qp(struct sif_dev *sdev, u8 flush_idx) } sdev->flush_qp[flush_idx] = qp->qp_idx; - sif_log(sdev, SIF_INFO, "Allocated flush-retry qp port %d, index %d", port, sdev->flush_qp[flush_idx]); + sif_log(sdev, SIF_QP, "Allocated flush-retry qp port %d, index %d", port, sdev->flush_qp[flush_idx]); return ret; @@ -455,18 +455,18 @@ int reset_qp_flush_retry(struct sif_dev *sdev, u8 flush_idx) } } - sdev->wa_stats.wa3714[0]++; + atomic64_inc(&sdev->wa_stats.wa3714[FLUSH_RETRY_WA3714_CNT]); mutex_unlock(&sdev->flush_lock[flush_idx]); return ret; fail: - sdev->wa_stats.wa3714[1]++; + atomic64_inc(&sdev->wa_stats.wa3714[FLUSH_RETRY_WA3714_ERR_CNT]); sif_hw_free_flush_qp(sdev, flush_idx); sif_hw_allocate_flush_qp(sdev, flush_idx); mutex_unlock(&sdev->flush_lock[flush_idx]); return ret; err_flush_qp: - sdev->wa_stats.wa3714[1]++; + atomic64_inc(&sdev->wa_stats.wa3714[FLUSH_RETRY_WA3714_ERR_CNT]); mutex_unlock(&sdev->flush_lock[flush_idx]); return ret; } @@ -519,10 +519,13 @@ int pre_process_wa4074(struct sif_dev *sdev, struct sif_qp *qp) set_psif_wr__checksum(&sqe->wr, ~get_psif_wr__checksum(&sqe->wr)); len--; } + atomic64_add(len, &sdev->wa_stats.wa4074[WRS_CSUM_CORR_WA4074_CNT]); spin_unlock_irqrestore(&sq->lock, flags); if (cq) set_bit(CQ_POLLING_NOT_ALLOWED, &cq_sw->flags); + atomic64_inc(&sdev->wa_stats.wa4074[PRE_WA4074_CNT]); + return 0; } @@ -694,6 +697,8 @@ flush_sq_again: "sq %d, last_seq %x, sif_gen_sq_flush_cqe returned %d", sq->index, last_seq, ret); + atomic64_inc(&sdev->wa_stats.wa4074[RCV_SND_GEN_WA4074_CNT]); + if (ret == -EAGAIN) { ret = gen_pqp_cqe(&lcqe); if (ret < 0) @@ -738,6 +743,12 @@ err_post_wa4074: clear_bit(CQ_POLLING_NOT_ALLOWED, &cq_sw->flags); clear_bit(FLUSH_SQ_IN_FLIGHT, &sq_sw->flags); clear_bit(FLUSH_SQ_IN_PROGRESS, &sq_sw->flags); + + if (ret < 0) + atomic64_inc(&sdev->wa_stats.wa4074[POST_WA4074_ERR_CNT]); + else + atomic64_inc(&sdev->wa_stats.wa4074[POST_WA4074_CNT]); + return ret = ret > 0 ? 0 : ret; } @@ -781,7 +792,7 @@ static u16 walk_and_update_cqes(struct sif_dev *sdev, struct sif_qp *qp, u16 hea if (GREATER_16(updated_seq, end)) { /* A scenario might be that an additional CQE * must be generated to flush all the HW - * generated completions. Thus, igore the polling the cqe. + * generated completions. Thus, ignore the polling of the cqe. */ lcqe.seq_num = ~lcqe.seq_num; sif_log(sdev, SIF_WCE_V, "corrupt: lcqe.seq_num %x", @@ -799,6 +810,7 @@ static u16 walk_and_update_cqes(struct sif_dev *sdev, struct sif_qp *qp, u16 hea sq->index, cq->index, n); spin_unlock_irqrestore(&cq->lock, flags); + return updated_seq; } @@ -857,12 +869,25 @@ static u16 cq_walk_wa4074(struct sif_dev *sdev, struct sif_qp *qp, bool *last_se void sif_dfs_print_wa_stats(struct sif_dev *sdev, char *buf) { - /* Header */ - sprintf(buf, "#%7s %10s %10s %20s\n", "WA", "ok", "err", "desc"); - /* Content */ - sprintf(buf + strlen(buf), "#%8s %9llu %10llu %20s\n", - "WA3714", - sdev->wa_stats.wa3714[0], - sdev->wa_stats.wa3714[1], - "Destroying QPs with a retry in progress"); + /* Header WA#3714 */ + sprintf(buf, "\nWA3714: Destroying QPs with a retry in progress\n"); + /* Content WA#3714 */ + sprintf(buf + strlen(buf), "%s: %lu\n%s: %lu\n", + "ok", atomic64_read(&sdev->wa_stats.wa3714[FLUSH_RETRY_WA3714_CNT]), + "err", atomic64_read(&sdev->wa_stats.wa3714[FLUSH_RETRY_WA3714_ERR_CNT])); + /* Header WA#4074 */ + sprintf(buf + strlen(buf), "\nWA4074: Duplicate flushed in error completions\n"); + /* Content WA#4074 */ + sprintf(buf + strlen(buf), "%s: %lu\n%s: %lu\n%s: %lu\n%s: %lu\n%s: %lu\n", + "pre-ok", atomic64_read(&sdev->wa_stats.wa4074[PRE_WA4074_CNT]), + "post-ok", atomic64_read(&sdev->wa_stats.wa4074[POST_WA4074_CNT]), + "post-err", atomic64_read(&sdev->wa_stats.wa4074[POST_WA4074_ERR_CNT]), + "wr-csum-corr", atomic64_read(&sdev->wa_stats.wa4074[WRS_CSUM_CORR_WA4074_CNT]), + "rcv-snd-gen", atomic64_read(&sdev->wa_stats.wa4074[RCV_SND_GEN_WA4074_CNT])); + /* Header WA#4059 */ + sprintf(buf + strlen(buf), "\nWA4059: Mailbox writes from host to EPS sometimes get misplaced\n"); + /* Content WA#4059 */ + sprintf(buf + strlen(buf), "%s: %lu\n%s: %lu\n", + "keep-alive-int", atomic64_read(&sdev->wa_stats.wa4059[SND_INTR_KEEP_ALIVE_WA4059_CNT]), + "keep-alive-thread", atomic64_read(&sdev->wa_stats.wa4059[SND_THREAD_KEEP_ALIVE_WA4059_CNT])); } diff --git a/drivers/infiniband/hw/sif/sif_r3.h b/drivers/infiniband/hw/sif/sif_r3.h index b6cf44c8f81b..bb694b40f5d1 100644 --- a/drivers/infiniband/hw/sif/sif_r3.h +++ b/drivers/infiniband/hw/sif/sif_r3.h @@ -14,9 +14,34 @@ #ifndef _SIF_R3_H #define _SIF_R3_H +enum wa4059_stats_counter { + SND_INTR_KEEP_ALIVE_WA4059_CNT = 0, + SND_THREAD_KEEP_ALIVE_WA4059_CNT = 1, + WA4059_CNT_MAX = 2, +}; + +enum wa3714_stats_counter { + FLUSH_RETRY_WA3714_CNT = 0, + FLUSH_RETRY_WA3714_ERR_CNT = 1, + WA3714_CNT_MAX = 2, +}; + +enum wa4074_stats_counter { + PRE_WA4074_CNT = 0, + POST_WA4074_CNT = 1, + POST_WA4074_ERR_CNT = 2, + WRS_CSUM_CORR_WA4074_CNT = 3, + RCV_SND_GEN_WA4074_CNT = 4, + WA4074_CNT_MAX = 5, +}; + struct sif_wa_stats { /* Destroying QPs with a retry in progress */ - u64 wa3714[2]; + atomic64_t wa3714[WA3714_CNT_MAX]; + /* Duplicate flushed in error completions */ + atomic64_t wa4074[WA4074_CNT_MAX]; + /* Mailbox writes from host to EPS sometimes get misplaced */ + atomic64_t wa4059[WA4059_CNT_MAX]; }; void sif_r3_pre_init(struct sif_dev *sdev); @@ -30,7 +55,6 @@ void sif_r3_recreate_flush_qp(struct sif_dev *sdev, u8 flush_idx); /* WA for #4074 */ int pre_process_wa4074(struct sif_dev *sdev, struct sif_qp *qp); int post_process_wa4074(struct sif_dev *sdev, struct sif_qp *qp); -int sq_flush_wa4074(struct sif_dev *sdev, struct sif_qp *qp); /* Single file for the wa statistics */ void sif_dfs_print_wa_stats(struct sif_dev *sdev, char *buf);