From c4cc97a581e18156308f375d21d632f686601b43 Mon Sep 17 00:00:00 2001 From: Knut Omang Date: Wed, 26 Oct 2016 11:44:00 +0200 Subject: [PATCH] sif: pqp: Implement handling of PQPs in error. The assumption is that any such situation that can arise in production is due to an application that causes it's CQ to go to error and where the PQP subsequently tries to post a CQ operation that affects the CQ that is in error. In these cases, the PQP itself goes to error and an event is generated. This commit refactors the modify_qp logic slightly, as well as implementing a modification cycle to bring a privileged QP back up again. It also adds a new pqp debugfs file and some statistics to help monitoring the new PQP specific state as well. The resurrect operation is queued on the sif workqueue by the new handle_pqp_event function, which is now properly wired up to accept all PQP events. When a PQP is detected as being in error, its last_set_state is updated, and in addition the write_only flag is set, which causes new send reqs not to touch any collect buffer as part of the operation. This flag was introduced to allow the resurrect to set the PQP in RTS again while still not triggering any sends. This way the implementation allows clients to continue to post requests to the PQP while it is in error or in transition back to RTS again by just accepting these requests into the PQP send queue without any writes to the collect buffer. When in the INIT state, the resurrect worker updates the SQ pointers to skip the request that triggered the PQP error. Once back in RTS, the resurrect worker can take the single SQ lock which serializes posts, check the size of the send queue and if >= 0, trigger the send queue scheduler to start processing these. Once the QP is in SQS mode, or just idle if the queue was empty, it is safe for ordering purposes to let normal posting with collect buffer writes commence. Orabug: 24715634 Signed-off-by: Knut Omang Reviewed-by: Francisco Trivino-Garcia --- drivers/infiniband/hw/sif/psif_hw_macro.h | 9 + drivers/infiniband/hw/sif/psif_hw_setget.h | 19 ++ drivers/infiniband/hw/sif/sif_base.c | 5 +- drivers/infiniband/hw/sif/sif_cq.c | 7 +- drivers/infiniband/hw/sif/sif_cq.h | 2 +- drivers/infiniband/hw/sif/sif_debug.c | 31 +- drivers/infiniband/hw/sif/sif_eq.c | 35 +- drivers/infiniband/hw/sif/sif_eq.h | 4 + drivers/infiniband/hw/sif/sif_pqp.c | 370 ++++++++++++++++++--- drivers/infiniband/hw/sif/sif_pqp.h | 26 ++ drivers/infiniband/hw/sif/sif_qp.c | 151 +++++---- drivers/infiniband/hw/sif/sif_qp.h | 14 + drivers/infiniband/hw/sif/sif_sq.c | 2 +- 13 files changed, 539 insertions(+), 136 deletions(-) diff --git a/drivers/infiniband/hw/sif/psif_hw_macro.h b/drivers/infiniband/hw/sif/psif_hw_macro.h index ff529315deff..43b81b42f153 100644 --- a/drivers/infiniband/hw/sif/psif_hw_macro.h +++ b/drivers/infiniband/hw/sif/psif_hw_macro.h @@ -226,6 +226,15 @@ #define PSIF_QP_CORE_3_MAGIC_BITS 32 #define PSIF_QP_CORE_3_MAGIC_MASK 0x00000000ffffffffull +/* + * Completion queue sequence number. This is used for privileged requests, + * where sequence number for one CQ is added to a different completion. + */ +#define PSIF_QP_CORE_CQ_SEQ_OFFSET 3 +#define PSIF_QP_CORE_3_CQ_SEQ_SHIFT 32 +#define PSIF_QP_CORE_3_CQ_SEQ_BITS 32 +#define PSIF_QP_CORE_3_CQ_SEQ_MASK 0xffffffff00000000ull + /* * Q-Key received in incoming IB packet is checked towards this Q-Key. Q-Key * used on transmit if top bit of Q-Key in WR is set. diff --git a/drivers/infiniband/hw/sif/psif_hw_setget.h b/drivers/infiniband/hw/sif/psif_hw_setget.h index 88beafc6eaf7..3a3dbe233155 100644 --- a/drivers/infiniband/hw/sif/psif_hw_setget.h +++ b/drivers/infiniband/hw/sif/psif_hw_setget.h @@ -511,6 +511,25 @@ static inline u32 get_psif_qp_core__magic(volatile struct psif_qp_core *ptr) return((u32)be32_to_cpu(*pte)); } +/* + * Completion queue sequence number. This is used for privileged requests, + * where sequence number for one CQ is added to a different completion. + */ +static inline void set_psif_qp_core__cq_seq( + volatile struct psif_qp_core *ptr, + u32 data) +{ + /* group=3 shift=32 bits=32 */ + volatile u32 * const pte = (u32 *)((u8 *)((__be64 *)ptr + 3) + 0); + *pte = cpu_to_be32(data); +} +static inline u32 get_psif_qp_core__cq_seq(volatile struct psif_qp_core *ptr) +{ + /* group=3 shift=32 bits=32 */ + volatile u32 * const pte = (u32 *)((u8 *)((__be64 *)ptr + 3) + 0); + return((u32)be32_to_cpu(*pte)); +} + /* * Q-Key received in incoming IB packet is checked towards this Q-Key. Q-Key * used on transmit if top bit of Q-Key in WR is set. diff --git a/drivers/infiniband/hw/sif/sif_base.c b/drivers/infiniband/hw/sif/sif_base.c index b912d82c4fe8..18b5f4c8201d 100644 --- a/drivers/infiniband/hw/sif/sif_base.c +++ b/drivers/infiniband/hw/sif/sif_base.c @@ -782,8 +782,9 @@ int sif_write_invalidate(struct sif_pqp *pqp, enum sif_tab_type type, int index, if (inv_op == -1) return -ENODEV; - sif_log(sdev, SIF_PQP, "sending inv.req. type %s (0x%x) target queue index %d", - sif_table_name(type), inv_op, index); + if (likely(p_mode != PM_WRITE)) /* Only log actual operations to the PQP */ + sif_log(sdev, SIF_PQP, "sending inv.req. type %s (0x%x) target queue index %d", + sif_table_name(type), inv_op, index); memset(&wr, 0, sizeof(struct psif_wr)); /* For this table type we need to send an explicit diff --git a/drivers/infiniband/hw/sif/sif_cq.c b/drivers/infiniband/hw/sif/sif_cq.c index 46b1ee9da9ae..19f796cab38e 100644 --- a/drivers/infiniband/hw/sif/sif_cq.c +++ b/drivers/infiniband/hw/sif/sif_cq.c @@ -933,8 +933,11 @@ int sif_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) if (flags & IB_CQ_SOLICITED) wr.se = 1; - /* Do not rearm a CQ if it is not valid or is in error */ - if (unlikely(!get_psif_cq_hw__valid(&cq->d) || READ_ONCE(cq->in_error))) { + /* Do not rearm a CQ if it is not valid or is in error - except for small queues + * (detects no_x_cqe case..) + */ + if (unlikely(!get_psif_cq_hw__valid(&cq->d) || + (READ_ONCE(cq->in_error) && cq->entries > SIF_SW_RESERVED_DUL_CQE))) { sif_log(sdev, SIF_NCQ, "cq %d, flags 0x%x (ignored - CQ in error)", cq->index, flags); return 0; } diff --git a/drivers/infiniband/hw/sif/sif_cq.h b/drivers/infiniband/hw/sif/sif_cq.h index d69113280ac8..73f04902d133 100644 --- a/drivers/infiniband/hw/sif/sif_cq.h +++ b/drivers/infiniband/hw/sif/sif_cq.h @@ -96,6 +96,6 @@ int sif_release_cq(struct sif_dev *sdev, int index); void sif_dfs_print_cq_hw(struct seq_file *s, struct sif_dev *sdev, loff_t pos); -extern int sif_fixup_cqes(struct sif_cq *cq, struct sif_sq *sq, struct sif_qp *qp); +int sif_fixup_cqes(struct sif_cq *cq, struct sif_sq *sq, struct sif_qp *qp); #endif diff --git a/drivers/infiniband/hw/sif/sif_debug.c b/drivers/infiniband/hw/sif/sif_debug.c index 67db215c0696..37c1d9f7f024 100644 --- a/drivers/infiniband/hw/sif/sif_debug.c +++ b/drivers/infiniband/hw/sif/sif_debug.c @@ -27,6 +27,7 @@ struct sif_dfs_ref { struct sif_dev *sdev; bool is_eq; + bool no_table_info; /* If set, no table extent/size info printed */ enum sif_tab_type type; sif_dfs_printer dfs_print; }; @@ -43,6 +44,7 @@ struct sif_dfs { struct sif_dfs_ref sd_irq_ch; struct sif_dfs_ref sd_ipoffload; struct sif_dfs_ref sd_wa_stats; + struct sif_dfs_ref sd_pqp; }; /* A simple iterator */ @@ -106,7 +108,7 @@ static int sif_seq_show(struct seq_file *s, void *v) sif_log(sd->sdev, SIF_DFS, "%lld", it->pos); if (!it->pos || !it->started) { - if (!sd->is_eq) + if (!sd->no_table_info) seq_printf(s, "# %s state (entries %d, extent %d):\n", sif_table_name(sd->type), sd->sdev->ba[sd->type].entry_cnt, @@ -533,45 +535,60 @@ int sif_dfs_register(struct sif_dev *sdev) /* Single file for the event queues */ sdr = &sdev->dfs->sd_eq; sdr->sdev = sdev; - sdr->is_eq = true; + sdr->is_eq = sdr->no_table_info = true; sdr->dfs_print = sif_dfs_print_eq; df = debugfs_create_file("eq", S_IRUGO, sdev->dfs->root, (void *)sdr, &table_fops); if (!df) { sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for event queues"); - return -ENOMEM; + goto sif_dfs_reg_failed; } /* Single file for the ipoffload qp-statistics */ sdr = &sdev->dfs->sd_ipoffload; sdr->sdev = sdev; sdr->dfs_print = sif_dfs_print_ipoffload; sdr->type = qp; + sdr->no_table_info = true; df = debugfs_create_file("ipoffload", S_IRUGO, sdev->dfs->root, (void *)sdr, &table_fops); if (!df) { sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for ipoffload qp stat"); - return -ENOMEM; + goto sif_dfs_reg_failed; } /* Single file for the wa statistics */ sdr = &sdev->dfs->sd_wa_stats; sdr->sdev = sdev; + sdr->no_table_info = true; df = debugfs_create_file("wa_stats", S_IRUGO, sdev->dfs->root, (void *)sdr, &wa_fops); if (!df) { sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for wa stat"); - return -ENOMEM; + goto sif_dfs_reg_failed; } /* Single file for the int channel coalescing settings */ sdr = &sdev->dfs->sd_irq_ch; sdr->sdev = sdev; - sdr->is_eq = true; + sdr->is_eq = sdr->no_table_info = true; sdr->dfs_print = sif_dfs_print_irq_ch; df = debugfs_create_file("irq_ch", S_IWUSR | S_IRUGO, sdev->dfs->root, (void *)sdr, &table_fops_rw); if (!df) { sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for interrupt channels coalescing settings"); - return -ENOMEM; + goto sif_dfs_reg_failed; + } + + /* Single file for detailed pqp statistics */ + sdr = &sdev->dfs->sd_pqp; + sdr->sdev = sdev; + sdr->dfs_print = sif_dfs_print_pqp; + sdr->type = qp; + sdr->no_table_info = true; + df = debugfs_create_file("pqp", S_IRUGO, sdev->dfs->root, + (void *)sdr, &table_fops); + if (!df) { + sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for pqp stat"); + goto sif_dfs_reg_failed; } /* Create a directory for raw qp dump info */ diff --git a/drivers/infiniband/hw/sif/sif_eq.c b/drivers/infiniband/hw/sif/sif_eq.c index fec8534a34b7..4af42e225b88 100644 --- a/drivers/infiniband/hw/sif/sif_eq.c +++ b/drivers/infiniband/hw/sif/sif_eq.c @@ -879,11 +879,6 @@ static u32 handle_srq_event(struct sif_eq *eq, struct ib_event *ibe) } -#define dump_eq_entry(level, _s, _eqe) \ - sif_logs(level, printk("%s: ", _s); \ - write_struct_psif_eq_entry(NULL, 0, &leqe); printk("\n")) - - /* Called from interrupt threads */ static bool dispatch_eq(struct sif_eq *eq, int irq, unsigned int msecs) { @@ -975,10 +970,9 @@ static bool dispatch_eq(struct sif_eq *eq, int irq, unsigned int msecs) goto only_cne; } - /* silently drop the event if it is a PQP. */ - if (unlikely(sif_qp_elem->type == PSIF_QP_TRANSPORT_MANSP1) && - !leqe.event_status_srq_limit_reached) { - sif_log(eq->ba.sdev, SIF_INFO, "Received async event on PQP!"); + /* Capture PQP events which must be handled separately */ + if (unlikely(sif_qp_elem->type == PSIF_QP_TRANSPORT_MANSP1)) { + handle_pqp_event(eq, &leqe, sif_qp_elem); goto only_cne; } @@ -1025,16 +1019,25 @@ static bool dispatch_eq(struct sif_eq *eq, int irq, unsigned int msecs) /* Handle CQ errors early, as they may affect what we need to do on QPs */ if (leqe.event_status_cq_error) { struct sif_cq *cq = get_sif_cq(sdev, leqe.cqd_id); + struct sif_qp *qp = safe_get_sif_qp(sdev, leqe.qp); + + /* Capture PQP events which must be handled separately */ + if (unlikely(qp && qp->type == PSIF_QP_TRANSPORT_MANSP1)) { + handle_pqp_event(eq, &leqe, qp); + goto only_cne; + } ibe.event = IB_EVENT_CQ_ERR; ibe.element.cq = &get_sif_cq(sdev, leqe.cqd_id)->ibcq; WRITE_ONCE(cq->in_error, true); if (leqe.vendor_error == TSU_CBLD_CQ_FULL_ERR) - sif_log(sdev, SIF_INFO, "CQ overrun on CQ %d", cq->index); + sif_log(sdev, SIF_INFO, "CQ overrun on CQ %d (QP %d)", + leqe.qp, cq->index); else if (leqe.vendor_error == TSU_CBLD_CQ_ALREADY_IN_ERR) - sif_log(sdev, SIF_INTR, "CQ %d already in error event", cq->index); + sif_log(sdev, SIF_INTR, "CQ %d already in error event (QP %d)", + leqe.qp, cq->index); else - dump_eq_entry(SIF_INFO, "Got cq_error", &leqe); + dump_eq_entry(SIF_INFO, "Got other cq_error", &leqe); nevents += handle_event(eq, &ibe); } if (leqe.event_status_local_work_queue_catastrophic_error || @@ -1046,7 +1049,13 @@ static bool dispatch_eq(struct sif_eq *eq, int irq, unsigned int msecs) ibe.event = IB_EVENT_QP_FATAL; ibe.element.qp = ibqp; nevents += handle_event(eq, &ibe); - dump_eq_entry(SIF_INFO, "Got fatal QP error", &leqe); + + switch (leqe.vendor_error) { + case TSU_CBLD_CQ_FULL_ERR: + break; + default: + dump_eq_entry(SIF_INFO, "Got fatal QP error", &leqe); + } } if (leqe.event_status_srq_catastrophic_error) { ibe.event = IB_EVENT_SRQ_ERR; diff --git a/drivers/infiniband/hw/sif/sif_eq.h b/drivers/infiniband/hw/sif/sif_eq.h index 8d6b3c3541db..9e35a8aa2aaa 100644 --- a/drivers/infiniband/hw/sif/sif_eq.h +++ b/drivers/infiniband/hw/sif/sif_eq.h @@ -68,4 +68,8 @@ void sif_dfs_print_irq_ch(struct seq_file *s, struct sif_dev *sdev, loff_t pos); u32 sif_get_eq_channel(struct sif_dev *sdev, struct sif_cq *cq); bool sif_check_valid_eq_channel(struct sif_dev *sdev, int comp_vector); +#define dump_eq_entry(level, _s, _eqe) \ + sif_logs(level, printk("%s: ", _s); \ + write_struct_psif_eq_entry(NULL, 0, _eqe); printk("\n")) + #endif diff --git a/drivers/infiniband/hw/sif/sif_pqp.c b/drivers/infiniband/hw/sif/sif_pqp.c index c71b8f4dd993..1c167a4bdec6 100644 --- a/drivers/infiniband/hw/sif/sif_pqp.c +++ b/drivers/infiniband/hw/sif/sif_pqp.c @@ -25,6 +25,10 @@ #include "sif_ibqp.h" #include "sif_checksum.h" #include "sif_defs.h" +#include + +static int _pqp_queue_resurrect(struct sif_pqp *pqp); +static void pqp_resurrect(struct work_struct *work); static inline struct sif_qp *__create_init_qp(struct sif_dev *sdev, struct sif_cq *cq) { @@ -57,6 +61,91 @@ static inline struct sif_qp *__create_init_qp(struct sif_dev *sdev, struct sif_c } +/* Sync HW pointers to the first req we did not receive any completion for. + * This must be done from INIT state, eg. before we attempt to move to + * a HW owned state again, but after the implicit logic for resetting values + * in the qp state: + */ +static inline void _resync_pointers(struct sif_qp *qp) +{ + unsigned long flags; + struct sif_dev *sdev = to_sdev(qp->ibqp.device); + struct sif_sq *sq = get_sq(sdev, qp); + struct sif_cq *cq = get_sif_cq(sdev, qp->send_cq_indx); + struct sif_cq_sw *cq_sw = get_sif_cq_sw(sdev, qp->send_cq_indx); + struct sif_sq_sw *sq_sw = get_sif_sq_sw(sdev, qp->qp_idx); + struct sif_sq_hdl *wh; + + spin_lock_irqsave(&cq->lock, flags); + sq_sw->head_seq++; + spin_unlock_irqrestore(&cq->lock, flags); + + spin_lock_irqsave(&sq->lock, flags); + + /* Terminate the failed request in error, then replay any + * inflicted 3rd party reqs: + */ + wh = get_sq_hdl(sq, sq_sw->head_seq); + if (wh) { + struct sif_cqe *lcqe = (struct sif_cqe *)wh->wr_id; + + if (lcqe) { + sif_log(sdev, SIF_PQP, "Complete cqe %p for sq_seq %d for qp %d", + lcqe, wh->sq_seq, qp->qp_idx); + lcqe->cqe.status = PSIF_WC_STATUS_SUCCESS; + WRITE_ONCE(lcqe->written, true); + wh->wr_id = 0; + wh->used = false; + } + } + + /* This seqno got reset by the generic RESET->INIT code, set it back + * to the value we want here, which is +1 compared to sq_hw::last_seq + */ + set_psif_qp_core__sq_seq(&qp->d.state, sq_sw->head_seq + 1); + set_psif_qp_core__retry_sq_seq(&qp->d.state, sq_sw->head_seq + 1); + + /* We also need to set QP::cq_seq back to where we expect it: */ + set_psif_qp_core__cq_seq(&qp->d.state, cq_sw->next_seq); + + set_psif_sq_sw__tail_indx(&sq_sw->d, sq_sw->last_seq + 1); + set_psif_sq_hw__destroyed(&sq->d, 0); + set_psif_sq_hw__last_seq(&sq->d, sq_sw->head_seq + 1); + spin_unlock_irqrestore(&sq->lock, flags); + mb(); +} + +/* Take a priv.QP from RESET to RTS */ +static int _modify_reset_to_rts(struct sif_dev *sdev, struct sif_qp *qp, bool resurrect) +{ + int ret; + + /* The privileged QP only supports state in modify_qp */ + struct ib_qp_attr mod_attr = { + .qp_state = IB_QPS_INIT + }; + + /* Run the required qp modify sequence */ + ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL); + if (ret) + return ret; + + if (resurrect) + _resync_pointers(qp); + + mod_attr.qp_state = IB_QPS_RTR; + ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL); + if (ret) + return ret; + + mod_attr.qp_state = IB_QPS_RTS; + mod_attr.sq_psn = 0; + + /* Modify the QP to RTS, but don't reflect it to last_set_state yet.. */ + ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL); + return ret; +} + static struct sif_pqp *_sif_create_pqp(struct sif_dev *sdev, size_t alloc_sz, int comp_vector) { @@ -66,11 +155,6 @@ static struct sif_pqp *_sif_create_pqp(struct sif_dev *sdev, size_t alloc_sz, in struct sif_sq *sq = NULL; int ret = 0; - /* The privileged QP only supports state in modify_qp */ - struct ib_qp_attr mod_attr = { - .qp_state = IB_QPS_INIT - }; - pqp = kzalloc(alloc_sz, GFP_KERNEL); if (!pqp) { sif_log(sdev, SIF_INFO, "Failed to allocate memory for priv.qp"); @@ -97,35 +181,24 @@ static struct sif_pqp *_sif_create_pqp(struct sif_dev *sdev, size_t alloc_sz, in } pqp->qp = qp; + qp->pqp = pqp; sq = get_sif_sq(sdev, qp->qp_idx); /* Reserve 1/2 or at least 1 entry for pqp requests with completion on the PQP */ pqp->lowpri_lim = sq->entries - min_t(int, sq->entries/2, 2); - /* Run the required qp modify sequence */ - ret = sif_modify_qp(&qp->ibqp, &mod_attr, - IB_QP_STATE, NULL); - if (ret) - goto qp_alloc_failed; - - mod_attr.qp_state = IB_QPS_RTR; - ret = sif_modify_qp(&qp->ibqp, &mod_attr, - IB_QP_STATE, NULL); + mutex_lock(&qp->lock); + ret = _modify_reset_to_rts(sdev, qp, false); if (ret) goto qp_alloc_failed; - - mod_attr.qp_state = IB_QPS_RTS; - mod_attr.sq_psn = 0; - ret = sif_modify_qp(&qp->ibqp, &mod_attr, - IB_QP_STATE, NULL); - if (ret) - goto qp_alloc_failed; - atomic64_set(&pqp->qp->arm_srq_holdoff_time, 0); + mutex_unlock(&qp->lock); sif_log(sdev, SIF_QP, "success"); return pqp; qp_alloc_failed: + mutex_unlock(&qp->lock); + /* Special destruction order, see below: */ destroy_cq(cq); if (sq) @@ -298,6 +371,10 @@ static int __pqp_process_cqe(struct sif_pqp *pqp, struct sif_cqe *first_err) lcqe = (struct sif_cqe *)wh->wr_id; if (lcqe) { + unsigned long elapsed = jiffies - lcqe->t_start; + + if (unlikely(elapsed > pqp->max_cmpl_time)) + pqp->max_cmpl_time = elapsed; wh->wr_id = 0; cqe_cnt++; mb(); @@ -337,6 +414,8 @@ cont_no_wh: } ql = sq_length(sq, sq_seq, sq_sw->last_seq); + if (unlikely(ql > pqp->max_qlen)) + pqp->max_qlen = ql; if (ql <= sq->mask) pqp_complete_nonfull(pqp); mb(); @@ -464,11 +543,6 @@ int sif_pqp_write_send(struct sif_pqp *pqp, struct psif_wr *wr, struct sif_cqe * struct sif_sq_sw *sq_sw = get_sif_sq_sw(sdev, qp_idx); unsigned long timeout = sdev->min_resp_ticks * 4; u16 limit = pqp_req_gets_completion(pqp, wr, mode) ? sq->entries : pqp->lowpri_lim; - /* Per IBTA 11.4.1.1, error is only returned - * when the QP is in the RESET, INIT or RTR states. - */ - if (qp->last_set_state < IB_QPS_RTS) - return -EINVAL; /* The pqp is not ready */ pqp->timeout = jiffies + timeout; @@ -477,7 +551,9 @@ int sif_pqp_write_send(struct sif_pqp *pqp, struct psif_wr *wr, struct sif_cqe * wr->tsu_sl = qp->tsl; restart: - /* Make sure emptying the queue takes preference over filling it up: */ + /* Make sure emptying the queue takes preference over filling it up. + * This will also make us exit if the pqp is not up and running: + */ if (mode != PM_WRITE) ret = pqp_process_cqe(pqp, NULL); if (ret > 0 || ret == -EBUSY) @@ -552,13 +628,13 @@ restart: sqe = get_sq_entry(sq, sq_seq); - sif_log(sdev, SIF_PQP, "pd %d cq_idx %d sq_idx %d sq.seqn %d op %s", - pd->idx, wr->cq_desc_vlan_pri_union.cqd_id, sq->index, sq_seq, - string_enum_psif_wr_type(wr->op)); - if (likely(mode != PM_WRITE)) { u64 csum; + sif_log(sdev, SIF_PQP, "pd %d cq_idx %d sq_idx %d sq.seqn %d op %s", + pd->idx, wr->cq_desc_vlan_pri_union.cqd_id, sq->index, sq_seq, + string_enum_psif_wr_type(wr->op)); + wr->sq_seq = sq_seq; /* Collect_length is always 0 for privileged wr's - they have no data */ @@ -574,7 +650,9 @@ restart: /* update send queue */ copy_conv_to_hw(sqe, wr, sizeof(struct psif_wr)); - if (likely(mode != PM_WRITE)) { + if (unlikely(READ_ONCE(pqp->write_only))) + wmb(); + else if (likely(mode != PM_WRITE)) { /* Flush writes before updating the sw pointer, * This is necessary to ensure that the sqs do not see * an incomplete entry: @@ -662,7 +740,8 @@ int poll_cq_waitfor(struct sif_cqe *lcqe) if (sif_feature(pcie_trigger)) force_pcie_link_retrain(sdev); sif_log(sdev, SIF_INFO, - "cq %d: poll for cqe %p timed out", cq->index, lcqe); + "cq %d: poll for cqe for sq %d, sq.seq %d timed out", + cq->index, pqp->qp->qp_idx, lcqe->sq_seq); atomic_inc(&cq->timeout_cnt); sif_logs(SIF_PQPT, @@ -683,13 +762,6 @@ int poll_cq_waitfor(struct sif_cqe *lcqe) else cpu_relax(); - if (unlikely(READ_ONCE(pqp->qp->last_set_state) != IB_QPS_RTS)) { - sif_log(sdev, SIF_INFO, - "cq %d: poll for cqe %p failed - pqp %d not operational\n", - cq->index, lcqe, pqp->qp->qp_idx); - ret = -EINTR; - break; - } if (sdev->min_resp_ticks != min_resp_ticks) { /* Give us a quick way out by changing min_resp_ticks */ pqp->timeout -= (min_resp_ticks - sdev->min_resp_ticks) * 4; @@ -1148,6 +1220,11 @@ int sif_pqp_init(struct sif_dev *sdev) int ret = 0; uint n_pqps = es->eqs.cnt - 2; + /* Use a sensible default value for when to query a PQP to see if it got + * set to error without an event - will be adjusted dynamically: + */ + pqi->pqp_query_ticks = max(1ULL, sdev->min_resp_ticks / 50); + pqi->pqp = sif_kmalloc(sdev, sizeof(struct sif_pqp *) * n_pqps, GFP_KERNEL | __GFP_ZERO); if (!pqi->pqp) return -ENOMEM; @@ -1206,3 +1283,214 @@ void sif_pqp_fini(struct sif_dev *sdev) kfree(pqi->pqp); pqi->pqp = NULL; } + +void handle_pqp_event(struct sif_eq *eq, struct psif_eq_entry *eqe, struct sif_qp *qp) +{ + struct sif_dev *sdev = eq->ba.sdev; + unsigned long flags; + struct sif_pqp *pqp = qp->pqp; + + if (eqe->event_status_cq_error) { + struct sif_cq *cq = get_sif_cq(sdev, eqe->cqd_id); + + if (eqe->vendor_error == TSU_CBLD_CQ_FULL_ERR) + sif_log(sdev, SIF_INFO, "PQP error due to CQ overrun on CQ %d", cq->index); + else if (eqe->vendor_error == TSU_CBLD_CQ_ALREADY_IN_ERR) + sif_log(sdev, SIF_INFO, "PQP error due to CQ %d already in error event", + cq->index); + else + dump_eq_entry(SIF_INFO, "Got unexpected cq_error", eqe); + + WRITE_ONCE(qp->last_set_state, IB_QPS_ERR); + WRITE_ONCE(pqp->write_only, true); + + spin_lock_irqsave(&pqp->cq->lock, flags); + _pqp_queue_resurrect(pqp); + spin_unlock_irqrestore(&pqp->cq->lock, flags); + } else { + sif_log(sdev, SIF_INFO, "Received unexpected async event on PQP %d: ", qp->qp_idx); + sif_logs(SIF_INFO, write_struct_psif_eq_entry(NULL, 0, eqe); printk("\n")); + } + atomic_inc(&pqp->ev_cnt); +} + +/* NB! Assumed called with cq lock held: + * Trigger a resurrect operation on the PQP - + * if necessary - eg. if the QP is in error + * and no resurrect operation is already queued: + */ +static int _pqp_queue_resurrect(struct sif_pqp *pqp) +{ + enum ib_qp_state s; + struct pqp_work *work; + struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device); + struct sif_qp *qp = pqp->qp; + + if (READ_ONCE(pqp->res_queued)) + return -EAGAIN; + s = qp->last_set_state; + if (s != IB_QPS_ERR && s != IB_QPS_SQE) + return 0; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + work->pqp = pqp; + INIT_WORK(&work->ws, pqp_resurrect); + WRITE_ONCE(pqp->res_queued, true); + + queue_work(sdev->wq, &work->ws); + return -EAGAIN; +} + +/* Simplified version of _reset_qp() for PQPs already in error + * as none of the other workarounds are needed for PQPs in error + * due to a CQ error. Assumes QP lock is held. + */ +static int _reset_qp_pqp(struct sif_pqp *pqp) +{ + struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device); + struct sif_qp *qp = pqp->qp; + struct sif_sq *sq = get_sq(sdev, qp); + int ret = 0; + struct ib_qp_attr mod_attr = { + .qp_state = IB_QPS_RESET + }; + + ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL); + if (ret) + goto out; + + /* Bring down order needed by rev2 according to bug #3480 */ + ret = poll_wait_for_qp_writeback(sdev, qp); + if (ret) + goto out; + + ret = sif_flush_sqs(sdev, sq); + if (ret) + goto out; + + /* Re-initialize the HW QP state as after create_qp() */ + init_hw_qp_state(sdev, qp); +out: + return ret; +} + + +/* Called with QP lock held. Check SQ state and + * trigger SQ mode if there are outstanding requests + * that failed in the previous "life" of this PQP. + * The QP is already in RTS but new reqs are held back + * by pqp->write_only being set: + */ +static int _retrigger_sq(struct sif_pqp *pqp) +{ + unsigned long flags; + struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device); + struct sif_sq *sq = get_sq(sdev, pqp->qp); + struct sif_sq_sw *sq_sw = get_sif_sq_sw(sdev, pqp->qp->qp_idx); + int ql; + + spin_lock_irqsave(&sq->lock, flags); + ql = sq_length(sq, sq_sw->head_seq, sq_sw->last_seq); + + if (ql > 0) { + u32 sq_seq = sq_sw->head_seq + 1; + + sif_log(sdev, SIF_INFO, "Queue length %d, start at %u", ql, sq_seq); + + /* Outstanding requests, must trigger SQ mode: */ + sif_doorbell_from_sqe(pqp->qp, sq_seq, true); + + sif_logs(SIF_PQPT, + struct sif_sq *sq = get_sif_sq(sdev, pqp->qp->qp_idx); + struct psif_sq_entry *sqe = get_sq_entry(sq, sq_seq); + + write_struct_psif_sq_entry(NULL, 1, sqe)); + } + + /* Finally reset the write only flag: + * While the flag is set, post operations will just be written + * to the send queue without any collect buffer write: + */ + WRITE_ONCE(pqp->write_only, false); + spin_unlock_irqrestore(&sq->lock, flags); + return 0; +} + + +/* Process a pqp_work work element to resurrect a PQP that is in error. + * This function assumes this is due to a CQ error on a user CQ: + */ +static void pqp_resurrect(struct work_struct *work) +{ + struct pqp_work *rw = container_of(work, struct pqp_work, ws); + struct sif_pqp *pqp = rw->pqp; + struct sif_qp *qp = pqp->qp; + struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device); + unsigned long flags; + int ret; + + sif_log(sdev, SIF_PQP, " PQP %d", qp->qp_idx); + + mutex_lock(&qp->lock); + ret = _reset_qp_pqp(pqp); + if (ret) + goto out; + + ret = _modify_reset_to_rts(sdev, qp, true); + if (ret) + goto out; + + /* Now retrigger any accidental 3rd party requests + * that failed in the previous "life" of the PQP: + */ + ret = _retrigger_sq(pqp); + if (ret) + goto out; + + spin_lock_irqsave(&pqp->cq->lock, flags); + /* Avoid losing an acciental update to the state */ + if (qp->last_set_state == IB_QPS_RTS) + WRITE_ONCE(pqp->res_queued, false); + spin_unlock_irqrestore(&pqp->cq->lock, flags); +out: + mutex_unlock(&qp->lock); + kfree(rw); + if (ret) + sif_log(sdev, SIF_INFO, "Fatal error: Failed to resurrect PQP %d", + qp->qp_idx); + + sif_log(sdev, SIF_PQP, " PQP %d done", qp->qp_idx); +} + +void sif_dfs_print_pqp(struct seq_file *s, struct sif_dev *sdev, loff_t pos) +{ + struct sif_qp *qp; + struct sif_pqp *pqp; + struct sif_pqp_info *pqi = &sdev->pqi; + + if (unlikely(pos < 0)) { + seq_printf(s, "# Global PQP config:\n# Number of normal PQPs: %u\n# Next PQP in RR: %u\n" + "#\n# Per PQP stats:\n# qmax = Longest PQP send queue observed (during poll)\n" + "# tmax = Max time in msec observed for a PQP req\n" + "# evc = #of PQP async events seen\n#\n" + "#Index State %6s qmax tmax evc\n", pqi->cnt, + atomic_read(&pqi->next), "CQ"); + return; + } + + qp = get_sif_qp(sdev, pos); + if (qp->type != PSIF_QP_TRANSPORT_MANSP1) + return; + pqp = qp->pqp; + + seq_printf(s, "%6llu %5u %6u %4u %6u %u", pos, qp->last_set_state, + get_psif_qp_core__send_cq_indx(&qp->d.state), + pqp->max_qlen, jiffies_to_msecs(pqp->max_cmpl_time), atomic_read(&pqp->ev_cnt)); + if (qp->flags & SIF_QPF_KI_STENCIL) + seq_puts(s, " [ki_stencil]\n"); + else + seq_puts(s, "\n"); +} diff --git a/drivers/infiniband/hw/sif/sif_pqp.h b/drivers/infiniband/hw/sif/sif_pqp.h index bac7c399b307..a4c07c4c9426 100644 --- a/drivers/infiniband/hw/sif/sif_pqp.h +++ b/drivers/infiniband/hw/sif/sif_pqp.h @@ -18,8 +18,10 @@ struct sif_qp; struct sif_cq; struct sif_rq; struct sif_sq; +struct sif_eq; struct completion; enum post_mode; +struct psif_eq_entry; /* Data structure used by PQP requesters to get the completion information, * and optionally block waiting for it to arrive: @@ -31,6 +33,7 @@ struct sif_cqe { bool need_complete; /* cmpl is initialized and a waiter is present */ bool written; /* Set to true when a completion has been copied here */ u16 sq_seq; /* set by post_send to allow us to reset ourselves */ + unsigned long t_start; /* jiffies when request was posted */ }; /* @@ -43,6 +46,7 @@ struct sif_cqe { .pqp = get_pqp(d_),\ .need_complete = false,\ .written = false,\ + .t_start = jiffies,\ } #define DECLARE_SIF_CQE_WITH_SAME_EQ(d_, c_, e_) \ @@ -51,6 +55,7 @@ struct sif_cqe { .pqp = get_pqp_same_eq(d_, e_), \ .need_complete = false,\ .written = false,\ + .t_start = jiffies,\ } @@ -60,6 +65,7 @@ struct sif_cqe { .pqp = get_pqp(d_),\ .need_complete = true,\ .written = false,\ + .t_start = jiffies,\ };\ init_completion(&c_.cmpl) @@ -69,8 +75,13 @@ struct sif_cqe { .pqp = get_next_pqp(d_),\ .need_complete = false,\ .written = false,\ + .t_start = jiffies,\ } +struct pqp_work { + struct work_struct ws; + struct sif_pqp *pqp; /* The pqp that needs work */ +}; /* Per PQP state/configuration info */ @@ -83,6 +94,13 @@ struct sif_pqp { u16 last_full_seq; /* For logging purposes, record when last observed full */ u16 last_nc_full; /* Track when to return EAGAIN to flush non-compl.entries */ u16 lowpri_lim; /* Max number of outstanding low priority reqs */ + + /* Error recovery handling state of the PQP (access to these protected by CQ lock) */ + bool res_queued; /* Queued for resurrect after a QP error */ + bool write_only; /* The PQP is temporarily disabled and only writing entries is legal */ + atomic_t ev_cnt; /* #of async events seen for this PQP */ + u32 max_qlen; /* Longest PQP send queue observed during poll */ + unsigned long max_cmpl_time; /* Highest number of ticks recorded for a PQP completion */ }; /* Stencil PQP support - pre-populated PQPs for special performance sensitive use cases */ @@ -118,6 +136,9 @@ struct sif_pqp_info { int cnt; /* Number of PQPs set up */ atomic_t next; /* Used for round robin assignment of pqp */ + /* Support for resurrecting PQPs */ + unsigned long pqp_query_ticks; /* #of ticks to wait before querying a PQP for error */ + /* Stencil PQPs for key invalidates */ struct sif_spqp_pool ki_s; }; @@ -205,4 +226,9 @@ int sif_inv_key_update_st(struct sif_st_pqp *spqp, int index, enum wr_mode mode) int sif_destroy_st_pqp(struct sif_dev *sdev, struct sif_st_pqp *spqp); +/* Called from interrupt level to handle events on privileged QPs: */ +void handle_pqp_event(struct sif_eq *eq, struct psif_eq_entry *eqe, struct sif_qp *qp); + +void sif_dfs_print_pqp(struct seq_file *s, struct sif_dev *sdev, loff_t pos); + #endif diff --git a/drivers/infiniband/hw/sif/sif_qp.c b/drivers/infiniband/hw/sif/sif_qp.c index 93e36d91adbe..6eb602291755 100644 --- a/drivers/infiniband/hw/sif/sif_qp.c +++ b/drivers/infiniband/hw/sif/sif_qp.c @@ -71,8 +71,6 @@ static unsigned char bug_3646_conv_table[32] = { 0, }; -static int reset_qp(struct sif_dev *sdev, struct sif_qp *qp); - static int sif_create_pma_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct sif_qp_init_attr sif_attr); @@ -89,7 +87,7 @@ struct sif_rq *get_rq(struct sif_dev *sdev, struct sif_qp *qp) NULL : get_sif_rq(sdev, qp->rq_idx); } -static int poll_wait_for_qp_writeback(struct sif_dev *sdev, struct sif_qp *qp) +int poll_wait_for_qp_writeback(struct sif_dev *sdev, struct sif_qp *qp) { unsigned long timeout = sdev->min_resp_ticks; unsigned long timeout_real = jiffies + timeout; @@ -157,8 +155,7 @@ static int send_epsa_proxy_qp_sq_key(struct sif_dev *sdev, u32 lkey, * To be called from create_qp and when QP is modified to RESET, in * case it is resurrected */ - -static void init_hw_qp_state(struct sif_dev *sdev, struct sif_qp *qp) +void init_hw_qp_state(struct sif_dev *sdev, struct sif_qp *qp) { struct psif_qp qpi; @@ -974,64 +971,17 @@ int sif_modify_qp(struct ib_qp *ibqp, } -int modify_qp(struct sif_dev *sdev, struct sif_qp *qp, +/* The actual modify_qp operation: + * assuming qp->lock is held at entry + */ +int _modify_qp(struct sif_dev *sdev, struct sif_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, - bool fail_on_same_state, struct ib_udata *udata) + bool fail_on_same_state, enum ib_qp_state *new_statep) { - int ret = 0; - struct ib_qp *ibqp = &qp->ibqp; - struct sif_rq *rq = get_rq(sdev, qp); - struct sif_sq *sq = get_sq(sdev, qp); enum ib_qp_state cur_state, new_state; + struct ib_qp *ibqp = &qp->ibqp; enum sif_mqp_type mqp_type = SIF_MQP_IGN; - - sif_log(sdev, SIF_QP, "Enter: qpn %d qp_idx %d mask 0x%x", - ibqp->qp_num, qp->qp_idx, qp_attr_mask); - - /* WA for Bug 622, RQ flush from error completion in userspace */ - if (udata) { - struct sif_modify_qp_ext cmd; - - ret = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); - if (ret) { - sif_log(sdev, SIF_INFO, "ib_copy_from_udata failed, sts %d, qp %d, size %ld", - ret, qp->qp_idx, sizeof(cmd)); - return ret; - } - - switch (cmd.flush) { - case FLUSH_RQ: - if (unlikely(!rq)) { - ret = -EINVAL; - sif_log(sdev, SIF_INFO, - "flush requested for qp(type %s) with no rq defined", - string_enum_psif_qp_trans(qp->type)); - } else { - ret = sif_flush_rq_wq(sdev, rq, qp, rq->entries); - if (ret) - sif_log(sdev, SIF_INFO, "failed to flush RQ %d", rq->index); - } - return ret; - case FLUSH_SQ: - sif_log(sdev, SIF_WCE_V, "user trying to flush SQ %d", qp->qp_idx); - - if (unlikely(!sq)) { - ret = -EINVAL; - sif_log(sdev, SIF_INFO, - "flush requested for qp(type %s) with no sq defined", - string_enum_psif_qp_trans(qp->type)); - } else { - ret = post_process_wa4074(sdev, qp); - if (ret) - sif_log(sdev, SIF_INFO, "failed to flush SQ %d", qp->qp_idx); - } - return ret; - default: - break; - } - } - - mutex_lock(&qp->lock); + int ret = 0; cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : qp->last_set_state; @@ -1085,7 +1035,70 @@ sif_mqp_ret: /* No extra actions needed */ break; } + if (new_statep) + *new_statep = new_state; + return ret; +} + + +int modify_qp(struct sif_dev *sdev, struct sif_qp *qp, + struct ib_qp_attr *qp_attr, int qp_attr_mask, + bool fail_on_same_state, struct ib_udata *udata) +{ + int ret = 0; + struct sif_rq *rq = get_rq(sdev, qp); + struct sif_sq *sq = get_sq(sdev, qp); + struct ib_qp *ibqp = &qp->ibqp; + enum ib_qp_state new_state; + + sif_log(sdev, SIF_QP, "Enter: qpn %d qp_idx %d mask 0x%x", + ibqp->qp_num, qp->qp_idx, qp_attr_mask); + + /* WA for Bug 622, RQ flush from error completion in userspace */ + if (udata) { + struct sif_modify_qp_ext cmd; + + ret = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); + if (ret) { + sif_log(sdev, SIF_INFO, "ib_copy_from_udata failed, sts %d, qp %d, size %ld", + ret, qp->qp_idx, sizeof(cmd)); + return ret; + } + + switch (cmd.flush) { + case FLUSH_RQ: + if (unlikely(!rq)) { + ret = -EINVAL; + sif_log(sdev, SIF_INFO, + "flush requested for qp(type %s) with no rq defined", + string_enum_psif_qp_trans(qp->type)); + } else { + ret = sif_flush_rq_wq(sdev, rq, qp, rq->entries); + if (ret) + sif_log(sdev, SIF_INFO, "failed to flush RQ %d", rq->index); + } + return ret; + case FLUSH_SQ: + sif_log(sdev, SIF_WCE_V, "user trying to flush SQ %d", qp->qp_idx); + + if (unlikely(!sq)) { + ret = -EINVAL; + sif_log(sdev, SIF_INFO, + "flush requested for qp(type %s) with no sq defined", + string_enum_psif_qp_trans(qp->type)); + } else { + ret = post_process_wa4074(sdev, qp); + if (ret) + sif_log(sdev, SIF_INFO, "failed to flush SQ %d", qp->qp_idx); + } + return ret; + default: + break; + } + } + mutex_lock(&qp->lock); + ret = _modify_qp(sdev, qp, qp_attr, qp_attr_mask, fail_on_same_state, &new_state); mutex_unlock(&qp->lock); if (ret) @@ -1119,7 +1132,7 @@ sif_mqp_ret: break; case IB_QPS_RESET: /* clean all state associated with this QP */ - ret = reset_qp(sdev, qp); + ret = _reset_qp(sdev, qp); break; default: /* No extra actions needed */ @@ -2284,9 +2297,9 @@ int destroy_qp(struct sif_dev *sdev, struct sif_qp *qp) } /* Set this QP back to the initial state - * (called by modify_qp after a successful modify to reset + * (called by modify_qp after a successful modify to reset) */ -static int reset_qp(struct sif_dev *sdev, struct sif_qp *qp) +int _reset_qp(struct sif_dev *sdev, struct sif_qp *qp) { struct sif_rq *rq = get_rq(sdev, qp); struct sif_sq *sq = get_sq(sdev, qp); @@ -2319,14 +2332,14 @@ static int reset_qp(struct sif_dev *sdev, struct sif_qp *qp) struct sif_cq *recv_cq = rq ? get_sif_cq(sdev, cq_idx) : NULL; - /* clean-up the SQ/RQ CQ before resetting the SQ */ + /* clean up the SQ/RQ CQ before resetting the SQ */ if (send_cq) { nfixup = sif_fixup_cqes(send_cq, sq, qp); if (nfixup < 0) { sif_log(sdev, SIF_INFO, "fixup cqes on qp %d send cq %d failed with error %d", qp->qp_idx, sq->cq_idx, nfixup); - goto fixup_failed; + goto failed; } sif_log(sdev, SIF_QP, "fixup cqes fixed %d CQEs in sq.cq %d", nfixup, sq->cq_idx); @@ -2337,7 +2350,7 @@ static int reset_qp(struct sif_dev *sdev, struct sif_qp *qp) sif_log(sdev, SIF_INFO, "fixup cqes on qp %d recv cq %d failed with error %d", qp->qp_idx, cq_idx, nfixup); - goto fixup_failed; + goto failed; } sif_log(sdev, SIF_QP, "fixup cqes fixed %d CQEs in rq.cq %d", nfixup, cq_idx); @@ -2345,7 +2358,7 @@ static int reset_qp(struct sif_dev *sdev, struct sif_qp *qp) } } -fixup_failed: +failed: /* if the send queue scheduler is running, wait for * it to terminate: */ @@ -2353,13 +2366,13 @@ fixup_failed: if (qp->ibqp.qp_type != IB_QPT_XRC_TGT) { ret = sif_flush_sqs(sdev, sq); if (ret) - goto failed; + goto sqs_flush_failed; } sif_logs(SIF_DUMP, write_struct_psif_qp(NULL, 1, (struct psif_qp *)&qp->d)); -failed: +sqs_flush_failed: if (ret) { /* TBD: Debug case - should never fail? */ if (qp->type != PSIF_QP_TRANSPORT_MANSP1) @@ -2471,7 +2484,7 @@ void sif_dfs_print_qp(struct seq_file *s, struct sif_dev *sdev, sq = get_sq(sdev, qp); rq = get_rq(sdev, qp); - seq_printf(s, "%llu\t%d\t", pos, qp->last_set_state); + seq_printf(s, "%llu\t%d\t", pos, qp->last_set_state); if (!rq) seq_puts(s, "[none]"); diff --git a/drivers/infiniband/hw/sif/sif_qp.h b/drivers/infiniband/hw/sif/sif_qp.h index 8985867d633c..3cfca395bd55 100644 --- a/drivers/infiniband/hw/sif/sif_qp.h +++ b/drivers/infiniband/hw/sif/sif_qp.h @@ -135,6 +135,7 @@ struct sif_qp { enum ib_mtu mtu; /* Currently set mtu */ enum ib_qp_state tracked_state; /* TBD: This is stupid: Make SQD fail as MLX for SQD */ struct dentry *dfs_qp; /* Raw qp dump debugfs handle - used by sif_debug.c */ + struct sif_pqp *pqp; /* Set if this QP is used as a PQP */ bool sq_cmpl_map_valid; int srq_idx; /* WA #3952: Track SRQ for modify_srq(used only for pQP) */ @@ -225,6 +226,14 @@ int modify_qp(struct sif_dev *sdev, struct sif_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, bool fail_on_same_state, struct ib_udata *udata); +/* Internal kernel only modify QP operation that assumes qp->lock is held */ +int _modify_qp(struct sif_dev *sdev, struct sif_qp *qp, + struct ib_qp_attr *qp_attr, int qp_attr_mask, + bool fail_on_same_state, enum ib_qp_state *new_statep); + +/* Reset data structures of a QP - exposed for PQP only usage */ +int _reset_qp(struct sif_dev *sdev, struct sif_qp *qp); + enum ib_qp_state get_qp_state(struct sif_qp *qp); /* Line printers for debugfs files */ @@ -292,4 +301,9 @@ static inline bool ib_legal_path_mtu(enum ib_mtu mtu) struct sif_sq *get_sq(struct sif_dev *sdev, struct sif_qp *qp); struct sif_rq *get_rq(struct sif_dev *sdev, struct sif_qp *qp); +int poll_wait_for_qp_writeback(struct sif_dev *sdev, struct sif_qp *qp); + +/* Initialization of qp state via local copy (exposed for use with PQP) */ +void init_hw_qp_state(struct sif_dev *sdev, struct sif_qp *qp); + #endif diff --git a/drivers/infiniband/hw/sif/sif_sq.c b/drivers/infiniband/hw/sif/sif_sq.c index 9c13976eecdf..23b57b624ffb 100644 --- a/drivers/infiniband/hw/sif/sif_sq.c +++ b/drivers/infiniband/hw/sif/sif_sq.c @@ -449,7 +449,7 @@ void sif_dfs_print_sq_hw(struct seq_file *s, struct sif_dev *sdev, loff_t pos) pos, sq->cq_idx, head, tail, sq->entries, qlen, sq->max_outstanding, sq->sg_entries, qp->max_inline_data, - get_psif_sq_sw__tail_indx(&sq_sw->d), lhw.last_seq, + lhw.last_seq, get_psif_sq_sw__tail_indx(&sq_sw->d), lhw.sq_next.next_qp_num, lhw.sq_next.next_null, tsv, lhw.sq_done, lhw.destroyed); } -- 2.50.1