#define PSIF_QP_CORE_3_MAGIC_BITS 32
#define PSIF_QP_CORE_3_MAGIC_MASK 0x00000000ffffffffull
+/*
+ * Completion queue sequence number. This is used for privileged requests,
+ * where sequence number for one CQ is added to a different completion.
+ */
+#define PSIF_QP_CORE_CQ_SEQ_OFFSET 3
+#define PSIF_QP_CORE_3_CQ_SEQ_SHIFT 32
+#define PSIF_QP_CORE_3_CQ_SEQ_BITS 32
+#define PSIF_QP_CORE_3_CQ_SEQ_MASK 0xffffffff00000000ull
+
/*
* Q-Key received in incoming IB packet is checked towards this Q-Key. Q-Key
* used on transmit if top bit of Q-Key in WR is set.
return((u32)be32_to_cpu(*pte));
}
+/*
+ * Completion queue sequence number. This is used for privileged requests,
+ * where sequence number for one CQ is added to a different completion.
+ */
+static inline void set_psif_qp_core__cq_seq(
+ volatile struct psif_qp_core *ptr,
+ u32 data)
+{
+ /* group=3 shift=32 bits=32 */
+ volatile u32 * const pte = (u32 *)((u8 *)((__be64 *)ptr + 3) + 0);
+ *pte = cpu_to_be32(data);
+}
+static inline u32 get_psif_qp_core__cq_seq(volatile struct psif_qp_core *ptr)
+{
+ /* group=3 shift=32 bits=32 */
+ volatile u32 * const pte = (u32 *)((u8 *)((__be64 *)ptr + 3) + 0);
+ return((u32)be32_to_cpu(*pte));
+}
+
/*
* Q-Key received in incoming IB packet is checked towards this Q-Key. Q-Key
* used on transmit if top bit of Q-Key in WR is set.
if (inv_op == -1)
return -ENODEV;
- sif_log(sdev, SIF_PQP, "sending inv.req. type %s (0x%x) target queue index %d",
- sif_table_name(type), inv_op, index);
+ if (likely(p_mode != PM_WRITE)) /* Only log actual operations to the PQP */
+ sif_log(sdev, SIF_PQP, "sending inv.req. type %s (0x%x) target queue index %d",
+ sif_table_name(type), inv_op, index);
memset(&wr, 0, sizeof(struct psif_wr));
/* For this table type we need to send an explicit
if (flags & IB_CQ_SOLICITED)
wr.se = 1;
- /* Do not rearm a CQ if it is not valid or is in error */
- if (unlikely(!get_psif_cq_hw__valid(&cq->d) || READ_ONCE(cq->in_error))) {
+ /* Do not rearm a CQ if it is not valid or is in error - except for small queues
+ * (detects no_x_cqe case..)
+ */
+ if (unlikely(!get_psif_cq_hw__valid(&cq->d) ||
+ (READ_ONCE(cq->in_error) && cq->entries > SIF_SW_RESERVED_DUL_CQE))) {
sif_log(sdev, SIF_NCQ, "cq %d, flags 0x%x (ignored - CQ in error)", cq->index, flags);
return 0;
}
void sif_dfs_print_cq_hw(struct seq_file *s, struct sif_dev *sdev,
loff_t pos);
-extern int sif_fixup_cqes(struct sif_cq *cq, struct sif_sq *sq, struct sif_qp *qp);
+int sif_fixup_cqes(struct sif_cq *cq, struct sif_sq *sq, struct sif_qp *qp);
#endif
struct sif_dfs_ref {
struct sif_dev *sdev;
bool is_eq;
+ bool no_table_info; /* If set, no table extent/size info printed */
enum sif_tab_type type;
sif_dfs_printer dfs_print;
};
struct sif_dfs_ref sd_irq_ch;
struct sif_dfs_ref sd_ipoffload;
struct sif_dfs_ref sd_wa_stats;
+ struct sif_dfs_ref sd_pqp;
};
/* A simple iterator */
sif_log(sd->sdev, SIF_DFS, "%lld", it->pos);
if (!it->pos || !it->started) {
- if (!sd->is_eq)
+ if (!sd->no_table_info)
seq_printf(s, "# %s state (entries %d, extent %d):\n",
sif_table_name(sd->type),
sd->sdev->ba[sd->type].entry_cnt,
/* Single file for the event queues */
sdr = &sdev->dfs->sd_eq;
sdr->sdev = sdev;
- sdr->is_eq = true;
+ sdr->is_eq = sdr->no_table_info = true;
sdr->dfs_print = sif_dfs_print_eq;
df = debugfs_create_file("eq", S_IRUGO, sdev->dfs->root,
(void *)sdr, &table_fops);
if (!df) {
sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for event queues");
- return -ENOMEM;
+ goto sif_dfs_reg_failed;
}
/* Single file for the ipoffload qp-statistics */
sdr = &sdev->dfs->sd_ipoffload;
sdr->sdev = sdev;
sdr->dfs_print = sif_dfs_print_ipoffload;
sdr->type = qp;
+ sdr->no_table_info = true;
df = debugfs_create_file("ipoffload", S_IRUGO, sdev->dfs->root,
(void *)sdr, &table_fops);
if (!df) {
sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for ipoffload qp stat");
- return -ENOMEM;
+ goto sif_dfs_reg_failed;
}
/* Single file for the wa statistics */
sdr = &sdev->dfs->sd_wa_stats;
sdr->sdev = sdev;
+ sdr->no_table_info = true;
df = debugfs_create_file("wa_stats", S_IRUGO, sdev->dfs->root,
(void *)sdr, &wa_fops);
if (!df) {
sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for wa stat");
- return -ENOMEM;
+ goto sif_dfs_reg_failed;
}
/* Single file for the int channel coalescing settings */
sdr = &sdev->dfs->sd_irq_ch;
sdr->sdev = sdev;
- sdr->is_eq = true;
+ sdr->is_eq = sdr->no_table_info = true;
sdr->dfs_print = sif_dfs_print_irq_ch;
df = debugfs_create_file("irq_ch", S_IWUSR | S_IRUGO, sdev->dfs->root,
(void *)sdr, &table_fops_rw);
if (!df) {
sif_log(sdev, SIF_INFO,
"Unable to set up debugfs file for interrupt channels coalescing settings");
- return -ENOMEM;
+ goto sif_dfs_reg_failed;
+ }
+
+ /* Single file for detailed pqp statistics */
+ sdr = &sdev->dfs->sd_pqp;
+ sdr->sdev = sdev;
+ sdr->dfs_print = sif_dfs_print_pqp;
+ sdr->type = qp;
+ sdr->no_table_info = true;
+ df = debugfs_create_file("pqp", S_IRUGO, sdev->dfs->root,
+ (void *)sdr, &table_fops);
+ if (!df) {
+ sif_log(sdev, SIF_INFO, "Unable to set up debugfs file for pqp stat");
+ goto sif_dfs_reg_failed;
}
/* Create a directory for raw qp dump info */
}
-#define dump_eq_entry(level, _s, _eqe) \
- sif_logs(level, printk("%s: ", _s); \
- write_struct_psif_eq_entry(NULL, 0, &leqe); printk("\n"))
-
-
/* Called from interrupt threads */
static bool dispatch_eq(struct sif_eq *eq, int irq, unsigned int msecs)
{
goto only_cne;
}
- /* silently drop the event if it is a PQP. */
- if (unlikely(sif_qp_elem->type == PSIF_QP_TRANSPORT_MANSP1) &&
- !leqe.event_status_srq_limit_reached) {
- sif_log(eq->ba.sdev, SIF_INFO, "Received async event on PQP!");
+ /* Capture PQP events which must be handled separately */
+ if (unlikely(sif_qp_elem->type == PSIF_QP_TRANSPORT_MANSP1)) {
+ handle_pqp_event(eq, &leqe, sif_qp_elem);
goto only_cne;
}
/* Handle CQ errors early, as they may affect what we need to do on QPs */
if (leqe.event_status_cq_error) {
struct sif_cq *cq = get_sif_cq(sdev, leqe.cqd_id);
+ struct sif_qp *qp = safe_get_sif_qp(sdev, leqe.qp);
+
+ /* Capture PQP events which must be handled separately */
+ if (unlikely(qp && qp->type == PSIF_QP_TRANSPORT_MANSP1)) {
+ handle_pqp_event(eq, &leqe, qp);
+ goto only_cne;
+ }
ibe.event = IB_EVENT_CQ_ERR;
ibe.element.cq = &get_sif_cq(sdev, leqe.cqd_id)->ibcq;
WRITE_ONCE(cq->in_error, true);
if (leqe.vendor_error == TSU_CBLD_CQ_FULL_ERR)
- sif_log(sdev, SIF_INFO, "CQ overrun on CQ %d", cq->index);
+ sif_log(sdev, SIF_INFO, "CQ overrun on CQ %d (QP %d)",
+ leqe.qp, cq->index);
else if (leqe.vendor_error == TSU_CBLD_CQ_ALREADY_IN_ERR)
- sif_log(sdev, SIF_INTR, "CQ %d already in error event", cq->index);
+ sif_log(sdev, SIF_INTR, "CQ %d already in error event (QP %d)",
+ leqe.qp, cq->index);
else
- dump_eq_entry(SIF_INFO, "Got cq_error", &leqe);
+ dump_eq_entry(SIF_INFO, "Got other cq_error", &leqe);
nevents += handle_event(eq, &ibe);
}
if (leqe.event_status_local_work_queue_catastrophic_error ||
ibe.event = IB_EVENT_QP_FATAL;
ibe.element.qp = ibqp;
nevents += handle_event(eq, &ibe);
- dump_eq_entry(SIF_INFO, "Got fatal QP error", &leqe);
+
+ switch (leqe.vendor_error) {
+ case TSU_CBLD_CQ_FULL_ERR:
+ break;
+ default:
+ dump_eq_entry(SIF_INFO, "Got fatal QP error", &leqe);
+ }
}
if (leqe.event_status_srq_catastrophic_error) {
ibe.event = IB_EVENT_SRQ_ERR;
u32 sif_get_eq_channel(struct sif_dev *sdev, struct sif_cq *cq);
bool sif_check_valid_eq_channel(struct sif_dev *sdev, int comp_vector);
+#define dump_eq_entry(level, _s, _eqe) \
+ sif_logs(level, printk("%s: ", _s); \
+ write_struct_psif_eq_entry(NULL, 0, _eqe); printk("\n"))
+
#endif
#include "sif_ibqp.h"
#include "sif_checksum.h"
#include "sif_defs.h"
+#include <linux/seq_file.h>
+
+static int _pqp_queue_resurrect(struct sif_pqp *pqp);
+static void pqp_resurrect(struct work_struct *work);
static inline struct sif_qp *__create_init_qp(struct sif_dev *sdev, struct sif_cq *cq)
{
}
+/* Sync HW pointers to the first req we did not receive any completion for.
+ * This must be done from INIT state, eg. before we attempt to move to
+ * a HW owned state again, but after the implicit logic for resetting values
+ * in the qp state:
+ */
+static inline void _resync_pointers(struct sif_qp *qp)
+{
+ unsigned long flags;
+ struct sif_dev *sdev = to_sdev(qp->ibqp.device);
+ struct sif_sq *sq = get_sq(sdev, qp);
+ struct sif_cq *cq = get_sif_cq(sdev, qp->send_cq_indx);
+ struct sif_cq_sw *cq_sw = get_sif_cq_sw(sdev, qp->send_cq_indx);
+ struct sif_sq_sw *sq_sw = get_sif_sq_sw(sdev, qp->qp_idx);
+ struct sif_sq_hdl *wh;
+
+ spin_lock_irqsave(&cq->lock, flags);
+ sq_sw->head_seq++;
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ spin_lock_irqsave(&sq->lock, flags);
+
+ /* Terminate the failed request in error, then replay any
+ * inflicted 3rd party reqs:
+ */
+ wh = get_sq_hdl(sq, sq_sw->head_seq);
+ if (wh) {
+ struct sif_cqe *lcqe = (struct sif_cqe *)wh->wr_id;
+
+ if (lcqe) {
+ sif_log(sdev, SIF_PQP, "Complete cqe %p for sq_seq %d for qp %d",
+ lcqe, wh->sq_seq, qp->qp_idx);
+ lcqe->cqe.status = PSIF_WC_STATUS_SUCCESS;
+ WRITE_ONCE(lcqe->written, true);
+ wh->wr_id = 0;
+ wh->used = false;
+ }
+ }
+
+ /* This seqno got reset by the generic RESET->INIT code, set it back
+ * to the value we want here, which is +1 compared to sq_hw::last_seq
+ */
+ set_psif_qp_core__sq_seq(&qp->d.state, sq_sw->head_seq + 1);
+ set_psif_qp_core__retry_sq_seq(&qp->d.state, sq_sw->head_seq + 1);
+
+ /* We also need to set QP::cq_seq back to where we expect it: */
+ set_psif_qp_core__cq_seq(&qp->d.state, cq_sw->next_seq);
+
+ set_psif_sq_sw__tail_indx(&sq_sw->d, sq_sw->last_seq + 1);
+ set_psif_sq_hw__destroyed(&sq->d, 0);
+ set_psif_sq_hw__last_seq(&sq->d, sq_sw->head_seq + 1);
+ spin_unlock_irqrestore(&sq->lock, flags);
+ mb();
+}
+
+/* Take a priv.QP from RESET to RTS */
+static int _modify_reset_to_rts(struct sif_dev *sdev, struct sif_qp *qp, bool resurrect)
+{
+ int ret;
+
+ /* The privileged QP only supports state in modify_qp */
+ struct ib_qp_attr mod_attr = {
+ .qp_state = IB_QPS_INIT
+ };
+
+ /* Run the required qp modify sequence */
+ ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL);
+ if (ret)
+ return ret;
+
+ if (resurrect)
+ _resync_pointers(qp);
+
+ mod_attr.qp_state = IB_QPS_RTR;
+ ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL);
+ if (ret)
+ return ret;
+
+ mod_attr.qp_state = IB_QPS_RTS;
+ mod_attr.sq_psn = 0;
+
+ /* Modify the QP to RTS, but don't reflect it to last_set_state yet.. */
+ ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL);
+ return ret;
+}
+
static struct sif_pqp *_sif_create_pqp(struct sif_dev *sdev, size_t alloc_sz, int comp_vector)
{
struct sif_sq *sq = NULL;
int ret = 0;
- /* The privileged QP only supports state in modify_qp */
- struct ib_qp_attr mod_attr = {
- .qp_state = IB_QPS_INIT
- };
-
pqp = kzalloc(alloc_sz, GFP_KERNEL);
if (!pqp) {
sif_log(sdev, SIF_INFO, "Failed to allocate memory for priv.qp");
}
pqp->qp = qp;
+ qp->pqp = pqp;
sq = get_sif_sq(sdev, qp->qp_idx);
/* Reserve 1/2 or at least 1 entry for pqp requests with completion on the PQP */
pqp->lowpri_lim = sq->entries - min_t(int, sq->entries/2, 2);
- /* Run the required qp modify sequence */
- ret = sif_modify_qp(&qp->ibqp, &mod_attr,
- IB_QP_STATE, NULL);
- if (ret)
- goto qp_alloc_failed;
-
- mod_attr.qp_state = IB_QPS_RTR;
- ret = sif_modify_qp(&qp->ibqp, &mod_attr,
- IB_QP_STATE, NULL);
+ mutex_lock(&qp->lock);
+ ret = _modify_reset_to_rts(sdev, qp, false);
if (ret)
goto qp_alloc_failed;
-
- mod_attr.qp_state = IB_QPS_RTS;
- mod_attr.sq_psn = 0;
- ret = sif_modify_qp(&qp->ibqp, &mod_attr,
- IB_QP_STATE, NULL);
- if (ret)
- goto qp_alloc_failed;
-
atomic64_set(&pqp->qp->arm_srq_holdoff_time, 0);
+ mutex_unlock(&qp->lock);
sif_log(sdev, SIF_QP, "success");
return pqp;
qp_alloc_failed:
+ mutex_unlock(&qp->lock);
+
/* Special destruction order, see below: */
destroy_cq(cq);
if (sq)
lcqe = (struct sif_cqe *)wh->wr_id;
if (lcqe) {
+ unsigned long elapsed = jiffies - lcqe->t_start;
+
+ if (unlikely(elapsed > pqp->max_cmpl_time))
+ pqp->max_cmpl_time = elapsed;
wh->wr_id = 0;
cqe_cnt++;
mb();
}
ql = sq_length(sq, sq_seq, sq_sw->last_seq);
+ if (unlikely(ql > pqp->max_qlen))
+ pqp->max_qlen = ql;
if (ql <= sq->mask)
pqp_complete_nonfull(pqp);
mb();
struct sif_sq_sw *sq_sw = get_sif_sq_sw(sdev, qp_idx);
unsigned long timeout = sdev->min_resp_ticks * 4;
u16 limit = pqp_req_gets_completion(pqp, wr, mode) ? sq->entries : pqp->lowpri_lim;
- /* Per IBTA 11.4.1.1, error is only returned
- * when the QP is in the RESET, INIT or RTR states.
- */
- if (qp->last_set_state < IB_QPS_RTS)
- return -EINVAL; /* The pqp is not ready */
pqp->timeout = jiffies + timeout;
wr->tsu_sl = qp->tsl;
restart:
- /* Make sure emptying the queue takes preference over filling it up: */
+ /* Make sure emptying the queue takes preference over filling it up.
+ * This will also make us exit if the pqp is not up and running:
+ */
if (mode != PM_WRITE)
ret = pqp_process_cqe(pqp, NULL);
if (ret > 0 || ret == -EBUSY)
sqe = get_sq_entry(sq, sq_seq);
- sif_log(sdev, SIF_PQP, "pd %d cq_idx %d sq_idx %d sq.seqn %d op %s",
- pd->idx, wr->cq_desc_vlan_pri_union.cqd_id, sq->index, sq_seq,
- string_enum_psif_wr_type(wr->op));
-
if (likely(mode != PM_WRITE)) {
u64 csum;
+ sif_log(sdev, SIF_PQP, "pd %d cq_idx %d sq_idx %d sq.seqn %d op %s",
+ pd->idx, wr->cq_desc_vlan_pri_union.cqd_id, sq->index, sq_seq,
+ string_enum_psif_wr_type(wr->op));
+
wr->sq_seq = sq_seq;
/* Collect_length is always 0 for privileged wr's - they have no data */
/* update send queue */
copy_conv_to_hw(sqe, wr, sizeof(struct psif_wr));
- if (likely(mode != PM_WRITE)) {
+ if (unlikely(READ_ONCE(pqp->write_only)))
+ wmb();
+ else if (likely(mode != PM_WRITE)) {
/* Flush writes before updating the sw pointer,
* This is necessary to ensure that the sqs do not see
* an incomplete entry:
if (sif_feature(pcie_trigger))
force_pcie_link_retrain(sdev);
sif_log(sdev, SIF_INFO,
- "cq %d: poll for cqe %p timed out", cq->index, lcqe);
+ "cq %d: poll for cqe for sq %d, sq.seq %d timed out",
+ cq->index, pqp->qp->qp_idx, lcqe->sq_seq);
atomic_inc(&cq->timeout_cnt);
sif_logs(SIF_PQPT,
else
cpu_relax();
- if (unlikely(READ_ONCE(pqp->qp->last_set_state) != IB_QPS_RTS)) {
- sif_log(sdev, SIF_INFO,
- "cq %d: poll for cqe %p failed - pqp %d not operational\n",
- cq->index, lcqe, pqp->qp->qp_idx);
- ret = -EINTR;
- break;
- }
if (sdev->min_resp_ticks != min_resp_ticks) {
/* Give us a quick way out by changing min_resp_ticks */
pqp->timeout -= (min_resp_ticks - sdev->min_resp_ticks) * 4;
int ret = 0;
uint n_pqps = es->eqs.cnt - 2;
+ /* Use a sensible default value for when to query a PQP to see if it got
+ * set to error without an event - will be adjusted dynamically:
+ */
+ pqi->pqp_query_ticks = max(1ULL, sdev->min_resp_ticks / 50);
+
pqi->pqp = sif_kmalloc(sdev, sizeof(struct sif_pqp *) * n_pqps, GFP_KERNEL | __GFP_ZERO);
if (!pqi->pqp)
return -ENOMEM;
kfree(pqi->pqp);
pqi->pqp = NULL;
}
+
+void handle_pqp_event(struct sif_eq *eq, struct psif_eq_entry *eqe, struct sif_qp *qp)
+{
+ struct sif_dev *sdev = eq->ba.sdev;
+ unsigned long flags;
+ struct sif_pqp *pqp = qp->pqp;
+
+ if (eqe->event_status_cq_error) {
+ struct sif_cq *cq = get_sif_cq(sdev, eqe->cqd_id);
+
+ if (eqe->vendor_error == TSU_CBLD_CQ_FULL_ERR)
+ sif_log(sdev, SIF_INFO, "PQP error due to CQ overrun on CQ %d", cq->index);
+ else if (eqe->vendor_error == TSU_CBLD_CQ_ALREADY_IN_ERR)
+ sif_log(sdev, SIF_INFO, "PQP error due to CQ %d already in error event",
+ cq->index);
+ else
+ dump_eq_entry(SIF_INFO, "Got unexpected cq_error", eqe);
+
+ WRITE_ONCE(qp->last_set_state, IB_QPS_ERR);
+ WRITE_ONCE(pqp->write_only, true);
+
+ spin_lock_irqsave(&pqp->cq->lock, flags);
+ _pqp_queue_resurrect(pqp);
+ spin_unlock_irqrestore(&pqp->cq->lock, flags);
+ } else {
+ sif_log(sdev, SIF_INFO, "Received unexpected async event on PQP %d: ", qp->qp_idx);
+ sif_logs(SIF_INFO, write_struct_psif_eq_entry(NULL, 0, eqe); printk("\n"));
+ }
+ atomic_inc(&pqp->ev_cnt);
+}
+
+/* NB! Assumed called with cq lock held:
+ * Trigger a resurrect operation on the PQP -
+ * if necessary - eg. if the QP is in error
+ * and no resurrect operation is already queued:
+ */
+static int _pqp_queue_resurrect(struct sif_pqp *pqp)
+{
+ enum ib_qp_state s;
+ struct pqp_work *work;
+ struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device);
+ struct sif_qp *qp = pqp->qp;
+
+ if (READ_ONCE(pqp->res_queued))
+ return -EAGAIN;
+ s = qp->last_set_state;
+ if (s != IB_QPS_ERR && s != IB_QPS_SQE)
+ return 0;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ work->pqp = pqp;
+ INIT_WORK(&work->ws, pqp_resurrect);
+ WRITE_ONCE(pqp->res_queued, true);
+
+ queue_work(sdev->wq, &work->ws);
+ return -EAGAIN;
+}
+
+/* Simplified version of _reset_qp() for PQPs already in error
+ * as none of the other workarounds are needed for PQPs in error
+ * due to a CQ error. Assumes QP lock is held.
+ */
+static int _reset_qp_pqp(struct sif_pqp *pqp)
+{
+ struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device);
+ struct sif_qp *qp = pqp->qp;
+ struct sif_sq *sq = get_sq(sdev, qp);
+ int ret = 0;
+ struct ib_qp_attr mod_attr = {
+ .qp_state = IB_QPS_RESET
+ };
+
+ ret = _modify_qp(sdev, qp, &mod_attr, IB_QP_STATE, true, NULL);
+ if (ret)
+ goto out;
+
+ /* Bring down order needed by rev2 according to bug #3480 */
+ ret = poll_wait_for_qp_writeback(sdev, qp);
+ if (ret)
+ goto out;
+
+ ret = sif_flush_sqs(sdev, sq);
+ if (ret)
+ goto out;
+
+ /* Re-initialize the HW QP state as after create_qp() */
+ init_hw_qp_state(sdev, qp);
+out:
+ return ret;
+}
+
+
+/* Called with QP lock held. Check SQ state and
+ * trigger SQ mode if there are outstanding requests
+ * that failed in the previous "life" of this PQP.
+ * The QP is already in RTS but new reqs are held back
+ * by pqp->write_only being set:
+ */
+static int _retrigger_sq(struct sif_pqp *pqp)
+{
+ unsigned long flags;
+ struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device);
+ struct sif_sq *sq = get_sq(sdev, pqp->qp);
+ struct sif_sq_sw *sq_sw = get_sif_sq_sw(sdev, pqp->qp->qp_idx);
+ int ql;
+
+ spin_lock_irqsave(&sq->lock, flags);
+ ql = sq_length(sq, sq_sw->head_seq, sq_sw->last_seq);
+
+ if (ql > 0) {
+ u32 sq_seq = sq_sw->head_seq + 1;
+
+ sif_log(sdev, SIF_INFO, "Queue length %d, start at %u", ql, sq_seq);
+
+ /* Outstanding requests, must trigger SQ mode: */
+ sif_doorbell_from_sqe(pqp->qp, sq_seq, true);
+
+ sif_logs(SIF_PQPT,
+ struct sif_sq *sq = get_sif_sq(sdev, pqp->qp->qp_idx);
+ struct psif_sq_entry *sqe = get_sq_entry(sq, sq_seq);
+
+ write_struct_psif_sq_entry(NULL, 1, sqe));
+ }
+
+ /* Finally reset the write only flag:
+ * While the flag is set, post operations will just be written
+ * to the send queue without any collect buffer write:
+ */
+ WRITE_ONCE(pqp->write_only, false);
+ spin_unlock_irqrestore(&sq->lock, flags);
+ return 0;
+}
+
+
+/* Process a pqp_work work element to resurrect a PQP that is in error.
+ * This function assumes this is due to a CQ error on a user CQ:
+ */
+static void pqp_resurrect(struct work_struct *work)
+{
+ struct pqp_work *rw = container_of(work, struct pqp_work, ws);
+ struct sif_pqp *pqp = rw->pqp;
+ struct sif_qp *qp = pqp->qp;
+ struct sif_dev *sdev = to_sdev(pqp->qp->ibqp.device);
+ unsigned long flags;
+ int ret;
+
+ sif_log(sdev, SIF_PQP, " PQP %d", qp->qp_idx);
+
+ mutex_lock(&qp->lock);
+ ret = _reset_qp_pqp(pqp);
+ if (ret)
+ goto out;
+
+ ret = _modify_reset_to_rts(sdev, qp, true);
+ if (ret)
+ goto out;
+
+ /* Now retrigger any accidental 3rd party requests
+ * that failed in the previous "life" of the PQP:
+ */
+ ret = _retrigger_sq(pqp);
+ if (ret)
+ goto out;
+
+ spin_lock_irqsave(&pqp->cq->lock, flags);
+ /* Avoid losing an acciental update to the state */
+ if (qp->last_set_state == IB_QPS_RTS)
+ WRITE_ONCE(pqp->res_queued, false);
+ spin_unlock_irqrestore(&pqp->cq->lock, flags);
+out:
+ mutex_unlock(&qp->lock);
+ kfree(rw);
+ if (ret)
+ sif_log(sdev, SIF_INFO, "Fatal error: Failed to resurrect PQP %d",
+ qp->qp_idx);
+
+ sif_log(sdev, SIF_PQP, " PQP %d done", qp->qp_idx);
+}
+
+void sif_dfs_print_pqp(struct seq_file *s, struct sif_dev *sdev, loff_t pos)
+{
+ struct sif_qp *qp;
+ struct sif_pqp *pqp;
+ struct sif_pqp_info *pqi = &sdev->pqi;
+
+ if (unlikely(pos < 0)) {
+ seq_printf(s, "# Global PQP config:\n# Number of normal PQPs: %u\n# Next PQP in RR: %u\n"
+ "#\n# Per PQP stats:\n# qmax = Longest PQP send queue observed (during poll)\n"
+ "# tmax = Max time in msec observed for a PQP req\n"
+ "# evc = #of PQP async events seen\n#\n"
+ "#Index State %6s qmax tmax evc\n", pqi->cnt,
+ atomic_read(&pqi->next), "CQ");
+ return;
+ }
+
+ qp = get_sif_qp(sdev, pos);
+ if (qp->type != PSIF_QP_TRANSPORT_MANSP1)
+ return;
+ pqp = qp->pqp;
+
+ seq_printf(s, "%6llu %5u %6u %4u %6u %u", pos, qp->last_set_state,
+ get_psif_qp_core__send_cq_indx(&qp->d.state),
+ pqp->max_qlen, jiffies_to_msecs(pqp->max_cmpl_time), atomic_read(&pqp->ev_cnt));
+ if (qp->flags & SIF_QPF_KI_STENCIL)
+ seq_puts(s, " [ki_stencil]\n");
+ else
+ seq_puts(s, "\n");
+}
struct sif_cq;
struct sif_rq;
struct sif_sq;
+struct sif_eq;
struct completion;
enum post_mode;
+struct psif_eq_entry;
/* Data structure used by PQP requesters to get the completion information,
* and optionally block waiting for it to arrive:
bool need_complete; /* cmpl is initialized and a waiter is present */
bool written; /* Set to true when a completion has been copied here */
u16 sq_seq; /* set by post_send to allow us to reset ourselves */
+ unsigned long t_start; /* jiffies when request was posted */
};
/*
.pqp = get_pqp(d_),\
.need_complete = false,\
.written = false,\
+ .t_start = jiffies,\
}
#define DECLARE_SIF_CQE_WITH_SAME_EQ(d_, c_, e_) \
.pqp = get_pqp_same_eq(d_, e_), \
.need_complete = false,\
.written = false,\
+ .t_start = jiffies,\
}
.pqp = get_pqp(d_),\
.need_complete = true,\
.written = false,\
+ .t_start = jiffies,\
};\
init_completion(&c_.cmpl)
.pqp = get_next_pqp(d_),\
.need_complete = false,\
.written = false,\
+ .t_start = jiffies,\
}
+struct pqp_work {
+ struct work_struct ws;
+ struct sif_pqp *pqp; /* The pqp that needs work */
+};
/* Per PQP state/configuration info */
u16 last_full_seq; /* For logging purposes, record when last observed full */
u16 last_nc_full; /* Track when to return EAGAIN to flush non-compl.entries */
u16 lowpri_lim; /* Max number of outstanding low priority reqs */
+
+ /* Error recovery handling state of the PQP (access to these protected by CQ lock) */
+ bool res_queued; /* Queued for resurrect after a QP error */
+ bool write_only; /* The PQP is temporarily disabled and only writing entries is legal */
+ atomic_t ev_cnt; /* #of async events seen for this PQP */
+ u32 max_qlen; /* Longest PQP send queue observed during poll */
+ unsigned long max_cmpl_time; /* Highest number of ticks recorded for a PQP completion */
};
/* Stencil PQP support - pre-populated PQPs for special performance sensitive use cases */
int cnt; /* Number of PQPs set up */
atomic_t next; /* Used for round robin assignment of pqp */
+ /* Support for resurrecting PQPs */
+ unsigned long pqp_query_ticks; /* #of ticks to wait before querying a PQP for error */
+
/* Stencil PQPs for key invalidates */
struct sif_spqp_pool ki_s;
};
int sif_destroy_st_pqp(struct sif_dev *sdev, struct sif_st_pqp *spqp);
+/* Called from interrupt level to handle events on privileged QPs: */
+void handle_pqp_event(struct sif_eq *eq, struct psif_eq_entry *eqe, struct sif_qp *qp);
+
+void sif_dfs_print_pqp(struct seq_file *s, struct sif_dev *sdev, loff_t pos);
+
#endif
0,
};
-static int reset_qp(struct sif_dev *sdev, struct sif_qp *qp);
-
static int sif_create_pma_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct sif_qp_init_attr sif_attr);
NULL : get_sif_rq(sdev, qp->rq_idx);
}
-static int poll_wait_for_qp_writeback(struct sif_dev *sdev, struct sif_qp *qp)
+int poll_wait_for_qp_writeback(struct sif_dev *sdev, struct sif_qp *qp)
{
unsigned long timeout = sdev->min_resp_ticks;
unsigned long timeout_real = jiffies + timeout;
* To be called from create_qp and when QP is modified to RESET, in
* case it is resurrected
*/
-
-static void init_hw_qp_state(struct sif_dev *sdev, struct sif_qp *qp)
+void init_hw_qp_state(struct sif_dev *sdev, struct sif_qp *qp)
{
struct psif_qp qpi;
}
-int modify_qp(struct sif_dev *sdev, struct sif_qp *qp,
+/* The actual modify_qp operation:
+ * assuming qp->lock is held at entry
+ */
+int _modify_qp(struct sif_dev *sdev, struct sif_qp *qp,
struct ib_qp_attr *qp_attr, int qp_attr_mask,
- bool fail_on_same_state, struct ib_udata *udata)
+ bool fail_on_same_state, enum ib_qp_state *new_statep)
{
- int ret = 0;
- struct ib_qp *ibqp = &qp->ibqp;
- struct sif_rq *rq = get_rq(sdev, qp);
- struct sif_sq *sq = get_sq(sdev, qp);
enum ib_qp_state cur_state, new_state;
+ struct ib_qp *ibqp = &qp->ibqp;
enum sif_mqp_type mqp_type = SIF_MQP_IGN;
-
- sif_log(sdev, SIF_QP, "Enter: qpn %d qp_idx %d mask 0x%x",
- ibqp->qp_num, qp->qp_idx, qp_attr_mask);
-
- /* WA for Bug 622, RQ flush from error completion in userspace */
- if (udata) {
- struct sif_modify_qp_ext cmd;
-
- ret = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
- if (ret) {
- sif_log(sdev, SIF_INFO, "ib_copy_from_udata failed, sts %d, qp %d, size %ld",
- ret, qp->qp_idx, sizeof(cmd));
- return ret;
- }
-
- switch (cmd.flush) {
- case FLUSH_RQ:
- if (unlikely(!rq)) {
- ret = -EINVAL;
- sif_log(sdev, SIF_INFO,
- "flush requested for qp(type %s) with no rq defined",
- string_enum_psif_qp_trans(qp->type));
- } else {
- ret = sif_flush_rq_wq(sdev, rq, qp, rq->entries);
- if (ret)
- sif_log(sdev, SIF_INFO, "failed to flush RQ %d", rq->index);
- }
- return ret;
- case FLUSH_SQ:
- sif_log(sdev, SIF_WCE_V, "user trying to flush SQ %d", qp->qp_idx);
-
- if (unlikely(!sq)) {
- ret = -EINVAL;
- sif_log(sdev, SIF_INFO,
- "flush requested for qp(type %s) with no sq defined",
- string_enum_psif_qp_trans(qp->type));
- } else {
- ret = post_process_wa4074(sdev, qp);
- if (ret)
- sif_log(sdev, SIF_INFO, "failed to flush SQ %d", qp->qp_idx);
- }
- return ret;
- default:
- break;
- }
- }
-
- mutex_lock(&qp->lock);
+ int ret = 0;
cur_state = qp_attr_mask & IB_QP_CUR_STATE ?
qp_attr->cur_qp_state : qp->last_set_state;
/* No extra actions needed */
break;
}
+ if (new_statep)
+ *new_statep = new_state;
+ return ret;
+}
+
+
+int modify_qp(struct sif_dev *sdev, struct sif_qp *qp,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ bool fail_on_same_state, struct ib_udata *udata)
+{
+ int ret = 0;
+ struct sif_rq *rq = get_rq(sdev, qp);
+ struct sif_sq *sq = get_sq(sdev, qp);
+ struct ib_qp *ibqp = &qp->ibqp;
+ enum ib_qp_state new_state;
+
+ sif_log(sdev, SIF_QP, "Enter: qpn %d qp_idx %d mask 0x%x",
+ ibqp->qp_num, qp->qp_idx, qp_attr_mask);
+
+ /* WA for Bug 622, RQ flush from error completion in userspace */
+ if (udata) {
+ struct sif_modify_qp_ext cmd;
+
+ ret = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
+ if (ret) {
+ sif_log(sdev, SIF_INFO, "ib_copy_from_udata failed, sts %d, qp %d, size %ld",
+ ret, qp->qp_idx, sizeof(cmd));
+ return ret;
+ }
+
+ switch (cmd.flush) {
+ case FLUSH_RQ:
+ if (unlikely(!rq)) {
+ ret = -EINVAL;
+ sif_log(sdev, SIF_INFO,
+ "flush requested for qp(type %s) with no rq defined",
+ string_enum_psif_qp_trans(qp->type));
+ } else {
+ ret = sif_flush_rq_wq(sdev, rq, qp, rq->entries);
+ if (ret)
+ sif_log(sdev, SIF_INFO, "failed to flush RQ %d", rq->index);
+ }
+ return ret;
+ case FLUSH_SQ:
+ sif_log(sdev, SIF_WCE_V, "user trying to flush SQ %d", qp->qp_idx);
+
+ if (unlikely(!sq)) {
+ ret = -EINVAL;
+ sif_log(sdev, SIF_INFO,
+ "flush requested for qp(type %s) with no sq defined",
+ string_enum_psif_qp_trans(qp->type));
+ } else {
+ ret = post_process_wa4074(sdev, qp);
+ if (ret)
+ sif_log(sdev, SIF_INFO, "failed to flush SQ %d", qp->qp_idx);
+ }
+ return ret;
+ default:
+ break;
+ }
+ }
+ mutex_lock(&qp->lock);
+ ret = _modify_qp(sdev, qp, qp_attr, qp_attr_mask, fail_on_same_state, &new_state);
mutex_unlock(&qp->lock);
if (ret)
break;
case IB_QPS_RESET:
/* clean all state associated with this QP */
- ret = reset_qp(sdev, qp);
+ ret = _reset_qp(sdev, qp);
break;
default:
/* No extra actions needed */
}
/* Set this QP back to the initial state
- * (called by modify_qp after a successful modify to reset
+ * (called by modify_qp after a successful modify to reset)
*/
-static int reset_qp(struct sif_dev *sdev, struct sif_qp *qp)
+int _reset_qp(struct sif_dev *sdev, struct sif_qp *qp)
{
struct sif_rq *rq = get_rq(sdev, qp);
struct sif_sq *sq = get_sq(sdev, qp);
struct sif_cq *recv_cq = rq ? get_sif_cq(sdev, cq_idx) : NULL;
- /* clean-up the SQ/RQ CQ before resetting the SQ */
+ /* clean up the SQ/RQ CQ before resetting the SQ */
if (send_cq) {
nfixup = sif_fixup_cqes(send_cq, sq, qp);
if (nfixup < 0) {
sif_log(sdev, SIF_INFO,
"fixup cqes on qp %d send cq %d failed with error %d",
qp->qp_idx, sq->cq_idx, nfixup);
- goto fixup_failed;
+ goto failed;
}
sif_log(sdev, SIF_QP, "fixup cqes fixed %d CQEs in sq.cq %d",
nfixup, sq->cq_idx);
sif_log(sdev, SIF_INFO,
"fixup cqes on qp %d recv cq %d failed with error %d",
qp->qp_idx, cq_idx, nfixup);
- goto fixup_failed;
+ goto failed;
}
sif_log(sdev, SIF_QP, "fixup cqes fixed %d CQEs in rq.cq %d",
nfixup, cq_idx);
}
}
-fixup_failed:
+failed:
/* if the send queue scheduler is running, wait for
* it to terminate:
*/
if (qp->ibqp.qp_type != IB_QPT_XRC_TGT) {
ret = sif_flush_sqs(sdev, sq);
if (ret)
- goto failed;
+ goto sqs_flush_failed;
}
sif_logs(SIF_DUMP,
write_struct_psif_qp(NULL, 1, (struct psif_qp *)&qp->d));
-failed:
+sqs_flush_failed:
if (ret) {
/* TBD: Debug case - should never fail? */
if (qp->type != PSIF_QP_TRANSPORT_MANSP1)
sq = get_sq(sdev, qp);
rq = get_rq(sdev, qp);
- seq_printf(s, "%llu\t%d\t", pos, qp->last_set_state);
+ seq_printf(s, "%llu\t%d\t", pos, qp->last_set_state);
if (!rq)
seq_puts(s, "[none]");
enum ib_mtu mtu; /* Currently set mtu */
enum ib_qp_state tracked_state; /* TBD: This is stupid: Make SQD fail as MLX for SQD */
struct dentry *dfs_qp; /* Raw qp dump debugfs handle - used by sif_debug.c */
+ struct sif_pqp *pqp; /* Set if this QP is used as a PQP */
bool sq_cmpl_map_valid;
int srq_idx; /* WA #3952: Track SRQ for modify_srq(used only for pQP) */
struct ib_qp_attr *qp_attr, int qp_attr_mask,
bool fail_on_same_state, struct ib_udata *udata);
+/* Internal kernel only modify QP operation that assumes qp->lock is held */
+int _modify_qp(struct sif_dev *sdev, struct sif_qp *qp,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ bool fail_on_same_state, enum ib_qp_state *new_statep);
+
+/* Reset data structures of a QP - exposed for PQP only usage */
+int _reset_qp(struct sif_dev *sdev, struct sif_qp *qp);
+
enum ib_qp_state get_qp_state(struct sif_qp *qp);
/* Line printers for debugfs files */
struct sif_sq *get_sq(struct sif_dev *sdev, struct sif_qp *qp);
struct sif_rq *get_rq(struct sif_dev *sdev, struct sif_qp *qp);
+int poll_wait_for_qp_writeback(struct sif_dev *sdev, struct sif_qp *qp);
+
+/* Initialization of qp state via local copy (exposed for use with PQP) */
+void init_hw_qp_state(struct sif_dev *sdev, struct sif_qp *qp);
+
#endif
pos,
sq->cq_idx, head, tail, sq->entries, qlen, sq->max_outstanding,
sq->sg_entries, qp->max_inline_data,
- get_psif_sq_sw__tail_indx(&sq_sw->d), lhw.last_seq,
+ lhw.last_seq, get_psif_sq_sw__tail_indx(&sq_sw->d),
lhw.sq_next.next_qp_num, lhw.sq_next.next_null,
tsv, lhw.sq_done, lhw.destroyed);
}