IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
+ IPOIB_CM_MAX_BAD_CONNS = 8, /* Keep network interface queue running even with 4 bad connections */
IPOIB_RX_RING_SIZE = 256,
IPOIB_TX_RING_SIZE = 128,
IPOIB_MAX_QUEUE_SIZE = 8192,
extern int ipoib_sendq_size;
extern int ipoib_recvq_size;
+extern int ipoib_cm_sendq_size;
extern struct ib_sa_client ipoib_sa_client;
if (ipoib_linearize_skb(dev, skb, priv, tx->max_send_sge) < 0)
return;
+ if ((tx->tx_head - tx->tx_tail) >= ipoib_cm_sendq_size) {
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ ipoib_dbg_data(priv, "dropping packet: length %d connection 0x%x\n",
+ skb->len, tx->qp->qp_num);
+ return;
+ }
+
ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
tx->tx_head, skb->len, tx->qp->qp_num);
* means we have to make sure everything is properly recorded and
* our state is consistent before we call post_send().
*/
- tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
+ tx_req = &tx->tx_ring[tx->tx_head & (ipoib_cm_sendq_size - 1)];
tx_req->skb = skb;
/* Calculate checksum if we support ibcrc_as_csum but peer is not */
return;
}
rc = post_send_sg(priv, tx, tx->tx_head &
- (ipoib_sendq_size - 1),
+ (ipoib_cm_sendq_size - 1),
skb, tx_req->mapping);
} else {
addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
skb_orphan(skb);
skb_dst_drop(skb);
- rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
+ rc = post_send(priv, tx, tx->tx_head & (ipoib_cm_sendq_size - 1),
addr, skb->len);
}
if (unlikely(rc)) {
ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
wr_id, wc->status);
- if (unlikely(wr_id >= ipoib_sendq_size)) {
+ if (unlikely(wr_id >= ipoib_cm_sendq_size)) {
ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
- wr_id, ipoib_sendq_size);
+ wr_id, ipoib_cm_sendq_size);
return;
}
.send_cq = priv->recv_cq,
.recv_cq = priv->recv_cq,
.srq = priv->cm.srq,
- .cap.max_send_wr = ipoib_sendq_size,
+ .cap.max_send_wr = ipoib_cm_sendq_size,
.cap.max_send_sge = 1,
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
int ret;
- p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+ p->tx_ring = __vmalloc(ipoib_cm_sendq_size * sizeof(*p->tx_ring),
GFP_NOIO, PAGE_KERNEL);
if (!p->tx_ring) {
ipoib_warn(priv, "failed to allocate tx ring\n");
ret = -ENOMEM;
goto err_tx;
}
- memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
+ memset(p->tx_ring, 0, ipoib_cm_sendq_size * sizeof(*p->tx_ring));
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
if (IS_ERR(p->qp)) {
timeout:
while ((int) p->tx_tail - (int) p->tx_head < 0) {
- tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
+ tx_req = &p->tx_ring[p->tx_tail & (ipoib_cm_sendq_size - 1)];
ipoib_cm_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
int unload_allowed __read_mostly = 1;
+/* IPOIB_CM_MAX_BAD_CONNS default value (8) is inline with current
+ * Exadata-ZFS deployment.
+ * We usually have 2 ZFS heads in current deployment.
+ * Considering this, maximum four connections can go bad (assuming
+ * unlikely scenario where all connections went bad simultaneously)
+ * With CM connection workqueue size, which is 1/8th of port limit,
+ * as defined below; we should hold good.
+ * Orabug: 22287489
+ */
+int ipoib_cm_sendq_size __read_mostly = IPOIB_TX_RING_SIZE / IPOIB_CM_MAX_BAD_CONNS;
+int ipoib_cm_max_bad_conns = IPOIB_CM_MAX_BAD_CONNS;
module_param_named(module_unload_allowed, unload_allowed, int, 0444);
MODULE_PARM_DESC(module_unload_allowed, "Allow this module to be unloaded or not (default 1 for YES)");
MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
+module_param_named(cm_max_bad_conns, ipoib_cm_max_bad_conns, int, 0444);
+MODULE_PARM_DESC(cm_max_bad_conns, "Continue data transfer with other nodes upto certain no of bad connections (Default 8 indicates, data transfer will continue with 4 bad connections)");
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
int ipoib_debug_level;
}
#ifdef CONFIG_INFINIBAND_IPOIB_CM
+ if (ipoib_cm_max_bad_conns <= 0) {
+ pr_err("invalid value for cm_max_bad_conns %d, seting to default %d\n",
+ ipoib_cm_max_bad_conns, IPOIB_CM_MAX_BAD_CONNS);
+ ipoib_cm_max_bad_conns = IPOIB_CM_MAX_BAD_CONNS;
+ }
+
+ ipoib_cm_sendq_size = ipoib_sendq_size / ipoib_cm_max_bad_conns;
ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
#endif