From 998a09fddada31d5fd0f31a3c5f4df3a409c74b3 Mon Sep 17 00:00:00 2001 From: Hans Westgaard Ry Date: Wed, 16 Mar 2016 14:01:02 +0100 Subject: [PATCH] IB/ipoib: Add handling for sending of skb with many frags MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Orabug: 21498734 IPoIB puts skb-fragments in SGEs adding 1 extra SGE when SG is enabled. Current codepath assumes that the max number of SGEs a device supports is at least MAX_SKB_FRAGS+1, there is no interaction with upper layers to limit number of fragments in an skb if a device suports fewer SGEs. The assumptions also lead to requesting a fixed number of SGEs when IPoIB creates queue-pairs with SG enabled. A fallback/slowpath is implemented using skb_linearize to handle cases where the conversion would result in more sges than supported. Change-Id: Ia81e69d7231987208ac298300fc5b9734f193a2d Signed-off-by: Hans Westgaard Ry Reviewed-by: HÃ¥kon Bugge Reviewed-by: Wei Lin Guay --- drivers/infiniband/ulp/ipoib/ipoib.h | 43 +++++++++++++++++++++- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 7 +++- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 + drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 5 ++- 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 693b9da6a2637..1343ea796e880 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -260,6 +260,8 @@ struct ipoib_cm_tx { unsigned long flags; u32 mtu; u16 caps; + /* Used when checking for need to linearize SKBs with many frags */ + unsigned max_send_sge; }; struct ipoib_cm_rx_buf { @@ -407,6 +409,10 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + /* Used when checking for need to linearize SKBs with many frags */ + unsigned max_send_sge; + /* Device specific; obtained from query_device */ + unsigned max_sge; }; struct ipoib_ah { @@ -765,11 +771,15 @@ static inline int ipoib_register_debugfs(void) { return 0; } static inline void ipoib_unregister_debugfs(void) { } #endif +#define ipoib_dev_name(priv) (((struct ipoib_dev_priv *) priv)->dev->name) #define ipoib_printk(level, priv, format, arg...) \ - printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) + printk(level "%s: " format, ipoib_dev_name(priv), ## arg) #define ipoib_warn(priv, format, arg...) \ ipoib_printk(KERN_WARNING, priv, format , ## arg) +#define ipoib_warn_ratelimited(priv, format, arg...) \ + pr_warn_ratelimited("%s: " format, ipoib_dev_name(priv), ## arg) + extern int ipoib_sendq_size; extern int ipoib_recvq_size; @@ -810,4 +820,35 @@ extern int ipoib_debug_level; extern const char ipoib_driver_version[]; +static inline int ipoib_linearize_skb(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_dev_priv *priv, + unsigned max_send_sge) +{ + unsigned usable_sge = max_send_sge - !!skb_headlen(skb); + + if (skb_shinfo(skb)->nr_frags > usable_sge) { + if (skb_linearize(skb) < 0) { + ipoib_warn_ratelimited(priv, + "skb could not be linearized\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return -1; + } + + /* skb_linearize returned ok but still not reducing nr_frags */ + if (skb_shinfo(skb)->nr_frags > usable_sge) { + ipoib_warn_ratelimited(priv, + "too many frags after skb linearize\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return -1; + } + } + return 0; + +} + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index d84531d49722b..e59d4ce87ddaa 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -794,6 +794,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); return; } + if (ipoib_linearize_skb(dev, skb, priv, tx->max_send_sge) < 0) + return; ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", tx->tx_head, skb->len, tx->qp->qp_num); @@ -1152,7 +1154,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) - attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + attr.cap.max_send_sge = + min_t(u32, priv->max_sge, MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { @@ -1161,6 +1164,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } + tx->max_send_sge = attr.cap.max_send_sge; return tx_qp; } @@ -1714,6 +1718,7 @@ int ipoib_cm_dev_init(struct net_device *dev) ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge); + priv->max_sge = attr.max_sge; attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge); ipoib_cm_create_srq(dev, attr.max_srq_sge); if (ipoib_cm_has_srq(dev)) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 1fb82d84aaa4f..8f28dbda80db7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -578,6 +578,8 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, phead = NULL; hlen = 0; } + if (ipoib_linearize_skb(dev, skb, priv, priv->max_send_sge) < 0) + return; ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", skb->len, address, qpn); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 66d140358ba7f..bc5f636b74eb7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -207,7 +207,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; if (dev->features & NETIF_F_SG) - init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + init_attr.cap.max_send_sge = + min_t(u32, priv->max_sge, MAX_SKB_FRAGS + 1); priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { @@ -234,6 +235,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge; + priv->max_send_sge = init_attr.cap.max_send_sge; + return 0; out_free_send_cq: -- 2.50.1