From: Pradeep Gopanapalli Date: Tue, 12 Jul 2016 20:10:56 +0000 (-0700) Subject: xsigo: Fixed Path locking issues X-Git-Tag: v4.1.12-92~108^2~4 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=e4c2a242704148bb823f8faac397a7e5b2152139;p=users%2Fjedix%2Flinux-maple.git xsigo: Fixed Path locking issues Orabug: 23514725 Changed xve_put_path to allow condition where caller holds private lock, priv->lock Removed path_free function and put all the functionality in xve_put_path No need for using scatter-gatter when MTU is less than admin mtu instead of multicast mtu, as admin MTU is the driving factor for vnic Signed-off-by: Pradeep Gopanapalli Reviewed-by: sajid zia Reviewed-by: Haakon Bugge Reviewed-by: Asmund Ostvold Reviewed-by: Yuval Shaia --- diff --git a/drivers/infiniband/ulp/xsigo/xve/xve.h b/drivers/infiniband/ulp/xsigo/xve/xve.h index f6339baad42e3..c30038d1df2e0 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve.h +++ b/drivers/infiniband/ulp/xsigo/xve/xve.h @@ -1230,8 +1230,7 @@ void xve_fwt_put_ctx(struct xve_fwt_s *xve_fwt, struct xve_fwt_entry *xve_fwt_list(struct xve_fwt_s *xve_fwt, int val); bool xve_fwt_entry_valid(struct xve_fwt_s *xve_fwt, struct xve_fwt_entry *fwt_entry); -void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path, - int do_lock); +void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path); int xve_aging_task_machine(struct xve_dev_priv *priv); void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb); void xve_tables_exit(void); diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_ib.c b/drivers/infiniband/ulp/xsigo/xve/xve_ib.c index ff2268685bb9f..1266e752db2fb 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_ib.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_ib.c @@ -64,17 +64,14 @@ void xve_free_ah(struct kref *kref) { struct xve_ah *ah = container_of(kref, struct xve_ah, ref); struct xve_dev_priv *priv = netdev_priv(ah->dev); - unsigned long flags; - spin_lock_irqsave(&priv->lock, flags); list_add_tail(&ah->list, &priv->dead_ahs); - spin_unlock_irqrestore(&priv->lock, flags); } static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv, u64 mapping[XVE_UD_RX_EDR_SG]) { - if (xve_ud_need_sg(priv->max_ib_mtu)) { + if (xve_ud_need_sg(priv->admin_mtu)) { ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_HEAD_SIZE, DMA_FROM_DEVICE); ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE, @@ -92,7 +89,7 @@ static void xve_ud_skb_put_frags(struct xve_dev_priv *priv, struct sk_buff *skb, unsigned int length) { - if (xve_ud_need_sg(priv->max_ib_mtu)) { + if (xve_ud_need_sg(priv->admin_mtu)) { skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; unsigned int size; /* @@ -140,7 +137,7 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id) u64 *mapping; int tailroom; - if (xve_ud_need_sg(priv->max_ib_mtu)) { + if (xve_ud_need_sg(priv->admin_mtu)) { /* reserve some tailroom for IP/TCP headers */ buf_size = XVE_UD_HEAD_SIZE; tailroom = 128; @@ -168,7 +165,7 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id) if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) goto error; - if (xve_ud_need_sg(priv->max_ib_mtu)) { + if (xve_ud_need_sg(priv->admin_mtu)) { struct page *page = xve_alloc_page(GFP_ATOMIC); if (!page) @@ -319,7 +316,7 @@ xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) { xve_warn(priv, "failed recv event "); - xve_warn(priv, "(status=%d, wrid=%d vend_err 0x%x)\n", + xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); } xve_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); @@ -525,7 +522,7 @@ static void xve_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) { xve_warn(priv, "failed send event "); - xve_warn(priv, "(status=%d, wrid=%d vend_err 0x%x)\n", + xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); } } @@ -585,7 +582,6 @@ int xve_poll(struct napi_struct *napi, int budget) container_of(napi, struct xve_dev_priv, napi); struct net_device *dev = priv->netdev; int done, n, t; - unsigned long flags = 0; done = 0; @@ -603,9 +599,25 @@ int xve_poll(struct napi_struct *napi, int budget) poll_more: while (done < budget) { int max = (budget - done); + int i; t = min(XVE_NUM_WC, max); - n = poll_rx(priv, t, &done, 0); + + n = ib_poll_cq(priv->recv_cq, t, priv->ibwc); + for (i = 0; i < n; i++) { + struct ib_wc *wc = priv->ibwc + i; + + if (wc->wr_id & XVE_OP_RECV) { + ++done; + if (wc->wr_id & XVE_OP_CM) + xve_cm_handle_rx_wc(priv->netdev, + wc); + else + xve_ib_handle_rx_wc(priv->netdev, + wc); + } else + xve_cm_handle_tx_wc(priv->netdev, wc); + } if (n != t) break; } @@ -616,31 +628,20 @@ poll_more: napi_complete(napi); clear_bit(XVE_OVER_QUOTA, &priv->state); - } else { - set_bit(XVE_OVER_QUOTA, &priv->state); - priv->counters[XVE_RX_QUOTA_EXCEEDED_COUNTER]++; - return done; - } - - spin_lock_irqsave(&priv->lock, flags); - if (test_bit(XVE_OS_ADMIN_UP, &priv->state) && - test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) && - (test_bit(XVE_OPER_UP, &priv->state) || - test_bit(XVE_HBEAT_LOST, &priv->state)) && - !test_bit(XVE_DELETING, &priv->state)) { - set_bit(XVE_INTR_ENABLED, &priv->state); - if (unlikely - (ib_req_notify_cq - (priv->recv_cq, - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) - && napi_reschedule(napi)) { - priv->counters[XVE_NAPI_RESCHEDULE_COUNTER]++; - spin_unlock_irqrestore(&priv->lock, flags); - goto poll_more; + if (test_bit(XVE_OS_ADMIN_UP, &priv->state) && + test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) && + (test_bit(XVE_OPER_UP, &priv->state) || + test_bit(XVE_HBEAT_LOST, &priv->state)) && + !test_bit(XVE_DELETING, &priv->state)) { + set_bit(XVE_INTR_ENABLED, &priv->state); + if (unlikely(ib_req_notify_cq(priv->recv_cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) + && napi_reschedule(napi)) { + priv->counters[XVE_NAPI_RESCHEDULE_COUNTER]++; + goto poll_more; + } } } - spin_unlock_irqrestore(&priv->lock, flags); - return done; } @@ -724,7 +725,8 @@ static inline int post_send(struct xve_dev_priv *priv, return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); } -/* type argument is used to differentiate between the GATEWAY +/* Always called with priv->lock held + * type argument is used to differentiate between the GATEWAY * and UVNIC packet. * 1 -> GATEWAY PACKET * 0 -> normal UVNIC PACKET @@ -747,7 +749,6 @@ int xve_send(struct net_device *dev, struct sk_buff *skb, __func__, dev->stats.tx_dropped, dev->name); INC_TX_DROP_STATS(priv, dev); - INC_TX_ERROR_STATS(priv, dev); xve_put_ah_refcnt(address); dev_kfree_skb_any(skb); return ret; @@ -759,7 +760,6 @@ int xve_send(struct net_device *dev, struct sk_buff *skb, xve_warn(priv, "send,dropping %ld packets %s\n", dev->stats.tx_dropped, dev->name); INC_TX_DROP_STATS(priv, dev); - INC_TX_ERROR_STATS(priv, dev); xve_put_ah_refcnt(address); dev_kfree_skb_any(skb); return ret; @@ -771,33 +771,6 @@ int xve_send(struct net_device *dev, struct sk_buff *skb, xve_dbg_data(priv, "%s sending packet, length=%d address=%p qpn=0x%06x\n", __func__, skb->len, address, qpn); - - if (++priv->tx_outstanding == priv->xve_sendq_size) { - if (type != 1) { - /* UVNIC PACKET */ - xve_dbg_data(priv, - "%s TX ring full, stopping kernel net queue\n", - __func__); - if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) - xve_warn(priv, "%s Req notify on send CQ failed\n", - __func__); - priv->counters[XVE_TX_RING_FULL_COUNTER]++; - priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++; - netif_stop_queue(dev); - } else { - /* GATEWAY PACKET */ - xve_dbg_data(priv, - "%s TX ring full, Dropping the Gateway Packet\n", - __func__); - xve_put_ah_refcnt(address); - dev_kfree_skb(skb); - poll_tx(priv); - INC_TX_DROP_STATS(priv, dev); - priv->counters[XVE_TX_SKB_FREE_COUNTER]++; - priv->counters[XVE_TX_RING_FULL_COUNTER]++; - return ret; - } - } /* * We put the skb into the tx_ring _before_ we call post_send() * because it's entirely possible that the completion handler will @@ -815,24 +788,50 @@ int xve_send(struct net_device *dev, struct sk_buff *skb, memset(tx_req, 0, sizeof(struct xve_tx_buf)); return ret; } + + /* Queue almost full */ + if (++priv->tx_outstanding == priv->xve_sendq_size) { + xve_dbg_data(priv, + "%s stop queue head%d out%d tail%d type%d", + __func__, priv->tx_head, priv->tx_tail, + priv->tx_outstanding, type); + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) + xve_warn(priv, "%s Req notify on send CQ failed\n", + __func__); + priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++; + netif_stop_queue(dev); + } + + if (netif_queue_stopped(dev)) { + int rc; + + rc = ib_req_notify_cq(priv->send_cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + if (rc < 0) + xve_warn(priv, "request notify on send CQ failed\n"); + else if (rc) + poll_tx(priv); + } + skb_orphan(skb); + skb_dst_drop(skb); if (unlikely(post_send(priv, priv->tx_head & (priv->xve_sendq_size - 1), address->ah, qpn, tx_req, phead, hlen))) { - xve_warn(priv, "%s post_send failed\n", __func__); - INC_TX_ERROR_STATS(priv, dev); + xve_warn(priv, "%s post_send failed head%d tail%d out%d type%d\n", + __func__, priv->tx_head, priv->tx_tail, + priv->tx_outstanding, type); --priv->tx_outstanding; priv->counters[XVE_TX_RING_FULL_COUNTER]++; xve_put_ah_refcnt(address); - xve_free_txbuf_memory(priv, tx_req); if (netif_queue_stopped(dev)) { priv->counters[XVE_TX_WAKE_UP_COUNTER]++; netif_wake_queue(dev); } - } else { + xve_free_txbuf_memory(priv, tx_req); + } else ++priv->tx_head; - skb_orphan(skb); - } + priv->send_hbeat_flag = 0; - if (unlikely(priv->tx_outstanding > priv->xve_max_send_cqe)) + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) poll_tx(priv); return ret; } diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_main.c b/drivers/infiniband/ulp/xsigo/xve/xve_main.c index ef1639c683019..344e1a1d7e569 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_main.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_main.c @@ -136,8 +136,6 @@ MODULE_PARM_DESC(xve_ud_mode, "Always use UD mode irrespective of xsmp.vnet_mode static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data, int len); -static void path_free(struct net_device *netdev, struct xve_path *path); - struct xve_path_iter { struct net_device *dev; struct xve_path path; @@ -339,10 +337,39 @@ inline void xve_get_path(struct xve_path *path) atomic_inc(&path->users); } -inline void xve_put_path(struct xve_path *path) +inline void xve_put_path(struct xve_path *path, int do_lock) { - if (atomic_dec_and_test(&path->users)) - path_free(path->dev, path); + struct xve_dev_priv *priv; + struct net_device *netdev; + struct sk_buff *skb; + unsigned long flags = 0; + + if (atomic_dec_and_test(&path->users)) { + netdev = path->dev; + priv = netdev_priv(netdev); + while ((skb = __skb_dequeue(&path->queue))) + dev_kfree_skb_irq(skb); + + while ((skb = __skb_dequeue(&path->uplink_queue))) + dev_kfree_skb_irq(skb); + + if (do_lock) + spin_lock_irqsave(&priv->lock, flags); + if (xve_cmtx_get(path)) { + if (do_lock) + spin_unlock_irqrestore(&priv->lock, flags); + xve_cm_destroy_tx_deferred(xve_cmtx_get(path)); + if (do_lock) + spin_lock_irqsave(&priv->lock, flags); + } + xve_flush_l2_entries(netdev, path); + if (path->ah) + xve_put_ah(path->ah); + if (do_lock) + spin_unlock_irqrestore(&priv->lock, flags); + + kfree(path); + } } struct xve_path *__path_find(struct net_device *netdev, void *gid) @@ -399,47 +426,14 @@ static int __path_add(struct net_device *netdev, struct xve_path *path) return 0; } -void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path, - int do_lock) +void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path) { struct xve_dev_priv *priv = netdev_priv(netdev); struct xve_fwt_entry *fwt_entry, *tn; - unsigned long flags = 0; - - if (do_lock) - spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(fwt_entry, tn, &path->fwt_list, list) xve_fwt_entry_destroy(priv, fwt_entry); - if (do_lock) - spin_unlock_irqrestore(&priv->lock, flags); -} - -static void path_free(struct net_device *netdev, struct xve_path *path) -{ - struct xve_dev_priv *priv = netdev_priv(netdev); - struct sk_buff *skb; - unsigned long flags; - - while ((skb = __skb_dequeue(&path->queue))) - dev_kfree_skb_irq(skb); - - while ((skb = __skb_dequeue(&path->uplink_queue))) - dev_kfree_skb_irq(skb); - - spin_lock_irqsave(&priv->lock, flags); - if (xve_cmtx_get(path)) { - spin_unlock_irqrestore(&priv->lock, flags); - xve_cm_destroy_tx_deferred(xve_cmtx_get(path)); - spin_lock_irqsave(&priv->lock, flags); - } - xve_flush_l2_entries(netdev, path, 0); - spin_unlock_irqrestore(&priv->lock, flags); - - if (path->ah) - xve_put_ah(path->ah); - kfree(path); } /* @@ -531,7 +525,7 @@ void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid) wait_for_completion(&path->done); list_del(&path->list); - xve_put_path(path); + xve_put_path(path, 1); } void xve_flush_single_path(struct net_device *dev, struct xve_path *path) @@ -601,10 +595,10 @@ static void path_rec_completion(int status, path->query = NULL; complete(&path->done); - spin_unlock_irqrestore(&priv->lock, flags); - if (old_ah) xve_put_ah(old_ah); + spin_unlock_irqrestore(&priv->lock, flags); + while ((skb = __skb_dequeue(&skqueue))) { if (xve_is_edr(priv)) { @@ -765,7 +759,7 @@ xve_path_lookup(struct net_device *dev, spin_unlock_irqrestore(&xve_fwt->lock, flags); if (!path->ah) { if (!path->query && path_rec_start(dev, path)) { - xve_put_path(path); + xve_put_path(path, 0); return NULL; } } @@ -823,7 +817,7 @@ int xve_gw_send(struct net_device *dev, struct sk_buff *skb) priv->counters[XVE_GW_MCAST_TX]++; out: - xve_put_path(path); + xve_put_path(path, 0); return ret; } @@ -1039,7 +1033,7 @@ stats: priv->counters[XVE_TX_COUNTER]++; free_fwt_ctx: if (path) - xve_put_path(path); + xve_put_path(path, 0); xve_fwt_put_ctx(&priv->xve_fwt, fwt_entry); unlock: if (inc_drop_cnt) @@ -1599,11 +1593,10 @@ static int xve_state_machine(struct xve_dev_priv *priv) XVE_HBEAT_LOSS_THRES*priv->hb_interval)) { unsigned long flags = 0; - xve_warn(priv, "Heart Beat Loss: %lu:%lu\n", jiffies, - (unsigned long)priv->last_hbeat + + xve_warn(priv, "Heart Beat Loss: %lu:%lu\n", + jiffies, (unsigned long)priv->last_hbeat + 3*priv->hb_interval*HZ); - xve_flush_paths(priv->netdev); spin_lock_irqsave(&priv->lock, flags); /* Disjoin from multicast Group */ set_bit(XVE_HBEAT_LOST, &priv->state); @@ -1745,6 +1738,8 @@ xve_set_edr_features(struct xve_dev_priv *priv) priv->netdev->hw_features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_GRO; + priv->lro_mode = 1; + if (xve_enable_offload) { if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) priv->netdev->hw_features |= @@ -1756,6 +1751,13 @@ xve_set_edr_features(struct xve_dev_priv *priv) } priv->netdev->features |= priv->netdev->hw_features; + if (priv->lro_mode && lro) { + priv->netdev->features |= NETIF_F_LRO; + xve_lro_setup(priv); + } else { + priv->lro_mode = 0; + } + /* Reserve extra space for EoIB header */ priv->netdev->hard_header_len += sizeof(struct xve_eoib_hdr); } diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_stats.c b/drivers/infiniband/ulp/xsigo/xve/xve_stats.c index a6d09d9728e31..1401762c9c760 100755 --- a/drivers/infiniband/ulp/xsigo/xve/xve_stats.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_stats.c @@ -457,7 +457,7 @@ static int xve_proc_read_device(struct seq_file *m, void *data) seq_printf(m, "Admin mtu:\t\t\t%d\n", vp->admin_mtu); seq_printf(m, "MCAST mtu:\t\t\t%d\n", vp->mcast_mtu); seq_printf(m, "IB MAX MTU: \t\t\t%d\n", vp->max_ib_mtu); - seq_printf(m, "SG for UD:\t\t\t%d\n", xve_ud_need_sg(vp->max_ib_mtu)); + seq_printf(m, "SG UD Mode:\t\t\t%d\n", xve_ud_need_sg(vp->admin_mtu)); seq_printf(m, "Max SG supported(HCA):\t\t%d\n", vp->dev_attr.max_sge); seq_printf(m, "Receive Queue size: \t\t%d\n", xve_recvq_size); diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c b/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c index 3cb7948bbe2f4..9cfbfc76223cc 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c @@ -231,7 +231,7 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->tx_wr.send_flags = IB_SEND_SIGNALED; priv->rx_sge[0].lkey = priv->mr->lkey; - if (xve_ud_need_sg(priv->max_ib_mtu)) { + if (xve_ud_need_sg(priv->admin_mtu)) { priv->rx_sge[0].length = XVE_UD_HEAD_SIZE; priv->rx_sge[1].length = PAGE_SIZE; priv->rx_sge[1].lkey = priv->mr->lkey;