]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
xsigo: Fixed Path locking issues
authorPradeep Gopanapalli <pradeep.gopanapalli@oracle.com>
Tue, 12 Jul 2016 20:10:56 +0000 (13:10 -0700)
committerChuck Anderson <chuck.anderson@oracle.com>
Tue, 12 Jul 2016 20:10:56 +0000 (13:10 -0700)
Orabug: 23514725

Changed xve_put_path to allow condition where caller
holds private lock, priv->lock
Removed path_free function and put all the functionality
in xve_put_path
No need for using scatter-gatter when MTU is less than admin mtu
instead of multicast mtu, as admin MTU is the driving factor for
vnic

Signed-off-by: Pradeep Gopanapalli <pradeep.gopanapalli@oracle.com>
Reviewed-by: sajid zia <szia@oracle.com>
Reviewed-by: Haakon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Asmund Ostvold <asmund.ostvold@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
drivers/infiniband/ulp/xsigo/xve/xve.h
drivers/infiniband/ulp/xsigo/xve/xve_ib.c
drivers/infiniband/ulp/xsigo/xve/xve_main.c
drivers/infiniband/ulp/xsigo/xve/xve_stats.c
drivers/infiniband/ulp/xsigo/xve/xve_verbs.c

index f6339baad42e314964c33fca35792d92870a7e87..c30038d1df2e081a8beea65330485128caa64e6c 100644 (file)
@@ -1230,8 +1230,7 @@ void xve_fwt_put_ctx(struct xve_fwt_s *xve_fwt,
 struct xve_fwt_entry *xve_fwt_list(struct xve_fwt_s *xve_fwt, int val);
 bool xve_fwt_entry_valid(struct xve_fwt_s *xve_fwt,
                         struct xve_fwt_entry *fwt_entry);
-void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path,
-                         int do_lock);
+void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path);
 int xve_aging_task_machine(struct xve_dev_priv *priv);
 void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb);
 void xve_tables_exit(void);
index ff2268685bb9fe1548afcecf1f8ae9d7e3ffcf58..1266e752db2fb905f625002539379de5b35cc203 100644 (file)
@@ -64,17 +64,14 @@ void xve_free_ah(struct kref *kref)
 {
        struct xve_ah *ah = container_of(kref, struct xve_ah, ref);
        struct xve_dev_priv *priv = netdev_priv(ah->dev);
-       unsigned long flags;
 
-       spin_lock_irqsave(&priv->lock, flags);
        list_add_tail(&ah->list, &priv->dead_ahs);
-       spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv,
                                u64 mapping[XVE_UD_RX_EDR_SG])
 {
-       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+       if (xve_ud_need_sg(priv->admin_mtu)) {
                ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_HEAD_SIZE,
                                    DMA_FROM_DEVICE);
                ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
@@ -92,7 +89,7 @@ static void xve_ud_skb_put_frags(struct xve_dev_priv *priv,
                struct sk_buff *skb,
                unsigned int length)
 {
-       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+       if (xve_ud_need_sg(priv->admin_mtu)) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
                unsigned int size;
                /*
@@ -140,7 +137,7 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id)
        u64 *mapping;
        int tailroom;
 
-       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+       if (xve_ud_need_sg(priv->admin_mtu)) {
                /* reserve some tailroom for IP/TCP headers */
                buf_size = XVE_UD_HEAD_SIZE;
                tailroom = 128;
@@ -168,7 +165,7 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id)
        if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
                goto error;
 
-       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+       if (xve_ud_need_sg(priv->admin_mtu)) {
                struct page *page = xve_alloc_page(GFP_ATOMIC);
 
                if (!page)
@@ -319,7 +316,7 @@ xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        if (unlikely(wc->status != IB_WC_SUCCESS)) {
                if (wc->status != IB_WC_WR_FLUSH_ERR) {
                        xve_warn(priv, "failed recv event ");
-                       xve_warn(priv, "(status=%d, wrid=%d vend_err 0x%x)\n",
+                       xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n",
                                 wc->status, wr_id, wc->vendor_err);
                }
                xve_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
@@ -525,7 +522,7 @@ static void xve_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
        if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) {
                xve_warn(priv, "failed send event ");
-               xve_warn(priv, "(status=%d, wrid=%d vend_err 0x%x)\n",
+               xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n",
                         wc->status, wr_id, wc->vendor_err);
        }
 }
@@ -585,7 +582,6 @@ int xve_poll(struct napi_struct *napi, int budget)
            container_of(napi, struct xve_dev_priv, napi);
        struct net_device *dev = priv->netdev;
        int done, n, t;
-       unsigned long flags = 0;
 
        done = 0;
 
@@ -603,9 +599,25 @@ int xve_poll(struct napi_struct *napi, int budget)
 poll_more:
        while (done < budget) {
                int max = (budget - done);
+               int i;
 
                t = min(XVE_NUM_WC, max);
-               n = poll_rx(priv, t, &done, 0);
+
+               n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
+               for (i = 0; i < n; i++) {
+                       struct ib_wc *wc = priv->ibwc + i;
+
+                       if (wc->wr_id & XVE_OP_RECV) {
+                               ++done;
+                               if (wc->wr_id & XVE_OP_CM)
+                                       xve_cm_handle_rx_wc(priv->netdev,
+                                                       wc);
+                               else
+                                       xve_ib_handle_rx_wc(priv->netdev,
+                                                       wc);
+                       } else
+                               xve_cm_handle_tx_wc(priv->netdev, wc);
+               }
                if (n != t)
                        break;
        }
@@ -616,31 +628,20 @@ poll_more:
 
                napi_complete(napi);
                clear_bit(XVE_OVER_QUOTA, &priv->state);
-       } else {
-               set_bit(XVE_OVER_QUOTA, &priv->state);
-               priv->counters[XVE_RX_QUOTA_EXCEEDED_COUNTER]++;
-               return done;
-       }
-
-       spin_lock_irqsave(&priv->lock, flags);
-       if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
-           test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
-           (test_bit(XVE_OPER_UP, &priv->state) ||
-               test_bit(XVE_HBEAT_LOST, &priv->state)) &&
-           !test_bit(XVE_DELETING, &priv->state)) {
-               set_bit(XVE_INTR_ENABLED, &priv->state);
-               if (unlikely
-                   (ib_req_notify_cq
-                    (priv->recv_cq,
-                     IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS))
-                   && napi_reschedule(napi)) {
-                       priv->counters[XVE_NAPI_RESCHEDULE_COUNTER]++;
-                       spin_unlock_irqrestore(&priv->lock, flags);
-                       goto poll_more;
+               if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
+                               test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
+                               (test_bit(XVE_OPER_UP, &priv->state) ||
+                                test_bit(XVE_HBEAT_LOST, &priv->state)) &&
+                               !test_bit(XVE_DELETING, &priv->state)) {
+                       set_bit(XVE_INTR_ENABLED, &priv->state);
+                       if (unlikely(ib_req_notify_cq(priv->recv_cq,
+                               IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS))
+                               && napi_reschedule(napi)) {
+                               priv->counters[XVE_NAPI_RESCHEDULE_COUNTER]++;
+                               goto poll_more;
+                       }
                }
        }
-       spin_unlock_irqrestore(&priv->lock, flags);
-
        return done;
 }
 
@@ -724,7 +725,8 @@ static inline int post_send(struct xve_dev_priv *priv,
 
        return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
-/* type argument is used to differentiate between the GATEWAY
+/* Always called with priv->lock held
+ * type argument is used to differentiate between the GATEWAY
  * and UVNIC packet.
  * 1 -> GATEWAY PACKET
  * 0 -> normal UVNIC PACKET
@@ -747,7 +749,6 @@ int xve_send(struct net_device *dev, struct sk_buff *skb,
                                 __func__, dev->stats.tx_dropped,
                                 dev->name);
                        INC_TX_DROP_STATS(priv, dev);
-                       INC_TX_ERROR_STATS(priv, dev);
                        xve_put_ah_refcnt(address);
                        dev_kfree_skb_any(skb);
                        return ret;
@@ -759,7 +760,6 @@ int xve_send(struct net_device *dev, struct sk_buff *skb,
                        xve_warn(priv, "send,dropping %ld packets %s\n",
                                        dev->stats.tx_dropped, dev->name);
                        INC_TX_DROP_STATS(priv, dev);
-                       INC_TX_ERROR_STATS(priv, dev);
                        xve_put_ah_refcnt(address);
                        dev_kfree_skb_any(skb);
                        return ret;
@@ -771,33 +771,6 @@ int xve_send(struct net_device *dev, struct sk_buff *skb,
        xve_dbg_data(priv,
                     "%s sending packet, length=%d address=%p qpn=0x%06x\n",
                     __func__, skb->len, address, qpn);
-
-       if (++priv->tx_outstanding  == priv->xve_sendq_size) {
-               if (type != 1) {
-                       /* UVNIC PACKET */
-                       xve_dbg_data(priv,
-                                    "%s TX ring full, stopping kernel net queue\n",
-                                    __func__);
-                       if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
-                               xve_warn(priv, "%s Req notify on send CQ failed\n",
-                                               __func__);
-                       priv->counters[XVE_TX_RING_FULL_COUNTER]++;
-                       priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
-                       netif_stop_queue(dev);
-               } else {
-                       /* GATEWAY PACKET */
-                       xve_dbg_data(priv,
-                               "%s TX ring full, Dropping the Gateway Packet\n",
-                                       __func__);
-                       xve_put_ah_refcnt(address);
-                       dev_kfree_skb(skb);
-                       poll_tx(priv);
-                       INC_TX_DROP_STATS(priv, dev);
-                       priv->counters[XVE_TX_SKB_FREE_COUNTER]++;
-                       priv->counters[XVE_TX_RING_FULL_COUNTER]++;
-                       return ret;
-               }
-       }
        /*
         * We put the skb into the tx_ring _before_ we call post_send()
         * because it's entirely possible that the completion handler will
@@ -815,24 +788,50 @@ int xve_send(struct net_device *dev, struct sk_buff *skb,
                memset(tx_req, 0, sizeof(struct xve_tx_buf));
                return ret;
        }
+
+       /* Queue almost full */
+       if (++priv->tx_outstanding == priv->xve_sendq_size) {
+               xve_dbg_data(priv,
+                               "%s stop queue head%d out%d tail%d type%d",
+                               __func__, priv->tx_head, priv->tx_tail,
+                               priv->tx_outstanding, type);
+               if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+                       xve_warn(priv, "%s Req notify on send CQ failed\n",
+                                       __func__);
+               priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+               netif_stop_queue(dev);
+       }
+
+       if (netif_queue_stopped(dev)) {
+               int rc;
+
+               rc = ib_req_notify_cq(priv->send_cq,
+                               IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+               if (rc < 0)
+                       xve_warn(priv, "request notify on send CQ failed\n");
+               else if (rc)
+                       poll_tx(priv);
+        }
+       skb_orphan(skb);
+       skb_dst_drop(skb);
        if (unlikely(post_send(priv, priv->tx_head & (priv->xve_sendq_size - 1),
                               address->ah, qpn, tx_req, phead, hlen))) {
-               xve_warn(priv, "%s post_send failed\n", __func__);
-               INC_TX_ERROR_STATS(priv, dev);
+               xve_warn(priv, "%s post_send failed head%d tail%d out%d type%d\n",
+                               __func__, priv->tx_head, priv->tx_tail,
+                               priv->tx_outstanding, type);
                --priv->tx_outstanding;
                priv->counters[XVE_TX_RING_FULL_COUNTER]++;
                xve_put_ah_refcnt(address);
-               xve_free_txbuf_memory(priv, tx_req);
                if (netif_queue_stopped(dev)) {
                        priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
                        netif_wake_queue(dev);
                }
-       } else {
+               xve_free_txbuf_memory(priv, tx_req);
+       } else
                ++priv->tx_head;
-               skb_orphan(skb);
-       }
+
        priv->send_hbeat_flag = 0;
-       if (unlikely(priv->tx_outstanding > priv->xve_max_send_cqe))
+       if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
                poll_tx(priv);
        return ret;
 }
index ef1639c683019f249c7ffb07a91d561d873df0a3..344e1a1d7e569bff6c09ed133dc663e47712add2 100644 (file)
@@ -136,8 +136,6 @@ MODULE_PARM_DESC(xve_ud_mode, "Always use UD mode irrespective of xsmp.vnet_mode
 
 static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
                                   int len);
-static void path_free(struct net_device *netdev, struct xve_path *path);
-
 struct xve_path_iter {
        struct net_device *dev;
        struct xve_path path;
@@ -339,10 +337,39 @@ inline void xve_get_path(struct xve_path *path)
        atomic_inc(&path->users);
 }
 
-inline void xve_put_path(struct xve_path *path)
+inline void xve_put_path(struct xve_path *path, int do_lock)
 {
-       if (atomic_dec_and_test(&path->users))
-               path_free(path->dev, path);
+       struct xve_dev_priv *priv;
+       struct net_device *netdev;
+       struct sk_buff *skb;
+       unsigned long flags = 0;
+
+       if (atomic_dec_and_test(&path->users)) {
+               netdev = path->dev;
+               priv = netdev_priv(netdev);
+               while ((skb = __skb_dequeue(&path->queue)))
+                       dev_kfree_skb_irq(skb);
+
+               while ((skb = __skb_dequeue(&path->uplink_queue)))
+                       dev_kfree_skb_irq(skb);
+
+               if (do_lock)
+                       spin_lock_irqsave(&priv->lock, flags);
+               if (xve_cmtx_get(path)) {
+                       if (do_lock)
+                               spin_unlock_irqrestore(&priv->lock, flags);
+                       xve_cm_destroy_tx_deferred(xve_cmtx_get(path));
+                       if (do_lock)
+                               spin_lock_irqsave(&priv->lock, flags);
+               }
+               xve_flush_l2_entries(netdev, path);
+               if (path->ah)
+                       xve_put_ah(path->ah);
+               if (do_lock)
+                       spin_unlock_irqrestore(&priv->lock, flags);
+
+               kfree(path);
+       }
 }
 
 struct xve_path *__path_find(struct net_device *netdev, void *gid)
@@ -399,47 +426,14 @@ static int __path_add(struct net_device *netdev, struct xve_path *path)
        return 0;
 }
 
-void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path,
-                         int do_lock)
+void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path)
 {
        struct xve_dev_priv *priv = netdev_priv(netdev);
        struct xve_fwt_entry *fwt_entry, *tn;
-       unsigned long flags = 0;
-
-       if (do_lock)
-               spin_lock_irqsave(&priv->lock, flags);
 
        list_for_each_entry_safe(fwt_entry, tn, &path->fwt_list, list)
                xve_fwt_entry_destroy(priv, fwt_entry);
 
-       if (do_lock)
-               spin_unlock_irqrestore(&priv->lock, flags);
-}
-
-static void path_free(struct net_device *netdev, struct xve_path *path)
-{
-       struct xve_dev_priv *priv = netdev_priv(netdev);
-       struct sk_buff *skb;
-       unsigned long flags;
-
-       while ((skb = __skb_dequeue(&path->queue)))
-               dev_kfree_skb_irq(skb);
-
-       while ((skb = __skb_dequeue(&path->uplink_queue)))
-               dev_kfree_skb_irq(skb);
-
-       spin_lock_irqsave(&priv->lock, flags);
-       if (xve_cmtx_get(path)) {
-               spin_unlock_irqrestore(&priv->lock, flags);
-               xve_cm_destroy_tx_deferred(xve_cmtx_get(path));
-               spin_lock_irqsave(&priv->lock, flags);
-       }
-       xve_flush_l2_entries(netdev, path, 0);
-       spin_unlock_irqrestore(&priv->lock, flags);
-
-       if (path->ah)
-               xve_put_ah(path->ah);
-       kfree(path);
 }
 
 /*
@@ -531,7 +525,7 @@ void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid)
 
        wait_for_completion(&path->done);
        list_del(&path->list);
-       xve_put_path(path);
+       xve_put_path(path, 1);
 }
 
 void xve_flush_single_path(struct net_device *dev, struct xve_path *path)
@@ -601,10 +595,10 @@ static void path_rec_completion(int status,
        path->query = NULL;
        complete(&path->done);
 
-       spin_unlock_irqrestore(&priv->lock, flags);
-
        if (old_ah)
                xve_put_ah(old_ah);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
 
        while ((skb = __skb_dequeue(&skqueue))) {
                if (xve_is_edr(priv)) {
@@ -765,7 +759,7 @@ xve_path_lookup(struct net_device *dev,
        spin_unlock_irqrestore(&xve_fwt->lock, flags);
        if (!path->ah) {
                if (!path->query && path_rec_start(dev, path)) {
-                       xve_put_path(path);
+                       xve_put_path(path, 0);
                        return NULL;
                }
        }
@@ -823,7 +817,7 @@ int xve_gw_send(struct net_device *dev, struct sk_buff *skb)
        priv->counters[XVE_GW_MCAST_TX]++;
 
 out:
-       xve_put_path(path);
+       xve_put_path(path, 0);
        return ret;
 }
 
@@ -1039,7 +1033,7 @@ stats:
        priv->counters[XVE_TX_COUNTER]++;
 free_fwt_ctx:
        if (path)
-               xve_put_path(path);
+               xve_put_path(path, 0);
        xve_fwt_put_ctx(&priv->xve_fwt, fwt_entry);
 unlock:
        if (inc_drop_cnt)
@@ -1599,11 +1593,10 @@ static int xve_state_machine(struct xve_dev_priv *priv)
                                XVE_HBEAT_LOSS_THRES*priv->hb_interval)) {
                        unsigned long flags = 0;
 
-                       xve_warn(priv, "Heart Beat Loss: %lu:%lu\n", jiffies,
-                               (unsigned long)priv->last_hbeat +
+                       xve_warn(priv, "Heart Beat Loss: %lu:%lu\n",
+                               jiffies, (unsigned long)priv->last_hbeat +
                                3*priv->hb_interval*HZ);
 
-                       xve_flush_paths(priv->netdev);
                        spin_lock_irqsave(&priv->lock, flags);
                        /* Disjoin from multicast Group */
                        set_bit(XVE_HBEAT_LOST, &priv->state);
@@ -1745,6 +1738,8 @@ xve_set_edr_features(struct xve_dev_priv *priv)
        priv->netdev->hw_features =
                NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_GRO;
 
+       priv->lro_mode = 1;
+
        if (xve_enable_offload) {
                if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM)
                        priv->netdev->hw_features |=
@@ -1756,6 +1751,13 @@ xve_set_edr_features(struct xve_dev_priv *priv)
        }
        priv->netdev->features |= priv->netdev->hw_features;
 
+       if (priv->lro_mode && lro) {
+               priv->netdev->features |= NETIF_F_LRO;
+               xve_lro_setup(priv);
+       } else {
+               priv->lro_mode = 0;
+       }
+
        /* Reserve extra space for EoIB header */
        priv->netdev->hard_header_len += sizeof(struct xve_eoib_hdr);
 }
index a6d09d9728e31d6a7f52bae8b2ace2e3c169b4c8..1401762c9c760864c9f49c198c15948f54ea8c77 100755 (executable)
@@ -457,7 +457,7 @@ static int xve_proc_read_device(struct seq_file *m, void *data)
        seq_printf(m, "Admin mtu:\t\t\t%d\n", vp->admin_mtu);
        seq_printf(m, "MCAST mtu:\t\t\t%d\n", vp->mcast_mtu);
        seq_printf(m, "IB MAX MTU: \t\t\t%d\n", vp->max_ib_mtu);
-       seq_printf(m, "SG for UD:\t\t\t%d\n", xve_ud_need_sg(vp->max_ib_mtu));
+       seq_printf(m, "SG UD Mode:\t\t\t%d\n", xve_ud_need_sg(vp->admin_mtu));
        seq_printf(m, "Max SG supported(HCA):\t\t%d\n", vp->dev_attr.max_sge);
 
        seq_printf(m, "Receive Queue size: \t\t%d\n", xve_recvq_size);
index 3cb7948bbe2f46e173ee9107e86b8a75c0d0488f..9cfbfc76223cca8b98de7c1bfd9eb7593951ed29 100644 (file)
@@ -231,7 +231,7 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        priv->tx_wr.send_flags = IB_SEND_SIGNALED;
 
        priv->rx_sge[0].lkey = priv->mr->lkey;
-       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+       if (xve_ud_need_sg(priv->admin_mtu)) {
                priv->rx_sge[0].length = XVE_UD_HEAD_SIZE;
                priv->rx_sge[1].length = PAGE_SIZE;
                priv->rx_sge[1].lkey = priv->mr->lkey;