]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
1) Support vnic for EDR based platform(uVnic) 2) Supported Types now Type 0 - XSMP_XC...
authorPradeep Gopanapalli <pradeep.gopanapalli@oracle.com>
Thu, 5 Nov 2015 02:58:15 +0000 (18:58 -0800)
committerQing Huang <qing.huang@oracle.com>
Mon, 9 Nov 2015 20:36:18 +0000 (12:36 -0800)
Reviewed-by: Sajid Zia <sajid.zia@oracle.com>
Signed-off-by: Pradeep Gopanapalli <pradeep.gopanapalli@oracle.com>
Signed-off-by: Qing Huang <qing.huang@oracle.com>
drivers/infiniband/ulp/xsigo/xve/Makefile
drivers/infiniband/ulp/xsigo/xve/xve.h
drivers/infiniband/ulp/xsigo/xve/xve_cm.c
drivers/infiniband/ulp/xsigo/xve/xve_compat.h
drivers/infiniband/ulp/xsigo/xve/xve_ib.c
drivers/infiniband/ulp/xsigo/xve/xve_main.c
drivers/infiniband/ulp/xsigo/xve/xve_multicast.c
drivers/infiniband/ulp/xsigo/xve/xve_stats.c
drivers/infiniband/ulp/xsigo/xve/xve_tables.c
drivers/infiniband/ulp/xsigo/xve/xve_verbs.c
drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h

index 13f4dec4af4b0928c562e014ca382674998791f2..89d4f857dec788fea476d1975212e7dc2bd4245b 100644 (file)
@@ -2,7 +2,7 @@ obj-$(CONFIG_INFINIBAND_XVE) := xve.o
 xve-y := xve_main.o xve_verbs.o xve_multicast.o xve_ib.o xve_tables.o \
         xve_ethtool.o xve_cm.o xve_stats.o
 
-ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8008\"
 ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
 ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
 ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
index 5bd33b2831941e96db63498cc180bde24e4672da..fd6ea4e3f595f6e7b79798dc092bd1622cabd6cb 100644 (file)
 #define PREFIX_MULTI_ADDR      0x33
 /* ethernet header length */
 #define ETH_HDR_LEN            14
+#define        XVE_EOIB_MAGIC  0x8919
+#define        ETH_P_XVE_CTRL  0x8919
+#define        XVE_EOIB_LEN    4
+
+#define XVE_VNET_MODE_RC 1
+#define XVE_VNET_MODE_UD 2
+
+#define        XVE_MAX_RX_QUEUES       16
+#define        XVE_MAX_TX_QUEUES       16
 
 /* constants */
 enum xve_flush_level {
@@ -142,8 +151,9 @@ enum xve_flush_level {
 };
 
 enum {
-       XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN,
-       XVE_UD_RX_SG = 2,       /* max buffer needed for 4K mtu */
+       XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN + XVE_EOIB_LEN + 2048,
+       XVE_UD_RX_OVN_SG = 2,   /* max buffer needed for 4K mtu */
+       XVE_UD_RX_EDR_SG = 3,   /* max buffer needed for 10K mtu */
        XVE_CM_MTU = 0x10000 - 0x20,    /* padding to align header to 16 */
        XVE_CM_BUF_SIZE = XVE_CM_MTU + VLAN_ETH_HLEN,
        XVE_CM_HEAD_SIZE = XVE_CM_BUF_SIZE % PAGE_SIZE,
@@ -300,6 +310,11 @@ enum {
        XVE_EVENT_PKEY_CHANGE_COUNTER,
        XVE_INVALID_EVENT_COUNTER,
 
+       XVE_GW_MCAST_TX,
+       XVE_HBEAT_COUNTER,
+       XVE_LINK_STATUS_COUNTER,
+       XVE_RX_NOGRH,
+
        XVE_MAX_COUNTERS
 };
 
@@ -400,7 +415,8 @@ enum {
        DEBUG_CONTINUE_UNLOAD = 0x00002000,
        DEBUG_MISC_INFO = 0x00004000,
        DEBUG_IBDEV_INFO = 0x00008000,
-       DEBUG_CM_INFO = 0x00010000
+       DEBUG_CM_INFO = 0x00010000,
+       DEBUG_CTRL_INFO = 0x00020000
 };
 
 #define        XVE_OP_RECV   (1ul << 31)
@@ -433,13 +449,31 @@ enum {
 #define        XVE_OVER_QUOTA                  23
 #define        XVE_TSO_CHANGE                  24
 #define        XVE_RXBATCH_CHANGE              25
+#define        XVE_VNIC_READY_PENDING          26
+#define        XVE_HBEAT_LOST                  27
+#define        XVE_GW_STATE_UP                 28
+
 #define MODULE_NAME "XVE"
 #define ALIGN_TO_FF(a) (a & 0xff)
 #define XVE_FWT_ENTRY_VALID 1
 #define XVE_FWT_ENTRY_REFRESH 2
-#define XVE_UD_MTU(ib_mtu)             (ib_mtu - VLAN_ETH_HLEN)
-#define XVE_UD_BUF_SIZE(ib_mtu)        (ib_mtu + IB_GRH_BYTES + VLAN_ETH_HLEN)
-#define XVE_MIN_PACKET_LEN 60
+#define XVE_UD_MTU(ib_mtu)     (ib_mtu - (VLAN_ETH_HLEN + XVE_EOIB_LEN))
+#define XVE_UD_BUF_SIZE(ib_mtu)        (ib_mtu + IB_GRH_BYTES + \
+                               (VLAN_ETH_HLEN + XVE_EOIB_LEN))
+#define XVE_MIN_PACKET_LEN 64
+
+enum xcm_type {
+       XSMP_XCM_OVN,
+       XSMP_XCM_NOUPLINK,
+       XSMP_XCM_UPLINK
+};
+
+#define        xve_is_uplink(priv) ((priv)->vnic_type == XSMP_XCM_UPLINK)
+#define        xve_is_ovn(priv) ((priv)->vnic_type == XSMP_XCM_OVN)
+#define        xve_is_edr(priv) (!xve_is_ovn(priv))
+#define xve_gw_linkup(priv) test_bit(XVE_GW_STATE_UP, &(priv)->state)
+#define xve_ud_rx_sg(priv) (xve_is_edr(priv) ? XVE_UD_RX_EDR_SG : \
+                               XVE_UD_RX_OVN_SG)
 
 /*Extern declarations */
 extern int xve_debug_level;
@@ -447,6 +481,7 @@ extern int xve_cm_single_qp;
 extern u32 xve_hash_salt;
 extern int xve_sendq_size;
 extern int xve_recvq_size;
+extern int xve_max_send_cqe;
 extern struct ib_sa_client xve_sa_client;
 extern u32 xve_counters[];
 extern struct workqueue_struct *xve_taskqueue;
@@ -481,11 +516,12 @@ struct xve_mcast {
 
 struct xve_rx_buf {
        struct sk_buff *skb;
-       u64 mapping[XVE_UD_RX_SG];
+       u64 mapping[XVE_UD_RX_EDR_SG];
 };
 
 struct xve_tx_buf {
        struct sk_buff *skb;
+       struct xve_ah *ah;
        u64 mapping[MAX_SKB_FRAGS + 1];
 };
 
@@ -591,6 +627,46 @@ struct xve_fwt_s {
        unsigned num;
 };
 
+#define XVE_VNIC_HBEAT 1
+#define        XVE_VNIC_LINK_STATE 2
+
+#define        XVE_HBEAT_LOSS_THRES    3
+struct xve_keep_alive {
+       uint32_t pvi_id;
+       uint32_t type;
+       uint64_t tca_hbeat_cnt;
+       uint32_t uplink_status;
+} __packed;
+
+struct xve_gw_info {
+       union ib_gid    t_gid;
+       u32 t_ctrl_qp;
+       u32 t_data_qp;
+       u32 t_qkey;
+       u16 t_pkey;
+};
+
+struct xve_eoib_hdr {
+       union {
+               struct { /* CX */
+                       __u8 encap_data;
+                       __u8 seg_off;
+                       __be16 seg_id;
+               };
+               struct { /* PSIF */
+                       __be16 magic;
+                       __be16 tss_mask_sz;
+               };
+       };
+} __packed;
+
+
+struct xve_rx_cm_info {
+       struct ib_sge           rx_sge[XVE_CM_RX_SG];
+       struct ib_recv_wr       rx_wr;
+};
+
+
 /*
  * Device private locking: network stack tx_lock protects members used
  * in TX fast path, lock protects everything else.  lock nests inside
@@ -608,9 +684,13 @@ struct xve_dev_priv {
        struct ib_qp *qp;
        union ib_gid local_gid;
        union ib_gid bcast_mgid;
+       __be16       bcast_mlid;
        u16 local_lid;
        u32 qkey;
 
+       /* Device attributes */
+       struct ib_device_attr dev_attr;
+
        /* Netdev related attributes */
        struct net_device *netdev;
        struct net_device_stats stats;
@@ -636,6 +716,9 @@ struct xve_dev_priv {
        unsigned long jiffies;
        struct xve_fwt_s xve_fwt;
        int aging_delay;
+       void *pci;
+       uint32_t hb_interval;
+       uint64_t last_hbeat;
 
        struct xve_cm_dev_priv cm;
        unsigned int cm_supported;
@@ -650,8 +733,10 @@ struct xve_dev_priv {
        unsigned int mcast_mtu;
        unsigned int max_ib_mtu;
        char mode[64];
-
        /* TX and RX Ring attributes */
+       int xve_recvq_size;
+       int xve_sendq_size;
+       int xve_max_send_cqe;
        struct xve_rx_buf *rx_ring;
        struct xve_tx_buf *tx_ring;
        unsigned tx_head;
@@ -661,7 +746,8 @@ struct xve_dev_priv {
        struct ib_send_wr tx_wr;
        struct ib_wc send_wc[MAX_SEND_CQE];
        struct ib_recv_wr rx_wr;
-       struct ib_sge rx_sge[XVE_UD_RX_SG];
+       /* Allocate EDR SG for now */
+       struct ib_sge rx_sge[XVE_UD_RX_EDR_SG];
        struct ib_wc ibwc[XVE_NUM_WC];
        struct ib_cq *recv_cq;
        struct ib_cq *send_cq;
@@ -674,9 +760,12 @@ struct xve_dev_priv {
        u64 resource_id;
        u64 mac;
        u32 net_id;
+       u32 install_flag;
        u16 mp_flag;
-       char vnet_mode;
+       u8 vnet_mode;
+       u8 vnic_type;
        char xve_name[XVE_MAX_NAME_SIZE];
+       struct xve_gw_info gw;
 
        /* Proc related attributes */
        struct proc_dir_entry *nic_dir;
@@ -696,7 +785,7 @@ struct xve_ah {
        struct ib_ah *ah;
        struct list_head list;
        struct kref ref;
-       unsigned last_send;
+       atomic_t refcnt;
 };
 
 struct ib_packed_grh {
@@ -724,7 +813,10 @@ struct xve_path {
        struct rb_node rb_node;
        struct list_head list;
        int valid;
+       int index;
        struct sk_buff_head queue;
+       struct sk_buff_head uplink_queue;
+       atomic_t users;
 };
 
 struct xve_work {
@@ -790,14 +882,6 @@ struct icmp6_ndp {
                dev->stats.rx_bytes += len;                     \
        } while (0)
 
-#define SET_FLUSH_BIT(priv, bit)                               \
-       do {                                                    \
-               unsigned long flags;                            \
-               spin_lock_irqsave(&priv->lock, flags);          \
-               set_bit(bit, &priv->state);                     \
-               spin_unlock_irqrestore(&priv->lock, flags);     \
-       } while (0)
-
 #define PRINT(level, x, fmt, arg...)                           \
        printk(level "%s: " fmt, MODULE_NAME, ##arg)
 #define XSMP_ERROR(fmt, arg...)                                        \
@@ -807,18 +891,18 @@ struct icmp6_ndp {
                ((struct xve_dev_priv *) priv)->netdev->name,   \
                ## arg)
 #define xve_warn(priv, format, arg...)                         \
-       xve_printk(KERN_WARNING, priv, format , ## arg)
+       xve_printk(KERN_WARNING, priv, format, ## arg)
 
 #define XSMP_INFO(fmt, arg...)                                 \
        do {                                                    \
                if (xve_debug_level & DEBUG_XSMP_INFO)          \
-                       PRINT(KERN_DEBUG, "XSMP", fmt , ## arg);\
+                       PRINT(KERN_DEBUG, "XSMP", fmt, ## arg);\
        } while (0)
 
 #define xve_test(fmt, arg...)                                  \
        do {                                                    \
                if (xve_debug_level & DEBUG_TEST_INFO)          \
-                       PRINT(KERN_DEBUG, "DEBUG", fmt , ## arg); \
+                       PRINT(KERN_DEBUG, "DEBUG", fmt, ## arg); \
        } while (0)
 
 #define xve_dbg_data(priv, format, arg...)                     \
@@ -827,10 +911,16 @@ struct icmp6_ndp {
                        xve_printk(KERN_DEBUG, priv, format,    \
                        ## arg);                                \
        } while (0)
+#define xve_dbg_ctrl(priv, format, arg...)                     \
+       do {                                                    \
+               if (xve_debug_level & DEBUG_CTRL_INFO)          \
+                       xve_printk(KERN_DEBUG, priv, format,    \
+                       ## arg);                                \
+       } while (0)
 #define xve_dbg_mcast(priv, format, arg...)                    \
        do {                                                    \
                if (xve_debug_level & DEBUG_MCAST_INFO)         \
-                       xve_printk(KERN_ERR, priv, format , ## arg); \
+                       xve_printk(KERN_ERR, priv, format, ## arg); \
        } while (0)
 #define xve_debug(level, priv, format, arg...)                         \
        do {                                                            \
@@ -899,6 +989,8 @@ static inline void xve_send_skb(struct xve_dev_priv *priv, struct sk_buff *skb)
 
        if (netdev->features & NETIF_F_LRO)
                lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
+       else if (netdev->features & NETIF_F_GRO)
+               napi_gro_receive(&priv->napi, skb);
        else
                netif_receive_skb(skb);
 
@@ -1018,8 +1110,11 @@ static inline void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
 
                if (length == 0) {
                        /* don't need this page */
-                       skb_fill_page_desc(toskb, i, skb_frag_page(frag),
-                                          0, PAGE_SIZE);
+                       if (toskb)
+                               skb_fill_page_desc(toskb, i, skb_frag_page(frag)
+                                               , 0, PAGE_SIZE);
+                       else
+                               __free_page(skb_shinfo(skb)->frags[i].page.p);
                        --skb_shinfo(skb)->nr_frags;
                } else {
                        size = min_t(unsigned, length, (unsigned)PAGE_SIZE);
@@ -1046,11 +1141,20 @@ static inline void xve_put_ah(struct xve_ah *ah)
        kref_put(&ah->ref, xve_free_ah);
 }
 
+static inline void xve_put_ah_refcnt(struct xve_ah *address)
+{
+       atomic_dec(&address->refcnt);
+}
+static inline void xve_get_ah_refcnt(struct xve_ah *address)
+{
+       atomic_inc(&address->refcnt);
+}
+
 int xve_open(struct net_device *dev);
 int xve_add_pkey_attr(struct net_device *dev);
 
-void xve_send(struct net_device *dev, struct sk_buff *skb,
-             struct xve_ah *address, u32 qpn);
+int xve_send(struct net_device *dev, struct sk_buff *skb,
+             struct xve_ah *address, u32 qpn, int type);
 int poll_tx(struct xve_dev_priv *priv);
 int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state);
 void handle_carrier_state(struct xve_dev_priv *priv, char state);
@@ -1096,7 +1200,7 @@ void xve_remove_fwt_entry(struct xve_dev_priv *priv,
 void xve_fwt_entry_free(struct xve_dev_priv *priv,
                        struct xve_fwt_entry *fwt_entry);
 
-void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
+int xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
 void xve_advert_mcast_join(struct xve_dev_priv *priv);
 int xve_mcast_start_thread(struct net_device *dev);
 int xve_mcast_stop_thread(struct net_device *dev, int flush);
@@ -1129,7 +1233,7 @@ int xve_send_hbeat(struct xve_dev_priv *xvep);
 void xve_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl, u64 resource_id);
 
 /*CM */
-void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+int xve_cm_send(struct net_device *dev, struct sk_buff *skb,
                 struct xve_cm_ctx *tx);
 int xve_cm_dev_open(struct net_device *dev);
 void xve_cm_dev_stop(struct net_device *dev);
@@ -1163,9 +1267,11 @@ void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb);
 void xve_tables_exit(void);
 void xve_remove_one(struct xve_dev_priv *priv);
 struct xve_path *__path_find(struct net_device *netdev, void *gid);
-extern int xve_add_proc_entry(struct xve_dev_priv *vp);
+int xve_add_proc_entry(struct xve_dev_priv *vp);
 void xve_remove_proc_entry(struct xve_dev_priv *vp);
-extern int xve_change_rxbatch(struct xve_dev_priv *xvep, int flag);
+int xve_gw_send(struct net_device *priv, struct sk_buff *skb);
+struct xve_path *xve_get_gw_path(struct net_device *dev);
+void xve_set_oper_up_state(struct xve_dev_priv *priv);
 
 static inline int xve_continue_unload(void)
 {
@@ -1179,7 +1285,7 @@ static inline int xve_get_misc_info(void)
 
 static inline int xg_vlan_tx_tag_present(struct sk_buff *skb)
 {
-       struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+       struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
 
        return veth->h_vlan_proto == htons(ETH_P_8021Q);
 }
index 7c68f8f760b48f8921bd6ca4aceb92dc93baccd7..caf4e8aa53d6d75c316eadc269f3ccbaf31e819a 100644 (file)
@@ -81,14 +81,15 @@ static int xve_cm_post_receive_srq(struct net_device *netdev, int id)
 {
        struct xve_dev_priv *priv = netdev_priv(netdev);
        struct ib_recv_wr *bad_wr;
+       struct ib_recv_wr *wr = &priv->cm.rx_wr;
        int i, ret;
 
-       priv->cm.rx_wr.wr_id = id | XVE_OP_CM | XVE_OP_RECV;
+       wr->wr_id = id | XVE_OP_CM | XVE_OP_RECV;
 
        for (i = 0; i < priv->cm.num_frags; ++i)
                priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
 
-       ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+       ret = ib_post_srq_recv(priv->cm.srq, wr, &bad_wr);
        if (unlikely(ret)) {
                xve_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
                xve_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
@@ -171,7 +172,7 @@ static void xve_cm_free_rx_ring(struct net_device *dev,
        struct xve_dev_priv *priv = netdev_priv(dev);
        int i;
 
-       for (i = 0; i < xve_recvq_size; ++i) {
+       for (i = 0; i < priv->xve_recvq_size; ++i) {
                if (rx_ring[i].skb) {
                        xve_cm_dma_unmap_rx(priv, XVE_CM_RX_SG - 1,
                                            rx_ring[i].mapping);
@@ -463,7 +464,7 @@ void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        xve_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
                     wr_id, wc->status);
 
-       if (unlikely(wr_id >= xve_recvq_size)) {
+       if (unlikely(wr_id >= priv->xve_recvq_size)) {
                if (wr_id ==
                    (XVE_CM_RX_DRAIN_WRID & ~(XVE_OP_CM | XVE_OP_RECV))) {
                        spin_lock_irqsave(&priv->lock, flags);
@@ -475,7 +476,7 @@ void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                } else
                        xve_warn(priv,
                                 "cm recv completion event with wrid %d (> %d)\n",
-                                wr_id, xve_recvq_size);
+                                wr_id, priv->xve_recvq_size);
                return;
        }
 
@@ -619,12 +620,13 @@ static void xve_cm_tx_buf_free(struct xve_dev_priv *priv,
        memset(tx_req, 0, sizeof(struct xve_cm_buf));
 }
 
-void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+int xve_cm_send(struct net_device *dev, struct sk_buff *skb,
                 struct xve_cm_ctx *tx)
 {
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct xve_cm_buf *tx_req;
        u64 addr;
+       int ret = NETDEV_TX_OK;
 
        if (unlikely(skb->len > tx->mtu + VLAN_ETH_HLEN)) {
                xve_warn(priv,
@@ -633,7 +635,7 @@ void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
                INC_TX_DROP_STATS(priv, dev);
                INC_TX_ERROR_STATS(priv, dev);
                dev_kfree_skb_any(skb);
-               return;
+               return ret;
        }
 
        xve_dbg_data(priv,
@@ -647,25 +649,27 @@ void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
         * means we have to make sure everything is properly recorded and
         * our state is consistent before we call post_send().
         */
-       tx_req = &tx->tx_ring[tx->tx_head & (xve_sendq_size - 1)];
+       tx_req = &tx->tx_ring[tx->tx_head & (priv->xve_sendq_size - 1)];
        tx_req->skb = skb;
        addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
        if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
                INC_TX_ERROR_STATS(priv, dev);
                dev_kfree_skb_any(skb);
                memset(tx_req, 0, sizeof(struct xve_cm_buf));
-               return;
+               return ret;
        }
        tx_req->mapping[0] = addr;
 
-       if (unlikely(post_send(priv, tx, tx->tx_head & (xve_sendq_size - 1),
+       if (unlikely(post_send(priv, tx, tx->tx_head &
+                              (priv->xve_sendq_size - 1),
                               addr, skb->len))) {
                xve_warn(priv, "post_send failed\n");
                INC_TX_ERROR_STATS(priv, dev);
                xve_cm_tx_buf_free(priv, tx_req);
        } else {
+               dev->trans_start = jiffies;
                ++tx->tx_head;
-               if (++priv->tx_outstanding == xve_sendq_size) {
+               if (++priv->tx_outstanding == priv->xve_sendq_size) {
                        xve_dbg_data(priv,
                                     "TX ring 0x%x full, stopping kernel net queue\n",
                                     tx->qp->qp_num);
@@ -678,10 +682,11 @@ void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
                }
        }
        priv->send_hbeat_flag = 0;
-
+       return ret;
 }
 
-void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+void xve_cm_handle_tx_wc(struct net_device *dev,
+               struct ib_wc *wc)
 {
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct xve_cm_ctx *tx = wc->qp->qp_context;
@@ -691,18 +696,18 @@ void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
        xve_dbg_data(priv, "cm send completion: id %d, status: %d\n",
                     wr_id, wc->status);
 
-       if (unlikely(wr_id >= xve_sendq_size)) {
+       if (unlikely(wr_id >= priv->xve_sendq_size)) {
                xve_warn(priv, "cm send completion event with wrid %d (> %d)\n",
-                        wr_id, xve_sendq_size);
+                        wr_id, priv->xve_sendq_size);
                return;
        }
 
        tx_req = &tx->tx_ring[wr_id];
        xve_cm_tx_buf_free(priv, tx_req);
-       ++tx->tx_tail;
 
        netif_tx_lock(dev);
-       if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+       ++tx->tx_tail;
+       if (unlikely(--priv->tx_outstanding == priv->xve_sendq_size >> 1) &&
            netif_queue_stopped(dev) &&
            test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
                priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
@@ -893,7 +898,7 @@ static struct ib_qp *xve_cm_create_tx_qp(struct net_device *dev,
                .send_cq = priv->recv_cq,
                .recv_cq = priv->recv_cq,
                .srq = priv->cm.srq,
-               .cap.max_send_wr = xve_sendq_size,
+               .cap.max_send_wr = priv->xve_sendq_size,
                .cap.max_send_sge = 1,
                .sq_sig_type = IB_SIGNAL_ALL_WR,
                .qp_type = IB_QPT_RC,
@@ -976,13 +981,13 @@ static int xve_cm_tx_init(struct xve_cm_ctx *p, struct ib_sa_path_rec *pathrec)
        struct xve_dev_priv *priv = netdev_priv(p->netdev);
        int ret;
 
-       p->tx_ring = vmalloc(xve_sendq_size * sizeof(*p->tx_ring));
+       p->tx_ring = vmalloc(priv->xve_sendq_size * sizeof(*p->tx_ring));
        if (!p->tx_ring) {
                xve_warn(priv, "failed to allocate tx ring\n");
                ret = -ENOMEM;
                goto err_tx;
        }
-       memset(p->tx_ring, 0, xve_sendq_size * sizeof(*p->tx_ring));
+       memset(p->tx_ring, 0, priv->xve_sendq_size * sizeof(*p->tx_ring));
 
        p->qp = xve_cm_create_tx_qp(p->netdev, p);
        if (IS_ERR(p->qp)) {
@@ -1048,7 +1053,8 @@ static void xve_cm_tx_destroy(struct xve_cm_ctx *p)
                /* Wait for all sends to complete */
                if (!netif_carrier_ok(priv->netdev)
                    && unlikely(priv->tx_outstanding > MAX_SEND_CQE))
-                       while (poll_tx(priv)); /* nothing */
+                       while (poll_tx(priv))
+                               ; /* nothing */
 
                begin = jiffies;
                while ((int)p->tx_tail - (int)p->tx_head < 0) {
@@ -1067,14 +1073,17 @@ timeout:
 
        spin_lock_irqsave(&priv->lock, flags);
        while ((int)p->tx_tail - (int)p->tx_head < 0) {
-               tx_req = &p->tx_ring[p->tx_tail & (xve_sendq_size - 1)];
+               tx_req = &p->tx_ring[p->tx_tail & (priv->xve_sendq_size - 1)];
+
+
                ++p->tx_tail;
                spin_unlock_irqrestore(&priv->lock, flags);
 
                xve_cm_tx_buf_free(priv, tx_req);
                netif_tx_lock_bh(p->netdev);
-               if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
-                   netif_queue_stopped(p->netdev) &&
+               if (unlikely(--priv->tx_outstanding ==
+                                       (priv->xve_sendq_size >> 1))
+                   && netif_queue_stopped(p->netdev) &&
                    test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
                        priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
                        netif_wake_queue(p->netdev);
@@ -1211,7 +1220,6 @@ void xve_cm_tx_start(struct work_struct *work)
        spin_unlock_irqrestore(&priv->lock, flags);
        netif_tx_unlock_bh(dev);
        xve_put_ctx(priv);
-
 }
 
 static void __xve_cm_tx_reap(struct xve_dev_priv *priv)
@@ -1296,7 +1304,7 @@ static void xve_cm_create_srq(struct net_device *dev, int max_sge)
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct ib_srq_init_attr srq_init_attr = {
                .attr = {
-                        .max_wr = xve_recvq_size,
+                        .max_wr = priv->xve_recvq_size,
                         .max_sge = max_sge}
        };
 
@@ -1310,17 +1318,17 @@ static void xve_cm_create_srq(struct net_device *dev, int max_sge)
        }
 
        priv->cm.srq_ring =
-           vmalloc(xve_recvq_size * sizeof(*priv->cm.srq_ring));
+           vmalloc(priv->xve_recvq_size * sizeof(*priv->cm.srq_ring));
        if (!priv->cm.srq_ring) {
                pr_warn("%s: failed to allocate CM SRQ ring (%d entries)\n",
-                       priv->ca->name, xve_recvq_size);
+                       priv->ca->name, priv->xve_recvq_size);
                ib_destroy_srq(priv->cm.srq);
                priv->cm.srq = NULL;
                return;
        }
 
        memset(priv->cm.srq_ring, 0,
-              xve_recvq_size * sizeof(*priv->cm.srq_ring));
+              priv->xve_recvq_size * sizeof(*priv->cm.srq_ring));
 }
 
 int xve_cm_dev_init(struct net_device *dev)
@@ -1343,6 +1351,8 @@ int xve_cm_dev_init(struct net_device *dev)
                return ret;
        }
 
+       priv->dev_attr = attr;
+
        /* Based on the admin mtu from the chassis */
        attr.max_srq_sge =
            min_t(int,
@@ -1366,7 +1376,7 @@ int xve_cm_dev_init(struct net_device *dev)
        xve_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
 
        if (xve_cm_has_srq(dev)) {
-               for (i = 0; i < xve_recvq_size; ++i) {
+               for (i = 0; i < priv->xve_recvq_size; ++i) {
                        if (!xve_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
                                                 priv->cm.num_frags - 1,
                                                 priv->cm.
index cd24547330d4d1b83c6af82ced72e6c13ab91bb1..7f41e15fabd38c752c6a843e09c6d45007190107 100644 (file)
 #define        XVE_OP_CM     (1ul << 30)
 
 #include <net/icmp.h>
+
+/* for kernel >= 3.8 */
+#define random32 prandom_u32
+
 static inline void skb_pkt_type(struct sk_buff *skb, unsigned char type)
 {
        skb->pkt_type = type;
@@ -54,7 +58,7 @@ static inline void xg_skb_push(struct sk_buff *skb, unsigned int len)
 
 static inline unsigned xve_random32(struct xve_dev_priv *priv)
 {
-       return (prandom_u32() & 0xffffff);
+       return random32() & 0xffffff;
 }
 
 static inline struct proc_dir_entry *xg_create_proc_entry(const char *name,
index 85ebdb06da1abf64868690777a6737f226131f9d..548f0c31d1db6eaf7a07ec54cd72bdaf4a55763e 100644 (file)
@@ -45,16 +45,17 @@ struct xve_ah *xve_create_ah(struct net_device *dev,
                return NULL;
 
        ah->dev = dev;
-       ah->last_send = 0;
        kref_init(&ah->ref);
 
        ah->ah = ib_create_ah(pd, attr);
        if (IS_ERR(ah->ah)) {
                kfree(ah);
                ah = NULL;
-       } else
+       } else {
+               atomic_set(&ah->refcnt, 0);
                xve_debug(DEBUG_MCAST_INFO, netdev_priv(dev),
                          "%s Created ah %p\n", __func__, ah->ah);
+       }
 
        return ah;
 }
@@ -71,7 +72,7 @@ void xve_free_ah(struct kref *kref)
 }
 
 static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv,
-                               u64 mapping[XVE_UD_RX_SG])
+                               u64 mapping[XVE_UD_RX_EDR_SG])
 {
        if (xve_ud_need_sg(priv->max_ib_mtu)) {
                ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_HEAD_SIZE,
@@ -79,34 +80,11 @@ static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv,
                ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
                                  DMA_FROM_DEVICE);
                xve_counters[XVE_NUM_PAGES_ALLOCED]--;
-       } else
+       } else {
                ib_dma_unmap_single(priv->ca, mapping[0],
                                    XVE_UD_BUF_SIZE(priv->max_ib_mtu),
                                    DMA_FROM_DEVICE);
-}
-
-static void xve_ud_skb_put_frags(struct xve_dev_priv *priv,
-                                struct sk_buff *skb, unsigned int length)
-{
-       if (xve_ud_need_sg(priv->max_ib_mtu)) {
-               skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
-               unsigned int size;
-               /*
-                * There is only two buffers needed for max_payload = 4K,
-                * first buf size is XVE_UD_HEAD_SIZE
-                */
-               skb->tail += XVE_UD_HEAD_SIZE;
-               skb->len += length;
-
-               size = length - XVE_UD_HEAD_SIZE;
-
-               frag->size = size;
-               skb->data_len += size;
-               skb->truesize += size;
-       } else {
-               skb_put(skb, length);
        }
-
 }
 
 static int xve_ib_post_receive(struct net_device *dev, int id)
@@ -134,7 +112,7 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id)
 {
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct sk_buff *skb;
-       int buf_size;
+       int buf_size, align;
        u64 *mapping;
 
        if (xve_ud_need_sg(priv->max_ib_mtu))
@@ -142,16 +120,18 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id)
        else
                buf_size = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
 
-       skb = xve_dev_alloc_skb(priv, buf_size + 10);
-       if (unlikely(!skb))
-               return NULL;
-
        /*
         * Eth header is 14 bytes, IB will leave a 40 byte gap for a GRH
         * so we need 10 more bytes to get to 64 and align the
-        * IP header to a multiple of 16.
+        * IP header to a multiple of 16. EDR vNICs will have an additional
+        * 4-byte EoIB header.
         */
-       skb_reserve(skb, 10);
+       align = xve_is_ovn(priv) ? 10 : 6;
+       skb = xve_dev_alloc_skb(priv, buf_size + align);
+       if (unlikely(!skb))
+               return NULL;
+
+       skb_reserve(skb, align);
 
        mapping = priv->rx_ring[id].mapping;
        mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
@@ -187,7 +167,7 @@ static int xve_ib_post_receives(struct net_device *dev)
        struct xve_dev_priv *priv = netdev_priv(dev);
        int i;
 
-       for (i = 0; i < xve_recvq_size; ++i) {
+       for (i = 0; i < priv->xve_recvq_size; ++i) {
                if (!xve_alloc_rx_skb(dev, i)) {
                        xve_warn(priv,
                                 "%s failed to allocate ib receive buffer %d\n",
@@ -205,22 +185,102 @@ static int xve_ib_post_receives(struct net_device *dev)
        return 0;
 }
 
-static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+static void xve_link_up(struct xve_dev_priv *priv)
+{
+       if (test_bit(XVE_FLAG_ADMIN_UP, &priv->flags) &&
+                       test_bit(XVE_CHASSIS_ADMIN_UP, &priv->flags)) {
+               if (test_and_clear_bit(XVE_HBEAT_LOST, &priv->state)) {
+                       xve_set_oper_up_state(priv);
+                       xve_xsmp_send_oper_state(priv, priv->resource_id,
+                               XSMP_XVE_OPER_UP);
+               }
+               handle_carrier_state(priv, 1);
+       }
+}
+
+void xve_process_link_state(struct xve_dev_priv *priv,
+               struct xve_keep_alive *ka)
+{
+       uint32_t state = ntohl(ka->uplink_status);
+
+       if (state) {
+               set_bit(XVE_GW_STATE_UP, &priv->state);
+               priv->hb_interval = 30*HZ;
+
+               if (!netif_carrier_ok(priv->netdev))
+                       xve_link_up(priv);
+       } else {
+               clear_bit(XVE_GW_STATE_UP, &priv->state);
+               priv->hb_interval = 15*HZ;
+               if (netif_carrier_ok(priv->netdev))
+                       handle_carrier_state(priv, 0);
+       }
+}
+
+void xve_update_hbeat(struct xve_dev_priv *priv)
+{
+       priv->last_hbeat = jiffies;
+}
+
+void xve_process_hbeat(struct xve_dev_priv *priv, struct xve_keep_alive *ka)
+{
+       xve_process_link_state(priv, ka);
+       xve_update_hbeat(priv);
+}
+
+void xve_handle_ctrl_msg(struct xve_dev_priv *priv,
+               struct sk_buff *skb, struct ethhdr *eh)
+{
+       struct xve_keep_alive *ka;
+
+       skb_pull(skb, ETH_HLEN);
+
+       if (!pskb_may_pull(skb, sizeof(*ka)))
+               goto skb_free;
+
+       ka = (struct xve_keep_alive *) skb->data;
+       xve_dbg_ctrl(priv, "RX CTRL_MSG: ethtype: 0x%x, type:%d, state: 0x%x\n",
+               ntohs(eh->h_proto), ntohl(ka->type),
+               ntohl(ka->uplink_status));
+
+       switch (ntohl(ka->type)) {
+       case XVE_VNIC_HBEAT:
+               xve_process_hbeat(priv, ka);
+               priv->counters[XVE_HBEAT_COUNTER]++;
+               break;
+
+       case XVE_VNIC_LINK_STATE:
+               xve_process_link_state(priv, ka);
+               priv->counters[XVE_LINK_STATUS_COUNTER]++;
+               break;
+
+       default:
+               xve_dbg_ctrl(priv, "Unknown control message type: %hu\n",
+                       ka->type);
+       }
+
+skb_free:
+       dev_kfree_skb_any(skb);
+}
+
+static void
+xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
        struct xve_dev_priv *priv = netdev_priv(dev);
        unsigned int wr_id = wc->wr_id & ~XVE_OP_RECV;
+       struct ethhdr   *eh;
        struct sk_buff *skb;
-       u64 mapping[XVE_UD_RX_SG];
+       u64 mapping[XVE_UD_RX_EDR_SG];
        struct ib_packed_grh *grhhdr;
-       char *smac;
        u16 vlan;
 
-       xve_dbg_data(priv, "recv completion: id %d, status: %d\n",
-                    wr_id, wc->status);
+       xve_dbg_data(priv, "recv completion: id %d, QP%x, status: %d\n",
+                    wr_id, wc->src_qp, wc->status);
 
-       if (unlikely(wr_id >= xve_recvq_size)) {
+
+       if (unlikely(wr_id >= priv->xve_recvq_size)) {
                xve_warn(priv, "recv completion event with wrid %d (> %d)\n",
-                        wr_id, xve_recvq_size);
+                        wr_id, priv->xve_recvq_size);
                return;
        }
 
@@ -242,11 +302,12 @@ static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
         * Drop packets that this interface sent, ie multicast packets
         * that the HCA has replicated.
         */
-       if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+       if (wc->slid == priv->local_lid &&
+                       (wc->src_qp & ~(0x3UL)) == priv->qp->qp_num)
                goto repost;
 
        memcpy(mapping, priv->rx_ring[wr_id].mapping,
-              XVE_UD_RX_SG * sizeof(*mapping));
+              XVE_UD_RX_EDR_SG * sizeof(*mapping));
 
        /*
         * If we can't allocate a new RX buffer, dump
@@ -257,18 +318,51 @@ static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                goto repost;
        }
 
+
        xve_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
                     wc->byte_len, wc->slid);
 
        xve_ud_dma_unmap_rx(priv, mapping);
-       xve_ud_skb_put_frags(priv, skb, wc->byte_len);
-
+       skb_put_frags(skb, XVE_UD_HEAD_SIZE, wc->byte_len, NULL);
        grhhdr = (struct ib_packed_grh *)(skb->data);
-       smac = skb->data + IB_GRH_BYTES + ETH_ALEN;
+       /* This will print packet when driver is in Debug Mode */
+       dumppkt(skb->data, skb->len, "UD Packet Dump");
        skb_pull(skb, IB_GRH_BYTES);
+
+       if (xve_is_edr(priv)) {
+               struct xve_eoib_hdr *eoibp;
+
+               eoibp = (struct xve_eoib_hdr *)skb_pull(skb, sizeof(*eoibp));
+       }
+
+       if (!pskb_may_pull(skb, ETH_HLEN)) {
+               dev_kfree_skb_any(skb);
+               INC_RX_DROP_STATS(priv, dev);
+               goto repost;
+       }
+
+       skb_reset_mac_header(skb);
+       eh = eth_hdr(skb);
+       if (ntohs(eh->h_proto) == ETH_P_XVE_CTRL) { /* heart beat/link status */
+               xve_handle_ctrl_msg(priv, skb, eh);
+               goto repost;
+       }
+
        vlan = xg_vlan_get_rxtag(skb);
-       xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp, smac, vlan);
+       if (wc->wc_flags & IB_WC_GRH) {
+               xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp,
+                               eh->h_source, vlan);
+       } else {
+               xve_dbg_data(priv,
+                       "No GRH, not used for fwt learning smac %pM, vlan:%u\n",
+                        &eh->h_source, vlan);
+               priv->counters[XVE_RX_NOGRH]++;
+       }
        xve_prepare_skb(priv, skb);
+       if (((skb->dev->features & NETIF_F_RXCSUM) &&
+                       likely(wc->wc_flags & IB_WC_IP_CSUM_OK)) ||
+                       test_bit(XVE_FLAG_CSUM, &priv->flags))
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
 
        xve_test("%s RX UD pkt %02x %02x %02x %02x %02x %02x %02x %02x %02x",
                 __func__, skb->data[0], skb->data[1], skb->data[2],
@@ -373,18 +467,26 @@ static void xve_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
        xve_dbg_data(priv, "send completion: id %d, status: %d\n",
                     wr_id, wc->status);
 
-       if (unlikely(wr_id >= xve_sendq_size)) {
+       if (unlikely(wr_id >= priv->xve_sendq_size)) {
                xve_warn(priv, "send completion event with wrid %d (> %d)\n",
-                        wr_id, xve_sendq_size);
+                        wr_id, priv->xve_sendq_size);
                return;
        }
 
        tx_req = &priv->tx_ring[wr_id];
+       if ((tx_req == NULL) || (tx_req->ah == NULL)) {
+               xve_debug(DEBUG_DATA_INFO, priv,
+                               "%s [ca %p] wr_id%d content NULL\n",
+                               __func__, priv->ca, wr_id);
+               return;
+       }
+
+       xve_put_ah_refcnt(tx_req->ah);
        xve_free_txbuf_memory(priv, tx_req);
 
        ++priv->tx_tail;
 
-       if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+       if (unlikely(--priv->tx_outstanding == priv->xve_sendq_size >> 1) &&
            netif_queue_stopped(dev) &&
            test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
                priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
@@ -465,7 +567,8 @@ int xve_poll(struct napi_struct *napi, int budget)
        /*
         * If not connected complete it
         */
-       if (!test_bit(XVE_OPER_UP, &priv->state)) {
+       if (!(test_bit(XVE_OPER_UP, &priv->state) ||
+               test_bit(XVE_HBEAT_LOST, &priv->state))) {
                napi_complete(&priv->napi);
                clear_bit(XVE_INTR_ENABLED, &priv->state);
                return 0;
@@ -496,7 +599,8 @@ poll_more:
        spin_lock_irqsave(&priv->lock, flags);
        if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
            test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
-           test_bit(XVE_OPER_UP, &priv->state) &&
+           (test_bit(XVE_OPER_UP, &priv->state) ||
+               test_bit(XVE_HBEAT_LOST, &priv->state)) &&
            !test_bit(XVE_DELETING, &priv->state)) {
                set_bit(XVE_INTR_ENABLED, &priv->state);
                if (unlikely
@@ -530,14 +634,16 @@ void xve_data_recv_handler(struct xve_dev_priv *priv)
 {
 
        if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
-           test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
-           test_bit(XVE_OPER_UP, &priv->state) &&
-           !test_bit(XVE_DELETING, &priv->state)) {
+                       test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
+                       (test_bit(XVE_OPER_UP, &priv->state) ||
+                                test_bit(XVE_HBEAT_LOST, &priv->state)) &&
+                       !test_bit(XVE_DELETING, &priv->state)) {
                priv->counters[XVE_NAPI_SCHED_COUNTER]++;
                clear_bit(XVE_INTR_ENABLED, &priv->state);
                napi_schedule(&priv->napi);
-       } else
+       } else {
                priv->counters[XVE_NAPI_NOTSCHED_COUNTER]++;
+       }
 }
 
 void xve_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
@@ -559,6 +665,7 @@ static inline int post_send(struct xve_dev_priv *priv,
                            struct xve_tx_buf *tx_req, void *head, int hlen)
 {
        struct ib_send_wr *bad_wr;
+       struct ib_send_wr *wr = &priv->tx_wr;
        int i, off;
        struct sk_buff *skb = tx_req->skb;
        skb_frag_t *frags = skb_shinfo(skb)->frags;
@@ -576,29 +683,34 @@ static inline int post_send(struct xve_dev_priv *priv,
                priv->tx_sge[i + off].addr = mapping[i + off];
                priv->tx_sge[i + off].length = frags[i].size;
        }
-       priv->tx_wr.num_sge = nr_frags + off;
-       priv->tx_wr.wr_id = wr_id;
-       priv->tx_wr.wr.ud.remote_qpn = qpn;
-       priv->tx_wr.wr.ud.ah = address;
-
+       wr->num_sge = nr_frags + off;
+       wr->wr_id = wr_id;
+       wr->wr.ud.remote_qpn = qpn;
+       wr->wr.ud.ah = address;
        if (head) {
-               priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size;
-               priv->tx_wr.wr.ud.header = head;
-               priv->tx_wr.wr.ud.hlen = hlen;
-               priv->tx_wr.opcode = IB_WR_LSO;
-       } else
-               priv->tx_wr.opcode = IB_WR_SEND;
+               wr->wr.ud.mss = skb_shinfo(skb)->gso_size;
+               wr->wr.ud.header = head;
+               wr->wr.ud.hlen = hlen;
+               wr->opcode = IB_WR_LSO;
+       } else {
+               wr->opcode = IB_WR_SEND;
+       }
 
        return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
-
-void xve_send(struct net_device *dev, struct sk_buff *skb,
-             struct xve_ah *address, u32 qpn)
+/* type argument is used to differentiate between the GATEWAY
+ * and UVNIC packet.
+ * 1 -> GATEWAY PACKET
+ * 0 -> normal UVNIC PACKET
+ */
+int xve_send(struct net_device *dev, struct sk_buff *skb,
+             struct xve_ah *address, u32 qpn, int type)
 {
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct xve_tx_buf *tx_req;
        int hlen;
        void *phead;
+       int ret = NETDEV_TX_OK;
 
        if (skb_is_gso(skb)) {
                hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -606,22 +718,25 @@ void xve_send(struct net_device *dev, struct sk_buff *skb,
                if (unlikely(!skb_pull(skb, hlen))) {
                        xve_warn(priv,
                                 "%s linear data too small dropping %ld packets %s\n",
-                                __func__, dev->stats.tx_dropped, dev->name);
+                                __func__, dev->stats.tx_dropped,
+                                dev->name);
                        INC_TX_DROP_STATS(priv, dev);
                        INC_TX_ERROR_STATS(priv, dev);
+                       xve_put_ah_refcnt(address);
                        dev_kfree_skb_any(skb);
-                       return;
+                       return ret;
                }
        } else {
                if (unlikely(skb->len > priv->mcast_mtu + VLAN_ETH_HLEN)) {
-                       xve_warn(priv, "%s packet len %d", __func__, skb->len);
+                       xve_warn(priv, "%s packet len %d",  __func__, skb->len);
                        xve_warn(priv, "(> %d) too long to", priv->mcast_mtu);
                        xve_warn(priv, "send,dropping %ld packets %s\n",
-                                dev->stats.tx_dropped, dev->name);
+                                       dev->stats.tx_dropped, dev->name);
                        INC_TX_DROP_STATS(priv, dev);
                        INC_TX_ERROR_STATS(priv, dev);
+                       xve_put_ah_refcnt(address);
                        dev_kfree_skb_any(skb);
-                       return;
+                       return ret;
                }
                phead = NULL;
                hlen = 0;
@@ -631,6 +746,32 @@ void xve_send(struct net_device *dev, struct sk_buff *skb,
                     "%s sending packet, length=%d address=%p qpn=0x%06x\n",
                     __func__, skb->len, address, qpn);
 
+       if (++priv->tx_outstanding  == priv->xve_sendq_size) {
+               if (type != 1) {
+                       /* UVNIC PACKET */
+                       xve_dbg_data(priv,
+                                    "%s TX ring full, stopping kernel net queue\n",
+                                    __func__);
+                       if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+                               xve_warn(priv, "%s Req notify on send CQ failed\n",
+                                               __func__);
+                       priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+                       priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+                       netif_stop_queue(dev);
+               } else {
+                       /* GATEWAY PACKET */
+                       xve_dbg_data(priv,
+                               "%s TX ring full, Dropping the Gateway Packet\n",
+                                       __func__);
+                       xve_put_ah_refcnt(address);
+                       dev_kfree_skb(skb);
+                       poll_tx(priv);
+                       INC_TX_DROP_STATS(priv, dev);
+                       priv->counters[XVE_TX_SKB_FREE_COUNTER]++;
+                       priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+                       return ret;
+               }
+       }
        /*
         * We put the skb into the tx_ring _before_ we call post_send()
         * because it's entirely possible that the completion handler will
@@ -638,46 +779,36 @@ void xve_send(struct net_device *dev, struct sk_buff *skb,
         * means we have to make sure everything is properly recorded and
         * our state is consistent before we call post_send().
         */
-       tx_req = &priv->tx_ring[priv->tx_head & (xve_sendq_size - 1)];
+       tx_req = &priv->tx_ring[priv->tx_head & (priv->xve_sendq_size - 1)];
        tx_req->skb = skb;
+       tx_req->ah = address;
        if (unlikely(xve_dma_map_tx(priv->ca, tx_req))) {
                INC_TX_ERROR_STATS(priv, dev);
+               xve_put_ah_refcnt(address);
                dev_kfree_skb_any(tx_req->skb);
                memset(tx_req, 0, sizeof(struct xve_tx_buf));
-               return;
-       }
-
-       if (++priv->tx_outstanding == xve_sendq_size) {
-               xve_dbg_data(priv,
-                            "%s TX ring full, stopping kernel net queue\n",
-                            __func__);
-               if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
-                       xve_warn(priv, "%s request notify on send CQ failed\n",
-                                __func__);
-               priv->counters[XVE_TX_RING_FULL_COUNTER]++;
-               priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
-               netif_stop_queue(dev);
+               return ret;
        }
-
-       if (unlikely(post_send(priv, priv->tx_head & (xve_sendq_size - 1),
+       if (unlikely(post_send(priv, priv->tx_head & (priv->xve_sendq_size - 1),
                               address->ah, qpn, tx_req, phead, hlen))) {
                xve_warn(priv, "%s post_send failed\n", __func__);
                INC_TX_ERROR_STATS(priv, dev);
                --priv->tx_outstanding;
                priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+               xve_put_ah_refcnt(address);
                xve_free_txbuf_memory(priv, tx_req);
                if (netif_queue_stopped(dev)) {
                        priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
                        netif_wake_queue(dev);
                }
        } else {
-               address->last_send = priv->tx_head;
                ++priv->tx_head;
                skb_orphan(skb);
        }
        priv->send_hbeat_flag = 0;
-       if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+       if (unlikely(priv->tx_outstanding > priv->xve_max_send_cqe))
                poll_tx(priv);
+       return ret;
 }
 
 static void __xve_reap_ah(struct net_device *dev)
@@ -690,12 +821,13 @@ static void __xve_reap_ah(struct net_device *dev)
        netif_tx_lock_bh(dev);
        spin_lock_irqsave(&priv->lock, flags);
 
-       list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
-               if ((int)priv->tx_tail - (int)ah->last_send >= 0) {
+       list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) {
+               if (atomic_read(&ah->refcnt) == 0) {
                        list_del(&ah->list);
                        ib_destroy_ah(ah->ah);
                        kfree(ah);
                }
+       }
 
        spin_unlock_irqrestore(&priv->lock, flags);
        netif_tx_unlock_bh(dev);
@@ -760,6 +892,8 @@ int xve_ib_dev_up(struct net_device *dev)
        }
 
        set_bit(XVE_FLAG_OPER_UP, &priv->flags);
+       priv->hb_interval = 30*HZ;
+       xve_update_hbeat(priv);
 
        return xve_mcast_start_thread(dev);
 }
@@ -798,7 +932,7 @@ static int recvs_pending(struct net_device *dev)
        int pending = 0;
        int i;
 
-       for (i = 0; i < xve_recvq_size; ++i)
+       for (i = 0; i < priv->xve_recvq_size; ++i)
                if (priv->rx_ring[i].skb)
                        ++pending;
 
@@ -919,13 +1053,13 @@ int xve_ib_dev_stop(struct net_device *dev, int flush)
                         */
                        while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
                                tx_req = &priv->tx_ring[priv->tx_tail &
-                                                       (xve_sendq_size - 1)];
+                                       (priv->xve_sendq_size - 1)];
                                xve_free_txbuf_memory(priv, tx_req);
                                ++priv->tx_tail;
                                --priv->tx_outstanding;
                        }
 
-                       for (i = 0; i < xve_recvq_size; ++i) {
+                       for (i = 0; i < priv->xve_recvq_size; ++i) {
                                struct xve_rx_buf *rx_req;
 
                                rx_req = &priv->rx_ring[i];
index dfcc700697c12992f633fe3eb739ac2a94f347b6..91c29cee2b67600b13ba0986a4b68674ab477f80 100644 (file)
@@ -56,6 +56,10 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
 module_param_named(recv_queue_size, xve_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
 
+int xve_max_send_cqe __read_mostly = MAX_SEND_CQE;
+module_param_named(max_send_cqe, xve_max_send_cqe, int, 0444);
+MODULE_PARM_DESC(max_send_cqe, "Threshold for polling send completion queue");
+
 static int napi_weight = 128;
 module_param(napi_weight, int, 0644);
 
@@ -98,8 +102,41 @@ int xve_do_arp = 1;
 module_param_named(do_arp, xve_do_arp, int, 0644);
 MODULE_PARM_DESC(do_arp, "Enable/Disable ARP for NIC MTU less than IB-MTU");
 
+int xve_ignore_hbeat_loss;
+module_param_named(ignore_hb_loss, xve_ignore_hbeat_loss, int, 0644);
+MODULE_PARM_DESC(ignore_hb_loss, "Ignore heart beat loss on edr based vNICs with uplink");
+
+int xve_enable_offload;
+module_param_named(enable_offload, xve_enable_offload, int, 0444);
+MODULE_PARM_DESC(enable_offload, "Enable stateless offload");
+
+unsigned long xve_tca_subnet;
+module_param(xve_tca_subnet, ulong, 0444);
+MODULE_PARM_DESC(xve_tca_subnet, "tca subnet prefix");
+
+unsigned long xve_tca_guid;
+module_param(xve_tca_guid, ulong, 0444);
+MODULE_PARM_DESC(xve_tca_guid, "TCA GUID");
+
+unsigned int xve_tca_data_qp;
+module_param(xve_tca_data_qp, uint, 0444);
+MODULE_PARM_DESC(xve_tca_data_qp, "tca data qp number");
+
+unsigned int xve_tca_pkey;
+module_param(xve_tca_pkey, uint, 0444);
+MODULE_PARM_DESC(xve_tca_pkey, "tca pkey");
+
+unsigned int xve_tca_qkey;
+module_param(xve_tca_qkey, uint, 0444);
+MODULE_PARM_DESC(xve_tca_qkey, "tca qkey");
+
+unsigned int xve_ud_mode;
+module_param(xve_ud_mode, uint, 0444);
+MODULE_PARM_DESC(xve_ud_mode, "Always use UD mode irrespective of xsmp.vnet_mode value");
+
 static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
                                   int len);
+static void path_free(struct net_device *netdev, struct xve_path *path);
 
 struct xve_path_iter {
        struct net_device *dev;
@@ -148,9 +185,15 @@ int xve_open(struct net_device *netdev)
        priv->counters[XVE_OPEN_COUNTER]++;
 
        spin_lock_irqsave(&priv->lock, flags);
+       if (test_bit(XVE_VNIC_READY_PENDING, &priv->state)) {
+               spin_unlock_irqrestore(&priv->lock, flags);
+               return -EAGAIN;
+       }
        set_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
        set_bit(XVE_OPER_UP, &priv->state);
        set_bit(XVE_OS_ADMIN_UP, &priv->state);
+       if (xve_is_uplink(priv))
+               set_bit(XVE_GW_STATE_UP, &priv->state);
        priv->port_speed = xve_calc_speed(priv);
        spin_unlock_irqrestore(&priv->lock, flags);
 
@@ -192,6 +235,8 @@ static int xve_stop(struct net_device *netdev)
 
        xve_ib_dev_down(netdev, 0);
        xve_ib_dev_stop(netdev, 0);
+       xve_xsmp_send_oper_state(priv, priv->resource_id,
+                        XSMP_XVE_OPER_DOWN);
 
        pr_info("XVE: %s Finished Stopping interface %s\n", __func__,
                priv->xve_name);
@@ -289,6 +334,17 @@ static int xve_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
        return ret;
 }
 
+inline void xve_get_path(struct xve_path *path)
+{
+       atomic_inc(&path->users);
+}
+
+inline void xve_put_path(struct xve_path *path)
+{
+       if (atomic_dec_and_test(&path->users))
+               path_free(path->dev, path);
+}
+
 struct xve_path *__path_find(struct net_device *netdev, void *gid)
 {
        struct xve_dev_priv *priv = netdev_priv(netdev);
@@ -338,6 +394,7 @@ static int __path_add(struct net_device *netdev, struct xve_path *path)
        rb_insert_color(&path->rb_node, &priv->path_tree);
 
        list_add_tail(&path->list, &priv->path_list);
+       xve_get_path(path);
 
        return 0;
 }
@@ -368,6 +425,9 @@ static void path_free(struct net_device *netdev, struct xve_path *path)
        while ((skb = __skb_dequeue(&path->queue)))
                dev_kfree_skb_irq(skb);
 
+       while ((skb = __skb_dequeue(&path->uplink_queue)))
+               dev_kfree_skb_irq(skb);
+
        spin_lock_irqsave(&priv->lock, flags);
        if (xve_cmtx_get(path)) {
                spin_unlock_irqrestore(&priv->lock, flags);
@@ -390,12 +450,19 @@ static void xve_flood_all_paths(struct net_device *dev, struct sk_buff *skb)
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct xve_path *path;
        struct sk_buff *nskb;
+       int ret = 0;
 
        list_for_each_entry(path, &priv->path_list, list) {
                if (xve_cmtx_get(path) && xve_cm_up(path)) {
                        nskb = skb_clone(skb, GFP_ATOMIC);
-                       if (nskb)
-                               xve_cm_send(dev, nskb, xve_cmtx_get(path));
+                       if (nskb) {
+                               ret = xve_cm_send(dev, nskb,
+                                               xve_cmtx_get(path));
+                               if (ret == NETDEV_TX_BUSY)
+                                       xve_warn(priv,
+                                               "send queue full so dropping packet %s\n",
+                                                       priv->xve_name);
+                       }
                }
        }
 }
@@ -464,7 +531,7 @@ void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid)
 
        wait_for_completion(&path->done);
        list_del(&path->list);
-       path_free(dev, path);
+       xve_put_path(path);
 }
 
 void xve_flush_single_path(struct net_device *dev, struct xve_path *path)
@@ -480,9 +547,10 @@ static void path_rec_completion(int status,
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct xve_ah *ah = NULL;
        struct xve_ah *old_ah = NULL;
-       struct sk_buff_head skqueue;
+       struct sk_buff_head skqueue, uplink_skqueue;
        struct sk_buff *skb;
        unsigned long flags;
+       int ret;
 
        if (!status) {
                priv->counters[XVE_PATHREC_RESP_COUNTER]++;
@@ -496,12 +564,14 @@ static void path_rec_completion(int status,
        }
 
        skb_queue_head_init(&skqueue);
+       skb_queue_head_init(&uplink_skqueue);
 
        if (!status) {
                struct ib_ah_attr av;
 
                if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) {
                        av.ah_flags = IB_AH_GRH;
+                       av.grh.dgid = path->pathrec.dgid;
                        ah = xve_create_ah(dev, priv->pd, &av);
                }
        }
@@ -523,6 +593,8 @@ static void path_rec_completion(int status,
 
                while ((skb = __skb_dequeue(&path->queue)))
                        __skb_queue_tail(&skqueue, skb);
+               while ((skb = __skb_dequeue(&path->uplink_queue)))
+                       __skb_queue_tail(&uplink_skqueue, skb);
                path->valid = 1;
        }
 
@@ -535,16 +607,29 @@ static void path_rec_completion(int status,
                xve_put_ah(old_ah);
 
        while ((skb = __skb_dequeue(&skqueue))) {
-               skb->dev = dev;
+               if (xve_is_edr(priv)) {
+                       skb_pull(skb, sizeof(struct xve_eoib_hdr));
+                       skb_reset_mac_header(skb);
+               }
                if (dev_queue_xmit(skb)) {
                        xve_warn(priv,
-                                "dev_queue_xmit failed to requeue pkt for %s\n",
-                                priv->xve_name);
+                               "dev_queue_xmit failed to requeue pkt for %s\n",
+                               priv->xve_name);
                } else {
                        xve_test("%s Succefully completed path for %s\n",
                                 __func__, priv->xve_name);
                }
        }
+       while ((skb = __skb_dequeue(&uplink_skqueue))) {
+               skb->dev = dev;
+               xve_get_ah_refcnt(path->ah);
+               /* Sending the queued GATEWAY Packet */
+               ret = xve_send(dev, skb, path->ah, priv->gw.t_data_qp, 1);
+               if (ret == NETDEV_TX_BUSY) {
+                       xve_warn(priv, "send queue full full, dropping packet for %s\n",
+                                       priv->xve_name);
+                       }
+       }
 }
 
 static struct xve_path *path_rec_create(struct net_device *dev, void *gid)
@@ -562,6 +647,7 @@ static struct xve_path *path_rec_create(struct net_device *dev, void *gid)
        path->dev = dev;
 
        skb_queue_head_init(&path->queue);
+       skb_queue_head_init(&path->uplink_queue);
 
        INIT_LIST_HEAD(&path->fwt_list);
 
@@ -621,45 +707,180 @@ static int path_rec_start(struct net_device *dev, struct xve_path *path)
                xve_warn(priv, "ib_sa_path_rec_get failed: %d for %s\n",
                         path->query_id, priv->xve_name);
                path->query = NULL;
-               complete(&path->done);
+               complete_all(&path->done);
                return path->query_id;
        }
        priv->counters[XVE_PATHREC_QUERY_COUNTER]++;
        return 0;
 }
 
-static void xve_path_lookup(struct sk_buff *skb, struct net_device *dev,
-                           struct xve_fwt_entry *fwt_entry, int *ok)
+inline struct xve_path*
+xve_fwt_get_path(struct xve_fwt_entry *fwt)
+{
+       if (!fwt->path)
+               return NULL;
+
+       xve_get_path(fwt->path);
+       return fwt->path;
+}
+
+struct xve_path*
+xve_find_path_by_gid(struct xve_dev_priv *priv,
+               union ib_gid *gid)
 {
-       struct xve_dev_priv *priv = netdev_priv(dev);
-       struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
        struct xve_path *path;
-       unsigned long flags = 0;
 
-       path = __path_find(dev, fwt_entry->dgid.raw);
+       path = __path_find(priv->netdev, gid->raw);
        if (!path) {
                xve_debug(DEBUG_TABLE_INFO, priv, "%s Unable to find path\n",
                          __func__);
-               path = path_rec_create(dev, fwt_entry->dgid.raw);
+               path = path_rec_create(priv->netdev, gid->raw);
                if (!path)
-                       goto err_drop;
-               __path_add(dev, path);
+                       return NULL;
+               __path_add(priv->netdev, path);
        }
+       xve_get_path(path);
+
+       return path;
+}
+
+static struct xve_path*
+xve_path_lookup(struct net_device *dev,
+                       struct xve_fwt_entry *fwt_entry)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+       struct xve_path *path;
+       unsigned long flags = 0;
 
        xve_debug(DEBUG_TABLE_INFO, priv, "%s Adding  FWT to list %p\n",
                  __func__, fwt_entry);
+       path = xve_find_path_by_gid(priv, &fwt_entry->dgid);
+       if (!path)
+               return NULL;
+
        spin_lock_irqsave(&xve_fwt->lock, flags);
        fwt_entry->path = path;
        list_add_tail(&fwt_entry->list, &path->fwt_list);
        spin_unlock_irqrestore(&xve_fwt->lock, flags);
        if (!path->ah) {
-               if (!path->query && path_rec_start(dev, path))
-                       goto err_drop;
+               if (!path->query && path_rec_start(dev, path)) {
+                       xve_put_path(path);
+                       return NULL;
+               }
+       }
+
+       return path;
+}
+
+struct xve_path *
+xve_get_gw_path(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_path *path;
+
+       if (!priv->gw.t_data_qp)
+               return NULL;
+
+       path = xve_find_path_by_gid(priv, &priv->gw.t_gid);
+
+       if (!path->ah && !path->query)
+               path_rec_start(priv->netdev, path);
+
+       return path;
+}
+
+int xve_gw_send(struct net_device *dev, struct sk_buff *skb)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_gw_info *gwp = &priv->gw;
+       struct xve_path *path;
+       int ret = NETDEV_TX_OK;
+
+       path = xve_get_gw_path(dev);
+       if (!path)
+               return NETDEV_TX_BUSY;
+
+       if (path->ah) {
+               xve_dbg_data(priv, "Sending unicast copy to gw ah:%p dqpn:%u\n",
+                               path->ah, gwp->t_data_qp);
+               xve_get_ah_refcnt(path->ah);
+               /* Sending Packet to GATEWAY */
+               ret = xve_send(dev, skb, path->ah, priv->gw.t_data_qp, 1);
+       } else if (skb_queue_len(&path->uplink_queue) <
+                       XVE_MAX_PATH_REC_QUEUE) {
+               xve_dbg_data(priv, "gw ah not found - queue len: %u\n",
+                               skb_queue_len(&path->uplink_queue));
+               priv->counters[XVE_TX_QUEUE_PKT]++;
+               __skb_queue_tail(&path->uplink_queue, skb);
+       } else {
+               xve_dbg_data(priv,
+                       "No path found to gw - droping the unicast packet\n");
+               dev_kfree_skb_any(skb);
+               INC_TX_DROP_STATS(priv, dev);
+               goto out;
+       }
+       priv->counters[XVE_GW_MCAST_TX]++;
+
+out:
+       xve_put_path(path);
+       return ret;
+}
+
+int xve_add_eoib_header(struct xve_dev_priv *priv, struct sk_buff *skb)
+{
+       struct xve_eoib_hdr *eoibp;
+       int len = sizeof(*eoibp);
+
+       if (skb_headroom(skb) < len) {
+               struct sk_buff *skb_new;
+
+               skb_new = skb_realloc_headroom(skb, len);
+               if (!skb_new)
+                       return -1;
+
+               kfree_skb(skb);
+               skb = skb_new;
+       }
+       eoibp = (struct xve_eoib_hdr *) skb_push(skb, len);
+
+       skb_set_mac_header(skb, len);
+       if (!xve_enable_offload) {
+               eoibp->magic = cpu_to_be16(XVE_EOIB_MAGIC);
+               eoibp->tss_mask_sz = 0;
+               return 0;
+       }
+       /* encap_data = (VNIC_EOIB_HDR_VER << 4) | (VNIC_EOIB_HDR_SIG << 6)
+               From net/ethernet/mellanox/mlx4_vnic/vnic_data_tx.c */
+       eoibp->encap_data = 0x3 << 6;
+       eoibp->seg_off = eoibp->seg_id = 0;
+#define VNIC_EOIB_HDR_UDP_CHK_OK        0x2
+#define VNIC_EOIB_HDR_TCP_CHK_OK        0x1
+#define VNIC_EOIB_HDR_IP_CHK_OK         0x1
+
+#define VNIC_EOIB_HDR_SET_IP_CHK_OK(eoib_hdr)   (eoib_hdr->encap_data = \
+               (eoib_hdr->encap_data & 0xFC) | VNIC_EOIB_HDR_IP_CHK_OK)
+#define VNIC_EOIB_HDR_SET_TCP_CHK_OK(eoib_hdr)  (eoib_hdr->encap_data = \
+               (eoib_hdr->encap_data & 0xF3) | (VNIC_EOIB_HDR_TCP_CHK_OK << 2))
+#define VNIC_EOIB_HDR_SET_UDP_CHK_OK(eoib_hdr)  (eoib_hdr->encap_data = \
+               (eoib_hdr->encap_data & 0xF3) | (VNIC_EOIB_HDR_UDP_CHK_OK << 2))
+
+       switch (ntohs(skb->protocol)) {
+       case ETH_P_IP: {
+               struct iphdr *ip_h = ip_hdr(skb);
+
+               VNIC_EOIB_HDR_SET_IP_CHK_OK(eoibp);
+               if (ip_h->protocol == IPPROTO_TCP)
+                       VNIC_EOIB_HDR_SET_TCP_CHK_OK(eoibp);
+               else if (ip_h->protocol == IPPROTO_UDP)
+                       VNIC_EOIB_HDR_SET_UDP_CHK_OK(eoibp);
+               break;
        }
-       *ok = 1;
-       return;
-err_drop:
-       *ok = 0;
+
+       case ETH_P_IPV6:
+               break;
+       }
+       return 0;
 }
 
 static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -667,17 +888,15 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct sk_buff *bcast_skb = NULL;
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct xve_fwt_entry *fwt_entry = NULL;
-       struct xve_path *path;
+       struct xve_path *path = NULL;
        unsigned long flags;
        int ret = NETDEV_TX_OK, len = 0;
-       char *smac;
        u8 skb_need_tofree = 0, inc_drop_cnt = 0, queued_pkt = 0;
        u16 vlan_tag = 0;
 
        spin_lock_irqsave(&priv->lock, flags);
        if (!test_bit(XVE_OPER_UP, &priv->state)) {
                ret = NETDEV_TX_BUSY;
-               inc_drop_cnt = 1;
                priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++;
                goto unlock;
        }
@@ -687,27 +906,34 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
                if (skb_padto(skb, XVE_MIN_PACKET_LEN)) {
                        inc_drop_cnt = 1;
                        priv->counters[XVE_TX_SKB_ALLOC_ERROR_COUNTER]++;
-                       ret = NETDEV_TX_BUSY;
+                       ret = NETDEV_TX_OK;
                        goto unlock;
                }
                skb->len = XVE_MIN_PACKET_LEN;
        }
 
-       len = skb->len;
-       smac = skb->data + ETH_ALEN;
-
+       skb_reset_mac_header(skb);
        if (xg_vlan_tx_tag_present(skb))
                vlan_get_tag(skb, &vlan_tag);
 
-       fwt_entry = xve_fwt_lookup(&priv->xve_fwt, skb->data, vlan_tag, 0);
+       if (xve_is_edr(priv) &&
+                       xve_add_eoib_header(priv, skb)) {
+               skb_need_tofree = inc_drop_cnt = 1;
+               priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++;
+               goto unlock;
+       }
+       len = skb->len;
+
+       fwt_entry = xve_fwt_lookup(&priv->xve_fwt, eth_hdr(skb)->h_dest,
+                       vlan_tag, 0);
        if (!fwt_entry) {
-               if (is_multicast_ether_addr(skb->data)) {
-                       xve_mcast_send(dev, (void *)priv->bcast_mgid.raw, skb);
+               if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
+                       ret = xve_mcast_send(dev,
+                                       (void *)priv->bcast_mgid.raw, skb);
                        priv->counters[XVE_TX_MCAST_PKT]++;
                        goto stats;
                } else {
                        /*
-                        * XXX Viswa Need to change this
                         * Since this is a unicast packet and we do not have
                         * an L2 table entry
                         * We need to do the following
@@ -721,23 +947,23 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
                         * Do not ARP if if user does not want to for less
                         * than IB-MTU
                         */
-                       if (xve_do_arp
+                       if (!xve_is_edr(priv) && (xve_do_arp
                            || (priv->netdev->mtu >
-                               XVE_UD_MTU(priv->max_ib_mtu)))
+                               XVE_UD_MTU(priv->max_ib_mtu))))
 
                                bcast_skb = xve_generate_query(priv, skb);
-                       if (bcast_skb != NULL)
-                               xve_mcast_send(dev,
-                                              (void *)priv->bcast_mgid.raw,
-                                              bcast_skb);
+                               if (bcast_skb != NULL)
+                                       ret = xve_mcast_send(dev,
+                                                      (void *)priv->bcast_mgid.
+                                                      raw, bcast_skb);
                        /*
                         * Now send the original packet also to over broadcast
                         * Later add counters for flood mode
                         */
-                       if (len < XVE_UD_MTU(priv->max_ib_mtu)) {
-                               xve_mcast_send(dev,
-                                              (void *)priv->bcast_mgid.raw,
-                                              skb);
+                       if (xve_is_edr(priv) ||
+                                       len < XVE_UD_MTU(priv->max_ib_mtu)) {
+                               ret = xve_mcast_send(dev,
+                                      (void *)priv->bcast_mgid.raw, skb);
                                priv->counters[XVE_TX_MCAST_FLOOD_UD]++;
                        } else {
                                if (xve_flood_rc) {
@@ -756,20 +982,18 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
-       if (!fwt_entry->path) {
-               int ok;
-
+       path = xve_fwt_get_path(fwt_entry);
+       if (!path) {
                priv->counters[XVE_PATH_NOT_FOUND]++;
                xve_debug(DEBUG_SEND_INFO, priv,
                          "%s Unable to find neigbour doing a path lookup\n",
                          __func__);
-               xve_path_lookup(skb, dev, fwt_entry, &ok);
-               if (!ok) {
+               path = xve_path_lookup(dev, fwt_entry);
+               if (!path) {
                        skb_need_tofree = inc_drop_cnt = 1;
                        goto free_fwt_ctx;
                }
        } else {
-               path = fwt_entry->path;
                if (!path->ah) {
                        priv->counters[XVE_AH_NOT_FOUND]++;
                        xve_debug(DEBUG_SEND_INFO, priv,
@@ -782,11 +1006,9 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
-       path = fwt_entry->path;
-
        if (xve_cmtx_get(path)) {
                if (xve_cm_up(path)) {
-                       xve_cm_send(dev, skb, xve_cmtx_get(path));
+                       ret = xve_cm_send(dev, skb, xve_cmtx_get(path));
                        update_cm_tx_rate(xve_cmtx_get(path), len);
                        priv->counters[XVE_TX_RC_COUNTER]++;
                        goto stats;
@@ -794,7 +1016,8 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
        } else if (path->ah) {
                xve_debug(DEBUG_SEND_INFO, priv, "%s path ah is %p\n",
                          __func__, path->ah);
-               xve_send(dev, skb, path->ah, fwt_entry->dqpn);
+               xve_get_ah_refcnt(path->ah);
+               ret = xve_send(dev, skb, path->ah, fwt_entry->dqpn, 0);
                priv->counters[XVE_TX_UD_COUNTER]++;
                goto stats;
        }
@@ -815,6 +1038,8 @@ stats:
        INC_TX_BYTE_STATS(priv, dev, len);
        priv->counters[XVE_TX_COUNTER]++;
 free_fwt_ctx:
+       if (path)
+               xve_put_path(path);
        xve_fwt_put_ctx(&priv->xve_fwt, fwt_entry);
 unlock:
        if (inc_drop_cnt)
@@ -849,21 +1074,21 @@ int xve_dev_init(struct net_device *dev, struct ib_device *ca, int port)
        struct xve_dev_priv *priv = netdev_priv(dev);
 
        /* Allocate RX/TX "rings" to hold queued skbs */
-       priv->rx_ring = kcalloc(xve_recvq_size, sizeof(*priv->rx_ring),
+       priv->rx_ring = kcalloc(priv->xve_recvq_size, sizeof(*priv->rx_ring),
                                GFP_KERNEL);
        if (!priv->rx_ring) {
                pr_warn("%s: failed to allocate RX ring (%d entries)\n",
-                       ca->name, xve_recvq_size);
+                       ca->name, priv->xve_recvq_size);
                goto out;
        }
 
-       priv->tx_ring = vmalloc(xve_sendq_size * sizeof(*priv->tx_ring));
+       priv->tx_ring = vmalloc(priv->xve_sendq_size * sizeof(*priv->tx_ring));
        if (!priv->tx_ring) {
                pr_warn("%s: failed to allocate TX ring (%d entries)\n",
-                       ca->name, xve_sendq_size);
+                       ca->name, priv->xve_sendq_size);
                goto out_rx_ring_cleanup;
        }
-       memset(priv->tx_ring, 0, xve_sendq_size * sizeof(*priv->tx_ring));
+       memset(priv->tx_ring, 0, priv->xve_sendq_size * sizeof(*priv->tx_ring));
 
        /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
 
@@ -1001,7 +1226,7 @@ void handle_carrier_state(struct xve_dev_priv *priv, char state)
 struct sk_buff *xve_generate_query(struct xve_dev_priv *priv,
                                   struct sk_buff *skb)
 {
-       struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+       struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
 
        if ((xg_vlan_tx_tag_present(skb)
             && veth->h_vlan_encapsulated_proto == htons(ETH_P_IP))
@@ -1046,7 +1271,7 @@ struct sk_buff *xve_create_arp(struct xve_dev_priv *priv,
                struct vlan_ethhdr *veth;
 
                vlan_get_tag(skb_pkt, &vlan_tci);
-               veth = (struct vlan_ethhdr *)(skb->data);
+               veth = vlan_eth_hdr(skb);
                veth->h_vlan_proto = htons(ETH_P_8021Q);
                /* now, the TCI */
                veth->h_vlan_TCI = htons(vlan_tci);
@@ -1176,7 +1401,7 @@ struct sk_buff *xve_create_ndp(struct xve_dev_priv *priv,
                struct vlan_ethhdr *veth;
 
                vlan_get_tag(skb_pkt, &vlan_tci);
-               veth = (struct vlan_ethhdr *)(skb->data);
+               veth = vlan_eth_hdr(skb);
                veth->h_vlan_proto = htons(ETH_P_8021Q);
                /* now, the TCI */
                veth->h_vlan_TCI = htons(vlan_tci);
@@ -1284,9 +1509,6 @@ int xve_send_hbeat(struct xve_dev_priv *priv)
        skb->protocol = htons(ETH_P_RARP);
 
        ret = xve_start_xmit(skb, priv->netdev);
-       if (ret)
-               dev_kfree_skb_any(skb);
-
        return 0;
 }
 
@@ -1370,7 +1592,23 @@ static int xve_state_machine(struct xve_dev_priv *priv)
        if (test_bit(XVE_OPER_UP, &priv->state) &&
            test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
            !test_bit(XVE_DELETING, &priv->state)) {
-
+               /* Heart beat loss */
+               if (xve_is_uplink(priv) &&
+                       !xve_ignore_hbeat_loss &&
+                       time_after(jiffies, (unsigned long)priv->last_hbeat +
+                               XVE_HBEAT_LOSS_THRES*priv->hb_interval)) {
+                       unsigned long flags = 0;
+
+                       xve_warn(priv, "Heart Beat Loss: %lu:%lu\n", jiffies,
+                               (unsigned long)priv->last_hbeat +
+                               3*priv->hb_interval*HZ);
+
+                       xve_flush_paths(priv->netdev);
+                       spin_lock_irqsave(&priv->lock, flags);
+                       xve_set_oper_down(priv);
+                       set_bit(XVE_HBEAT_LOST, &priv->state);
+                       spin_unlock_irqrestore(&priv->lock, flags);
+               }
                priv->counters[XVE_STATE_MACHINE_UP]++;
                if (!test_bit(XVE_OPER_REP_SENT, &priv->state))
                        (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl,
@@ -1392,7 +1630,8 @@ static int xve_state_machine(struct xve_dev_priv *priv)
 
                if (priv->send_hbeat_flag) {
                        poll_tx(priv);
-                       xve_send_hbeat(priv);
+                       if (xve_is_ovn(priv))
+                               xve_send_hbeat(priv);
                }
                priv->send_hbeat_flag = 1;
        }
@@ -1478,7 +1717,46 @@ static void xve_set_netdev(struct net_device *dev)
        INIT_DELAYED_WORK(&priv->mcast_leave_task, xve_mcast_leave_task);
        INIT_DELAYED_WORK(&priv->mcast_join_task, xve_mcast_join_task);
        INIT_DELAYED_WORK(&priv->stale_task, xve_cm_stale_task);
+}
+
+void
+xve_set_ovn_features(struct xve_dev_priv *priv)
+{
+       priv->netdev->features |=
+           NETIF_F_HIGHDMA | NETIF_F_GRO;
+
+       if (!xve_no_tx_checksum_offload) {
+               priv->netdev->features |= NETIF_F_IP_CSUM;
+               set_bit(XVE_FLAG_CSUM, &priv->flags);
+       }
+
+       if (priv->lro_mode && lro) {
+               priv->netdev->features |= NETIF_F_LRO;
+               xve_lro_setup(priv);
+       } else {
+               priv->lro_mode = 0;
+       }
+}
+
+void
+xve_set_edr_features(struct xve_dev_priv *priv)
+{
+       priv->netdev->hw_features =
+               NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_GRO;
+
+       if (xve_enable_offload) {
+               if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM)
+                       priv->netdev->hw_features |=
+                               NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+
+               if (priv->hca_caps & IB_DEVICE_UD_TSO)
+                       priv->netdev->hw_features |= NETIF_F_TSO;
+
+       }
+       priv->netdev->features |= priv->netdev->hw_features;
 
+       /* Reserve extra space for EoIB header */
+       priv->netdev->hard_header_len += sizeof(struct xve_eoib_hdr);
 }
 
 int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca)
@@ -1487,43 +1765,27 @@ int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca)
        int result = -ENOMEM;
 
        priv->netdev->watchdog_timeo = 1000 * HZ;
-       priv->netdev->tx_queue_len = xve_sendq_size * 2;
-       priv->netdev->features |=
-           NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM;
-       set_bit(XVE_FLAG_CSUM, &priv->flags);
+       priv->netdev->tx_queue_len = priv->xve_sendq_size * 2;
 
-       if (lro)
-               priv->lro_mode = 1;
-       /* 1 -RC , 2 -UD */
-       if (priv->vnet_mode == 1) {
+       priv->lro_mode = 1;
+       if (priv->vnet_mode == XVE_VNET_MODE_RC) {
                pr_info("XVE: %s Setting RC mode for %s\n", __func__,
                        priv->xve_name);
                strcpy(priv->mode, "connected(RC)");
-               /* Turn off checksum offload If the module parameter is set */
-               /* TBD if the chassis sends a CHECK SUM BIT */
-               if (xve_no_tx_checksum_offload) {
-                       priv->netdev->features &= ~NETIF_F_IP_CSUM;
-                       clear_bit(XVE_FLAG_CSUM, &priv->flags);
-               }
-
                set_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
-               priv->netdev->features &= ~(NETIF_F_TSO | NETIF_F_SG);
-               priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
                priv->cm_supported = 1;
-       } else {                /* UD */
-               /* MTU will be reset when mcast join happens */
+       } else {/* UD */
+               pr_info("XVE: %s Setting UD mode for %s\n", __func__,
+                       priv->xve_name);
                strcpy(priv->mode, "datagram(UD)");
+
+               /* MTU will be reset when mcast join happens */
                if (priv->netdev->mtu > XVE_UD_MTU(priv->max_ib_mtu))
                        priv->netdev->mtu = XVE_UD_MTU(priv->max_ib_mtu);
-               priv->lro_mode = 1;
-               priv->cm_supported = 0;
-
+               priv->lro_mode = 0;
        }
-       priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu;
-
-       if (priv->lro_mode)
-               priv->netdev->features |= NETIF_F_LRO;
 
+       priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu;
        xg_setup_pseudo_device(priv->netdev, hca);
 
        SET_NETDEV_OPS(priv->netdev, &xve_netdev_ops);
@@ -1531,7 +1793,6 @@ int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca)
        netif_napi_add(priv->netdev, &priv->napi, xve_poll, napi_weight);
        if (xve_esx_preregister_setup(priv->netdev))
                return -EINVAL;
-       xve_lro_setup(priv);
 
        xve_set_netdev(priv->netdev);
 
@@ -1539,21 +1800,26 @@ int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca)
 
        if (!device_attr) {
                pr_warn("%s: allocation of %zu bytes failed\n",
-                       hca->name, sizeof(*device_attr));
+                               hca->name, sizeof(*device_attr));
                return result;
        }
 
        result = ib_query_device(hca, device_attr);
        if (result) {
                pr_warn("%s: ib_query_device failed (ret = %d)\n",
-                       hca->name, result);
+                               hca->name, result);
                kfree(device_attr);
                return result;
        }
        priv->hca_caps = device_attr->device_cap_flags;
-
        kfree(device_attr);
 
+       xve_lro_setup(priv);
+       if (xve_is_ovn(priv))
+               xve_set_ovn_features(priv);
+       else
+               xve_set_edr_features(priv);
+
        return 0;
 }
 
@@ -1720,7 +1986,7 @@ int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state)
        return ret;
 }
 
-static void xve_set_oper_up_state(struct xve_dev_priv *priv)
+void xve_set_oper_up_state(struct xve_dev_priv *priv)
 {
        unsigned long flags = 0;
 
@@ -1750,6 +2016,7 @@ static int handle_admin_state_change(struct xve_dev_priv *priv,
                          __func__, priv->xve_name);
                if (test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state)) {
                        priv->counters[XVE_ADMIN_DOWN_COUNTER]++;
+                       netif_carrier_off(priv->netdev);
                        clear_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
                        set_bit(XVE_SEND_ADMIN_STATE, &priv->state);
                }
@@ -1795,7 +2062,23 @@ static int xve_xsmp_send_ack(struct xve_dev_priv *priv,
        xmsgp->code = 0;
        xmsgp->vn_mtu = cpu_to_be16(priv->admin_mtu);
        xmsgp->net_id = cpu_to_be32(priv->net_id);
-       pr_info("XVE: %s ACK back with admin mtu ", __func__);
+       if (priv->vnic_type != XSMP_XCM_OVN) {
+               xmsgp->hca_subnet_prefix =
+                       cpu_to_be64(priv->local_gid.global.subnet_prefix);
+               xmsgp->hca_ctrl_qp = 0;
+               xmsgp->hca_data_qp = cpu_to_be32(priv->qp->qp_num);
+               xmsgp->hca_qkey = cpu_to_be32(priv->qkey);
+               xmsgp->hca_pkey = cpu_to_be16(priv->pkey);
+               xmsgp->tca_subnet_prefix =
+                       cpu_to_be64(priv->gw.t_gid.global.subnet_prefix);
+               xmsgp->tca_guid =
+                       cpu_to_be64(priv->gw.t_gid.global.interface_id);
+               xmsgp->tca_ctrl_qp = cpu_to_be32(priv->gw.t_ctrl_qp);
+               xmsgp->tca_data_qp = cpu_to_be32(priv->gw.t_data_qp);
+               xmsgp->tca_pkey = cpu_to_be16(priv->gw.t_pkey);
+               xmsgp->tca_qkey = cpu_to_be16(priv->gw.t_qkey);
+       }
+       pr_info("XVE: %s ACK back with admin mtu ",  __func__);
        pr_info("%d for %s", xmsgp->vn_mtu, priv->xve_name);
        pr_info("[netid %d ]\n", xmsgp->net_id);
 
@@ -1804,6 +2087,32 @@ static int xve_xsmp_send_ack(struct xve_dev_priv *priv,
        return xve_xsmp_send_msg(xsmp_hndl, msg, total_len);
 }
 
+static void
+xve_update_gw_info(struct xve_dev_priv *priv, struct xve_xsmp_msg *xmsgp)
+{
+       struct xve_gw_info *gwp = &priv->gw;
+
+       gwp->t_gid.global.subnet_prefix =
+               xve_tca_subnet ? cpu_to_be64(xve_tca_subnet) :
+               xmsgp->tca_subnet_prefix;
+
+       gwp->t_gid.global.interface_id =
+               xve_tca_guid ? cpu_to_be64(xve_tca_guid) :
+               xmsgp->tca_guid;
+       gwp->t_ctrl_qp = be32_to_cpu(xmsgp->tca_ctrl_qp);
+       gwp->t_data_qp = xve_tca_data_qp ? (xve_tca_data_qp)
+               : be32_to_cpu(xmsgp->tca_data_qp);
+       gwp->t_pkey = xve_tca_pkey ? (xve_tca_pkey)
+               : be16_to_cpu(xmsgp->tca_pkey);
+       gwp->t_qkey = xve_tca_qkey ? (xve_tca_qkey)
+               : be16_to_cpu(xmsgp->tca_qkey);
+       xve_dbg_ctrl(priv, "GW INFO gid:%pI6, lid: %hu\n",
+                       &gwp->t_gid.raw, be32_to_cpu(xmsgp->tca_lid));
+       xve_dbg_ctrl(priv, "qpn: %u, pkey: 0x%x, qkey: 0x%x\n",
+                       gwp->t_data_qp, gwp->t_pkey,
+                       gwp->t_qkey);
+}
+
 /*
  * Handle install message
  */
@@ -1819,13 +2128,14 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
        int result = -ENOMEM;
        struct ib_device *hca;
        u8 port;
+       __be16 pkey_be;
        __be32 net_id_be;
        u8 ecode = 0;
 
        if (xve_check_for_hca(xsmp_hndl) != 0) {
                pr_info("Warning !!!!! Unsupported HCA card for xve ");
                pr_info("interface - %s XSF feature is only ", xmsgp->xve_name);
-               pr_info("supported on Connect-X HCA cards !!!!!!!");
+               pr_info("supported on Connect-X and PSIF HCA cards !!!!!!!");
                ret = -EEXIST;
                goto dup_error;
        }
@@ -1869,11 +2179,12 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
        }
 
        netdev =
-           alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN, &xve_setup);
+               alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN,
+                               &xve_setup);
        if (netdev == NULL) {
                XSMP_ERROR("%s: alloc_netdev error name: %s, VID=0x%llx\n",
-                          __func__, xmsgp->xve_name,
-                          be64_to_cpu(xmsgp->resource_id));
+                               __func__, xmsgp->xve_name,
+                               be64_to_cpu(xmsgp->resource_id));
                ret = -ENOMEM;
                ecode = XVE_NACK_ALLOCATION_ERROR;
                goto dup_error;
@@ -1882,23 +2193,70 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
 
        pr_info("XVE: %s Installing xve %s - ", __func__, xmsgp->xve_name);
        pr_info("resource id %llx", be64_to_cpu(xmsgp->resource_id));
-       pr_info("priv DS %p\n", priv);
+       pr_info("priv DS %p\n",  priv);
 
        xcpm_get_xsmp_session_info(xsmp_hndl, &priv->xsmp_info);
        hca = priv->xsmp_info.ib_device;
        port = xscore_port_num(priv->xsmp_info.port);
        /* Parse PVI parameters */
-       priv->vnet_mode = (xmsgp->vnet_mode);
+       priv->vnet_mode = xve_ud_mode ? XVE_VNET_MODE_UD :
+               (xmsgp->vnet_mode);
        priv->net_id = be32_to_cpu(xmsgp->net_id);
        priv->netdev->mtu = be16_to_cpu(xmsgp->vn_mtu);
        priv->resource_id = be64_to_cpu(xmsgp->resource_id);
        priv->mp_flag = be16_to_cpu(xmsgp->mp_flag);
+       priv->install_flag = be32_to_cpu(xmsgp->install_flag);
        priv->xsmp_hndl = xsmp_hndl;
        priv->sm_delay = 1000;
        priv->aging_delay = xve_aging_timeout * HZ;
        strcpy(priv->xve_name, xmsgp->xve_name);
        strcpy(priv->proc_name, priv->xve_name);
        net_id_be = cpu_to_be32(priv->net_id);
+       /* Parse Uvnic properties */
+       /* For legacy PVI's XSMP will not have vnic_type field so
+          value is zero */
+       priv->vnic_type = xmsgp->vnic_type;
+       /* Make Send and Recv Queue parmaters Per Vnic */
+       priv->xve_sendq_size = xve_sendq_size;
+       priv->xve_recvq_size = xve_recvq_size;
+       priv->xve_max_send_cqe = xve_max_send_cqe;
+
+       if (priv->vnic_type == XSMP_XCM_UPLINK) {
+               /* For G/W mode set higher values */
+               priv->xve_sendq_size = 8192;
+               priv->xve_recvq_size = 8192;
+               priv->xve_max_send_cqe = 512;
+               priv->gw.t_gid.global.subnet_prefix =
+                       xve_tca_subnet ? cpu_to_be64(xve_tca_subnet) :
+                       be64_to_cpu(xmsgp->tca_subnet_prefix);
+
+               priv->gw.t_gid.global.interface_id =
+                       xve_tca_guid ? cpu_to_be64(xve_tca_guid) :
+                       be64_to_cpu(xmsgp->tca_guid);
+               priv->gw.t_ctrl_qp = be32_to_cpu(xmsgp->tca_ctrl_qp);
+               priv->gw.t_data_qp = xve_tca_data_qp ? xve_tca_data_qp :
+                       be32_to_cpu(xmsgp->tca_data_qp);
+               priv->gw.t_pkey = xve_tca_pkey ? xve_tca_pkey :
+                       be16_to_cpu(xmsgp->tca_pkey);
+               /* FIXME: xmsgp->tca_qkey is u16.need to fix in osdn */
+               priv->gw.t_qkey = xve_tca_qkey ? xve_tca_qkey :
+                       be16_to_cpu(xmsgp->tca_qkey);
+               xve_dbg_ctrl(priv,
+                       "GW prefix:%llx guid:%llx, lid: %hu sl: %hu TDQP%x TCQP:%x\n",
+                               priv->gw.t_gid.global.subnet_prefix,
+                               priv->gw.t_gid.global.interface_id,
+                               be16_to_cpu(xmsgp->tca_lid),
+                               be16_to_cpu(xmsgp->service_level),
+                               priv->gw.t_data_qp, priv->gw.t_ctrl_qp);
+       }
+       /* Pkey */
+       priv->pkey = xve_tca_pkey ? xve_tca_pkey :
+               be16_to_cpu(xmsgp->tca_pkey);
+       if (priv->pkey == 0)
+               priv->pkey |= 0x8000;
+       /* Qkey For EDR vnic's*/
+       priv->gw.t_qkey = xve_tca_qkey ? xve_tca_qkey :
+               be16_to_cpu(xmsgp->tca_qkey);
 
        /* Always set chassis ADMIN up by default */
        set_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
@@ -1906,30 +2264,52 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
        if (!ib_query_port(hca, port, &priv->port_attr))
                priv->max_ib_mtu = ib_mtu_enum_to_int(priv->port_attr.max_mtu);
        else {
-               pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
+               pr_warn("%s: ib_query_port %d failed\n",
+                      hca->name, port);
                goto device_init_failed;
        }
 
-       memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid));
-       pr_info("XVE: %s adding vnic %s ", __func__, priv->xve_name);
-       pr_info("net_id %d vnet_mode %d", priv->net_id, priv->vnet_mode);
+       pr_info("XVE: %s adding vnic %s ",
+                       __func__, priv->xve_name);
+       pr_info("net_id %d vnet_mode %d type%d",
+                       priv->net_id, priv->vnet_mode, priv->vnic_type);
        pr_info("port %d net_id_be %d\n", port, net_id_be);
-       memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be));
 
-       result = ib_query_pkey(hca, port, 0, &priv->pkey);
-       if (result) {
-               pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
-                       hca->name, port, result);
-               goto device_init_failed;
+       memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid));
+       if (xve_is_edr(priv)) {
+               result = ib_find_pkey(hca, port, priv->pkey, &priv->pkey_index);
+               if (result != 0)
+                       pr_warn("%s : ib_find_pkey %d failed %d in %s\n",
+                                       hca->name, port, result, __func__);
+               /* EDR MGID format: FF15:101C:P:0:0:0:0:N
+                * Where, P is the P_Key, N is the NetID. */
+               pkey_be = cpu_to_be16(priv->pkey);
+               priv->bcast_mgid.raw[0] = 0xFF;
+               priv->bcast_mgid.raw[1] = 0x15;
+               priv->bcast_mgid.raw[2] = 0x10;
+               priv->bcast_mgid.raw[3] = 0x1C;
+               memcpy(&priv->bcast_mgid.raw[4], &pkey_be, 2);
+               memcpy(&priv->bcast_mgid.raw[12], &net_id_be,
+                               sizeof(net_id_be));
+       } else {
+               memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be));
+               result = ib_query_pkey(hca, port, 0, &priv->pkey);
+               if (result) {
+                       pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
+                                       hca->name, port, result);
+                       goto device_init_failed;
+               }
+               /*
+                * Set the full membership bit, so that we join the right
+                * broadcast group, etc.
+                */
+               priv->pkey |= 0x8000;
        }
 
+       pr_info("MGID: %pI6 pkey%d\n", &priv->bcast_mgid.raw, priv->pkey);
+
        if (xve_set_dev_features(priv, hca))
                goto device_init_failed;
-       /*
-        * Set the full membership bit, so that we join the right
-        * broadcast group, etc.
-        */
-       priv->pkey |= 0x8000;
 
        result = ib_query_gid(hca, port, 0, &priv->local_gid);
 
@@ -1990,7 +2370,10 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
        list_add_tail(&priv->list, &xve_dev_list);
        mutex_unlock(&xve_mutex);
 
-       xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+       if (xve_is_ovn(priv))
+               xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+       else
+               set_bit(XVE_VNIC_READY_PENDING, &priv->state);
 
        queue_sm_work(priv, 0);
 
@@ -2004,7 +2387,7 @@ send_ack:
                           __func__, xmsgp->xve_name,
                           be64_to_cpu(xmsgp->resource_id));
        }
-       if (update_state) {
+       if (update_state && priv->vnic_type == XSMP_XCM_OVN) {
                printk
                    ("XVE: %s Sending Oper state to  chassis for %s id %llx\n",
                     __func__, priv->xve_name, priv->resource_id);
@@ -2109,42 +2492,94 @@ static void xve_xsmp_send_stats(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
 static int xve_xsmp_update(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp)
 {
        u32 bitmask = be32_to_cpu(xmsgp->bitmask);
-       struct xve_dev_priv *xvep;
+       struct xve_dev_priv *priv;
        int ret = 0;
-       int send_ack = 1;
+       int send_ack = 0;
 
-       xvep = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
-       if (!xvep) {
+       priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (!priv) {
                XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
                           __func__, be64_to_cpu(xmsgp->resource_id));
                return -EINVAL;
        }
 
-       XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, xvep->xve_name,
+       XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, priv->xve_name,
                  bitmask);
 
-       mutex_lock(&xvep->mutex);
+       mutex_lock(&priv->mutex);
 
-       if (bitmask & XVE_UPDATE_ADMIN_STATE) {
-               ret = handle_admin_state_change(xvep, xmsgp);
+       if (bitmask & XVE_UPDATE_ADMIN_STATE)
                /*
                 * Ack will be sent once QP's are brought down
                 */
-               send_ack = 0;
+               ret = handle_admin_state_change(priv, xmsgp);
+       if (bitmask & XVE_UPDATE_MTU)
+               xve_modify_mtu(priv->netdev, be16_to_cpu(xmsgp->vn_mtu));
+
+       if (bitmask & XVE_UPDATE_XT_STATE_DOWN &&
+                       xve_is_uplink(priv)) {
+               clear_bit(XVE_GW_STATE_UP, &priv->state);
+               if (netif_carrier_ok(priv->netdev))
+                       handle_carrier_state(priv, 0);
+       }
+       if (bitmask & XVE_UPDATE_XT_CHANGE && xve_is_uplink(priv)) {
+               xve_update_gw_info(priv, xmsgp);
+               if (!netif_carrier_ok(priv->netdev))
+                       handle_carrier_state(priv, 1);
+               send_ack = 1;
        }
 
        if (send_ack) {
-               ret = xve_xsmp_send_ack(xvep, xmsgp);
-               if (ret)
+               ret = xve_xsmp_send_ack(priv, xmsgp);
+               if (ret) {
                        XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s\n"
-                                  "VID=0x%llx\n", __func__, xmsgp->xve_name,
-                                  be64_to_cpu(xmsgp->resource_id));
+                               "VID=0x%llx\n", __func__, xmsgp->xve_name,
+                               be64_to_cpu(xmsgp->resource_id));
+               }
        }
-       mutex_unlock(&xvep->mutex);
+       mutex_unlock(&priv->mutex);
 
        return ret;
 }
 
+static int
+xve_xsmp_vnic_ready(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
+       void *data, int len)
+{
+       struct xve_dev_priv *priv;
+       unsigned long flags;
+int ret;
+
+       priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (!priv) {
+               XSMP_INFO("XVE: %s priv not found for %s\n",
+                         __func__, xmsgp->xve_name);
+               return -1;
+       }
+       pr_info("XVE VNIC_READY: vnic_type: %u, subnet_prefix: %llx\n",
+                       priv->vnic_type, priv->gw.t_gid.global.subnet_prefix);
+       pr_info("ctrl_qp: %u, data_qp: %u, pkey: %x, qkey: %x\n",
+                       priv->gw.t_ctrl_qp, priv->gw.t_data_qp,
+                       priv->gw.t_pkey, priv->gw.t_qkey);
+
+       xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+       spin_lock_irqsave(&priv->lock, flags);
+       clear_bit(XVE_VNIC_READY_PENDING, &priv->state);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       ret = xve_xsmp_send_ack(priv, xmsgp);
+       if (ret) {
+               XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s, VID=0x%llx\n",
+                          __func__, xmsgp->xve_name,
+                          be64_to_cpu(xmsgp->resource_id));
+       }
+
+       (void) xve_xsmp_handle_oper_req(priv->xsmp_hndl,
+           priv->resource_id);
+
+       return 0;
+}
+
 /*
  * We set the DELETING bit and let sm_work thread handle delete
  */
@@ -2193,6 +2628,9 @@ static void handle_xve_xsmp_messages(xsmp_cookie_t xsmp_hndl, u8 *data,
                xve_counters[XVE_VNIC_INSTALL_COUNTER]++;
                xve_xsmp_install(xsmp_hndl, xmsgp, data, length);
                break;
+       case XSMP_VNIC_READY:
+               xve_xsmp_vnic_ready(xsmp_hndl, xmsgp, data, length);
+               break;
        case XSMP_XVE_DELETE:
                xve_counters[XVE_VNIC_DEL_COUNTER]++;
                xve_handle_del_message(xsmp_hndl, xmsgp);
@@ -2379,7 +2817,7 @@ static int __init xve_init_module(void)
 
        xve_sendq_size = roundup_pow_of_two(xve_sendq_size);
        xve_sendq_size = min(xve_sendq_size, XVE_MAX_QUEUE_SIZE);
-       xve_sendq_size = max(xve_sendq_size, max(2 * MAX_SEND_CQE,
+       xve_sendq_size = max(xve_sendq_size, max(2 * xve_max_send_cqe,
                                                 XVE_MIN_QUEUE_SIZE));
        /*
         * When copying small received packets, we only copy from the
index 19ee47a81e997e1e313bf07b8290dfa75564ea41..314f7ff043edb504f75762ad7a6bf80008a99b68 100644 (file)
@@ -175,6 +175,7 @@ static int xve_mcast_join_finish(struct xve_mcast *mcast,
                priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
                spin_unlock_irq(&priv->lock);
                priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+
                set_qkey = 1;
        }
 
@@ -254,6 +255,9 @@ static int xve_mcast_sendonly_join_complete(int status,
        struct xve_mcast *mcast = multicast->context;
        struct net_device *dev = mcast->netdev;
 
+       xve_dbg_mcast(netdev_priv(dev),
+                       "Join completion[SD] for %pI6 LID0x%04x (status %d)\n",
+                       multicast->rec.mgid.raw, multicast->rec.mlid, status);
        /* We trap for port events ourselves. */
        if (status == -ENETRESET)
                return 0;
@@ -334,8 +338,8 @@ static int xve_mcast_sendonly_join(struct xve_mcast *mcast)
                rec.flow_label = priv->broadcast->mcmember.flow_label;
                rec.hop_limit = priv->broadcast->mcmember.hop_limit;
        }
-       xve_dbg_mcast(priv, "%s Joining send only join mtu %d\n", __func__,
-                     rec.mtu);
+       xve_dbg_mcast(priv, "%s Joining send only join mtu %d rate %d\n",
+                       __func__, rec.mtu, rec.rate);
 
        mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca,
                                         priv->port, &rec,
@@ -363,8 +367,9 @@ static int xve_mcast_join_complete(int status,
        struct net_device *dev = mcast->netdev;
        struct xve_dev_priv *priv = netdev_priv(dev);
 
-       xve_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
-                     mcast->mcmember.mgid.raw, status);
+       priv->bcast_mlid =  be16_to_cpu(multicast->rec.mlid);
+       xve_dbg_mcast(priv, "join completion for %pI6 LID0x%04x (status %d)\n",
+                     mcast->mcmember.mgid.raw, priv->bcast_mlid, status);
 
        /* We trap for port events ourselves. */
        if (status == -ENETRESET)
@@ -450,7 +455,7 @@ static void xve_mcast_join(struct net_device *dev, struct xve_mcast *mcast,
                    IB_SA_MCMEMBER_REC_RATE_SELECTOR |
                    IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
-               rec.qkey = 0x0;
+               rec.qkey = cpu_to_be32(priv->gw.t_qkey);
                rec.traffic_class = 0x0;
                rec.sl = 0x0;
                rec.flow_label = 0x0;
@@ -462,8 +467,8 @@ static void xve_mcast_join(struct net_device *dev, struct xve_mcast *mcast,
                rec.rate = mcast_rate;
        }
 
-       xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d\n",
-                     mcast->mcmember.mgid.raw, rec.pkey, rec.qkey);
+       xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d rate%d\n",
+                     mcast->mcmember.mgid.raw, rec.pkey, rec.qkey, rec.rate);
        set_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
        mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca, priv->port,
                                         &rec, comp_mask, GFP_KERNEL,
@@ -650,17 +655,25 @@ static int xve_mcast_leave(struct net_device *dev, struct xve_mcast *mcast)
        return 0;
 }
 
-void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
+int xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
 {
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct xve_mcast *mcast;
+       int ret = NETDEV_TX_OK;
 
        if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags) ||
            !priv->broadcast ||
            !test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
                INC_TX_DROP_STATS(priv, dev);
                dev_kfree_skb_any(skb);
-               return;
+               return ret;
+       }
+
+       if (xve_is_uplink(priv) && xve_gw_linkup(priv)) {
+               struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+               if (nskb)
+                       ret = xve_gw_send(dev, nskb);
        }
 
        mcast = __xve_mcast_find(dev, mgid);
@@ -691,6 +704,7 @@ void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
                else {
                        INC_TX_DROP_STATS(priv, dev);
                        dev_kfree_skb_any(skb);
+                       return ret;
                }
 
                if (test_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)) {
@@ -708,14 +722,14 @@ void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
 
 out:
        if (mcast && mcast->ah) {
-               xve_test("%s about to send mcast %02x%02x%02x%02x%02x%02x",
-                        __func__, skb->data[0], skb->data[1], skb->data[2],
-                        skb->data[3], skb->data[4], skb->data[5]);
-               xve_test("ah=%p proto=%02x%02x for %s\n", mcast->ah->ah,
-                        skb->data[12], skb->data[13], dev->name);
-               xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+               xve_test("%s about to send mcast %pM"
+                       , __func__, eth_hdr(skb)->h_dest);
+               xve_test("ah=%p proto=%04x for %s\n",
+                       mcast->ah->ah, eth_hdr(skb)->h_proto, dev->name);
+               xve_get_ah_refcnt(mcast->ah);
+               ret = xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN, 0);
        }
-
+       return ret;
 }
 
 void xve_mcast_carrier_on_task(struct work_struct *work)
index 632aef3aa67676d403db05489ffa13ecc2c496b7..9a4413d61e416337440768eeca3d1bf5b28f1bca 100755 (executable)
@@ -142,6 +142,10 @@ static char *counter_name[XVE_MAX_COUNTERS] = {
        "ib lid_active count:\t\t",
        "ib pkey_change count:\t\t",
        "ib invalid count:\t\t",
+       "uplink unicast:\t\t\t",
+       "Heartbeat Count:\t\t",
+       "Link State message count:\t",
+       "RX frames without GRH\t\t",
 };
 
 static char *misc_counter_name[XVE_MISC_MAX_COUNTERS] = {
@@ -438,6 +442,7 @@ static int xve_proc_read_device(struct seq_file *m, void *data)
        tmp_buf[0] = 0;
        print_mgid_buf(tmp_buf, bcast_mgid_token);
        seq_printf(m, "Bcast Mgid:\t\t\t%s\n", tmp_buf);
+       seq_printf(m, "Bcast Mlid:\t\t\t0x%04x\n", vp->bcast_mlid);
 
        tmp_buf[0] = 0;
        print_mgid_buf(tmp_buf, local_gid_token);
@@ -574,6 +579,14 @@ static int xve_proc_read_device(struct seq_file *m, void *data)
                seq_printf(m, "WQ Failed:\t\t\t%ld\n", vp->work_queue_failed);
 
        seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+
+       if (xve_is_uplink(vp)) {
+               seq_printf(m, "Time since last heart beat: %llu sec\n",
+                               (jiffies-vp->last_hbeat)/HZ);
+               seq_printf(m, "TCA info:\t\t\tGID: %pI6\tQPN: %u\n",
+                               &vp->gw.t_gid.raw, vp->gw.t_data_qp);
+       }
+
        vp->next_page = 1;
 out:
        return 0;
@@ -588,9 +601,9 @@ static ssize_t xve_proc_write_device(struct file *file,
        int ret;
 
        ret = sscanf(buffer, "%s", action);
-       if (ret != 1) {
+       if (ret != 1)
                return -EINVAL;
-       }
+
        if ((strlen(action) == 1) && (atoi(action) == 0)) {
                /* Clear counters */
                memset(vp->counters, 0, sizeof(vp->counters));
@@ -634,30 +647,29 @@ static ssize_t xve_proc_write_device_counters(struct file *file,
        struct xve_dev_priv *vp = PDE_DATA(file_inode(file));
        int newval, ret;
        char    *buf = (char *) __get_free_page(GFP_USER);
-       if (!buf) {
+
+       if (!buf)
                return -ENOMEM;
-       }
 
-        if (copy_from_user(buf, buffer, count - 1)) {
+       if (copy_from_user(buf, buffer, count - 1))
                goto out;
-       }
+
        buf[count] = '\0';
 
        ret = kstrtoint(buf, 0, &newval);
-        if (ret != 0) {
-                return -EINVAL;
-        }
+       if (ret != 0)
+               return -EINVAL;
 
-        if (newval == 0) {
-                /* Clear counters */
-                memset(vp->counters, 0, sizeof(vp->counters));
-                vp->counters_cleared++;
-        }
-        return count;
+       if (newval == 0) {
+               /* Clear counters */
+               memset(vp->counters, 0, sizeof(vp->counters));
+               vp->counters_cleared++;
+       }
+       return count;
 
 out:
-        free_page((unsigned long)buf);
-        return -EINVAL;
+       free_page((unsigned long)buf);
+       return -EINVAL;
 }
 
 static int xve_proc_open_device_counters(struct inode *inode, struct file *file)
@@ -809,19 +821,19 @@ static ssize_t xve_proc_write_debug(struct file *file,
 {
        int newval, ret;
        char    *buf = (char *) __get_free_page(GFP_USER);
-       if (!buf) {
+
+       if (!buf)
                return -ENOMEM;
-       }
 
-       if (copy_from_user(buf, buffer, count - 1)) {
+       if (copy_from_user(buf, buffer, count - 1))
                goto out;
-       }
+
        buf[count] = '\0';
 
        ret = kstrtoint(buf, 0, &newval);
-       if (ret != 0) {
+       if (ret != 0)
                return -EINVAL;
-       }
+
        xve_debug_level = newval;
        return count;
 
index eea4854a922e8f51ffe44ce990a9a15571eaa424..71f7843607eb4bd2cf6ee3f443a5f2bb07e05fdf 100644 (file)
@@ -169,13 +169,7 @@ int xve_aging_task_machine(struct xve_dev_priv *priv)
                                    && ((jiffies - fwt_entry->last_refresh) >=
                                        priv->aging_delay)) {
                                        pr_info("XVE: %s MAC ", priv->xve_name);
-                                       pr_info("%02x:%02x:%02x:%02x:%02x:%02x",
-                                               ALIGN_TO_FF(smac[0]),
-                                               ALIGN_TO_FF(smac[1]),
-                                               ALIGN_TO_FF(smac[2]),
-                                               ALIGN_TO_FF(smac[3]),
-                                               ALIGN_TO_FF(smac[4]),
-                                               ALIGN_TO_FF(smac[5]));
+                                       pr_info("%pM", smac);
                                        pr_info(" vlan %d Aged out\n",
                                                fwt_entry->vlan);
                                        /*
@@ -255,6 +249,10 @@ void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx,
        struct xve_path *path;
        char from[64], to[64];
 
+       if (xve_is_uplink(priv) &&
+                       !memcmp(&gid->raw, &priv->gw.t_gid.raw, sizeof(*gid)))
+               qpn = priv->gw.t_data_qp;
+
        fwt_entry = xve_fwt_lookup(xve_fwt, smac, vlan, 1);
        if (fwt_entry) {
                if (unlikely
@@ -262,11 +260,8 @@ void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx,
                     (fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid)))) {
                        print_mgid_buf(from, (char *)fwt_entry->dgid.raw);
                        print_mgid_buf(to, (char *)gid->raw);
-                       pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x ",
-                               priv->xve_name, ALIGN_TO_FF(smac[0]),
-                               ALIGN_TO_FF(smac[1]), ALIGN_TO_FF(smac[2]),
-                               ALIGN_TO_FF(smac[3]), ALIGN_TO_FF(smac[4]),
-                               ALIGN_TO_FF(smac[5]));
+                       pr_info("XVE: %s MAC %pM ",
+                            priv->xve_name, smac);
                        pr_info(" vlan %d moved from GID %s to GID %s\n",
                                fwt_entry->vlan, from, to);
 
@@ -306,13 +301,9 @@ void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx,
                }
                memset(fwt_entry, 0, sizeof(struct xve_fwt_entry));
                print_mgid_buf(from, (char *)gid->raw);
-               pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x",
-                       priv->xve_name, ALIGN_TO_FF(smac[0]),
-                       ALIGN_TO_FF(smac[1]),
-                       ALIGN_TO_FF(smac[2]), ALIGN_TO_FF(smac[3]),
-                       ALIGN_TO_FF(smac[4]), ALIGN_TO_FF(smac[5]));
-               pr_info("vlan %d learned from GID %s, mode: %s Fwt %p\n",
-                       vlan, from, qpn ? "UD" : "RC", fwt_entry);
+               pr_info("XVE: %s MAC %pM", priv->xve_name, smac);
+               pr_info("vlan %d learned from GID %s, mode: %s QPN %x Fwt %p\n",
+                       vlan, from, qpn ? "UD" : "RC", qpn, fwt_entry);
                priv->counters[XVE_MAC_LEARN_COUNTER]++;
                memcpy(fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid));
                fwt_entry->dqpn = qpn;
@@ -408,7 +399,7 @@ void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb)
        skb->protocol = eth_type_trans(skb, priv->netdev);
        skb->dev = priv->netdev;
        skb_pkt_type(skb, PACKET_HOST);
-       if (test_bit(XVE_FLAG_CSUM, &priv->flags))
+       if (xve_is_ovn(priv) && test_bit(XVE_FLAG_CSUM, &priv->flags))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
        skb->truesize = skb->len + sizeof(struct sk_buff);
 }
index ad9d6be2bb5b15705df9a8ab101213fcbec59c14..168019b6bb7a481c078a6ef305e70f34ab4c5030 100644 (file)
@@ -67,8 +67,8 @@ int xve_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid,
        ret = ib_attach_mcast(priv->qp, mgid, mlid);
        if (ret)
                xve_warn(priv,
-                        "failed to attach to multicast group, ret = %d\n",
-                        ret);
+                       "failed to attach to multicast group, ret = %d\n",
+                       ret);
 
 out:
        kfree(qp_attr);
@@ -82,8 +82,10 @@ int xve_init_qp(struct net_device *dev)
        struct ib_qp_attr qp_attr;
        int attr_mask;
 
-       if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags))
+       if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+               xve_warn(priv, "PKEY not assigned\n");
                return -1;
+       }
 
        qp_attr.qp_state = IB_QPS_INIT;
        qp_attr.qkey = 0;
@@ -130,22 +132,21 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        struct xve_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_init_attr init_attr = {
                .cap = {
-                       .max_send_wr = xve_sendq_size,
-                       .max_recv_wr = xve_recvq_size,
+                       .max_send_wr = priv->xve_sendq_size,
+                       .max_recv_wr = priv->xve_recvq_size,
                        .max_send_sge = 1,
-                       .max_recv_sge = XVE_UD_RX_SG},
+                       .max_recv_sge = xve_ud_rx_sg(priv)},
                .sq_sig_type = IB_SIGNAL_ALL_WR,
                .qp_type = IB_QPT_UD
        };
-
-       int ret, size;
-       int i;
        struct ethtool_coalesce *coal;
+       int ret, size, max_sge;
+       int i;
 
        priv->pd = ib_alloc_pd(priv->ca);
        if (IS_ERR(priv->pd)) {
                pr_warn("%s: failed to allocate PD for %s\n",
-                       ca->name, priv->xve_name);
+                               ca->name, priv->xve_name);
                return -ENODEV;
        }
 
@@ -155,16 +156,18 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                goto out_free_pd;
        }
 
-       size = xve_recvq_size + 1;
+       size = priv->xve_recvq_size + 1;
        ret = xve_cm_dev_init(dev);
        if (ret != 0) {
                pr_err("%s Failed for %s [ret %d ]\n", __func__,
-                      priv->xve_name, ret);
+                               priv->xve_name, ret);
                goto out_free_mr;
        }
-       size += xve_sendq_size;
-       size += xve_recvq_size + 1;     /* 1 extra for rx_drain_qp */
 
+       size += priv->xve_sendq_size;
+       size = priv->xve_recvq_size + 1;        /* 1 extra for rx_drain_qp */
+
+       /* Create Receive CompletionQueue */
        priv->recv_cq =
            ib_create_cq(priv->ca, xve_ib_completion, NULL, dev, size, 0);
        if (IS_ERR(priv->recv_cq)) {
@@ -173,8 +176,9 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                goto out_free_mr;
        }
 
+       /* Create Send CompletionQueue */
        priv->send_cq = ib_create_cq(priv->ca, xve_send_comp_handler, NULL,
-                                    dev, xve_sendq_size, 0);
+                                    dev, priv->xve_sendq_size, 0);
        if (IS_ERR(priv->send_cq)) {
                pr_warn("%s: failed to create send CQ for %s\n",
                        ca->name, priv->xve_name);
@@ -197,11 +201,19 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        init_attr.send_cq = priv->send_cq;
        init_attr.recv_cq = priv->recv_cq;
 
+       if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
+               init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
+
        if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
                init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 
-       if (dev->features & NETIF_F_SG)
-               init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+       if (dev->features & NETIF_F_SG) {
+               /* As Titan Card supports less than MAX SKB we need to check */
+               max_sge = priv->dev_attr.max_sge;
+               if (max_sge >= (MAX_SKB_FRAGS + 1))
+                       max_sge = MAX_SKB_FRAGS + 1;
+               init_attr.cap.max_send_sge = max_sge;
+       }
 
        priv->qp = ib_create_qp(priv->pd, &init_attr);
        if (IS_ERR(priv->qp)) {
@@ -221,7 +233,7 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                priv->rx_sge[0].length = XVE_UD_HEAD_SIZE;
                priv->rx_sge[1].length = PAGE_SIZE;
                priv->rx_sge[1].lkey = priv->mr->lkey;
-               priv->rx_wr.num_sge = XVE_UD_RX_SG;
+               priv->rx_wr.num_sge = xve_ud_rx_sg(priv);
        } else {
                priv->rx_sge[0].length = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
                priv->rx_wr.num_sge = 1;
@@ -249,34 +261,36 @@ out_free_pd:
 void xve_transport_dev_cleanup(struct net_device *dev)
 {
        struct xve_dev_priv *priv = netdev_priv(dev);
-       int ret = 0;
+       int ret;
 
+       /* Destroy QP */
        if (priv->qp) {
-               if (ib_destroy_qp(priv->qp))
-                       xve_warn(priv, "ib_qp_destroy failed\n");
+               ret = ib_destroy_qp(priv->qp);
+               if (ret)
+                       xve_warn(priv,
+                               "ib_qp_destroy failed (ret = %d)\n", ret);
+
                priv->qp = NULL;
                clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
        }
+
        ret = ib_destroy_cq(priv->send_cq);
        if (ret)
                xve_warn(priv, "%s ib_destroy_cq (sendq) failed ret=%d\n",
-                        __func__, ret);
+                               __func__, ret);
 
        ret = ib_destroy_cq(priv->recv_cq);
        if (ret)
                xve_warn(priv, "%s ib_destroy_cq failed ret=%d\n",
-                        __func__, ret);
+                               __func__, ret);
 
        xve_cm_dev_cleanup(dev);
 
-       ret = ib_dereg_mr(priv->mr);
-       if (ret)
-               xve_warn(priv, "%s ib_dereg_mr failed ret=%d\n", __func__, ret);
+       if (ib_dereg_mr(priv->mr))
+               xve_warn(priv, "ib_dereg_mr failed\n");
 
-       ret = ib_dealloc_pd(priv->pd);
-       if (ret)
-               xve_warn(priv, "%s ib_dealloc_pd failed ret=%d\n",
-                        __func__, ret);
+       if (ib_dealloc_pd(priv->pd))
+               xve_warn(priv, "ib_dealloc_pd failed\n");
 }
 
 void xve_event(struct ib_event_handler *handler, struct ib_event *record)
@@ -293,32 +307,32 @@ void xve_event(struct ib_event_handler *handler, struct ib_event *record)
 
        switch (record->event) {
        case IB_EVENT_SM_CHANGE:
-               priv->counters[XVE_SM_CHANGE_COUNTER]++;
-               xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
-               break;
+                       priv->counters[XVE_SM_CHANGE_COUNTER]++;
+                       xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+                       break;
        case IB_EVENT_CLIENT_REREGISTER:
-               priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++;
-               set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
-               xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
-               break;
+                       priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++;
+                       set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
+                       xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+                       break;
        case IB_EVENT_PORT_ERR:
-               priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++;
-               xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
-               break;
+                       priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++;
+                       xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+                       break;
        case IB_EVENT_PORT_ACTIVE:
-               priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++;
-               xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
-               break;
+                       priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++;
+                       xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+                       break;
        case IB_EVENT_LID_CHANGE:
-               priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++;
-               xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
-               break;
+                       priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++;
+                       xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+                       break;
        case IB_EVENT_PKEY_CHANGE:
-               priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++;
-               xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY);
-               break;
+                       priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++;
+                       xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY);
+                       break;
        default:
-               priv->counters[XVE_INVALID_EVENT_COUNTER]++;
-               break;
+                       priv->counters[XVE_INVALID_EVENT_COUNTER]++;
+                       break;
        }
 }
index 43a516edf795585d42f10f03f6fc3cf33b6b8762..65a1128ff92bf641e13381ae563b320a1d559cdb 100644 (file)
@@ -61,6 +61,10 @@ enum xve_xsmp_cmd_type {
        XSMP_XVE_HA_INFO,
        XSMP_XVE_ISCSI_INFO,
 
+       XSMP_XSF_FWD_TABLE,
+       XSMP_XSF_L2_TABLE,
+       XSMP_VNIC_READY,
+
        XSMP_XVE_TYPE_MAX,
 };
 
@@ -86,13 +90,13 @@ struct xve_xsmp_msg {
                        u8 xve_name[XVE_MAX_NAME_SIZE];
                        u16 service_level;      /* SL value for this vnic */
                        u16 fc_active;  /* 1: enable, 0:
-                                        * disable host rate control */
+                                       * disable host rate control */
                        u16 cir;        /* committed rate in mbps */
                        u16 pir;        /* peak rate in mbps */
                        u32 cbs;        /* committed burst size in bytes */
                        u32 pbs;        /* peak burst size in bytes */
                        u8 vm_index;    /* the index used by vmware
-                                        * for persistence */
+                                       * for persistence */
                        u8 _reserved;
                        u16 mp_flag;
                        u8 mp_group[XVE_MP_GROUP_NAME_MAX];
@@ -101,6 +105,21 @@ struct xve_xsmp_msg {
                        /* for virtual network */
                        u32 net_id;
                        u8 vnet_mode;
+
+                       u8 vnic_type;
+
+                       u64 tca_subnet_prefix;
+                       u32 tca_ctrl_qp;
+                       u32 tca_data_qp;
+                       u16 tca_pkey;
+                       u16 tca_qkey;
+
+                       /* host must fill these in INSTALL ACK */
+                       u64 hca_subnet_prefix;
+                       u32 hca_ctrl_qp;
+                       u32 hca_data_qp;
+                       u16 hca_pkey;
+                       u16 hca_qkey;
                } __packed;
                u8 bytes[512];
        };
@@ -257,7 +276,7 @@ struct xve_iscsi_msg {
 #define XVE_UPDATE_QOS         (1 << 7)
 #define XVE_UPDATE_ACL         (1 << 8)
 #define XVE_UPDATE_MP_FLAG             (1 << 10)
-#define XVE_XT_STATE_DOWN              (1 << 30)
+#define XVE_UPDATE_XT_STATE_DOWN       (1 << 30)
 #define XVE_UPDATE_XT_CHANGE           (1 << 31)
 
 /* mp_flag */