From 38093fbfbd8d3256a02739c83b2cf36d070fc1e3 Mon Sep 17 00:00:00 2001 From: Pradeep Gopanapalli Date: Wed, 4 Nov 2015 18:58:15 -0800 Subject: [PATCH] 1) Support vnic for EDR based platform(uVnic) 2) Supported Types now Type 0 - XSMP_XCM_OVN - Xsigo VP780/OSDN standalone Chassis, (add pvi) Type 1 - XSMP_XCM_NOUPLINK - EDR Without uplink (add public-network) Type 2 - XSMP_XCM_UPLINK -EDR with uplink (add public-network 3) Intelligence in driver to support all the modes 4) Added Code for printing Multicast LID [Revision 8008] 5) removed style errors Reviewed-by: Sajid Zia Signed-off-by: Pradeep Gopanapalli Signed-off-by: Qing Huang --- drivers/infiniband/ulp/xsigo/xve/Makefile | 2 +- drivers/infiniband/ulp/xsigo/xve/xve.h | 170 ++++- drivers/infiniband/ulp/xsigo/xve/xve_cm.c | 70 +- drivers/infiniband/ulp/xsigo/xve/xve_compat.h | 6 +- drivers/infiniband/ulp/xsigo/xve/xve_ib.c | 332 +++++--- drivers/infiniband/ulp/xsigo/xve/xve_main.c | 712 ++++++++++++++---- .../infiniband/ulp/xsigo/xve/xve_multicast.c | 46 +- drivers/infiniband/ulp/xsigo/xve/xve_stats.c | 58 +- drivers/infiniband/ulp/xsigo/xve/xve_tables.c | 31 +- drivers/infiniband/ulp/xsigo/xve/xve_verbs.c | 116 +-- .../infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h | 25 +- 11 files changed, 1155 insertions(+), 413 deletions(-) diff --git a/drivers/infiniband/ulp/xsigo/xve/Makefile b/drivers/infiniband/ulp/xsigo/xve/Makefile index 13f4dec4af4b..89d4f857dec7 100644 --- a/drivers/infiniband/ulp/xsigo/xve/Makefile +++ b/drivers/infiniband/ulp/xsigo/xve/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_INFINIBAND_XVE) := xve.o xve-y := xve_main.o xve_verbs.o xve_multicast.o xve_ib.o xve_tables.o \ xve_ethtool.o xve_cm.o xve_stats.o -ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\" +ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8008\" ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT diff --git a/drivers/infiniband/ulp/xsigo/xve/xve.h b/drivers/infiniband/ulp/xsigo/xve/xve.h index 5bd33b283194..fd6ea4e3f595 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve.h +++ b/drivers/infiniband/ulp/xsigo/xve/xve.h @@ -133,6 +133,15 @@ #define PREFIX_MULTI_ADDR 0x33 /* ethernet header length */ #define ETH_HDR_LEN 14 +#define XVE_EOIB_MAGIC 0x8919 +#define ETH_P_XVE_CTRL 0x8919 +#define XVE_EOIB_LEN 4 + +#define XVE_VNET_MODE_RC 1 +#define XVE_VNET_MODE_UD 2 + +#define XVE_MAX_RX_QUEUES 16 +#define XVE_MAX_TX_QUEUES 16 /* constants */ enum xve_flush_level { @@ -142,8 +151,9 @@ enum xve_flush_level { }; enum { - XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN, - XVE_UD_RX_SG = 2, /* max buffer needed for 4K mtu */ + XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN + XVE_EOIB_LEN + 2048, + XVE_UD_RX_OVN_SG = 2, /* max buffer needed for 4K mtu */ + XVE_UD_RX_EDR_SG = 3, /* max buffer needed for 10K mtu */ XVE_CM_MTU = 0x10000 - 0x20, /* padding to align header to 16 */ XVE_CM_BUF_SIZE = XVE_CM_MTU + VLAN_ETH_HLEN, XVE_CM_HEAD_SIZE = XVE_CM_BUF_SIZE % PAGE_SIZE, @@ -300,6 +310,11 @@ enum { XVE_EVENT_PKEY_CHANGE_COUNTER, XVE_INVALID_EVENT_COUNTER, + XVE_GW_MCAST_TX, + XVE_HBEAT_COUNTER, + XVE_LINK_STATUS_COUNTER, + XVE_RX_NOGRH, + XVE_MAX_COUNTERS }; @@ -400,7 +415,8 @@ enum { DEBUG_CONTINUE_UNLOAD = 0x00002000, DEBUG_MISC_INFO = 0x00004000, DEBUG_IBDEV_INFO = 0x00008000, - DEBUG_CM_INFO = 0x00010000 + DEBUG_CM_INFO = 0x00010000, + DEBUG_CTRL_INFO = 0x00020000 }; #define XVE_OP_RECV (1ul << 31) @@ -433,13 +449,31 @@ enum { #define XVE_OVER_QUOTA 23 #define XVE_TSO_CHANGE 24 #define XVE_RXBATCH_CHANGE 25 +#define XVE_VNIC_READY_PENDING 26 +#define XVE_HBEAT_LOST 27 +#define XVE_GW_STATE_UP 28 + #define MODULE_NAME "XVE" #define ALIGN_TO_FF(a) (a & 0xff) #define XVE_FWT_ENTRY_VALID 1 #define XVE_FWT_ENTRY_REFRESH 2 -#define XVE_UD_MTU(ib_mtu) (ib_mtu - VLAN_ETH_HLEN) -#define XVE_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + VLAN_ETH_HLEN) -#define XVE_MIN_PACKET_LEN 60 +#define XVE_UD_MTU(ib_mtu) (ib_mtu - (VLAN_ETH_HLEN + XVE_EOIB_LEN)) +#define XVE_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + \ + (VLAN_ETH_HLEN + XVE_EOIB_LEN)) +#define XVE_MIN_PACKET_LEN 64 + +enum xcm_type { + XSMP_XCM_OVN, + XSMP_XCM_NOUPLINK, + XSMP_XCM_UPLINK +}; + +#define xve_is_uplink(priv) ((priv)->vnic_type == XSMP_XCM_UPLINK) +#define xve_is_ovn(priv) ((priv)->vnic_type == XSMP_XCM_OVN) +#define xve_is_edr(priv) (!xve_is_ovn(priv)) +#define xve_gw_linkup(priv) test_bit(XVE_GW_STATE_UP, &(priv)->state) +#define xve_ud_rx_sg(priv) (xve_is_edr(priv) ? XVE_UD_RX_EDR_SG : \ + XVE_UD_RX_OVN_SG) /*Extern declarations */ extern int xve_debug_level; @@ -447,6 +481,7 @@ extern int xve_cm_single_qp; extern u32 xve_hash_salt; extern int xve_sendq_size; extern int xve_recvq_size; +extern int xve_max_send_cqe; extern struct ib_sa_client xve_sa_client; extern u32 xve_counters[]; extern struct workqueue_struct *xve_taskqueue; @@ -481,11 +516,12 @@ struct xve_mcast { struct xve_rx_buf { struct sk_buff *skb; - u64 mapping[XVE_UD_RX_SG]; + u64 mapping[XVE_UD_RX_EDR_SG]; }; struct xve_tx_buf { struct sk_buff *skb; + struct xve_ah *ah; u64 mapping[MAX_SKB_FRAGS + 1]; }; @@ -591,6 +627,46 @@ struct xve_fwt_s { unsigned num; }; +#define XVE_VNIC_HBEAT 1 +#define XVE_VNIC_LINK_STATE 2 + +#define XVE_HBEAT_LOSS_THRES 3 +struct xve_keep_alive { + uint32_t pvi_id; + uint32_t type; + uint64_t tca_hbeat_cnt; + uint32_t uplink_status; +} __packed; + +struct xve_gw_info { + union ib_gid t_gid; + u32 t_ctrl_qp; + u32 t_data_qp; + u32 t_qkey; + u16 t_pkey; +}; + +struct xve_eoib_hdr { + union { + struct { /* CX */ + __u8 encap_data; + __u8 seg_off; + __be16 seg_id; + }; + struct { /* PSIF */ + __be16 magic; + __be16 tss_mask_sz; + }; + }; +} __packed; + + +struct xve_rx_cm_info { + struct ib_sge rx_sge[XVE_CM_RX_SG]; + struct ib_recv_wr rx_wr; +}; + + /* * Device private locking: network stack tx_lock protects members used * in TX fast path, lock protects everything else. lock nests inside @@ -608,9 +684,13 @@ struct xve_dev_priv { struct ib_qp *qp; union ib_gid local_gid; union ib_gid bcast_mgid; + __be16 bcast_mlid; u16 local_lid; u32 qkey; + /* Device attributes */ + struct ib_device_attr dev_attr; + /* Netdev related attributes */ struct net_device *netdev; struct net_device_stats stats; @@ -636,6 +716,9 @@ struct xve_dev_priv { unsigned long jiffies; struct xve_fwt_s xve_fwt; int aging_delay; + void *pci; + uint32_t hb_interval; + uint64_t last_hbeat; struct xve_cm_dev_priv cm; unsigned int cm_supported; @@ -650,8 +733,10 @@ struct xve_dev_priv { unsigned int mcast_mtu; unsigned int max_ib_mtu; char mode[64]; - /* TX and RX Ring attributes */ + int xve_recvq_size; + int xve_sendq_size; + int xve_max_send_cqe; struct xve_rx_buf *rx_ring; struct xve_tx_buf *tx_ring; unsigned tx_head; @@ -661,7 +746,8 @@ struct xve_dev_priv { struct ib_send_wr tx_wr; struct ib_wc send_wc[MAX_SEND_CQE]; struct ib_recv_wr rx_wr; - struct ib_sge rx_sge[XVE_UD_RX_SG]; + /* Allocate EDR SG for now */ + struct ib_sge rx_sge[XVE_UD_RX_EDR_SG]; struct ib_wc ibwc[XVE_NUM_WC]; struct ib_cq *recv_cq; struct ib_cq *send_cq; @@ -674,9 +760,12 @@ struct xve_dev_priv { u64 resource_id; u64 mac; u32 net_id; + u32 install_flag; u16 mp_flag; - char vnet_mode; + u8 vnet_mode; + u8 vnic_type; char xve_name[XVE_MAX_NAME_SIZE]; + struct xve_gw_info gw; /* Proc related attributes */ struct proc_dir_entry *nic_dir; @@ -696,7 +785,7 @@ struct xve_ah { struct ib_ah *ah; struct list_head list; struct kref ref; - unsigned last_send; + atomic_t refcnt; }; struct ib_packed_grh { @@ -724,7 +813,10 @@ struct xve_path { struct rb_node rb_node; struct list_head list; int valid; + int index; struct sk_buff_head queue; + struct sk_buff_head uplink_queue; + atomic_t users; }; struct xve_work { @@ -790,14 +882,6 @@ struct icmp6_ndp { dev->stats.rx_bytes += len; \ } while (0) -#define SET_FLUSH_BIT(priv, bit) \ - do { \ - unsigned long flags; \ - spin_lock_irqsave(&priv->lock, flags); \ - set_bit(bit, &priv->state); \ - spin_unlock_irqrestore(&priv->lock, flags); \ - } while (0) - #define PRINT(level, x, fmt, arg...) \ printk(level "%s: " fmt, MODULE_NAME, ##arg) #define XSMP_ERROR(fmt, arg...) \ @@ -807,18 +891,18 @@ struct icmp6_ndp { ((struct xve_dev_priv *) priv)->netdev->name, \ ## arg) #define xve_warn(priv, format, arg...) \ - xve_printk(KERN_WARNING, priv, format , ## arg) + xve_printk(KERN_WARNING, priv, format, ## arg) #define XSMP_INFO(fmt, arg...) \ do { \ if (xve_debug_level & DEBUG_XSMP_INFO) \ - PRINT(KERN_DEBUG, "XSMP", fmt , ## arg);\ + PRINT(KERN_DEBUG, "XSMP", fmt, ## arg);\ } while (0) #define xve_test(fmt, arg...) \ do { \ if (xve_debug_level & DEBUG_TEST_INFO) \ - PRINT(KERN_DEBUG, "DEBUG", fmt , ## arg); \ + PRINT(KERN_DEBUG, "DEBUG", fmt, ## arg); \ } while (0) #define xve_dbg_data(priv, format, arg...) \ @@ -827,10 +911,16 @@ struct icmp6_ndp { xve_printk(KERN_DEBUG, priv, format, \ ## arg); \ } while (0) +#define xve_dbg_ctrl(priv, format, arg...) \ + do { \ + if (xve_debug_level & DEBUG_CTRL_INFO) \ + xve_printk(KERN_DEBUG, priv, format, \ + ## arg); \ + } while (0) #define xve_dbg_mcast(priv, format, arg...) \ do { \ if (xve_debug_level & DEBUG_MCAST_INFO) \ - xve_printk(KERN_ERR, priv, format , ## arg); \ + xve_printk(KERN_ERR, priv, format, ## arg); \ } while (0) #define xve_debug(level, priv, format, arg...) \ do { \ @@ -899,6 +989,8 @@ static inline void xve_send_skb(struct xve_dev_priv *priv, struct sk_buff *skb) if (netdev->features & NETIF_F_LRO) lro_receive_skb(&priv->lro.lro_mgr, skb, NULL); + else if (netdev->features & NETIF_F_GRO) + napi_gro_receive(&priv->napi, skb); else netif_receive_skb(skb); @@ -1018,8 +1110,11 @@ static inline void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, if (length == 0) { /* don't need this page */ - skb_fill_page_desc(toskb, i, skb_frag_page(frag), - 0, PAGE_SIZE); + if (toskb) + skb_fill_page_desc(toskb, i, skb_frag_page(frag) + , 0, PAGE_SIZE); + else + __free_page(skb_shinfo(skb)->frags[i].page.p); --skb_shinfo(skb)->nr_frags; } else { size = min_t(unsigned, length, (unsigned)PAGE_SIZE); @@ -1046,11 +1141,20 @@ static inline void xve_put_ah(struct xve_ah *ah) kref_put(&ah->ref, xve_free_ah); } +static inline void xve_put_ah_refcnt(struct xve_ah *address) +{ + atomic_dec(&address->refcnt); +} +static inline void xve_get_ah_refcnt(struct xve_ah *address) +{ + atomic_inc(&address->refcnt); +} + int xve_open(struct net_device *dev); int xve_add_pkey_attr(struct net_device *dev); -void xve_send(struct net_device *dev, struct sk_buff *skb, - struct xve_ah *address, u32 qpn); +int xve_send(struct net_device *dev, struct sk_buff *skb, + struct xve_ah *address, u32 qpn, int type); int poll_tx(struct xve_dev_priv *priv); int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state); void handle_carrier_state(struct xve_dev_priv *priv, char state); @@ -1096,7 +1200,7 @@ void xve_remove_fwt_entry(struct xve_dev_priv *priv, void xve_fwt_entry_free(struct xve_dev_priv *priv, struct xve_fwt_entry *fwt_entry); -void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); +int xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); void xve_advert_mcast_join(struct xve_dev_priv *priv); int xve_mcast_start_thread(struct net_device *dev); int xve_mcast_stop_thread(struct net_device *dev, int flush); @@ -1129,7 +1233,7 @@ int xve_send_hbeat(struct xve_dev_priv *xvep); void xve_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl, u64 resource_id); /*CM */ -void xve_cm_send(struct net_device *dev, struct sk_buff *skb, +int xve_cm_send(struct net_device *dev, struct sk_buff *skb, struct xve_cm_ctx *tx); int xve_cm_dev_open(struct net_device *dev); void xve_cm_dev_stop(struct net_device *dev); @@ -1163,9 +1267,11 @@ void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb); void xve_tables_exit(void); void xve_remove_one(struct xve_dev_priv *priv); struct xve_path *__path_find(struct net_device *netdev, void *gid); -extern int xve_add_proc_entry(struct xve_dev_priv *vp); +int xve_add_proc_entry(struct xve_dev_priv *vp); void xve_remove_proc_entry(struct xve_dev_priv *vp); -extern int xve_change_rxbatch(struct xve_dev_priv *xvep, int flag); +int xve_gw_send(struct net_device *priv, struct sk_buff *skb); +struct xve_path *xve_get_gw_path(struct net_device *dev); +void xve_set_oper_up_state(struct xve_dev_priv *priv); static inline int xve_continue_unload(void) { @@ -1179,7 +1285,7 @@ static inline int xve_get_misc_info(void) static inline int xg_vlan_tx_tag_present(struct sk_buff *skb) { - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + struct vlan_ethhdr *veth = vlan_eth_hdr(skb); return veth->h_vlan_proto == htons(ETH_P_8021Q); } diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_cm.c b/drivers/infiniband/ulp/xsigo/xve/xve_cm.c index 7c68f8f760b4..caf4e8aa53d6 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_cm.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_cm.c @@ -81,14 +81,15 @@ static int xve_cm_post_receive_srq(struct net_device *netdev, int id) { struct xve_dev_priv *priv = netdev_priv(netdev); struct ib_recv_wr *bad_wr; + struct ib_recv_wr *wr = &priv->cm.rx_wr; int i, ret; - priv->cm.rx_wr.wr_id = id | XVE_OP_CM | XVE_OP_RECV; + wr->wr_id = id | XVE_OP_CM | XVE_OP_RECV; for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; - ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_srq_recv(priv->cm.srq, wr, &bad_wr); if (unlikely(ret)) { xve_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); xve_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, @@ -171,7 +172,7 @@ static void xve_cm_free_rx_ring(struct net_device *dev, struct xve_dev_priv *priv = netdev_priv(dev); int i; - for (i = 0; i < xve_recvq_size; ++i) { + for (i = 0; i < priv->xve_recvq_size; ++i) { if (rx_ring[i].skb) { xve_cm_dma_unmap_rx(priv, XVE_CM_RX_SG - 1, rx_ring[i].mapping); @@ -463,7 +464,7 @@ void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) xve_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); - if (unlikely(wr_id >= xve_recvq_size)) { + if (unlikely(wr_id >= priv->xve_recvq_size)) { if (wr_id == (XVE_CM_RX_DRAIN_WRID & ~(XVE_OP_CM | XVE_OP_RECV))) { spin_lock_irqsave(&priv->lock, flags); @@ -475,7 +476,7 @@ void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } else xve_warn(priv, "cm recv completion event with wrid %d (> %d)\n", - wr_id, xve_recvq_size); + wr_id, priv->xve_recvq_size); return; } @@ -619,12 +620,13 @@ static void xve_cm_tx_buf_free(struct xve_dev_priv *priv, memset(tx_req, 0, sizeof(struct xve_cm_buf)); } -void xve_cm_send(struct net_device *dev, struct sk_buff *skb, +int xve_cm_send(struct net_device *dev, struct sk_buff *skb, struct xve_cm_ctx *tx) { struct xve_dev_priv *priv = netdev_priv(dev); struct xve_cm_buf *tx_req; u64 addr; + int ret = NETDEV_TX_OK; if (unlikely(skb->len > tx->mtu + VLAN_ETH_HLEN)) { xve_warn(priv, @@ -633,7 +635,7 @@ void xve_cm_send(struct net_device *dev, struct sk_buff *skb, INC_TX_DROP_STATS(priv, dev); INC_TX_ERROR_STATS(priv, dev); dev_kfree_skb_any(skb); - return; + return ret; } xve_dbg_data(priv, @@ -647,25 +649,27 @@ void xve_cm_send(struct net_device *dev, struct sk_buff *skb, * means we have to make sure everything is properly recorded and * our state is consistent before we call post_send(). */ - tx_req = &tx->tx_ring[tx->tx_head & (xve_sendq_size - 1)]; + tx_req = &tx->tx_ring[tx->tx_head & (priv->xve_sendq_size - 1)]; tx_req->skb = skb; addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { INC_TX_ERROR_STATS(priv, dev); dev_kfree_skb_any(skb); memset(tx_req, 0, sizeof(struct xve_cm_buf)); - return; + return ret; } tx_req->mapping[0] = addr; - if (unlikely(post_send(priv, tx, tx->tx_head & (xve_sendq_size - 1), + if (unlikely(post_send(priv, tx, tx->tx_head & + (priv->xve_sendq_size - 1), addr, skb->len))) { xve_warn(priv, "post_send failed\n"); INC_TX_ERROR_STATS(priv, dev); xve_cm_tx_buf_free(priv, tx_req); } else { + dev->trans_start = jiffies; ++tx->tx_head; - if (++priv->tx_outstanding == xve_sendq_size) { + if (++priv->tx_outstanding == priv->xve_sendq_size) { xve_dbg_data(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); @@ -678,10 +682,11 @@ void xve_cm_send(struct net_device *dev, struct sk_buff *skb, } } priv->send_hbeat_flag = 0; - + return ret; } -void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +void xve_cm_handle_tx_wc(struct net_device *dev, + struct ib_wc *wc) { struct xve_dev_priv *priv = netdev_priv(dev); struct xve_cm_ctx *tx = wc->qp->qp_context; @@ -691,18 +696,18 @@ void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) xve_dbg_data(priv, "cm send completion: id %d, status: %d\n", wr_id, wc->status); - if (unlikely(wr_id >= xve_sendq_size)) { + if (unlikely(wr_id >= priv->xve_sendq_size)) { xve_warn(priv, "cm send completion event with wrid %d (> %d)\n", - wr_id, xve_sendq_size); + wr_id, priv->xve_sendq_size); return; } tx_req = &tx->tx_ring[wr_id]; xve_cm_tx_buf_free(priv, tx_req); - ++tx->tx_tail; netif_tx_lock(dev); - if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) && + ++tx->tx_tail; + if (unlikely(--priv->tx_outstanding == priv->xve_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) { priv->counters[XVE_TX_WAKE_UP_COUNTER]++; @@ -893,7 +898,7 @@ static struct ib_qp *xve_cm_create_tx_qp(struct net_device *dev, .send_cq = priv->recv_cq, .recv_cq = priv->recv_cq, .srq = priv->cm.srq, - .cap.max_send_wr = xve_sendq_size, + .cap.max_send_wr = priv->xve_sendq_size, .cap.max_send_sge = 1, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, @@ -976,13 +981,13 @@ static int xve_cm_tx_init(struct xve_cm_ctx *p, struct ib_sa_path_rec *pathrec) struct xve_dev_priv *priv = netdev_priv(p->netdev); int ret; - p->tx_ring = vmalloc(xve_sendq_size * sizeof(*p->tx_ring)); + p->tx_ring = vmalloc(priv->xve_sendq_size * sizeof(*p->tx_ring)); if (!p->tx_ring) { xve_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } - memset(p->tx_ring, 0, xve_sendq_size * sizeof(*p->tx_ring)); + memset(p->tx_ring, 0, priv->xve_sendq_size * sizeof(*p->tx_ring)); p->qp = xve_cm_create_tx_qp(p->netdev, p); if (IS_ERR(p->qp)) { @@ -1048,7 +1053,8 @@ static void xve_cm_tx_destroy(struct xve_cm_ctx *p) /* Wait for all sends to complete */ if (!netif_carrier_ok(priv->netdev) && unlikely(priv->tx_outstanding > MAX_SEND_CQE)) - while (poll_tx(priv)); /* nothing */ + while (poll_tx(priv)) + ; /* nothing */ begin = jiffies; while ((int)p->tx_tail - (int)p->tx_head < 0) { @@ -1067,14 +1073,17 @@ timeout: spin_lock_irqsave(&priv->lock, flags); while ((int)p->tx_tail - (int)p->tx_head < 0) { - tx_req = &p->tx_ring[p->tx_tail & (xve_sendq_size - 1)]; + tx_req = &p->tx_ring[p->tx_tail & (priv->xve_sendq_size - 1)]; + + ++p->tx_tail; spin_unlock_irqrestore(&priv->lock, flags); xve_cm_tx_buf_free(priv, tx_req); netif_tx_lock_bh(p->netdev); - if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) && - netif_queue_stopped(p->netdev) && + if (unlikely(--priv->tx_outstanding == + (priv->xve_sendq_size >> 1)) + && netif_queue_stopped(p->netdev) && test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) { priv->counters[XVE_TX_WAKE_UP_COUNTER]++; netif_wake_queue(p->netdev); @@ -1211,7 +1220,6 @@ void xve_cm_tx_start(struct work_struct *work) spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); xve_put_ctx(priv); - } static void __xve_cm_tx_reap(struct xve_dev_priv *priv) @@ -1296,7 +1304,7 @@ static void xve_cm_create_srq(struct net_device *dev, int max_sge) struct xve_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { .attr = { - .max_wr = xve_recvq_size, + .max_wr = priv->xve_recvq_size, .max_sge = max_sge} }; @@ -1310,17 +1318,17 @@ static void xve_cm_create_srq(struct net_device *dev, int max_sge) } priv->cm.srq_ring = - vmalloc(xve_recvq_size * sizeof(*priv->cm.srq_ring)); + vmalloc(priv->xve_recvq_size * sizeof(*priv->cm.srq_ring)); if (!priv->cm.srq_ring) { pr_warn("%s: failed to allocate CM SRQ ring (%d entries)\n", - priv->ca->name, xve_recvq_size); + priv->ca->name, priv->xve_recvq_size); ib_destroy_srq(priv->cm.srq); priv->cm.srq = NULL; return; } memset(priv->cm.srq_ring, 0, - xve_recvq_size * sizeof(*priv->cm.srq_ring)); + priv->xve_recvq_size * sizeof(*priv->cm.srq_ring)); } int xve_cm_dev_init(struct net_device *dev) @@ -1343,6 +1351,8 @@ int xve_cm_dev_init(struct net_device *dev) return ret; } + priv->dev_attr = attr; + /* Based on the admin mtu from the chassis */ attr.max_srq_sge = min_t(int, @@ -1366,7 +1376,7 @@ int xve_cm_dev_init(struct net_device *dev) xve_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge); if (xve_cm_has_srq(dev)) { - for (i = 0; i < xve_recvq_size; ++i) { + for (i = 0; i < priv->xve_recvq_size; ++i) { if (!xve_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, priv->cm.num_frags - 1, priv->cm. diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_compat.h b/drivers/infiniband/ulp/xsigo/xve/xve_compat.h index cd24547330d4..7f41e15fabd3 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_compat.h +++ b/drivers/infiniband/ulp/xsigo/xve/xve_compat.h @@ -35,6 +35,10 @@ #define XVE_OP_CM (1ul << 30) #include + +/* for kernel >= 3.8 */ +#define random32 prandom_u32 + static inline void skb_pkt_type(struct sk_buff *skb, unsigned char type) { skb->pkt_type = type; @@ -54,7 +58,7 @@ static inline void xg_skb_push(struct sk_buff *skb, unsigned int len) static inline unsigned xve_random32(struct xve_dev_priv *priv) { - return (prandom_u32() & 0xffffff); + return random32() & 0xffffff; } static inline struct proc_dir_entry *xg_create_proc_entry(const char *name, diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_ib.c b/drivers/infiniband/ulp/xsigo/xve/xve_ib.c index 85ebdb06da1a..548f0c31d1db 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_ib.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_ib.c @@ -45,16 +45,17 @@ struct xve_ah *xve_create_ah(struct net_device *dev, return NULL; ah->dev = dev; - ah->last_send = 0; kref_init(&ah->ref); ah->ah = ib_create_ah(pd, attr); if (IS_ERR(ah->ah)) { kfree(ah); ah = NULL; - } else + } else { + atomic_set(&ah->refcnt, 0); xve_debug(DEBUG_MCAST_INFO, netdev_priv(dev), "%s Created ah %p\n", __func__, ah->ah); + } return ah; } @@ -71,7 +72,7 @@ void xve_free_ah(struct kref *kref) } static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv, - u64 mapping[XVE_UD_RX_SG]) + u64 mapping[XVE_UD_RX_EDR_SG]) { if (xve_ud_need_sg(priv->max_ib_mtu)) { ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_HEAD_SIZE, @@ -79,34 +80,11 @@ static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv, ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); xve_counters[XVE_NUM_PAGES_ALLOCED]--; - } else + } else { ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); -} - -static void xve_ud_skb_put_frags(struct xve_dev_priv *priv, - struct sk_buff *skb, unsigned int length) -{ - if (xve_ud_need_sg(priv->max_ib_mtu)) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; - unsigned int size; - /* - * There is only two buffers needed for max_payload = 4K, - * first buf size is XVE_UD_HEAD_SIZE - */ - skb->tail += XVE_UD_HEAD_SIZE; - skb->len += length; - - size = length - XVE_UD_HEAD_SIZE; - - frag->size = size; - skb->data_len += size; - skb->truesize += size; - } else { - skb_put(skb, length); } - } static int xve_ib_post_receive(struct net_device *dev, int id) @@ -134,7 +112,7 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id) { struct xve_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; - int buf_size; + int buf_size, align; u64 *mapping; if (xve_ud_need_sg(priv->max_ib_mtu)) @@ -142,16 +120,18 @@ static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id) else buf_size = XVE_UD_BUF_SIZE(priv->max_ib_mtu); - skb = xve_dev_alloc_skb(priv, buf_size + 10); - if (unlikely(!skb)) - return NULL; - /* * Eth header is 14 bytes, IB will leave a 40 byte gap for a GRH * so we need 10 more bytes to get to 64 and align the - * IP header to a multiple of 16. + * IP header to a multiple of 16. EDR vNICs will have an additional + * 4-byte EoIB header. */ - skb_reserve(skb, 10); + align = xve_is_ovn(priv) ? 10 : 6; + skb = xve_dev_alloc_skb(priv, buf_size + align); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, align); mapping = priv->rx_ring[id].mapping; mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size, @@ -187,7 +167,7 @@ static int xve_ib_post_receives(struct net_device *dev) struct xve_dev_priv *priv = netdev_priv(dev); int i; - for (i = 0; i < xve_recvq_size; ++i) { + for (i = 0; i < priv->xve_recvq_size; ++i) { if (!xve_alloc_rx_skb(dev, i)) { xve_warn(priv, "%s failed to allocate ib receive buffer %d\n", @@ -205,22 +185,102 @@ static int xve_ib_post_receives(struct net_device *dev) return 0; } -static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +static void xve_link_up(struct xve_dev_priv *priv) +{ + if (test_bit(XVE_FLAG_ADMIN_UP, &priv->flags) && + test_bit(XVE_CHASSIS_ADMIN_UP, &priv->flags)) { + if (test_and_clear_bit(XVE_HBEAT_LOST, &priv->state)) { + xve_set_oper_up_state(priv); + xve_xsmp_send_oper_state(priv, priv->resource_id, + XSMP_XVE_OPER_UP); + } + handle_carrier_state(priv, 1); + } +} + +void xve_process_link_state(struct xve_dev_priv *priv, + struct xve_keep_alive *ka) +{ + uint32_t state = ntohl(ka->uplink_status); + + if (state) { + set_bit(XVE_GW_STATE_UP, &priv->state); + priv->hb_interval = 30*HZ; + + if (!netif_carrier_ok(priv->netdev)) + xve_link_up(priv); + } else { + clear_bit(XVE_GW_STATE_UP, &priv->state); + priv->hb_interval = 15*HZ; + if (netif_carrier_ok(priv->netdev)) + handle_carrier_state(priv, 0); + } +} + +void xve_update_hbeat(struct xve_dev_priv *priv) +{ + priv->last_hbeat = jiffies; +} + +void xve_process_hbeat(struct xve_dev_priv *priv, struct xve_keep_alive *ka) +{ + xve_process_link_state(priv, ka); + xve_update_hbeat(priv); +} + +void xve_handle_ctrl_msg(struct xve_dev_priv *priv, + struct sk_buff *skb, struct ethhdr *eh) +{ + struct xve_keep_alive *ka; + + skb_pull(skb, ETH_HLEN); + + if (!pskb_may_pull(skb, sizeof(*ka))) + goto skb_free; + + ka = (struct xve_keep_alive *) skb->data; + xve_dbg_ctrl(priv, "RX CTRL_MSG: ethtype: 0x%x, type:%d, state: 0x%x\n", + ntohs(eh->h_proto), ntohl(ka->type), + ntohl(ka->uplink_status)); + + switch (ntohl(ka->type)) { + case XVE_VNIC_HBEAT: + xve_process_hbeat(priv, ka); + priv->counters[XVE_HBEAT_COUNTER]++; + break; + + case XVE_VNIC_LINK_STATE: + xve_process_link_state(priv, ka); + priv->counters[XVE_LINK_STATUS_COUNTER]++; + break; + + default: + xve_dbg_ctrl(priv, "Unknown control message type: %hu\n", + ka->type); + } + +skb_free: + dev_kfree_skb_any(skb); +} + +static void +xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct xve_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~XVE_OP_RECV; + struct ethhdr *eh; struct sk_buff *skb; - u64 mapping[XVE_UD_RX_SG]; + u64 mapping[XVE_UD_RX_EDR_SG]; struct ib_packed_grh *grhhdr; - char *smac; u16 vlan; - xve_dbg_data(priv, "recv completion: id %d, status: %d\n", - wr_id, wc->status); + xve_dbg_data(priv, "recv completion: id %d, QP%x, status: %d\n", + wr_id, wc->src_qp, wc->status); - if (unlikely(wr_id >= xve_recvq_size)) { + + if (unlikely(wr_id >= priv->xve_recvq_size)) { xve_warn(priv, "recv completion event with wrid %d (> %d)\n", - wr_id, xve_recvq_size); + wr_id, priv->xve_recvq_size); return; } @@ -242,11 +302,12 @@ static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) * Drop packets that this interface sent, ie multicast packets * that the HCA has replicated. */ - if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) + if (wc->slid == priv->local_lid && + (wc->src_qp & ~(0x3UL)) == priv->qp->qp_num) goto repost; memcpy(mapping, priv->rx_ring[wr_id].mapping, - XVE_UD_RX_SG * sizeof(*mapping)); + XVE_UD_RX_EDR_SG * sizeof(*mapping)); /* * If we can't allocate a new RX buffer, dump @@ -257,18 +318,51 @@ static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) goto repost; } + xve_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); xve_ud_dma_unmap_rx(priv, mapping); - xve_ud_skb_put_frags(priv, skb, wc->byte_len); - + skb_put_frags(skb, XVE_UD_HEAD_SIZE, wc->byte_len, NULL); grhhdr = (struct ib_packed_grh *)(skb->data); - smac = skb->data + IB_GRH_BYTES + ETH_ALEN; + /* This will print packet when driver is in Debug Mode */ + dumppkt(skb->data, skb->len, "UD Packet Dump"); skb_pull(skb, IB_GRH_BYTES); + + if (xve_is_edr(priv)) { + struct xve_eoib_hdr *eoibp; + + eoibp = (struct xve_eoib_hdr *)skb_pull(skb, sizeof(*eoibp)); + } + + if (!pskb_may_pull(skb, ETH_HLEN)) { + dev_kfree_skb_any(skb); + INC_RX_DROP_STATS(priv, dev); + goto repost; + } + + skb_reset_mac_header(skb); + eh = eth_hdr(skb); + if (ntohs(eh->h_proto) == ETH_P_XVE_CTRL) { /* heart beat/link status */ + xve_handle_ctrl_msg(priv, skb, eh); + goto repost; + } + vlan = xg_vlan_get_rxtag(skb); - xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp, smac, vlan); + if (wc->wc_flags & IB_WC_GRH) { + xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp, + eh->h_source, vlan); + } else { + xve_dbg_data(priv, + "No GRH, not used for fwt learning smac %pM, vlan:%u\n", + &eh->h_source, vlan); + priv->counters[XVE_RX_NOGRH]++; + } xve_prepare_skb(priv, skb); + if (((skb->dev->features & NETIF_F_RXCSUM) && + likely(wc->wc_flags & IB_WC_IP_CSUM_OK)) || + test_bit(XVE_FLAG_CSUM, &priv->flags)) + skb->ip_summed = CHECKSUM_UNNECESSARY; xve_test("%s RX UD pkt %02x %02x %02x %02x %02x %02x %02x %02x %02x", __func__, skb->data[0], skb->data[1], skb->data[2], @@ -373,18 +467,26 @@ static void xve_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) xve_dbg_data(priv, "send completion: id %d, status: %d\n", wr_id, wc->status); - if (unlikely(wr_id >= xve_sendq_size)) { + if (unlikely(wr_id >= priv->xve_sendq_size)) { xve_warn(priv, "send completion event with wrid %d (> %d)\n", - wr_id, xve_sendq_size); + wr_id, priv->xve_sendq_size); return; } tx_req = &priv->tx_ring[wr_id]; + if ((tx_req == NULL) || (tx_req->ah == NULL)) { + xve_debug(DEBUG_DATA_INFO, priv, + "%s [ca %p] wr_id%d content NULL\n", + __func__, priv->ca, wr_id); + return; + } + + xve_put_ah_refcnt(tx_req->ah); xve_free_txbuf_memory(priv, tx_req); ++priv->tx_tail; - if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) && + if (unlikely(--priv->tx_outstanding == priv->xve_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) { priv->counters[XVE_TX_WAKE_UP_COUNTER]++; @@ -465,7 +567,8 @@ int xve_poll(struct napi_struct *napi, int budget) /* * If not connected complete it */ - if (!test_bit(XVE_OPER_UP, &priv->state)) { + if (!(test_bit(XVE_OPER_UP, &priv->state) || + test_bit(XVE_HBEAT_LOST, &priv->state))) { napi_complete(&priv->napi); clear_bit(XVE_INTR_ENABLED, &priv->state); return 0; @@ -496,7 +599,8 @@ poll_more: spin_lock_irqsave(&priv->lock, flags); if (test_bit(XVE_OS_ADMIN_UP, &priv->state) && test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) && - test_bit(XVE_OPER_UP, &priv->state) && + (test_bit(XVE_OPER_UP, &priv->state) || + test_bit(XVE_HBEAT_LOST, &priv->state)) && !test_bit(XVE_DELETING, &priv->state)) { set_bit(XVE_INTR_ENABLED, &priv->state); if (unlikely @@ -530,14 +634,16 @@ void xve_data_recv_handler(struct xve_dev_priv *priv) { if (test_bit(XVE_OS_ADMIN_UP, &priv->state) && - test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) && - test_bit(XVE_OPER_UP, &priv->state) && - !test_bit(XVE_DELETING, &priv->state)) { + test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) && + (test_bit(XVE_OPER_UP, &priv->state) || + test_bit(XVE_HBEAT_LOST, &priv->state)) && + !test_bit(XVE_DELETING, &priv->state)) { priv->counters[XVE_NAPI_SCHED_COUNTER]++; clear_bit(XVE_INTR_ENABLED, &priv->state); napi_schedule(&priv->napi); - } else + } else { priv->counters[XVE_NAPI_NOTSCHED_COUNTER]++; + } } void xve_send_comp_handler(struct ib_cq *cq, void *dev_ptr) @@ -559,6 +665,7 @@ static inline int post_send(struct xve_dev_priv *priv, struct xve_tx_buf *tx_req, void *head, int hlen) { struct ib_send_wr *bad_wr; + struct ib_send_wr *wr = &priv->tx_wr; int i, off; struct sk_buff *skb = tx_req->skb; skb_frag_t *frags = skb_shinfo(skb)->frags; @@ -576,29 +683,34 @@ static inline int post_send(struct xve_dev_priv *priv, priv->tx_sge[i + off].addr = mapping[i + off]; priv->tx_sge[i + off].length = frags[i].size; } - priv->tx_wr.num_sge = nr_frags + off; - priv->tx_wr.wr_id = wr_id; - priv->tx_wr.wr.ud.remote_qpn = qpn; - priv->tx_wr.wr.ud.ah = address; - + wr->num_sge = nr_frags + off; + wr->wr_id = wr_id; + wr->wr.ud.remote_qpn = qpn; + wr->wr.ud.ah = address; if (head) { - priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size; - priv->tx_wr.wr.ud.header = head; - priv->tx_wr.wr.ud.hlen = hlen; - priv->tx_wr.opcode = IB_WR_LSO; - } else - priv->tx_wr.opcode = IB_WR_SEND; + wr->wr.ud.mss = skb_shinfo(skb)->gso_size; + wr->wr.ud.header = head; + wr->wr.ud.hlen = hlen; + wr->opcode = IB_WR_LSO; + } else { + wr->opcode = IB_WR_SEND; + } return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); } - -void xve_send(struct net_device *dev, struct sk_buff *skb, - struct xve_ah *address, u32 qpn) +/* type argument is used to differentiate between the GATEWAY + * and UVNIC packet. + * 1 -> GATEWAY PACKET + * 0 -> normal UVNIC PACKET + */ +int xve_send(struct net_device *dev, struct sk_buff *skb, + struct xve_ah *address, u32 qpn, int type) { struct xve_dev_priv *priv = netdev_priv(dev); struct xve_tx_buf *tx_req; int hlen; void *phead; + int ret = NETDEV_TX_OK; if (skb_is_gso(skb)) { hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); @@ -606,22 +718,25 @@ void xve_send(struct net_device *dev, struct sk_buff *skb, if (unlikely(!skb_pull(skb, hlen))) { xve_warn(priv, "%s linear data too small dropping %ld packets %s\n", - __func__, dev->stats.tx_dropped, dev->name); + __func__, dev->stats.tx_dropped, + dev->name); INC_TX_DROP_STATS(priv, dev); INC_TX_ERROR_STATS(priv, dev); + xve_put_ah_refcnt(address); dev_kfree_skb_any(skb); - return; + return ret; } } else { if (unlikely(skb->len > priv->mcast_mtu + VLAN_ETH_HLEN)) { - xve_warn(priv, "%s packet len %d", __func__, skb->len); + xve_warn(priv, "%s packet len %d", __func__, skb->len); xve_warn(priv, "(> %d) too long to", priv->mcast_mtu); xve_warn(priv, "send,dropping %ld packets %s\n", - dev->stats.tx_dropped, dev->name); + dev->stats.tx_dropped, dev->name); INC_TX_DROP_STATS(priv, dev); INC_TX_ERROR_STATS(priv, dev); + xve_put_ah_refcnt(address); dev_kfree_skb_any(skb); - return; + return ret; } phead = NULL; hlen = 0; @@ -631,6 +746,32 @@ void xve_send(struct net_device *dev, struct sk_buff *skb, "%s sending packet, length=%d address=%p qpn=0x%06x\n", __func__, skb->len, address, qpn); + if (++priv->tx_outstanding == priv->xve_sendq_size) { + if (type != 1) { + /* UVNIC PACKET */ + xve_dbg_data(priv, + "%s TX ring full, stopping kernel net queue\n", + __func__); + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) + xve_warn(priv, "%s Req notify on send CQ failed\n", + __func__); + priv->counters[XVE_TX_RING_FULL_COUNTER]++; + priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++; + netif_stop_queue(dev); + } else { + /* GATEWAY PACKET */ + xve_dbg_data(priv, + "%s TX ring full, Dropping the Gateway Packet\n", + __func__); + xve_put_ah_refcnt(address); + dev_kfree_skb(skb); + poll_tx(priv); + INC_TX_DROP_STATS(priv, dev); + priv->counters[XVE_TX_SKB_FREE_COUNTER]++; + priv->counters[XVE_TX_RING_FULL_COUNTER]++; + return ret; + } + } /* * We put the skb into the tx_ring _before_ we call post_send() * because it's entirely possible that the completion handler will @@ -638,46 +779,36 @@ void xve_send(struct net_device *dev, struct sk_buff *skb, * means we have to make sure everything is properly recorded and * our state is consistent before we call post_send(). */ - tx_req = &priv->tx_ring[priv->tx_head & (xve_sendq_size - 1)]; + tx_req = &priv->tx_ring[priv->tx_head & (priv->xve_sendq_size - 1)]; tx_req->skb = skb; + tx_req->ah = address; if (unlikely(xve_dma_map_tx(priv->ca, tx_req))) { INC_TX_ERROR_STATS(priv, dev); + xve_put_ah_refcnt(address); dev_kfree_skb_any(tx_req->skb); memset(tx_req, 0, sizeof(struct xve_tx_buf)); - return; - } - - if (++priv->tx_outstanding == xve_sendq_size) { - xve_dbg_data(priv, - "%s TX ring full, stopping kernel net queue\n", - __func__); - if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) - xve_warn(priv, "%s request notify on send CQ failed\n", - __func__); - priv->counters[XVE_TX_RING_FULL_COUNTER]++; - priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++; - netif_stop_queue(dev); + return ret; } - - if (unlikely(post_send(priv, priv->tx_head & (xve_sendq_size - 1), + if (unlikely(post_send(priv, priv->tx_head & (priv->xve_sendq_size - 1), address->ah, qpn, tx_req, phead, hlen))) { xve_warn(priv, "%s post_send failed\n", __func__); INC_TX_ERROR_STATS(priv, dev); --priv->tx_outstanding; priv->counters[XVE_TX_RING_FULL_COUNTER]++; + xve_put_ah_refcnt(address); xve_free_txbuf_memory(priv, tx_req); if (netif_queue_stopped(dev)) { priv->counters[XVE_TX_WAKE_UP_COUNTER]++; netif_wake_queue(dev); } } else { - address->last_send = priv->tx_head; ++priv->tx_head; skb_orphan(skb); } priv->send_hbeat_flag = 0; - if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) + if (unlikely(priv->tx_outstanding > priv->xve_max_send_cqe)) poll_tx(priv); + return ret; } static void __xve_reap_ah(struct net_device *dev) @@ -690,12 +821,13 @@ static void __xve_reap_ah(struct net_device *dev) netif_tx_lock_bh(dev); spin_lock_irqsave(&priv->lock, flags); - list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) - if ((int)priv->tx_tail - (int)ah->last_send >= 0) { + list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) { + if (atomic_read(&ah->refcnt) == 0) { list_del(&ah->list); ib_destroy_ah(ah->ah); kfree(ah); } + } spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); @@ -760,6 +892,8 @@ int xve_ib_dev_up(struct net_device *dev) } set_bit(XVE_FLAG_OPER_UP, &priv->flags); + priv->hb_interval = 30*HZ; + xve_update_hbeat(priv); return xve_mcast_start_thread(dev); } @@ -798,7 +932,7 @@ static int recvs_pending(struct net_device *dev) int pending = 0; int i; - for (i = 0; i < xve_recvq_size; ++i) + for (i = 0; i < priv->xve_recvq_size; ++i) if (priv->rx_ring[i].skb) ++pending; @@ -919,13 +1053,13 @@ int xve_ib_dev_stop(struct net_device *dev, int flush) */ while ((int)priv->tx_tail - (int)priv->tx_head < 0) { tx_req = &priv->tx_ring[priv->tx_tail & - (xve_sendq_size - 1)]; + (priv->xve_sendq_size - 1)]; xve_free_txbuf_memory(priv, tx_req); ++priv->tx_tail; --priv->tx_outstanding; } - for (i = 0; i < xve_recvq_size; ++i) { + for (i = 0; i < priv->xve_recvq_size; ++i) { struct xve_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_main.c b/drivers/infiniband/ulp/xsigo/xve/xve_main.c index dfcc700697c1..91c29cee2b67 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_main.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_main.c @@ -56,6 +56,10 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, xve_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); +int xve_max_send_cqe __read_mostly = MAX_SEND_CQE; +module_param_named(max_send_cqe, xve_max_send_cqe, int, 0444); +MODULE_PARM_DESC(max_send_cqe, "Threshold for polling send completion queue"); + static int napi_weight = 128; module_param(napi_weight, int, 0644); @@ -98,8 +102,41 @@ int xve_do_arp = 1; module_param_named(do_arp, xve_do_arp, int, 0644); MODULE_PARM_DESC(do_arp, "Enable/Disable ARP for NIC MTU less than IB-MTU"); +int xve_ignore_hbeat_loss; +module_param_named(ignore_hb_loss, xve_ignore_hbeat_loss, int, 0644); +MODULE_PARM_DESC(ignore_hb_loss, "Ignore heart beat loss on edr based vNICs with uplink"); + +int xve_enable_offload; +module_param_named(enable_offload, xve_enable_offload, int, 0444); +MODULE_PARM_DESC(enable_offload, "Enable stateless offload"); + +unsigned long xve_tca_subnet; +module_param(xve_tca_subnet, ulong, 0444); +MODULE_PARM_DESC(xve_tca_subnet, "tca subnet prefix"); + +unsigned long xve_tca_guid; +module_param(xve_tca_guid, ulong, 0444); +MODULE_PARM_DESC(xve_tca_guid, "TCA GUID"); + +unsigned int xve_tca_data_qp; +module_param(xve_tca_data_qp, uint, 0444); +MODULE_PARM_DESC(xve_tca_data_qp, "tca data qp number"); + +unsigned int xve_tca_pkey; +module_param(xve_tca_pkey, uint, 0444); +MODULE_PARM_DESC(xve_tca_pkey, "tca pkey"); + +unsigned int xve_tca_qkey; +module_param(xve_tca_qkey, uint, 0444); +MODULE_PARM_DESC(xve_tca_qkey, "tca qkey"); + +unsigned int xve_ud_mode; +module_param(xve_ud_mode, uint, 0444); +MODULE_PARM_DESC(xve_ud_mode, "Always use UD mode irrespective of xsmp.vnet_mode value"); + static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data, int len); +static void path_free(struct net_device *netdev, struct xve_path *path); struct xve_path_iter { struct net_device *dev; @@ -148,9 +185,15 @@ int xve_open(struct net_device *netdev) priv->counters[XVE_OPEN_COUNTER]++; spin_lock_irqsave(&priv->lock, flags); + if (test_bit(XVE_VNIC_READY_PENDING, &priv->state)) { + spin_unlock_irqrestore(&priv->lock, flags); + return -EAGAIN; + } set_bit(XVE_FLAG_ADMIN_UP, &priv->flags); set_bit(XVE_OPER_UP, &priv->state); set_bit(XVE_OS_ADMIN_UP, &priv->state); + if (xve_is_uplink(priv)) + set_bit(XVE_GW_STATE_UP, &priv->state); priv->port_speed = xve_calc_speed(priv); spin_unlock_irqrestore(&priv->lock, flags); @@ -192,6 +235,8 @@ static int xve_stop(struct net_device *netdev) xve_ib_dev_down(netdev, 0); xve_ib_dev_stop(netdev, 0); + xve_xsmp_send_oper_state(priv, priv->resource_id, + XSMP_XVE_OPER_DOWN); pr_info("XVE: %s Finished Stopping interface %s\n", __func__, priv->xve_name); @@ -289,6 +334,17 @@ static int xve_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) return ret; } +inline void xve_get_path(struct xve_path *path) +{ + atomic_inc(&path->users); +} + +inline void xve_put_path(struct xve_path *path) +{ + if (atomic_dec_and_test(&path->users)) + path_free(path->dev, path); +} + struct xve_path *__path_find(struct net_device *netdev, void *gid) { struct xve_dev_priv *priv = netdev_priv(netdev); @@ -338,6 +394,7 @@ static int __path_add(struct net_device *netdev, struct xve_path *path) rb_insert_color(&path->rb_node, &priv->path_tree); list_add_tail(&path->list, &priv->path_list); + xve_get_path(path); return 0; } @@ -368,6 +425,9 @@ static void path_free(struct net_device *netdev, struct xve_path *path) while ((skb = __skb_dequeue(&path->queue))) dev_kfree_skb_irq(skb); + while ((skb = __skb_dequeue(&path->uplink_queue))) + dev_kfree_skb_irq(skb); + spin_lock_irqsave(&priv->lock, flags); if (xve_cmtx_get(path)) { spin_unlock_irqrestore(&priv->lock, flags); @@ -390,12 +450,19 @@ static void xve_flood_all_paths(struct net_device *dev, struct sk_buff *skb) struct xve_dev_priv *priv = netdev_priv(dev); struct xve_path *path; struct sk_buff *nskb; + int ret = 0; list_for_each_entry(path, &priv->path_list, list) { if (xve_cmtx_get(path) && xve_cm_up(path)) { nskb = skb_clone(skb, GFP_ATOMIC); - if (nskb) - xve_cm_send(dev, nskb, xve_cmtx_get(path)); + if (nskb) { + ret = xve_cm_send(dev, nskb, + xve_cmtx_get(path)); + if (ret == NETDEV_TX_BUSY) + xve_warn(priv, + "send queue full so dropping packet %s\n", + priv->xve_name); + } } } } @@ -464,7 +531,7 @@ void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid) wait_for_completion(&path->done); list_del(&path->list); - path_free(dev, path); + xve_put_path(path); } void xve_flush_single_path(struct net_device *dev, struct xve_path *path) @@ -480,9 +547,10 @@ static void path_rec_completion(int status, struct xve_dev_priv *priv = netdev_priv(dev); struct xve_ah *ah = NULL; struct xve_ah *old_ah = NULL; - struct sk_buff_head skqueue; + struct sk_buff_head skqueue, uplink_skqueue; struct sk_buff *skb; unsigned long flags; + int ret; if (!status) { priv->counters[XVE_PATHREC_RESP_COUNTER]++; @@ -496,12 +564,14 @@ static void path_rec_completion(int status, } skb_queue_head_init(&skqueue); + skb_queue_head_init(&uplink_skqueue); if (!status) { struct ib_ah_attr av; if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) { av.ah_flags = IB_AH_GRH; + av.grh.dgid = path->pathrec.dgid; ah = xve_create_ah(dev, priv->pd, &av); } } @@ -523,6 +593,8 @@ static void path_rec_completion(int status, while ((skb = __skb_dequeue(&path->queue))) __skb_queue_tail(&skqueue, skb); + while ((skb = __skb_dequeue(&path->uplink_queue))) + __skb_queue_tail(&uplink_skqueue, skb); path->valid = 1; } @@ -535,16 +607,29 @@ static void path_rec_completion(int status, xve_put_ah(old_ah); while ((skb = __skb_dequeue(&skqueue))) { - skb->dev = dev; + if (xve_is_edr(priv)) { + skb_pull(skb, sizeof(struct xve_eoib_hdr)); + skb_reset_mac_header(skb); + } if (dev_queue_xmit(skb)) { xve_warn(priv, - "dev_queue_xmit failed to requeue pkt for %s\n", - priv->xve_name); + "dev_queue_xmit failed to requeue pkt for %s\n", + priv->xve_name); } else { xve_test("%s Succefully completed path for %s\n", __func__, priv->xve_name); } } + while ((skb = __skb_dequeue(&uplink_skqueue))) { + skb->dev = dev; + xve_get_ah_refcnt(path->ah); + /* Sending the queued GATEWAY Packet */ + ret = xve_send(dev, skb, path->ah, priv->gw.t_data_qp, 1); + if (ret == NETDEV_TX_BUSY) { + xve_warn(priv, "send queue full full, dropping packet for %s\n", + priv->xve_name); + } + } } static struct xve_path *path_rec_create(struct net_device *dev, void *gid) @@ -562,6 +647,7 @@ static struct xve_path *path_rec_create(struct net_device *dev, void *gid) path->dev = dev; skb_queue_head_init(&path->queue); + skb_queue_head_init(&path->uplink_queue); INIT_LIST_HEAD(&path->fwt_list); @@ -621,45 +707,180 @@ static int path_rec_start(struct net_device *dev, struct xve_path *path) xve_warn(priv, "ib_sa_path_rec_get failed: %d for %s\n", path->query_id, priv->xve_name); path->query = NULL; - complete(&path->done); + complete_all(&path->done); return path->query_id; } priv->counters[XVE_PATHREC_QUERY_COUNTER]++; return 0; } -static void xve_path_lookup(struct sk_buff *skb, struct net_device *dev, - struct xve_fwt_entry *fwt_entry, int *ok) +inline struct xve_path* +xve_fwt_get_path(struct xve_fwt_entry *fwt) +{ + if (!fwt->path) + return NULL; + + xve_get_path(fwt->path); + return fwt->path; +} + +struct xve_path* +xve_find_path_by_gid(struct xve_dev_priv *priv, + union ib_gid *gid) { - struct xve_dev_priv *priv = netdev_priv(dev); - struct xve_fwt_s *xve_fwt = &priv->xve_fwt; struct xve_path *path; - unsigned long flags = 0; - path = __path_find(dev, fwt_entry->dgid.raw); + path = __path_find(priv->netdev, gid->raw); if (!path) { xve_debug(DEBUG_TABLE_INFO, priv, "%s Unable to find path\n", __func__); - path = path_rec_create(dev, fwt_entry->dgid.raw); + path = path_rec_create(priv->netdev, gid->raw); if (!path) - goto err_drop; - __path_add(dev, path); + return NULL; + __path_add(priv->netdev, path); } + xve_get_path(path); + + return path; +} + +static struct xve_path* +xve_path_lookup(struct net_device *dev, + struct xve_fwt_entry *fwt_entry) +{ + struct xve_dev_priv *priv = netdev_priv(dev); + struct xve_fwt_s *xve_fwt = &priv->xve_fwt; + struct xve_path *path; + unsigned long flags = 0; xve_debug(DEBUG_TABLE_INFO, priv, "%s Adding FWT to list %p\n", __func__, fwt_entry); + path = xve_find_path_by_gid(priv, &fwt_entry->dgid); + if (!path) + return NULL; + spin_lock_irqsave(&xve_fwt->lock, flags); fwt_entry->path = path; list_add_tail(&fwt_entry->list, &path->fwt_list); spin_unlock_irqrestore(&xve_fwt->lock, flags); if (!path->ah) { - if (!path->query && path_rec_start(dev, path)) - goto err_drop; + if (!path->query && path_rec_start(dev, path)) { + xve_put_path(path); + return NULL; + } + } + + return path; +} + +struct xve_path * +xve_get_gw_path(struct net_device *dev) +{ + struct xve_dev_priv *priv = netdev_priv(dev); + struct xve_path *path; + + if (!priv->gw.t_data_qp) + return NULL; + + path = xve_find_path_by_gid(priv, &priv->gw.t_gid); + + if (!path->ah && !path->query) + path_rec_start(priv->netdev, path); + + return path; +} + +int xve_gw_send(struct net_device *dev, struct sk_buff *skb) +{ + struct xve_dev_priv *priv = netdev_priv(dev); + struct xve_gw_info *gwp = &priv->gw; + struct xve_path *path; + int ret = NETDEV_TX_OK; + + path = xve_get_gw_path(dev); + if (!path) + return NETDEV_TX_BUSY; + + if (path->ah) { + xve_dbg_data(priv, "Sending unicast copy to gw ah:%p dqpn:%u\n", + path->ah, gwp->t_data_qp); + xve_get_ah_refcnt(path->ah); + /* Sending Packet to GATEWAY */ + ret = xve_send(dev, skb, path->ah, priv->gw.t_data_qp, 1); + } else if (skb_queue_len(&path->uplink_queue) < + XVE_MAX_PATH_REC_QUEUE) { + xve_dbg_data(priv, "gw ah not found - queue len: %u\n", + skb_queue_len(&path->uplink_queue)); + priv->counters[XVE_TX_QUEUE_PKT]++; + __skb_queue_tail(&path->uplink_queue, skb); + } else { + xve_dbg_data(priv, + "No path found to gw - droping the unicast packet\n"); + dev_kfree_skb_any(skb); + INC_TX_DROP_STATS(priv, dev); + goto out; + } + priv->counters[XVE_GW_MCAST_TX]++; + +out: + xve_put_path(path); + return ret; +} + +int xve_add_eoib_header(struct xve_dev_priv *priv, struct sk_buff *skb) +{ + struct xve_eoib_hdr *eoibp; + int len = sizeof(*eoibp); + + if (skb_headroom(skb) < len) { + struct sk_buff *skb_new; + + skb_new = skb_realloc_headroom(skb, len); + if (!skb_new) + return -1; + + kfree_skb(skb); + skb = skb_new; + } + eoibp = (struct xve_eoib_hdr *) skb_push(skb, len); + + skb_set_mac_header(skb, len); + if (!xve_enable_offload) { + eoibp->magic = cpu_to_be16(XVE_EOIB_MAGIC); + eoibp->tss_mask_sz = 0; + return 0; + } + /* encap_data = (VNIC_EOIB_HDR_VER << 4) | (VNIC_EOIB_HDR_SIG << 6) + From net/ethernet/mellanox/mlx4_vnic/vnic_data_tx.c */ + eoibp->encap_data = 0x3 << 6; + eoibp->seg_off = eoibp->seg_id = 0; +#define VNIC_EOIB_HDR_UDP_CHK_OK 0x2 +#define VNIC_EOIB_HDR_TCP_CHK_OK 0x1 +#define VNIC_EOIB_HDR_IP_CHK_OK 0x1 + +#define VNIC_EOIB_HDR_SET_IP_CHK_OK(eoib_hdr) (eoib_hdr->encap_data = \ + (eoib_hdr->encap_data & 0xFC) | VNIC_EOIB_HDR_IP_CHK_OK) +#define VNIC_EOIB_HDR_SET_TCP_CHK_OK(eoib_hdr) (eoib_hdr->encap_data = \ + (eoib_hdr->encap_data & 0xF3) | (VNIC_EOIB_HDR_TCP_CHK_OK << 2)) +#define VNIC_EOIB_HDR_SET_UDP_CHK_OK(eoib_hdr) (eoib_hdr->encap_data = \ + (eoib_hdr->encap_data & 0xF3) | (VNIC_EOIB_HDR_UDP_CHK_OK << 2)) + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: { + struct iphdr *ip_h = ip_hdr(skb); + + VNIC_EOIB_HDR_SET_IP_CHK_OK(eoibp); + if (ip_h->protocol == IPPROTO_TCP) + VNIC_EOIB_HDR_SET_TCP_CHK_OK(eoibp); + else if (ip_h->protocol == IPPROTO_UDP) + VNIC_EOIB_HDR_SET_UDP_CHK_OK(eoibp); + break; } - *ok = 1; - return; -err_drop: - *ok = 0; + + case ETH_P_IPV6: + break; + } + return 0; } static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -667,17 +888,15 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev) struct sk_buff *bcast_skb = NULL; struct xve_dev_priv *priv = netdev_priv(dev); struct xve_fwt_entry *fwt_entry = NULL; - struct xve_path *path; + struct xve_path *path = NULL; unsigned long flags; int ret = NETDEV_TX_OK, len = 0; - char *smac; u8 skb_need_tofree = 0, inc_drop_cnt = 0, queued_pkt = 0; u16 vlan_tag = 0; spin_lock_irqsave(&priv->lock, flags); if (!test_bit(XVE_OPER_UP, &priv->state)) { ret = NETDEV_TX_BUSY; - inc_drop_cnt = 1; priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++; goto unlock; } @@ -687,27 +906,34 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_padto(skb, XVE_MIN_PACKET_LEN)) { inc_drop_cnt = 1; priv->counters[XVE_TX_SKB_ALLOC_ERROR_COUNTER]++; - ret = NETDEV_TX_BUSY; + ret = NETDEV_TX_OK; goto unlock; } skb->len = XVE_MIN_PACKET_LEN; } - len = skb->len; - smac = skb->data + ETH_ALEN; - + skb_reset_mac_header(skb); if (xg_vlan_tx_tag_present(skb)) vlan_get_tag(skb, &vlan_tag); - fwt_entry = xve_fwt_lookup(&priv->xve_fwt, skb->data, vlan_tag, 0); + if (xve_is_edr(priv) && + xve_add_eoib_header(priv, skb)) { + skb_need_tofree = inc_drop_cnt = 1; + priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++; + goto unlock; + } + len = skb->len; + + fwt_entry = xve_fwt_lookup(&priv->xve_fwt, eth_hdr(skb)->h_dest, + vlan_tag, 0); if (!fwt_entry) { - if (is_multicast_ether_addr(skb->data)) { - xve_mcast_send(dev, (void *)priv->bcast_mgid.raw, skb); + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { + ret = xve_mcast_send(dev, + (void *)priv->bcast_mgid.raw, skb); priv->counters[XVE_TX_MCAST_PKT]++; goto stats; } else { /* - * XXX Viswa Need to change this * Since this is a unicast packet and we do not have * an L2 table entry * We need to do the following @@ -721,23 +947,23 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev) * Do not ARP if if user does not want to for less * than IB-MTU */ - if (xve_do_arp + if (!xve_is_edr(priv) && (xve_do_arp || (priv->netdev->mtu > - XVE_UD_MTU(priv->max_ib_mtu))) + XVE_UD_MTU(priv->max_ib_mtu)))) bcast_skb = xve_generate_query(priv, skb); - if (bcast_skb != NULL) - xve_mcast_send(dev, - (void *)priv->bcast_mgid.raw, - bcast_skb); + if (bcast_skb != NULL) + ret = xve_mcast_send(dev, + (void *)priv->bcast_mgid. + raw, bcast_skb); /* * Now send the original packet also to over broadcast * Later add counters for flood mode */ - if (len < XVE_UD_MTU(priv->max_ib_mtu)) { - xve_mcast_send(dev, - (void *)priv->bcast_mgid.raw, - skb); + if (xve_is_edr(priv) || + len < XVE_UD_MTU(priv->max_ib_mtu)) { + ret = xve_mcast_send(dev, + (void *)priv->bcast_mgid.raw, skb); priv->counters[XVE_TX_MCAST_FLOOD_UD]++; } else { if (xve_flood_rc) { @@ -756,20 +982,18 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - if (!fwt_entry->path) { - int ok; - + path = xve_fwt_get_path(fwt_entry); + if (!path) { priv->counters[XVE_PATH_NOT_FOUND]++; xve_debug(DEBUG_SEND_INFO, priv, "%s Unable to find neigbour doing a path lookup\n", __func__); - xve_path_lookup(skb, dev, fwt_entry, &ok); - if (!ok) { + path = xve_path_lookup(dev, fwt_entry); + if (!path) { skb_need_tofree = inc_drop_cnt = 1; goto free_fwt_ctx; } } else { - path = fwt_entry->path; if (!path->ah) { priv->counters[XVE_AH_NOT_FOUND]++; xve_debug(DEBUG_SEND_INFO, priv, @@ -782,11 +1006,9 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - path = fwt_entry->path; - if (xve_cmtx_get(path)) { if (xve_cm_up(path)) { - xve_cm_send(dev, skb, xve_cmtx_get(path)); + ret = xve_cm_send(dev, skb, xve_cmtx_get(path)); update_cm_tx_rate(xve_cmtx_get(path), len); priv->counters[XVE_TX_RC_COUNTER]++; goto stats; @@ -794,7 +1016,8 @@ static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev) } else if (path->ah) { xve_debug(DEBUG_SEND_INFO, priv, "%s path ah is %p\n", __func__, path->ah); - xve_send(dev, skb, path->ah, fwt_entry->dqpn); + xve_get_ah_refcnt(path->ah); + ret = xve_send(dev, skb, path->ah, fwt_entry->dqpn, 0); priv->counters[XVE_TX_UD_COUNTER]++; goto stats; } @@ -815,6 +1038,8 @@ stats: INC_TX_BYTE_STATS(priv, dev, len); priv->counters[XVE_TX_COUNTER]++; free_fwt_ctx: + if (path) + xve_put_path(path); xve_fwt_put_ctx(&priv->xve_fwt, fwt_entry); unlock: if (inc_drop_cnt) @@ -849,21 +1074,21 @@ int xve_dev_init(struct net_device *dev, struct ib_device *ca, int port) struct xve_dev_priv *priv = netdev_priv(dev); /* Allocate RX/TX "rings" to hold queued skbs */ - priv->rx_ring = kcalloc(xve_recvq_size, sizeof(*priv->rx_ring), + priv->rx_ring = kcalloc(priv->xve_recvq_size, sizeof(*priv->rx_ring), GFP_KERNEL); if (!priv->rx_ring) { pr_warn("%s: failed to allocate RX ring (%d entries)\n", - ca->name, xve_recvq_size); + ca->name, priv->xve_recvq_size); goto out; } - priv->tx_ring = vmalloc(xve_sendq_size * sizeof(*priv->tx_ring)); + priv->tx_ring = vmalloc(priv->xve_sendq_size * sizeof(*priv->tx_ring)); if (!priv->tx_ring) { pr_warn("%s: failed to allocate TX ring (%d entries)\n", - ca->name, xve_sendq_size); + ca->name, priv->xve_sendq_size); goto out_rx_ring_cleanup; } - memset(priv->tx_ring, 0, xve_sendq_size * sizeof(*priv->tx_ring)); + memset(priv->tx_ring, 0, priv->xve_sendq_size * sizeof(*priv->tx_ring)); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ @@ -1001,7 +1226,7 @@ void handle_carrier_state(struct xve_dev_priv *priv, char state) struct sk_buff *xve_generate_query(struct xve_dev_priv *priv, struct sk_buff *skb) { - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + struct vlan_ethhdr *veth = vlan_eth_hdr(skb); if ((xg_vlan_tx_tag_present(skb) && veth->h_vlan_encapsulated_proto == htons(ETH_P_IP)) @@ -1046,7 +1271,7 @@ struct sk_buff *xve_create_arp(struct xve_dev_priv *priv, struct vlan_ethhdr *veth; vlan_get_tag(skb_pkt, &vlan_tci); - veth = (struct vlan_ethhdr *)(skb->data); + veth = vlan_eth_hdr(skb); veth->h_vlan_proto = htons(ETH_P_8021Q); /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); @@ -1176,7 +1401,7 @@ struct sk_buff *xve_create_ndp(struct xve_dev_priv *priv, struct vlan_ethhdr *veth; vlan_get_tag(skb_pkt, &vlan_tci); - veth = (struct vlan_ethhdr *)(skb->data); + veth = vlan_eth_hdr(skb); veth->h_vlan_proto = htons(ETH_P_8021Q); /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); @@ -1284,9 +1509,6 @@ int xve_send_hbeat(struct xve_dev_priv *priv) skb->protocol = htons(ETH_P_RARP); ret = xve_start_xmit(skb, priv->netdev); - if (ret) - dev_kfree_skb_any(skb); - return 0; } @@ -1370,7 +1592,23 @@ static int xve_state_machine(struct xve_dev_priv *priv) if (test_bit(XVE_OPER_UP, &priv->state) && test_bit(XVE_OS_ADMIN_UP, &priv->state) && !test_bit(XVE_DELETING, &priv->state)) { - + /* Heart beat loss */ + if (xve_is_uplink(priv) && + !xve_ignore_hbeat_loss && + time_after(jiffies, (unsigned long)priv->last_hbeat + + XVE_HBEAT_LOSS_THRES*priv->hb_interval)) { + unsigned long flags = 0; + + xve_warn(priv, "Heart Beat Loss: %lu:%lu\n", jiffies, + (unsigned long)priv->last_hbeat + + 3*priv->hb_interval*HZ); + + xve_flush_paths(priv->netdev); + spin_lock_irqsave(&priv->lock, flags); + xve_set_oper_down(priv); + set_bit(XVE_HBEAT_LOST, &priv->state); + spin_unlock_irqrestore(&priv->lock, flags); + } priv->counters[XVE_STATE_MACHINE_UP]++; if (!test_bit(XVE_OPER_REP_SENT, &priv->state)) (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl, @@ -1392,7 +1630,8 @@ static int xve_state_machine(struct xve_dev_priv *priv) if (priv->send_hbeat_flag) { poll_tx(priv); - xve_send_hbeat(priv); + if (xve_is_ovn(priv)) + xve_send_hbeat(priv); } priv->send_hbeat_flag = 1; } @@ -1478,7 +1717,46 @@ static void xve_set_netdev(struct net_device *dev) INIT_DELAYED_WORK(&priv->mcast_leave_task, xve_mcast_leave_task); INIT_DELAYED_WORK(&priv->mcast_join_task, xve_mcast_join_task); INIT_DELAYED_WORK(&priv->stale_task, xve_cm_stale_task); +} + +void +xve_set_ovn_features(struct xve_dev_priv *priv) +{ + priv->netdev->features |= + NETIF_F_HIGHDMA | NETIF_F_GRO; + + if (!xve_no_tx_checksum_offload) { + priv->netdev->features |= NETIF_F_IP_CSUM; + set_bit(XVE_FLAG_CSUM, &priv->flags); + } + + if (priv->lro_mode && lro) { + priv->netdev->features |= NETIF_F_LRO; + xve_lro_setup(priv); + } else { + priv->lro_mode = 0; + } +} + +void +xve_set_edr_features(struct xve_dev_priv *priv) +{ + priv->netdev->hw_features = + NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_GRO; + + if (xve_enable_offload) { + if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) + priv->netdev->hw_features |= + NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + + if (priv->hca_caps & IB_DEVICE_UD_TSO) + priv->netdev->hw_features |= NETIF_F_TSO; + + } + priv->netdev->features |= priv->netdev->hw_features; + /* Reserve extra space for EoIB header */ + priv->netdev->hard_header_len += sizeof(struct xve_eoib_hdr); } int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca) @@ -1487,43 +1765,27 @@ int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca) int result = -ENOMEM; priv->netdev->watchdog_timeo = 1000 * HZ; - priv->netdev->tx_queue_len = xve_sendq_size * 2; - priv->netdev->features |= - NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM; - set_bit(XVE_FLAG_CSUM, &priv->flags); + priv->netdev->tx_queue_len = priv->xve_sendq_size * 2; - if (lro) - priv->lro_mode = 1; - /* 1 -RC , 2 -UD */ - if (priv->vnet_mode == 1) { + priv->lro_mode = 1; + if (priv->vnet_mode == XVE_VNET_MODE_RC) { pr_info("XVE: %s Setting RC mode for %s\n", __func__, priv->xve_name); strcpy(priv->mode, "connected(RC)"); - /* Turn off checksum offload If the module parameter is set */ - /* TBD if the chassis sends a CHECK SUM BIT */ - if (xve_no_tx_checksum_offload) { - priv->netdev->features &= ~NETIF_F_IP_CSUM; - clear_bit(XVE_FLAG_CSUM, &priv->flags); - } - set_bit(XVE_FLAG_ADMIN_CM, &priv->flags); - priv->netdev->features &= ~(NETIF_F_TSO | NETIF_F_SG); - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; priv->cm_supported = 1; - } else { /* UD */ - /* MTU will be reset when mcast join happens */ + } else {/* UD */ + pr_info("XVE: %s Setting UD mode for %s\n", __func__, + priv->xve_name); strcpy(priv->mode, "datagram(UD)"); + + /* MTU will be reset when mcast join happens */ if (priv->netdev->mtu > XVE_UD_MTU(priv->max_ib_mtu)) priv->netdev->mtu = XVE_UD_MTU(priv->max_ib_mtu); - priv->lro_mode = 1; - priv->cm_supported = 0; - + priv->lro_mode = 0; } - priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu; - - if (priv->lro_mode) - priv->netdev->features |= NETIF_F_LRO; + priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu; xg_setup_pseudo_device(priv->netdev, hca); SET_NETDEV_OPS(priv->netdev, &xve_netdev_ops); @@ -1531,7 +1793,6 @@ int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca) netif_napi_add(priv->netdev, &priv->napi, xve_poll, napi_weight); if (xve_esx_preregister_setup(priv->netdev)) return -EINVAL; - xve_lro_setup(priv); xve_set_netdev(priv->netdev); @@ -1539,21 +1800,26 @@ int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca) if (!device_attr) { pr_warn("%s: allocation of %zu bytes failed\n", - hca->name, sizeof(*device_attr)); + hca->name, sizeof(*device_attr)); return result; } result = ib_query_device(hca, device_attr); if (result) { pr_warn("%s: ib_query_device failed (ret = %d)\n", - hca->name, result); + hca->name, result); kfree(device_attr); return result; } priv->hca_caps = device_attr->device_cap_flags; - kfree(device_attr); + xve_lro_setup(priv); + if (xve_is_ovn(priv)) + xve_set_ovn_features(priv); + else + xve_set_edr_features(priv); + return 0; } @@ -1720,7 +1986,7 @@ int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state) return ret; } -static void xve_set_oper_up_state(struct xve_dev_priv *priv) +void xve_set_oper_up_state(struct xve_dev_priv *priv) { unsigned long flags = 0; @@ -1750,6 +2016,7 @@ static int handle_admin_state_change(struct xve_dev_priv *priv, __func__, priv->xve_name); if (test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state)) { priv->counters[XVE_ADMIN_DOWN_COUNTER]++; + netif_carrier_off(priv->netdev); clear_bit(XVE_CHASSIS_ADMIN_UP, &priv->state); set_bit(XVE_SEND_ADMIN_STATE, &priv->state); } @@ -1795,7 +2062,23 @@ static int xve_xsmp_send_ack(struct xve_dev_priv *priv, xmsgp->code = 0; xmsgp->vn_mtu = cpu_to_be16(priv->admin_mtu); xmsgp->net_id = cpu_to_be32(priv->net_id); - pr_info("XVE: %s ACK back with admin mtu ", __func__); + if (priv->vnic_type != XSMP_XCM_OVN) { + xmsgp->hca_subnet_prefix = + cpu_to_be64(priv->local_gid.global.subnet_prefix); + xmsgp->hca_ctrl_qp = 0; + xmsgp->hca_data_qp = cpu_to_be32(priv->qp->qp_num); + xmsgp->hca_qkey = cpu_to_be32(priv->qkey); + xmsgp->hca_pkey = cpu_to_be16(priv->pkey); + xmsgp->tca_subnet_prefix = + cpu_to_be64(priv->gw.t_gid.global.subnet_prefix); + xmsgp->tca_guid = + cpu_to_be64(priv->gw.t_gid.global.interface_id); + xmsgp->tca_ctrl_qp = cpu_to_be32(priv->gw.t_ctrl_qp); + xmsgp->tca_data_qp = cpu_to_be32(priv->gw.t_data_qp); + xmsgp->tca_pkey = cpu_to_be16(priv->gw.t_pkey); + xmsgp->tca_qkey = cpu_to_be16(priv->gw.t_qkey); + } + pr_info("XVE: %s ACK back with admin mtu ", __func__); pr_info("%d for %s", xmsgp->vn_mtu, priv->xve_name); pr_info("[netid %d ]\n", xmsgp->net_id); @@ -1804,6 +2087,32 @@ static int xve_xsmp_send_ack(struct xve_dev_priv *priv, return xve_xsmp_send_msg(xsmp_hndl, msg, total_len); } +static void +xve_update_gw_info(struct xve_dev_priv *priv, struct xve_xsmp_msg *xmsgp) +{ + struct xve_gw_info *gwp = &priv->gw; + + gwp->t_gid.global.subnet_prefix = + xve_tca_subnet ? cpu_to_be64(xve_tca_subnet) : + xmsgp->tca_subnet_prefix; + + gwp->t_gid.global.interface_id = + xve_tca_guid ? cpu_to_be64(xve_tca_guid) : + xmsgp->tca_guid; + gwp->t_ctrl_qp = be32_to_cpu(xmsgp->tca_ctrl_qp); + gwp->t_data_qp = xve_tca_data_qp ? (xve_tca_data_qp) + : be32_to_cpu(xmsgp->tca_data_qp); + gwp->t_pkey = xve_tca_pkey ? (xve_tca_pkey) + : be16_to_cpu(xmsgp->tca_pkey); + gwp->t_qkey = xve_tca_qkey ? (xve_tca_qkey) + : be16_to_cpu(xmsgp->tca_qkey); + xve_dbg_ctrl(priv, "GW INFO gid:%pI6, lid: %hu\n", + &gwp->t_gid.raw, be32_to_cpu(xmsgp->tca_lid)); + xve_dbg_ctrl(priv, "qpn: %u, pkey: 0x%x, qkey: 0x%x\n", + gwp->t_data_qp, gwp->t_pkey, + gwp->t_qkey); +} + /* * Handle install message */ @@ -1819,13 +2128,14 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp, int result = -ENOMEM; struct ib_device *hca; u8 port; + __be16 pkey_be; __be32 net_id_be; u8 ecode = 0; if (xve_check_for_hca(xsmp_hndl) != 0) { pr_info("Warning !!!!! Unsupported HCA card for xve "); pr_info("interface - %s XSF feature is only ", xmsgp->xve_name); - pr_info("supported on Connect-X HCA cards !!!!!!!"); + pr_info("supported on Connect-X and PSIF HCA cards !!!!!!!"); ret = -EEXIST; goto dup_error; } @@ -1869,11 +2179,12 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp, } netdev = - alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN, &xve_setup); + alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN, + &xve_setup); if (netdev == NULL) { XSMP_ERROR("%s: alloc_netdev error name: %s, VID=0x%llx\n", - __func__, xmsgp->xve_name, - be64_to_cpu(xmsgp->resource_id)); + __func__, xmsgp->xve_name, + be64_to_cpu(xmsgp->resource_id)); ret = -ENOMEM; ecode = XVE_NACK_ALLOCATION_ERROR; goto dup_error; @@ -1882,23 +2193,70 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp, pr_info("XVE: %s Installing xve %s - ", __func__, xmsgp->xve_name); pr_info("resource id %llx", be64_to_cpu(xmsgp->resource_id)); - pr_info("priv DS %p\n", priv); + pr_info("priv DS %p\n", priv); xcpm_get_xsmp_session_info(xsmp_hndl, &priv->xsmp_info); hca = priv->xsmp_info.ib_device; port = xscore_port_num(priv->xsmp_info.port); /* Parse PVI parameters */ - priv->vnet_mode = (xmsgp->vnet_mode); + priv->vnet_mode = xve_ud_mode ? XVE_VNET_MODE_UD : + (xmsgp->vnet_mode); priv->net_id = be32_to_cpu(xmsgp->net_id); priv->netdev->mtu = be16_to_cpu(xmsgp->vn_mtu); priv->resource_id = be64_to_cpu(xmsgp->resource_id); priv->mp_flag = be16_to_cpu(xmsgp->mp_flag); + priv->install_flag = be32_to_cpu(xmsgp->install_flag); priv->xsmp_hndl = xsmp_hndl; priv->sm_delay = 1000; priv->aging_delay = xve_aging_timeout * HZ; strcpy(priv->xve_name, xmsgp->xve_name); strcpy(priv->proc_name, priv->xve_name); net_id_be = cpu_to_be32(priv->net_id); + /* Parse Uvnic properties */ + /* For legacy PVI's XSMP will not have vnic_type field so + value is zero */ + priv->vnic_type = xmsgp->vnic_type; + /* Make Send and Recv Queue parmaters Per Vnic */ + priv->xve_sendq_size = xve_sendq_size; + priv->xve_recvq_size = xve_recvq_size; + priv->xve_max_send_cqe = xve_max_send_cqe; + + if (priv->vnic_type == XSMP_XCM_UPLINK) { + /* For G/W mode set higher values */ + priv->xve_sendq_size = 8192; + priv->xve_recvq_size = 8192; + priv->xve_max_send_cqe = 512; + priv->gw.t_gid.global.subnet_prefix = + xve_tca_subnet ? cpu_to_be64(xve_tca_subnet) : + be64_to_cpu(xmsgp->tca_subnet_prefix); + + priv->gw.t_gid.global.interface_id = + xve_tca_guid ? cpu_to_be64(xve_tca_guid) : + be64_to_cpu(xmsgp->tca_guid); + priv->gw.t_ctrl_qp = be32_to_cpu(xmsgp->tca_ctrl_qp); + priv->gw.t_data_qp = xve_tca_data_qp ? xve_tca_data_qp : + be32_to_cpu(xmsgp->tca_data_qp); + priv->gw.t_pkey = xve_tca_pkey ? xve_tca_pkey : + be16_to_cpu(xmsgp->tca_pkey); + /* FIXME: xmsgp->tca_qkey is u16.need to fix in osdn */ + priv->gw.t_qkey = xve_tca_qkey ? xve_tca_qkey : + be16_to_cpu(xmsgp->tca_qkey); + xve_dbg_ctrl(priv, + "GW prefix:%llx guid:%llx, lid: %hu sl: %hu TDQP%x TCQP:%x\n", + priv->gw.t_gid.global.subnet_prefix, + priv->gw.t_gid.global.interface_id, + be16_to_cpu(xmsgp->tca_lid), + be16_to_cpu(xmsgp->service_level), + priv->gw.t_data_qp, priv->gw.t_ctrl_qp); + } + /* Pkey */ + priv->pkey = xve_tca_pkey ? xve_tca_pkey : + be16_to_cpu(xmsgp->tca_pkey); + if (priv->pkey == 0) + priv->pkey |= 0x8000; + /* Qkey For EDR vnic's*/ + priv->gw.t_qkey = xve_tca_qkey ? xve_tca_qkey : + be16_to_cpu(xmsgp->tca_qkey); /* Always set chassis ADMIN up by default */ set_bit(XVE_CHASSIS_ADMIN_UP, &priv->state); @@ -1906,30 +2264,52 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp, if (!ib_query_port(hca, port, &priv->port_attr)) priv->max_ib_mtu = ib_mtu_enum_to_int(priv->port_attr.max_mtu); else { - pr_warn("%s: ib_query_port %d failed\n", hca->name, port); + pr_warn("%s: ib_query_port %d failed\n", + hca->name, port); goto device_init_failed; } - memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid)); - pr_info("XVE: %s adding vnic %s ", __func__, priv->xve_name); - pr_info("net_id %d vnet_mode %d", priv->net_id, priv->vnet_mode); + pr_info("XVE: %s adding vnic %s ", + __func__, priv->xve_name); + pr_info("net_id %d vnet_mode %d type%d", + priv->net_id, priv->vnet_mode, priv->vnic_type); pr_info("port %d net_id_be %d\n", port, net_id_be); - memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be)); - result = ib_query_pkey(hca, port, 0, &priv->pkey); - if (result) { - pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; + memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid)); + if (xve_is_edr(priv)) { + result = ib_find_pkey(hca, port, priv->pkey, &priv->pkey_index); + if (result != 0) + pr_warn("%s : ib_find_pkey %d failed %d in %s\n", + hca->name, port, result, __func__); + /* EDR MGID format: FF15:101C:P:0:0:0:0:N + * Where, P is the P_Key, N is the NetID. */ + pkey_be = cpu_to_be16(priv->pkey); + priv->bcast_mgid.raw[0] = 0xFF; + priv->bcast_mgid.raw[1] = 0x15; + priv->bcast_mgid.raw[2] = 0x10; + priv->bcast_mgid.raw[3] = 0x1C; + memcpy(&priv->bcast_mgid.raw[4], &pkey_be, 2); + memcpy(&priv->bcast_mgid.raw[12], &net_id_be, + sizeof(net_id_be)); + } else { + memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be)); + result = ib_query_pkey(hca, port, 0, &priv->pkey); + if (result) { + pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", + hca->name, port, result); + goto device_init_failed; + } + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + priv->pkey |= 0x8000; } + pr_info("MGID: %pI6 pkey%d\n", &priv->bcast_mgid.raw, priv->pkey); + if (xve_set_dev_features(priv, hca)) goto device_init_failed; - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - priv->pkey |= 0x8000; result = ib_query_gid(hca, port, 0, &priv->local_gid); @@ -1990,7 +2370,10 @@ static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp, list_add_tail(&priv->list, &xve_dev_list); mutex_unlock(&xve_mutex); - xve_send_msg_to_xsigod(xsmp_hndl, data, len); + if (xve_is_ovn(priv)) + xve_send_msg_to_xsigod(xsmp_hndl, data, len); + else + set_bit(XVE_VNIC_READY_PENDING, &priv->state); queue_sm_work(priv, 0); @@ -2004,7 +2387,7 @@ send_ack: __func__, xmsgp->xve_name, be64_to_cpu(xmsgp->resource_id)); } - if (update_state) { + if (update_state && priv->vnic_type == XSMP_XCM_OVN) { printk ("XVE: %s Sending Oper state to chassis for %s id %llx\n", __func__, priv->xve_name, priv->resource_id); @@ -2109,42 +2492,94 @@ static void xve_xsmp_send_stats(xsmp_cookie_t xsmp_hndl, u8 *data, int length) static int xve_xsmp_update(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp) { u32 bitmask = be32_to_cpu(xmsgp->bitmask); - struct xve_dev_priv *xvep; + struct xve_dev_priv *priv; int ret = 0; - int send_ack = 1; + int send_ack = 0; - xvep = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id)); - if (!xvep) { + priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id)); + if (!priv) { XSMP_ERROR("%s: request for invalid vid: 0x%llx\n", __func__, be64_to_cpu(xmsgp->resource_id)); return -EINVAL; } - XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, xvep->xve_name, + XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, priv->xve_name, bitmask); - mutex_lock(&xvep->mutex); + mutex_lock(&priv->mutex); - if (bitmask & XVE_UPDATE_ADMIN_STATE) { - ret = handle_admin_state_change(xvep, xmsgp); + if (bitmask & XVE_UPDATE_ADMIN_STATE) /* * Ack will be sent once QP's are brought down */ - send_ack = 0; + ret = handle_admin_state_change(priv, xmsgp); + if (bitmask & XVE_UPDATE_MTU) + xve_modify_mtu(priv->netdev, be16_to_cpu(xmsgp->vn_mtu)); + + if (bitmask & XVE_UPDATE_XT_STATE_DOWN && + xve_is_uplink(priv)) { + clear_bit(XVE_GW_STATE_UP, &priv->state); + if (netif_carrier_ok(priv->netdev)) + handle_carrier_state(priv, 0); + } + if (bitmask & XVE_UPDATE_XT_CHANGE && xve_is_uplink(priv)) { + xve_update_gw_info(priv, xmsgp); + if (!netif_carrier_ok(priv->netdev)) + handle_carrier_state(priv, 1); + send_ack = 1; } if (send_ack) { - ret = xve_xsmp_send_ack(xvep, xmsgp); - if (ret) + ret = xve_xsmp_send_ack(priv, xmsgp); + if (ret) { XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s\n" - "VID=0x%llx\n", __func__, xmsgp->xve_name, - be64_to_cpu(xmsgp->resource_id)); + "VID=0x%llx\n", __func__, xmsgp->xve_name, + be64_to_cpu(xmsgp->resource_id)); + } } - mutex_unlock(&xvep->mutex); + mutex_unlock(&priv->mutex); return ret; } +static int +xve_xsmp_vnic_ready(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp, + void *data, int len) +{ + struct xve_dev_priv *priv; + unsigned long flags; +int ret; + + priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id)); + if (!priv) { + XSMP_INFO("XVE: %s priv not found for %s\n", + __func__, xmsgp->xve_name); + return -1; + } + pr_info("XVE VNIC_READY: vnic_type: %u, subnet_prefix: %llx\n", + priv->vnic_type, priv->gw.t_gid.global.subnet_prefix); + pr_info("ctrl_qp: %u, data_qp: %u, pkey: %x, qkey: %x\n", + priv->gw.t_ctrl_qp, priv->gw.t_data_qp, + priv->gw.t_pkey, priv->gw.t_qkey); + + xve_send_msg_to_xsigod(xsmp_hndl, data, len); + spin_lock_irqsave(&priv->lock, flags); + clear_bit(XVE_VNIC_READY_PENDING, &priv->state); + spin_unlock_irqrestore(&priv->lock, flags); + + ret = xve_xsmp_send_ack(priv, xmsgp); + if (ret) { + XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s, VID=0x%llx\n", + __func__, xmsgp->xve_name, + be64_to_cpu(xmsgp->resource_id)); + } + + (void) xve_xsmp_handle_oper_req(priv->xsmp_hndl, + priv->resource_id); + + return 0; +} + /* * We set the DELETING bit and let sm_work thread handle delete */ @@ -2193,6 +2628,9 @@ static void handle_xve_xsmp_messages(xsmp_cookie_t xsmp_hndl, u8 *data, xve_counters[XVE_VNIC_INSTALL_COUNTER]++; xve_xsmp_install(xsmp_hndl, xmsgp, data, length); break; + case XSMP_VNIC_READY: + xve_xsmp_vnic_ready(xsmp_hndl, xmsgp, data, length); + break; case XSMP_XVE_DELETE: xve_counters[XVE_VNIC_DEL_COUNTER]++; xve_handle_del_message(xsmp_hndl, xmsgp); @@ -2379,7 +2817,7 @@ static int __init xve_init_module(void) xve_sendq_size = roundup_pow_of_two(xve_sendq_size); xve_sendq_size = min(xve_sendq_size, XVE_MAX_QUEUE_SIZE); - xve_sendq_size = max(xve_sendq_size, max(2 * MAX_SEND_CQE, + xve_sendq_size = max(xve_sendq_size, max(2 * xve_max_send_cqe, XVE_MIN_QUEUE_SIZE)); /* * When copying small received packets, we only copy from the diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_multicast.c b/drivers/infiniband/ulp/xsigo/xve/xve_multicast.c index 19ee47a81e99..314f7ff043ed 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_multicast.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_multicast.c @@ -175,6 +175,7 @@ static int xve_mcast_join_finish(struct xve_mcast *mcast, priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; + set_qkey = 1; } @@ -254,6 +255,9 @@ static int xve_mcast_sendonly_join_complete(int status, struct xve_mcast *mcast = multicast->context; struct net_device *dev = mcast->netdev; + xve_dbg_mcast(netdev_priv(dev), + "Join completion[SD] for %pI6 LID0x%04x (status %d)\n", + multicast->rec.mgid.raw, multicast->rec.mlid, status); /* We trap for port events ourselves. */ if (status == -ENETRESET) return 0; @@ -334,8 +338,8 @@ static int xve_mcast_sendonly_join(struct xve_mcast *mcast) rec.flow_label = priv->broadcast->mcmember.flow_label; rec.hop_limit = priv->broadcast->mcmember.hop_limit; } - xve_dbg_mcast(priv, "%s Joining send only join mtu %d\n", __func__, - rec.mtu); + xve_dbg_mcast(priv, "%s Joining send only join mtu %d rate %d\n", + __func__, rec.mtu, rec.rate); mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca, priv->port, &rec, @@ -363,8 +367,9 @@ static int xve_mcast_join_complete(int status, struct net_device *dev = mcast->netdev; struct xve_dev_priv *priv = netdev_priv(dev); - xve_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", - mcast->mcmember.mgid.raw, status); + priv->bcast_mlid = be16_to_cpu(multicast->rec.mlid); + xve_dbg_mcast(priv, "join completion for %pI6 LID0x%04x (status %d)\n", + mcast->mcmember.mgid.raw, priv->bcast_mlid, status); /* We trap for port events ourselves. */ if (status == -ENETRESET) @@ -450,7 +455,7 @@ static void xve_mcast_join(struct net_device *dev, struct xve_mcast *mcast, IB_SA_MCMEMBER_REC_RATE_SELECTOR | IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_HOP_LIMIT; - rec.qkey = 0x0; + rec.qkey = cpu_to_be32(priv->gw.t_qkey); rec.traffic_class = 0x0; rec.sl = 0x0; rec.flow_label = 0x0; @@ -462,8 +467,8 @@ static void xve_mcast_join(struct net_device *dev, struct xve_mcast *mcast, rec.rate = mcast_rate; } - xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d\n", - mcast->mcmember.mgid.raw, rec.pkey, rec.qkey); + xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d rate%d\n", + mcast->mcmember.mgid.raw, rec.pkey, rec.qkey, rec.rate); set_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags); mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca, priv->port, &rec, comp_mask, GFP_KERNEL, @@ -650,17 +655,25 @@ static int xve_mcast_leave(struct net_device *dev, struct xve_mcast *mcast) return 0; } -void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) +int xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) { struct xve_dev_priv *priv = netdev_priv(dev); struct xve_mcast *mcast; + int ret = NETDEV_TX_OK; if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags) || !priv->broadcast || !test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { INC_TX_DROP_STATS(priv, dev); dev_kfree_skb_any(skb); - return; + return ret; + } + + if (xve_is_uplink(priv) && xve_gw_linkup(priv)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (nskb) + ret = xve_gw_send(dev, nskb); } mcast = __xve_mcast_find(dev, mgid); @@ -691,6 +704,7 @@ void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) else { INC_TX_DROP_STATS(priv, dev); dev_kfree_skb_any(skb); + return ret; } if (test_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)) { @@ -708,14 +722,14 @@ void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - xve_test("%s about to send mcast %02x%02x%02x%02x%02x%02x", - __func__, skb->data[0], skb->data[1], skb->data[2], - skb->data[3], skb->data[4], skb->data[5]); - xve_test("ah=%p proto=%02x%02x for %s\n", mcast->ah->ah, - skb->data[12], skb->data[13], dev->name); - xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); + xve_test("%s about to send mcast %pM" + , __func__, eth_hdr(skb)->h_dest); + xve_test("ah=%p proto=%04x for %s\n", + mcast->ah->ah, eth_hdr(skb)->h_proto, dev->name); + xve_get_ah_refcnt(mcast->ah); + ret = xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN, 0); } - + return ret; } void xve_mcast_carrier_on_task(struct work_struct *work) diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_stats.c b/drivers/infiniband/ulp/xsigo/xve/xve_stats.c index 632aef3aa676..9a4413d61e41 100755 --- a/drivers/infiniband/ulp/xsigo/xve/xve_stats.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_stats.c @@ -142,6 +142,10 @@ static char *counter_name[XVE_MAX_COUNTERS] = { "ib lid_active count:\t\t", "ib pkey_change count:\t\t", "ib invalid count:\t\t", + "uplink unicast:\t\t\t", + "Heartbeat Count:\t\t", + "Link State message count:\t", + "RX frames without GRH\t\t", }; static char *misc_counter_name[XVE_MISC_MAX_COUNTERS] = { @@ -438,6 +442,7 @@ static int xve_proc_read_device(struct seq_file *m, void *data) tmp_buf[0] = 0; print_mgid_buf(tmp_buf, bcast_mgid_token); seq_printf(m, "Bcast Mgid:\t\t\t%s\n", tmp_buf); + seq_printf(m, "Bcast Mlid:\t\t\t0x%04x\n", vp->bcast_mlid); tmp_buf[0] = 0; print_mgid_buf(tmp_buf, local_gid_token); @@ -574,6 +579,14 @@ static int xve_proc_read_device(struct seq_file *m, void *data) seq_printf(m, "WQ Failed:\t\t\t%ld\n", vp->work_queue_failed); seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared); + + if (xve_is_uplink(vp)) { + seq_printf(m, "Time since last heart beat: %llu sec\n", + (jiffies-vp->last_hbeat)/HZ); + seq_printf(m, "TCA info:\t\t\tGID: %pI6\tQPN: %u\n", + &vp->gw.t_gid.raw, vp->gw.t_data_qp); + } + vp->next_page = 1; out: return 0; @@ -588,9 +601,9 @@ static ssize_t xve_proc_write_device(struct file *file, int ret; ret = sscanf(buffer, "%s", action); - if (ret != 1) { + if (ret != 1) return -EINVAL; - } + if ((strlen(action) == 1) && (atoi(action) == 0)) { /* Clear counters */ memset(vp->counters, 0, sizeof(vp->counters)); @@ -634,30 +647,29 @@ static ssize_t xve_proc_write_device_counters(struct file *file, struct xve_dev_priv *vp = PDE_DATA(file_inode(file)); int newval, ret; char *buf = (char *) __get_free_page(GFP_USER); - if (!buf) { + + if (!buf) return -ENOMEM; - } - if (copy_from_user(buf, buffer, count - 1)) { + if (copy_from_user(buf, buffer, count - 1)) goto out; - } + buf[count] = '\0'; ret = kstrtoint(buf, 0, &newval); - if (ret != 0) { - return -EINVAL; - } + if (ret != 0) + return -EINVAL; - if (newval == 0) { - /* Clear counters */ - memset(vp->counters, 0, sizeof(vp->counters)); - vp->counters_cleared++; - } - return count; + if (newval == 0) { + /* Clear counters */ + memset(vp->counters, 0, sizeof(vp->counters)); + vp->counters_cleared++; + } + return count; out: - free_page((unsigned long)buf); - return -EINVAL; + free_page((unsigned long)buf); + return -EINVAL; } static int xve_proc_open_device_counters(struct inode *inode, struct file *file) @@ -809,19 +821,19 @@ static ssize_t xve_proc_write_debug(struct file *file, { int newval, ret; char *buf = (char *) __get_free_page(GFP_USER); - if (!buf) { + + if (!buf) return -ENOMEM; - } - if (copy_from_user(buf, buffer, count - 1)) { + if (copy_from_user(buf, buffer, count - 1)) goto out; - } + buf[count] = '\0'; ret = kstrtoint(buf, 0, &newval); - if (ret != 0) { + if (ret != 0) return -EINVAL; - } + xve_debug_level = newval; return count; diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_tables.c b/drivers/infiniband/ulp/xsigo/xve/xve_tables.c index eea4854a922e..71f7843607eb 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_tables.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_tables.c @@ -169,13 +169,7 @@ int xve_aging_task_machine(struct xve_dev_priv *priv) && ((jiffies - fwt_entry->last_refresh) >= priv->aging_delay)) { pr_info("XVE: %s MAC ", priv->xve_name); - pr_info("%02x:%02x:%02x:%02x:%02x:%02x", - ALIGN_TO_FF(smac[0]), - ALIGN_TO_FF(smac[1]), - ALIGN_TO_FF(smac[2]), - ALIGN_TO_FF(smac[3]), - ALIGN_TO_FF(smac[4]), - ALIGN_TO_FF(smac[5])); + pr_info("%pM", smac); pr_info(" vlan %d Aged out\n", fwt_entry->vlan); /* @@ -255,6 +249,10 @@ void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx, struct xve_path *path; char from[64], to[64]; + if (xve_is_uplink(priv) && + !memcmp(&gid->raw, &priv->gw.t_gid.raw, sizeof(*gid))) + qpn = priv->gw.t_data_qp; + fwt_entry = xve_fwt_lookup(xve_fwt, smac, vlan, 1); if (fwt_entry) { if (unlikely @@ -262,11 +260,8 @@ void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx, (fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid)))) { print_mgid_buf(from, (char *)fwt_entry->dgid.raw); print_mgid_buf(to, (char *)gid->raw); - pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x ", - priv->xve_name, ALIGN_TO_FF(smac[0]), - ALIGN_TO_FF(smac[1]), ALIGN_TO_FF(smac[2]), - ALIGN_TO_FF(smac[3]), ALIGN_TO_FF(smac[4]), - ALIGN_TO_FF(smac[5])); + pr_info("XVE: %s MAC %pM ", + priv->xve_name, smac); pr_info(" vlan %d moved from GID %s to GID %s\n", fwt_entry->vlan, from, to); @@ -306,13 +301,9 @@ void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx, } memset(fwt_entry, 0, sizeof(struct xve_fwt_entry)); print_mgid_buf(from, (char *)gid->raw); - pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x", - priv->xve_name, ALIGN_TO_FF(smac[0]), - ALIGN_TO_FF(smac[1]), - ALIGN_TO_FF(smac[2]), ALIGN_TO_FF(smac[3]), - ALIGN_TO_FF(smac[4]), ALIGN_TO_FF(smac[5])); - pr_info("vlan %d learned from GID %s, mode: %s Fwt %p\n", - vlan, from, qpn ? "UD" : "RC", fwt_entry); + pr_info("XVE: %s MAC %pM", priv->xve_name, smac); + pr_info("vlan %d learned from GID %s, mode: %s QPN %x Fwt %p\n", + vlan, from, qpn ? "UD" : "RC", qpn, fwt_entry); priv->counters[XVE_MAC_LEARN_COUNTER]++; memcpy(fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid)); fwt_entry->dqpn = qpn; @@ -408,7 +399,7 @@ void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb) skb->protocol = eth_type_trans(skb, priv->netdev); skb->dev = priv->netdev; skb_pkt_type(skb, PACKET_HOST); - if (test_bit(XVE_FLAG_CSUM, &priv->flags)) + if (xve_is_ovn(priv) && test_bit(XVE_FLAG_CSUM, &priv->flags)) skb->ip_summed = CHECKSUM_UNNECESSARY; skb->truesize = skb->len + sizeof(struct sk_buff); } diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c b/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c index ad9d6be2bb5b..168019b6bb7a 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c +++ b/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c @@ -67,8 +67,8 @@ int xve_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, ret = ib_attach_mcast(priv->qp, mgid, mlid); if (ret) xve_warn(priv, - "failed to attach to multicast group, ret = %d\n", - ret); + "failed to attach to multicast group, ret = %d\n", + ret); out: kfree(qp_attr); @@ -82,8 +82,10 @@ int xve_init_qp(struct net_device *dev) struct ib_qp_attr qp_attr; int attr_mask; - if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) + if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) { + xve_warn(priv, "PKEY not assigned\n"); return -1; + } qp_attr.qp_state = IB_QPS_INIT; qp_attr.qkey = 0; @@ -130,22 +132,21 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca) struct xve_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr init_attr = { .cap = { - .max_send_wr = xve_sendq_size, - .max_recv_wr = xve_recvq_size, + .max_send_wr = priv->xve_sendq_size, + .max_recv_wr = priv->xve_recvq_size, .max_send_sge = 1, - .max_recv_sge = XVE_UD_RX_SG}, + .max_recv_sge = xve_ud_rx_sg(priv)}, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD }; - - int ret, size; - int i; struct ethtool_coalesce *coal; + int ret, size, max_sge; + int i; priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { pr_warn("%s: failed to allocate PD for %s\n", - ca->name, priv->xve_name); + ca->name, priv->xve_name); return -ENODEV; } @@ -155,16 +156,18 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca) goto out_free_pd; } - size = xve_recvq_size + 1; + size = priv->xve_recvq_size + 1; ret = xve_cm_dev_init(dev); if (ret != 0) { pr_err("%s Failed for %s [ret %d ]\n", __func__, - priv->xve_name, ret); + priv->xve_name, ret); goto out_free_mr; } - size += xve_sendq_size; - size += xve_recvq_size + 1; /* 1 extra for rx_drain_qp */ + size += priv->xve_sendq_size; + size = priv->xve_recvq_size + 1; /* 1 extra for rx_drain_qp */ + + /* Create Receive CompletionQueue */ priv->recv_cq = ib_create_cq(priv->ca, xve_ib_completion, NULL, dev, size, 0); if (IS_ERR(priv->recv_cq)) { @@ -173,8 +176,9 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca) goto out_free_mr; } + /* Create Send CompletionQueue */ priv->send_cq = ib_create_cq(priv->ca, xve_send_comp_handler, NULL, - dev, xve_sendq_size, 0); + dev, priv->xve_sendq_size, 0); if (IS_ERR(priv->send_cq)) { pr_warn("%s: failed to create send CQ for %s\n", ca->name, priv->xve_name); @@ -197,11 +201,19 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.send_cq = priv->send_cq; init_attr.recv_cq = priv->recv_cq; + if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) + init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; + if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; - if (dev->features & NETIF_F_SG) - init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + if (dev->features & NETIF_F_SG) { + /* As Titan Card supports less than MAX SKB we need to check */ + max_sge = priv->dev_attr.max_sge; + if (max_sge >= (MAX_SKB_FRAGS + 1)) + max_sge = MAX_SKB_FRAGS + 1; + init_attr.cap.max_send_sge = max_sge; + } priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { @@ -221,7 +233,7 @@ int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->rx_sge[0].length = XVE_UD_HEAD_SIZE; priv->rx_sge[1].length = PAGE_SIZE; priv->rx_sge[1].lkey = priv->mr->lkey; - priv->rx_wr.num_sge = XVE_UD_RX_SG; + priv->rx_wr.num_sge = xve_ud_rx_sg(priv); } else { priv->rx_sge[0].length = XVE_UD_BUF_SIZE(priv->max_ib_mtu); priv->rx_wr.num_sge = 1; @@ -249,34 +261,36 @@ out_free_pd: void xve_transport_dev_cleanup(struct net_device *dev) { struct xve_dev_priv *priv = netdev_priv(dev); - int ret = 0; + int ret; + /* Destroy QP */ if (priv->qp) { - if (ib_destroy_qp(priv->qp)) - xve_warn(priv, "ib_qp_destroy failed\n"); + ret = ib_destroy_qp(priv->qp); + if (ret) + xve_warn(priv, + "ib_qp_destroy failed (ret = %d)\n", ret); + priv->qp = NULL; clear_bit(XVE_PKEY_ASSIGNED, &priv->flags); } + ret = ib_destroy_cq(priv->send_cq); if (ret) xve_warn(priv, "%s ib_destroy_cq (sendq) failed ret=%d\n", - __func__, ret); + __func__, ret); ret = ib_destroy_cq(priv->recv_cq); if (ret) xve_warn(priv, "%s ib_destroy_cq failed ret=%d\n", - __func__, ret); + __func__, ret); xve_cm_dev_cleanup(dev); - ret = ib_dereg_mr(priv->mr); - if (ret) - xve_warn(priv, "%s ib_dereg_mr failed ret=%d\n", __func__, ret); + if (ib_dereg_mr(priv->mr)) + xve_warn(priv, "ib_dereg_mr failed\n"); - ret = ib_dealloc_pd(priv->pd); - if (ret) - xve_warn(priv, "%s ib_dealloc_pd failed ret=%d\n", - __func__, ret); + if (ib_dealloc_pd(priv->pd)) + xve_warn(priv, "ib_dealloc_pd failed\n"); } void xve_event(struct ib_event_handler *handler, struct ib_event *record) @@ -293,32 +307,32 @@ void xve_event(struct ib_event_handler *handler, struct ib_event *record) switch (record->event) { case IB_EVENT_SM_CHANGE: - priv->counters[XVE_SM_CHANGE_COUNTER]++; - xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT); - break; + priv->counters[XVE_SM_CHANGE_COUNTER]++; + xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT); + break; case IB_EVENT_CLIENT_REREGISTER: - priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++; - set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags); - xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT); - break; + priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++; + set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags); + xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT); + break; case IB_EVENT_PORT_ERR: - priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++; - xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL); - break; + priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++; + xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL); + break; case IB_EVENT_PORT_ACTIVE: - priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++; - xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL); - break; + priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++; + xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL); + break; case IB_EVENT_LID_CHANGE: - priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++; - xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL); - break; + priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++; + xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL); + break; case IB_EVENT_PKEY_CHANGE: - priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++; - xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY); - break; + priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++; + xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY); + break; default: - priv->counters[XVE_INVALID_EVENT_COUNTER]++; - break; + priv->counters[XVE_INVALID_EVENT_COUNTER]++; + break; } } diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h b/drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h index 43a516edf795..65a1128ff92b 100644 --- a/drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h +++ b/drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h @@ -61,6 +61,10 @@ enum xve_xsmp_cmd_type { XSMP_XVE_HA_INFO, XSMP_XVE_ISCSI_INFO, + XSMP_XSF_FWD_TABLE, + XSMP_XSF_L2_TABLE, + XSMP_VNIC_READY, + XSMP_XVE_TYPE_MAX, }; @@ -86,13 +90,13 @@ struct xve_xsmp_msg { u8 xve_name[XVE_MAX_NAME_SIZE]; u16 service_level; /* SL value for this vnic */ u16 fc_active; /* 1: enable, 0: - * disable host rate control */ + * disable host rate control */ u16 cir; /* committed rate in mbps */ u16 pir; /* peak rate in mbps */ u32 cbs; /* committed burst size in bytes */ u32 pbs; /* peak burst size in bytes */ u8 vm_index; /* the index used by vmware - * for persistence */ + * for persistence */ u8 _reserved; u16 mp_flag; u8 mp_group[XVE_MP_GROUP_NAME_MAX]; @@ -101,6 +105,21 @@ struct xve_xsmp_msg { /* for virtual network */ u32 net_id; u8 vnet_mode; + + u8 vnic_type; + + u64 tca_subnet_prefix; + u32 tca_ctrl_qp; + u32 tca_data_qp; + u16 tca_pkey; + u16 tca_qkey; + + /* host must fill these in INSTALL ACK */ + u64 hca_subnet_prefix; + u32 hca_ctrl_qp; + u32 hca_data_qp; + u16 hca_pkey; + u16 hca_qkey; } __packed; u8 bytes[512]; }; @@ -257,7 +276,7 @@ struct xve_iscsi_msg { #define XVE_UPDATE_QOS (1 << 7) #define XVE_UPDATE_ACL (1 << 8) #define XVE_UPDATE_MP_FLAG (1 << 10) -#define XVE_XT_STATE_DOWN (1 << 30) +#define XVE_UPDATE_XT_STATE_DOWN (1 << 30) #define XVE_UPDATE_XT_CHANGE (1 << 31) /* mp_flag */ -- 2.50.1