xve-y := xve_main.o xve_verbs.o xve_multicast.o xve_ib.o xve_tables.o \
xve_ethtool.o xve_cm.o xve_stats.o
-ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8008\"
ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
#define PREFIX_MULTI_ADDR 0x33
/* ethernet header length */
#define ETH_HDR_LEN 14
+#define XVE_EOIB_MAGIC 0x8919
+#define ETH_P_XVE_CTRL 0x8919
+#define XVE_EOIB_LEN 4
+
+#define XVE_VNET_MODE_RC 1
+#define XVE_VNET_MODE_UD 2
+
+#define XVE_MAX_RX_QUEUES 16
+#define XVE_MAX_TX_QUEUES 16
/* constants */
enum xve_flush_level {
};
enum {
- XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN,
- XVE_UD_RX_SG = 2, /* max buffer needed for 4K mtu */
+ XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN + XVE_EOIB_LEN + 2048,
+ XVE_UD_RX_OVN_SG = 2, /* max buffer needed for 4K mtu */
+ XVE_UD_RX_EDR_SG = 3, /* max buffer needed for 10K mtu */
XVE_CM_MTU = 0x10000 - 0x20, /* padding to align header to 16 */
XVE_CM_BUF_SIZE = XVE_CM_MTU + VLAN_ETH_HLEN,
XVE_CM_HEAD_SIZE = XVE_CM_BUF_SIZE % PAGE_SIZE,
XVE_EVENT_PKEY_CHANGE_COUNTER,
XVE_INVALID_EVENT_COUNTER,
+ XVE_GW_MCAST_TX,
+ XVE_HBEAT_COUNTER,
+ XVE_LINK_STATUS_COUNTER,
+ XVE_RX_NOGRH,
+
XVE_MAX_COUNTERS
};
DEBUG_CONTINUE_UNLOAD = 0x00002000,
DEBUG_MISC_INFO = 0x00004000,
DEBUG_IBDEV_INFO = 0x00008000,
- DEBUG_CM_INFO = 0x00010000
+ DEBUG_CM_INFO = 0x00010000,
+ DEBUG_CTRL_INFO = 0x00020000
};
#define XVE_OP_RECV (1ul << 31)
#define XVE_OVER_QUOTA 23
#define XVE_TSO_CHANGE 24
#define XVE_RXBATCH_CHANGE 25
+#define XVE_VNIC_READY_PENDING 26
+#define XVE_HBEAT_LOST 27
+#define XVE_GW_STATE_UP 28
+
#define MODULE_NAME "XVE"
#define ALIGN_TO_FF(a) (a & 0xff)
#define XVE_FWT_ENTRY_VALID 1
#define XVE_FWT_ENTRY_REFRESH 2
-#define XVE_UD_MTU(ib_mtu) (ib_mtu - VLAN_ETH_HLEN)
-#define XVE_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + VLAN_ETH_HLEN)
-#define XVE_MIN_PACKET_LEN 60
+#define XVE_UD_MTU(ib_mtu) (ib_mtu - (VLAN_ETH_HLEN + XVE_EOIB_LEN))
+#define XVE_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + \
+ (VLAN_ETH_HLEN + XVE_EOIB_LEN))
+#define XVE_MIN_PACKET_LEN 64
+
+enum xcm_type {
+ XSMP_XCM_OVN,
+ XSMP_XCM_NOUPLINK,
+ XSMP_XCM_UPLINK
+};
+
+#define xve_is_uplink(priv) ((priv)->vnic_type == XSMP_XCM_UPLINK)
+#define xve_is_ovn(priv) ((priv)->vnic_type == XSMP_XCM_OVN)
+#define xve_is_edr(priv) (!xve_is_ovn(priv))
+#define xve_gw_linkup(priv) test_bit(XVE_GW_STATE_UP, &(priv)->state)
+#define xve_ud_rx_sg(priv) (xve_is_edr(priv) ? XVE_UD_RX_EDR_SG : \
+ XVE_UD_RX_OVN_SG)
/*Extern declarations */
extern int xve_debug_level;
extern u32 xve_hash_salt;
extern int xve_sendq_size;
extern int xve_recvq_size;
+extern int xve_max_send_cqe;
extern struct ib_sa_client xve_sa_client;
extern u32 xve_counters[];
extern struct workqueue_struct *xve_taskqueue;
struct xve_rx_buf {
struct sk_buff *skb;
- u64 mapping[XVE_UD_RX_SG];
+ u64 mapping[XVE_UD_RX_EDR_SG];
};
struct xve_tx_buf {
struct sk_buff *skb;
+ struct xve_ah *ah;
u64 mapping[MAX_SKB_FRAGS + 1];
};
unsigned num;
};
+#define XVE_VNIC_HBEAT 1
+#define XVE_VNIC_LINK_STATE 2
+
+#define XVE_HBEAT_LOSS_THRES 3
+struct xve_keep_alive {
+ uint32_t pvi_id;
+ uint32_t type;
+ uint64_t tca_hbeat_cnt;
+ uint32_t uplink_status;
+} __packed;
+
+struct xve_gw_info {
+ union ib_gid t_gid;
+ u32 t_ctrl_qp;
+ u32 t_data_qp;
+ u32 t_qkey;
+ u16 t_pkey;
+};
+
+struct xve_eoib_hdr {
+ union {
+ struct { /* CX */
+ __u8 encap_data;
+ __u8 seg_off;
+ __be16 seg_id;
+ };
+ struct { /* PSIF */
+ __be16 magic;
+ __be16 tss_mask_sz;
+ };
+ };
+} __packed;
+
+
+struct xve_rx_cm_info {
+ struct ib_sge rx_sge[XVE_CM_RX_SG];
+ struct ib_recv_wr rx_wr;
+};
+
+
/*
* Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside
struct ib_qp *qp;
union ib_gid local_gid;
union ib_gid bcast_mgid;
+ __be16 bcast_mlid;
u16 local_lid;
u32 qkey;
+ /* Device attributes */
+ struct ib_device_attr dev_attr;
+
/* Netdev related attributes */
struct net_device *netdev;
struct net_device_stats stats;
unsigned long jiffies;
struct xve_fwt_s xve_fwt;
int aging_delay;
+ void *pci;
+ uint32_t hb_interval;
+ uint64_t last_hbeat;
struct xve_cm_dev_priv cm;
unsigned int cm_supported;
unsigned int mcast_mtu;
unsigned int max_ib_mtu;
char mode[64];
-
/* TX and RX Ring attributes */
+ int xve_recvq_size;
+ int xve_sendq_size;
+ int xve_max_send_cqe;
struct xve_rx_buf *rx_ring;
struct xve_tx_buf *tx_ring;
unsigned tx_head;
struct ib_send_wr tx_wr;
struct ib_wc send_wc[MAX_SEND_CQE];
struct ib_recv_wr rx_wr;
- struct ib_sge rx_sge[XVE_UD_RX_SG];
+ /* Allocate EDR SG for now */
+ struct ib_sge rx_sge[XVE_UD_RX_EDR_SG];
struct ib_wc ibwc[XVE_NUM_WC];
struct ib_cq *recv_cq;
struct ib_cq *send_cq;
u64 resource_id;
u64 mac;
u32 net_id;
+ u32 install_flag;
u16 mp_flag;
- char vnet_mode;
+ u8 vnet_mode;
+ u8 vnic_type;
char xve_name[XVE_MAX_NAME_SIZE];
+ struct xve_gw_info gw;
/* Proc related attributes */
struct proc_dir_entry *nic_dir;
struct ib_ah *ah;
struct list_head list;
struct kref ref;
- unsigned last_send;
+ atomic_t refcnt;
};
struct ib_packed_grh {
struct rb_node rb_node;
struct list_head list;
int valid;
+ int index;
struct sk_buff_head queue;
+ struct sk_buff_head uplink_queue;
+ atomic_t users;
};
struct xve_work {
dev->stats.rx_bytes += len; \
} while (0)
-#define SET_FLUSH_BIT(priv, bit) \
- do { \
- unsigned long flags; \
- spin_lock_irqsave(&priv->lock, flags); \
- set_bit(bit, &priv->state); \
- spin_unlock_irqrestore(&priv->lock, flags); \
- } while (0)
-
#define PRINT(level, x, fmt, arg...) \
printk(level "%s: " fmt, MODULE_NAME, ##arg)
#define XSMP_ERROR(fmt, arg...) \
((struct xve_dev_priv *) priv)->netdev->name, \
## arg)
#define xve_warn(priv, format, arg...) \
- xve_printk(KERN_WARNING, priv, format , ## arg)
+ xve_printk(KERN_WARNING, priv, format, ## arg)
#define XSMP_INFO(fmt, arg...) \
do { \
if (xve_debug_level & DEBUG_XSMP_INFO) \
- PRINT(KERN_DEBUG, "XSMP", fmt , ## arg);\
+ PRINT(KERN_DEBUG, "XSMP", fmt, ## arg);\
} while (0)
#define xve_test(fmt, arg...) \
do { \
if (xve_debug_level & DEBUG_TEST_INFO) \
- PRINT(KERN_DEBUG, "DEBUG", fmt , ## arg); \
+ PRINT(KERN_DEBUG, "DEBUG", fmt, ## arg); \
} while (0)
#define xve_dbg_data(priv, format, arg...) \
xve_printk(KERN_DEBUG, priv, format, \
## arg); \
} while (0)
+#define xve_dbg_ctrl(priv, format, arg...) \
+ do { \
+ if (xve_debug_level & DEBUG_CTRL_INFO) \
+ xve_printk(KERN_DEBUG, priv, format, \
+ ## arg); \
+ } while (0)
#define xve_dbg_mcast(priv, format, arg...) \
do { \
if (xve_debug_level & DEBUG_MCAST_INFO) \
- xve_printk(KERN_ERR, priv, format , ## arg); \
+ xve_printk(KERN_ERR, priv, format, ## arg); \
} while (0)
#define xve_debug(level, priv, format, arg...) \
do { \
if (netdev->features & NETIF_F_LRO)
lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
+ else if (netdev->features & NETIF_F_GRO)
+ napi_gro_receive(&priv->napi, skb);
else
netif_receive_skb(skb);
if (length == 0) {
/* don't need this page */
- skb_fill_page_desc(toskb, i, skb_frag_page(frag),
- 0, PAGE_SIZE);
+ if (toskb)
+ skb_fill_page_desc(toskb, i, skb_frag_page(frag)
+ , 0, PAGE_SIZE);
+ else
+ __free_page(skb_shinfo(skb)->frags[i].page.p);
--skb_shinfo(skb)->nr_frags;
} else {
size = min_t(unsigned, length, (unsigned)PAGE_SIZE);
kref_put(&ah->ref, xve_free_ah);
}
+static inline void xve_put_ah_refcnt(struct xve_ah *address)
+{
+ atomic_dec(&address->refcnt);
+}
+static inline void xve_get_ah_refcnt(struct xve_ah *address)
+{
+ atomic_inc(&address->refcnt);
+}
+
int xve_open(struct net_device *dev);
int xve_add_pkey_attr(struct net_device *dev);
-void xve_send(struct net_device *dev, struct sk_buff *skb,
- struct xve_ah *address, u32 qpn);
+int xve_send(struct net_device *dev, struct sk_buff *skb,
+ struct xve_ah *address, u32 qpn, int type);
int poll_tx(struct xve_dev_priv *priv);
int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state);
void handle_carrier_state(struct xve_dev_priv *priv, char state);
void xve_fwt_entry_free(struct xve_dev_priv *priv,
struct xve_fwt_entry *fwt_entry);
-void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
+int xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
void xve_advert_mcast_join(struct xve_dev_priv *priv);
int xve_mcast_start_thread(struct net_device *dev);
int xve_mcast_stop_thread(struct net_device *dev, int flush);
void xve_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl, u64 resource_id);
/*CM */
-void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+int xve_cm_send(struct net_device *dev, struct sk_buff *skb,
struct xve_cm_ctx *tx);
int xve_cm_dev_open(struct net_device *dev);
void xve_cm_dev_stop(struct net_device *dev);
void xve_tables_exit(void);
void xve_remove_one(struct xve_dev_priv *priv);
struct xve_path *__path_find(struct net_device *netdev, void *gid);
-extern int xve_add_proc_entry(struct xve_dev_priv *vp);
+int xve_add_proc_entry(struct xve_dev_priv *vp);
void xve_remove_proc_entry(struct xve_dev_priv *vp);
-extern int xve_change_rxbatch(struct xve_dev_priv *xvep, int flag);
+int xve_gw_send(struct net_device *priv, struct sk_buff *skb);
+struct xve_path *xve_get_gw_path(struct net_device *dev);
+void xve_set_oper_up_state(struct xve_dev_priv *priv);
static inline int xve_continue_unload(void)
{
static inline int xg_vlan_tx_tag_present(struct sk_buff *skb)
{
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+ struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
return veth->h_vlan_proto == htons(ETH_P_8021Q);
}
{
struct xve_dev_priv *priv = netdev_priv(netdev);
struct ib_recv_wr *bad_wr;
+ struct ib_recv_wr *wr = &priv->cm.rx_wr;
int i, ret;
- priv->cm.rx_wr.wr_id = id | XVE_OP_CM | XVE_OP_RECV;
+ wr->wr_id = id | XVE_OP_CM | XVE_OP_RECV;
for (i = 0; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
- ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+ ret = ib_post_srq_recv(priv->cm.srq, wr, &bad_wr);
if (unlikely(ret)) {
xve_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
xve_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
struct xve_dev_priv *priv = netdev_priv(dev);
int i;
- for (i = 0; i < xve_recvq_size; ++i) {
+ for (i = 0; i < priv->xve_recvq_size; ++i) {
if (rx_ring[i].skb) {
xve_cm_dma_unmap_rx(priv, XVE_CM_RX_SG - 1,
rx_ring[i].mapping);
xve_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
- if (unlikely(wr_id >= xve_recvq_size)) {
+ if (unlikely(wr_id >= priv->xve_recvq_size)) {
if (wr_id ==
(XVE_CM_RX_DRAIN_WRID & ~(XVE_OP_CM | XVE_OP_RECV))) {
spin_lock_irqsave(&priv->lock, flags);
} else
xve_warn(priv,
"cm recv completion event with wrid %d (> %d)\n",
- wr_id, xve_recvq_size);
+ wr_id, priv->xve_recvq_size);
return;
}
memset(tx_req, 0, sizeof(struct xve_cm_buf));
}
-void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+int xve_cm_send(struct net_device *dev, struct sk_buff *skb,
struct xve_cm_ctx *tx)
{
struct xve_dev_priv *priv = netdev_priv(dev);
struct xve_cm_buf *tx_req;
u64 addr;
+ int ret = NETDEV_TX_OK;
if (unlikely(skb->len > tx->mtu + VLAN_ETH_HLEN)) {
xve_warn(priv,
INC_TX_DROP_STATS(priv, dev);
INC_TX_ERROR_STATS(priv, dev);
dev_kfree_skb_any(skb);
- return;
+ return ret;
}
xve_dbg_data(priv,
* means we have to make sure everything is properly recorded and
* our state is consistent before we call post_send().
*/
- tx_req = &tx->tx_ring[tx->tx_head & (xve_sendq_size - 1)];
+ tx_req = &tx->tx_ring[tx->tx_head & (priv->xve_sendq_size - 1)];
tx_req->skb = skb;
addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
INC_TX_ERROR_STATS(priv, dev);
dev_kfree_skb_any(skb);
memset(tx_req, 0, sizeof(struct xve_cm_buf));
- return;
+ return ret;
}
tx_req->mapping[0] = addr;
- if (unlikely(post_send(priv, tx, tx->tx_head & (xve_sendq_size - 1),
+ if (unlikely(post_send(priv, tx, tx->tx_head &
+ (priv->xve_sendq_size - 1),
addr, skb->len))) {
xve_warn(priv, "post_send failed\n");
INC_TX_ERROR_STATS(priv, dev);
xve_cm_tx_buf_free(priv, tx_req);
} else {
+ dev->trans_start = jiffies;
++tx->tx_head;
- if (++priv->tx_outstanding == xve_sendq_size) {
+ if (++priv->tx_outstanding == priv->xve_sendq_size) {
xve_dbg_data(priv,
"TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num);
}
}
priv->send_hbeat_flag = 0;
-
+ return ret;
}
-void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+void xve_cm_handle_tx_wc(struct net_device *dev,
+ struct ib_wc *wc)
{
struct xve_dev_priv *priv = netdev_priv(dev);
struct xve_cm_ctx *tx = wc->qp->qp_context;
xve_dbg_data(priv, "cm send completion: id %d, status: %d\n",
wr_id, wc->status);
- if (unlikely(wr_id >= xve_sendq_size)) {
+ if (unlikely(wr_id >= priv->xve_sendq_size)) {
xve_warn(priv, "cm send completion event with wrid %d (> %d)\n",
- wr_id, xve_sendq_size);
+ wr_id, priv->xve_sendq_size);
return;
}
tx_req = &tx->tx_ring[wr_id];
xve_cm_tx_buf_free(priv, tx_req);
- ++tx->tx_tail;
netif_tx_lock(dev);
- if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+ ++tx->tx_tail;
+ if (unlikely(--priv->tx_outstanding == priv->xve_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
.send_cq = priv->recv_cq,
.recv_cq = priv->recv_cq,
.srq = priv->cm.srq,
- .cap.max_send_wr = xve_sendq_size,
+ .cap.max_send_wr = priv->xve_sendq_size,
.cap.max_send_sge = 1,
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
struct xve_dev_priv *priv = netdev_priv(p->netdev);
int ret;
- p->tx_ring = vmalloc(xve_sendq_size * sizeof(*p->tx_ring));
+ p->tx_ring = vmalloc(priv->xve_sendq_size * sizeof(*p->tx_ring));
if (!p->tx_ring) {
xve_warn(priv, "failed to allocate tx ring\n");
ret = -ENOMEM;
goto err_tx;
}
- memset(p->tx_ring, 0, xve_sendq_size * sizeof(*p->tx_ring));
+ memset(p->tx_ring, 0, priv->xve_sendq_size * sizeof(*p->tx_ring));
p->qp = xve_cm_create_tx_qp(p->netdev, p);
if (IS_ERR(p->qp)) {
/* Wait for all sends to complete */
if (!netif_carrier_ok(priv->netdev)
&& unlikely(priv->tx_outstanding > MAX_SEND_CQE))
- while (poll_tx(priv)); /* nothing */
+ while (poll_tx(priv))
+ ; /* nothing */
begin = jiffies;
while ((int)p->tx_tail - (int)p->tx_head < 0) {
spin_lock_irqsave(&priv->lock, flags);
while ((int)p->tx_tail - (int)p->tx_head < 0) {
- tx_req = &p->tx_ring[p->tx_tail & (xve_sendq_size - 1)];
+ tx_req = &p->tx_ring[p->tx_tail & (priv->xve_sendq_size - 1)];
+
+
++p->tx_tail;
spin_unlock_irqrestore(&priv->lock, flags);
xve_cm_tx_buf_free(priv, tx_req);
netif_tx_lock_bh(p->netdev);
- if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
- netif_queue_stopped(p->netdev) &&
+ if (unlikely(--priv->tx_outstanding ==
+ (priv->xve_sendq_size >> 1))
+ && netif_queue_stopped(p->netdev) &&
test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
netif_wake_queue(p->netdev);
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
xve_put_ctx(priv);
-
}
static void __xve_cm_tx_reap(struct xve_dev_priv *priv)
struct xve_dev_priv *priv = netdev_priv(dev);
struct ib_srq_init_attr srq_init_attr = {
.attr = {
- .max_wr = xve_recvq_size,
+ .max_wr = priv->xve_recvq_size,
.max_sge = max_sge}
};
}
priv->cm.srq_ring =
- vmalloc(xve_recvq_size * sizeof(*priv->cm.srq_ring));
+ vmalloc(priv->xve_recvq_size * sizeof(*priv->cm.srq_ring));
if (!priv->cm.srq_ring) {
pr_warn("%s: failed to allocate CM SRQ ring (%d entries)\n",
- priv->ca->name, xve_recvq_size);
+ priv->ca->name, priv->xve_recvq_size);
ib_destroy_srq(priv->cm.srq);
priv->cm.srq = NULL;
return;
}
memset(priv->cm.srq_ring, 0,
- xve_recvq_size * sizeof(*priv->cm.srq_ring));
+ priv->xve_recvq_size * sizeof(*priv->cm.srq_ring));
}
int xve_cm_dev_init(struct net_device *dev)
return ret;
}
+ priv->dev_attr = attr;
+
/* Based on the admin mtu from the chassis */
attr.max_srq_sge =
min_t(int,
xve_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
if (xve_cm_has_srq(dev)) {
- for (i = 0; i < xve_recvq_size; ++i) {
+ for (i = 0; i < priv->xve_recvq_size; ++i) {
if (!xve_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
priv->cm.num_frags - 1,
priv->cm.
#define XVE_OP_CM (1ul << 30)
#include <net/icmp.h>
+
+/* for kernel >= 3.8 */
+#define random32 prandom_u32
+
static inline void skb_pkt_type(struct sk_buff *skb, unsigned char type)
{
skb->pkt_type = type;
static inline unsigned xve_random32(struct xve_dev_priv *priv)
{
- return (prandom_u32() & 0xffffff);
+ return random32() & 0xffffff;
}
static inline struct proc_dir_entry *xg_create_proc_entry(const char *name,
return NULL;
ah->dev = dev;
- ah->last_send = 0;
kref_init(&ah->ref);
ah->ah = ib_create_ah(pd, attr);
if (IS_ERR(ah->ah)) {
kfree(ah);
ah = NULL;
- } else
+ } else {
+ atomic_set(&ah->refcnt, 0);
xve_debug(DEBUG_MCAST_INFO, netdev_priv(dev),
"%s Created ah %p\n", __func__, ah->ah);
+ }
return ah;
}
}
static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv,
- u64 mapping[XVE_UD_RX_SG])
+ u64 mapping[XVE_UD_RX_EDR_SG])
{
if (xve_ud_need_sg(priv->max_ib_mtu)) {
ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_HEAD_SIZE,
ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
DMA_FROM_DEVICE);
xve_counters[XVE_NUM_PAGES_ALLOCED]--;
- } else
+ } else {
ib_dma_unmap_single(priv->ca, mapping[0],
XVE_UD_BUF_SIZE(priv->max_ib_mtu),
DMA_FROM_DEVICE);
-}
-
-static void xve_ud_skb_put_frags(struct xve_dev_priv *priv,
- struct sk_buff *skb, unsigned int length)
-{
- if (xve_ud_need_sg(priv->max_ib_mtu)) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
- unsigned int size;
- /*
- * There is only two buffers needed for max_payload = 4K,
- * first buf size is XVE_UD_HEAD_SIZE
- */
- skb->tail += XVE_UD_HEAD_SIZE;
- skb->len += length;
-
- size = length - XVE_UD_HEAD_SIZE;
-
- frag->size = size;
- skb->data_len += size;
- skb->truesize += size;
- } else {
- skb_put(skb, length);
}
-
}
static int xve_ib_post_receive(struct net_device *dev, int id)
{
struct xve_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
- int buf_size;
+ int buf_size, align;
u64 *mapping;
if (xve_ud_need_sg(priv->max_ib_mtu))
else
buf_size = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
- skb = xve_dev_alloc_skb(priv, buf_size + 10);
- if (unlikely(!skb))
- return NULL;
-
/*
* Eth header is 14 bytes, IB will leave a 40 byte gap for a GRH
* so we need 10 more bytes to get to 64 and align the
- * IP header to a multiple of 16.
+ * IP header to a multiple of 16. EDR vNICs will have an additional
+ * 4-byte EoIB header.
*/
- skb_reserve(skb, 10);
+ align = xve_is_ovn(priv) ? 10 : 6;
+ skb = xve_dev_alloc_skb(priv, buf_size + align);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, align);
mapping = priv->rx_ring[id].mapping;
mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
struct xve_dev_priv *priv = netdev_priv(dev);
int i;
- for (i = 0; i < xve_recvq_size; ++i) {
+ for (i = 0; i < priv->xve_recvq_size; ++i) {
if (!xve_alloc_rx_skb(dev, i)) {
xve_warn(priv,
"%s failed to allocate ib receive buffer %d\n",
return 0;
}
-static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+static void xve_link_up(struct xve_dev_priv *priv)
+{
+ if (test_bit(XVE_FLAG_ADMIN_UP, &priv->flags) &&
+ test_bit(XVE_CHASSIS_ADMIN_UP, &priv->flags)) {
+ if (test_and_clear_bit(XVE_HBEAT_LOST, &priv->state)) {
+ xve_set_oper_up_state(priv);
+ xve_xsmp_send_oper_state(priv, priv->resource_id,
+ XSMP_XVE_OPER_UP);
+ }
+ handle_carrier_state(priv, 1);
+ }
+}
+
+void xve_process_link_state(struct xve_dev_priv *priv,
+ struct xve_keep_alive *ka)
+{
+ uint32_t state = ntohl(ka->uplink_status);
+
+ if (state) {
+ set_bit(XVE_GW_STATE_UP, &priv->state);
+ priv->hb_interval = 30*HZ;
+
+ if (!netif_carrier_ok(priv->netdev))
+ xve_link_up(priv);
+ } else {
+ clear_bit(XVE_GW_STATE_UP, &priv->state);
+ priv->hb_interval = 15*HZ;
+ if (netif_carrier_ok(priv->netdev))
+ handle_carrier_state(priv, 0);
+ }
+}
+
+void xve_update_hbeat(struct xve_dev_priv *priv)
+{
+ priv->last_hbeat = jiffies;
+}
+
+void xve_process_hbeat(struct xve_dev_priv *priv, struct xve_keep_alive *ka)
+{
+ xve_process_link_state(priv, ka);
+ xve_update_hbeat(priv);
+}
+
+void xve_handle_ctrl_msg(struct xve_dev_priv *priv,
+ struct sk_buff *skb, struct ethhdr *eh)
+{
+ struct xve_keep_alive *ka;
+
+ skb_pull(skb, ETH_HLEN);
+
+ if (!pskb_may_pull(skb, sizeof(*ka)))
+ goto skb_free;
+
+ ka = (struct xve_keep_alive *) skb->data;
+ xve_dbg_ctrl(priv, "RX CTRL_MSG: ethtype: 0x%x, type:%d, state: 0x%x\n",
+ ntohs(eh->h_proto), ntohl(ka->type),
+ ntohl(ka->uplink_status));
+
+ switch (ntohl(ka->type)) {
+ case XVE_VNIC_HBEAT:
+ xve_process_hbeat(priv, ka);
+ priv->counters[XVE_HBEAT_COUNTER]++;
+ break;
+
+ case XVE_VNIC_LINK_STATE:
+ xve_process_link_state(priv, ka);
+ priv->counters[XVE_LINK_STATUS_COUNTER]++;
+ break;
+
+ default:
+ xve_dbg_ctrl(priv, "Unknown control message type: %hu\n",
+ ka->type);
+ }
+
+skb_free:
+ dev_kfree_skb_any(skb);
+}
+
+static void
+xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct xve_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id & ~XVE_OP_RECV;
+ struct ethhdr *eh;
struct sk_buff *skb;
- u64 mapping[XVE_UD_RX_SG];
+ u64 mapping[XVE_UD_RX_EDR_SG];
struct ib_packed_grh *grhhdr;
- char *smac;
u16 vlan;
- xve_dbg_data(priv, "recv completion: id %d, status: %d\n",
- wr_id, wc->status);
+ xve_dbg_data(priv, "recv completion: id %d, QP%x, status: %d\n",
+ wr_id, wc->src_qp, wc->status);
- if (unlikely(wr_id >= xve_recvq_size)) {
+
+ if (unlikely(wr_id >= priv->xve_recvq_size)) {
xve_warn(priv, "recv completion event with wrid %d (> %d)\n",
- wr_id, xve_recvq_size);
+ wr_id, priv->xve_recvq_size);
return;
}
* Drop packets that this interface sent, ie multicast packets
* that the HCA has replicated.
*/
- if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+ if (wc->slid == priv->local_lid &&
+ (wc->src_qp & ~(0x3UL)) == priv->qp->qp_num)
goto repost;
memcpy(mapping, priv->rx_ring[wr_id].mapping,
- XVE_UD_RX_SG * sizeof(*mapping));
+ XVE_UD_RX_EDR_SG * sizeof(*mapping));
/*
* If we can't allocate a new RX buffer, dump
goto repost;
}
+
xve_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
xve_ud_dma_unmap_rx(priv, mapping);
- xve_ud_skb_put_frags(priv, skb, wc->byte_len);
-
+ skb_put_frags(skb, XVE_UD_HEAD_SIZE, wc->byte_len, NULL);
grhhdr = (struct ib_packed_grh *)(skb->data);
- smac = skb->data + IB_GRH_BYTES + ETH_ALEN;
+ /* This will print packet when driver is in Debug Mode */
+ dumppkt(skb->data, skb->len, "UD Packet Dump");
skb_pull(skb, IB_GRH_BYTES);
+
+ if (xve_is_edr(priv)) {
+ struct xve_eoib_hdr *eoibp;
+
+ eoibp = (struct xve_eoib_hdr *)skb_pull(skb, sizeof(*eoibp));
+ }
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ dev_kfree_skb_any(skb);
+ INC_RX_DROP_STATS(priv, dev);
+ goto repost;
+ }
+
+ skb_reset_mac_header(skb);
+ eh = eth_hdr(skb);
+ if (ntohs(eh->h_proto) == ETH_P_XVE_CTRL) { /* heart beat/link status */
+ xve_handle_ctrl_msg(priv, skb, eh);
+ goto repost;
+ }
+
vlan = xg_vlan_get_rxtag(skb);
- xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp, smac, vlan);
+ if (wc->wc_flags & IB_WC_GRH) {
+ xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp,
+ eh->h_source, vlan);
+ } else {
+ xve_dbg_data(priv,
+ "No GRH, not used for fwt learning smac %pM, vlan:%u\n",
+ &eh->h_source, vlan);
+ priv->counters[XVE_RX_NOGRH]++;
+ }
xve_prepare_skb(priv, skb);
+ if (((skb->dev->features & NETIF_F_RXCSUM) &&
+ likely(wc->wc_flags & IB_WC_IP_CSUM_OK)) ||
+ test_bit(XVE_FLAG_CSUM, &priv->flags))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
xve_test("%s RX UD pkt %02x %02x %02x %02x %02x %02x %02x %02x %02x",
__func__, skb->data[0], skb->data[1], skb->data[2],
xve_dbg_data(priv, "send completion: id %d, status: %d\n",
wr_id, wc->status);
- if (unlikely(wr_id >= xve_sendq_size)) {
+ if (unlikely(wr_id >= priv->xve_sendq_size)) {
xve_warn(priv, "send completion event with wrid %d (> %d)\n",
- wr_id, xve_sendq_size);
+ wr_id, priv->xve_sendq_size);
return;
}
tx_req = &priv->tx_ring[wr_id];
+ if ((tx_req == NULL) || (tx_req->ah == NULL)) {
+ xve_debug(DEBUG_DATA_INFO, priv,
+ "%s [ca %p] wr_id%d content NULL\n",
+ __func__, priv->ca, wr_id);
+ return;
+ }
+
+ xve_put_ah_refcnt(tx_req->ah);
xve_free_txbuf_memory(priv, tx_req);
++priv->tx_tail;
- if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+ if (unlikely(--priv->tx_outstanding == priv->xve_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
/*
* If not connected complete it
*/
- if (!test_bit(XVE_OPER_UP, &priv->state)) {
+ if (!(test_bit(XVE_OPER_UP, &priv->state) ||
+ test_bit(XVE_HBEAT_LOST, &priv->state))) {
napi_complete(&priv->napi);
clear_bit(XVE_INTR_ENABLED, &priv->state);
return 0;
spin_lock_irqsave(&priv->lock, flags);
if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
- test_bit(XVE_OPER_UP, &priv->state) &&
+ (test_bit(XVE_OPER_UP, &priv->state) ||
+ test_bit(XVE_HBEAT_LOST, &priv->state)) &&
!test_bit(XVE_DELETING, &priv->state)) {
set_bit(XVE_INTR_ENABLED, &priv->state);
if (unlikely
{
if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
- test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
- test_bit(XVE_OPER_UP, &priv->state) &&
- !test_bit(XVE_DELETING, &priv->state)) {
+ test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
+ (test_bit(XVE_OPER_UP, &priv->state) ||
+ test_bit(XVE_HBEAT_LOST, &priv->state)) &&
+ !test_bit(XVE_DELETING, &priv->state)) {
priv->counters[XVE_NAPI_SCHED_COUNTER]++;
clear_bit(XVE_INTR_ENABLED, &priv->state);
napi_schedule(&priv->napi);
- } else
+ } else {
priv->counters[XVE_NAPI_NOTSCHED_COUNTER]++;
+ }
}
void xve_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
struct xve_tx_buf *tx_req, void *head, int hlen)
{
struct ib_send_wr *bad_wr;
+ struct ib_send_wr *wr = &priv->tx_wr;
int i, off;
struct sk_buff *skb = tx_req->skb;
skb_frag_t *frags = skb_shinfo(skb)->frags;
priv->tx_sge[i + off].addr = mapping[i + off];
priv->tx_sge[i + off].length = frags[i].size;
}
- priv->tx_wr.num_sge = nr_frags + off;
- priv->tx_wr.wr_id = wr_id;
- priv->tx_wr.wr.ud.remote_qpn = qpn;
- priv->tx_wr.wr.ud.ah = address;
-
+ wr->num_sge = nr_frags + off;
+ wr->wr_id = wr_id;
+ wr->wr.ud.remote_qpn = qpn;
+ wr->wr.ud.ah = address;
if (head) {
- priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size;
- priv->tx_wr.wr.ud.header = head;
- priv->tx_wr.wr.ud.hlen = hlen;
- priv->tx_wr.opcode = IB_WR_LSO;
- } else
- priv->tx_wr.opcode = IB_WR_SEND;
+ wr->wr.ud.mss = skb_shinfo(skb)->gso_size;
+ wr->wr.ud.header = head;
+ wr->wr.ud.hlen = hlen;
+ wr->opcode = IB_WR_LSO;
+ } else {
+ wr->opcode = IB_WR_SEND;
+ }
return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
}
-
-void xve_send(struct net_device *dev, struct sk_buff *skb,
- struct xve_ah *address, u32 qpn)
+/* type argument is used to differentiate between the GATEWAY
+ * and UVNIC packet.
+ * 1 -> GATEWAY PACKET
+ * 0 -> normal UVNIC PACKET
+ */
+int xve_send(struct net_device *dev, struct sk_buff *skb,
+ struct xve_ah *address, u32 qpn, int type)
{
struct xve_dev_priv *priv = netdev_priv(dev);
struct xve_tx_buf *tx_req;
int hlen;
void *phead;
+ int ret = NETDEV_TX_OK;
if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
if (unlikely(!skb_pull(skb, hlen))) {
xve_warn(priv,
"%s linear data too small dropping %ld packets %s\n",
- __func__, dev->stats.tx_dropped, dev->name);
+ __func__, dev->stats.tx_dropped,
+ dev->name);
INC_TX_DROP_STATS(priv, dev);
INC_TX_ERROR_STATS(priv, dev);
+ xve_put_ah_refcnt(address);
dev_kfree_skb_any(skb);
- return;
+ return ret;
}
} else {
if (unlikely(skb->len > priv->mcast_mtu + VLAN_ETH_HLEN)) {
- xve_warn(priv, "%s packet len %d", __func__, skb->len);
+ xve_warn(priv, "%s packet len %d", __func__, skb->len);
xve_warn(priv, "(> %d) too long to", priv->mcast_mtu);
xve_warn(priv, "send,dropping %ld packets %s\n",
- dev->stats.tx_dropped, dev->name);
+ dev->stats.tx_dropped, dev->name);
INC_TX_DROP_STATS(priv, dev);
INC_TX_ERROR_STATS(priv, dev);
+ xve_put_ah_refcnt(address);
dev_kfree_skb_any(skb);
- return;
+ return ret;
}
phead = NULL;
hlen = 0;
"%s sending packet, length=%d address=%p qpn=0x%06x\n",
__func__, skb->len, address, qpn);
+ if (++priv->tx_outstanding == priv->xve_sendq_size) {
+ if (type != 1) {
+ /* UVNIC PACKET */
+ xve_dbg_data(priv,
+ "%s TX ring full, stopping kernel net queue\n",
+ __func__);
+ if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+ xve_warn(priv, "%s Req notify on send CQ failed\n",
+ __func__);
+ priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+ priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+ netif_stop_queue(dev);
+ } else {
+ /* GATEWAY PACKET */
+ xve_dbg_data(priv,
+ "%s TX ring full, Dropping the Gateway Packet\n",
+ __func__);
+ xve_put_ah_refcnt(address);
+ dev_kfree_skb(skb);
+ poll_tx(priv);
+ INC_TX_DROP_STATS(priv, dev);
+ priv->counters[XVE_TX_SKB_FREE_COUNTER]++;
+ priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+ return ret;
+ }
+ }
/*
* We put the skb into the tx_ring _before_ we call post_send()
* because it's entirely possible that the completion handler will
* means we have to make sure everything is properly recorded and
* our state is consistent before we call post_send().
*/
- tx_req = &priv->tx_ring[priv->tx_head & (xve_sendq_size - 1)];
+ tx_req = &priv->tx_ring[priv->tx_head & (priv->xve_sendq_size - 1)];
tx_req->skb = skb;
+ tx_req->ah = address;
if (unlikely(xve_dma_map_tx(priv->ca, tx_req))) {
INC_TX_ERROR_STATS(priv, dev);
+ xve_put_ah_refcnt(address);
dev_kfree_skb_any(tx_req->skb);
memset(tx_req, 0, sizeof(struct xve_tx_buf));
- return;
- }
-
- if (++priv->tx_outstanding == xve_sendq_size) {
- xve_dbg_data(priv,
- "%s TX ring full, stopping kernel net queue\n",
- __func__);
- if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
- xve_warn(priv, "%s request notify on send CQ failed\n",
- __func__);
- priv->counters[XVE_TX_RING_FULL_COUNTER]++;
- priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
- netif_stop_queue(dev);
+ return ret;
}
-
- if (unlikely(post_send(priv, priv->tx_head & (xve_sendq_size - 1),
+ if (unlikely(post_send(priv, priv->tx_head & (priv->xve_sendq_size - 1),
address->ah, qpn, tx_req, phead, hlen))) {
xve_warn(priv, "%s post_send failed\n", __func__);
INC_TX_ERROR_STATS(priv, dev);
--priv->tx_outstanding;
priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+ xve_put_ah_refcnt(address);
xve_free_txbuf_memory(priv, tx_req);
if (netif_queue_stopped(dev)) {
priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
netif_wake_queue(dev);
}
} else {
- address->last_send = priv->tx_head;
++priv->tx_head;
skb_orphan(skb);
}
priv->send_hbeat_flag = 0;
- if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+ if (unlikely(priv->tx_outstanding > priv->xve_max_send_cqe))
poll_tx(priv);
+ return ret;
}
static void __xve_reap_ah(struct net_device *dev)
netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);
- list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
- if ((int)priv->tx_tail - (int)ah->last_send >= 0) {
+ list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) {
+ if (atomic_read(&ah->refcnt) == 0) {
list_del(&ah->list);
ib_destroy_ah(ah->ah);
kfree(ah);
}
+ }
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
}
set_bit(XVE_FLAG_OPER_UP, &priv->flags);
+ priv->hb_interval = 30*HZ;
+ xve_update_hbeat(priv);
return xve_mcast_start_thread(dev);
}
int pending = 0;
int i;
- for (i = 0; i < xve_recvq_size; ++i)
+ for (i = 0; i < priv->xve_recvq_size; ++i)
if (priv->rx_ring[i].skb)
++pending;
*/
while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
tx_req = &priv->tx_ring[priv->tx_tail &
- (xve_sendq_size - 1)];
+ (priv->xve_sendq_size - 1)];
xve_free_txbuf_memory(priv, tx_req);
++priv->tx_tail;
--priv->tx_outstanding;
}
- for (i = 0; i < xve_recvq_size; ++i) {
+ for (i = 0; i < priv->xve_recvq_size; ++i) {
struct xve_rx_buf *rx_req;
rx_req = &priv->rx_ring[i];
module_param_named(recv_queue_size, xve_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
+int xve_max_send_cqe __read_mostly = MAX_SEND_CQE;
+module_param_named(max_send_cqe, xve_max_send_cqe, int, 0444);
+MODULE_PARM_DESC(max_send_cqe, "Threshold for polling send completion queue");
+
static int napi_weight = 128;
module_param(napi_weight, int, 0644);
module_param_named(do_arp, xve_do_arp, int, 0644);
MODULE_PARM_DESC(do_arp, "Enable/Disable ARP for NIC MTU less than IB-MTU");
+int xve_ignore_hbeat_loss;
+module_param_named(ignore_hb_loss, xve_ignore_hbeat_loss, int, 0644);
+MODULE_PARM_DESC(ignore_hb_loss, "Ignore heart beat loss on edr based vNICs with uplink");
+
+int xve_enable_offload;
+module_param_named(enable_offload, xve_enable_offload, int, 0444);
+MODULE_PARM_DESC(enable_offload, "Enable stateless offload");
+
+unsigned long xve_tca_subnet;
+module_param(xve_tca_subnet, ulong, 0444);
+MODULE_PARM_DESC(xve_tca_subnet, "tca subnet prefix");
+
+unsigned long xve_tca_guid;
+module_param(xve_tca_guid, ulong, 0444);
+MODULE_PARM_DESC(xve_tca_guid, "TCA GUID");
+
+unsigned int xve_tca_data_qp;
+module_param(xve_tca_data_qp, uint, 0444);
+MODULE_PARM_DESC(xve_tca_data_qp, "tca data qp number");
+
+unsigned int xve_tca_pkey;
+module_param(xve_tca_pkey, uint, 0444);
+MODULE_PARM_DESC(xve_tca_pkey, "tca pkey");
+
+unsigned int xve_tca_qkey;
+module_param(xve_tca_qkey, uint, 0444);
+MODULE_PARM_DESC(xve_tca_qkey, "tca qkey");
+
+unsigned int xve_ud_mode;
+module_param(xve_ud_mode, uint, 0444);
+MODULE_PARM_DESC(xve_ud_mode, "Always use UD mode irrespective of xsmp.vnet_mode value");
+
static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
int len);
+static void path_free(struct net_device *netdev, struct xve_path *path);
struct xve_path_iter {
struct net_device *dev;
priv->counters[XVE_OPEN_COUNTER]++;
spin_lock_irqsave(&priv->lock, flags);
+ if (test_bit(XVE_VNIC_READY_PENDING, &priv->state)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return -EAGAIN;
+ }
set_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
set_bit(XVE_OPER_UP, &priv->state);
set_bit(XVE_OS_ADMIN_UP, &priv->state);
+ if (xve_is_uplink(priv))
+ set_bit(XVE_GW_STATE_UP, &priv->state);
priv->port_speed = xve_calc_speed(priv);
spin_unlock_irqrestore(&priv->lock, flags);
xve_ib_dev_down(netdev, 0);
xve_ib_dev_stop(netdev, 0);
+ xve_xsmp_send_oper_state(priv, priv->resource_id,
+ XSMP_XVE_OPER_DOWN);
pr_info("XVE: %s Finished Stopping interface %s\n", __func__,
priv->xve_name);
return ret;
}
+inline void xve_get_path(struct xve_path *path)
+{
+ atomic_inc(&path->users);
+}
+
+inline void xve_put_path(struct xve_path *path)
+{
+ if (atomic_dec_and_test(&path->users))
+ path_free(path->dev, path);
+}
+
struct xve_path *__path_find(struct net_device *netdev, void *gid)
{
struct xve_dev_priv *priv = netdev_priv(netdev);
rb_insert_color(&path->rb_node, &priv->path_tree);
list_add_tail(&path->list, &priv->path_list);
+ xve_get_path(path);
return 0;
}
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
+ while ((skb = __skb_dequeue(&path->uplink_queue)))
+ dev_kfree_skb_irq(skb);
+
spin_lock_irqsave(&priv->lock, flags);
if (xve_cmtx_get(path)) {
spin_unlock_irqrestore(&priv->lock, flags);
struct xve_dev_priv *priv = netdev_priv(dev);
struct xve_path *path;
struct sk_buff *nskb;
+ int ret = 0;
list_for_each_entry(path, &priv->path_list, list) {
if (xve_cmtx_get(path) && xve_cm_up(path)) {
nskb = skb_clone(skb, GFP_ATOMIC);
- if (nskb)
- xve_cm_send(dev, nskb, xve_cmtx_get(path));
+ if (nskb) {
+ ret = xve_cm_send(dev, nskb,
+ xve_cmtx_get(path));
+ if (ret == NETDEV_TX_BUSY)
+ xve_warn(priv,
+ "send queue full so dropping packet %s\n",
+ priv->xve_name);
+ }
}
}
}
wait_for_completion(&path->done);
list_del(&path->list);
- path_free(dev, path);
+ xve_put_path(path);
}
void xve_flush_single_path(struct net_device *dev, struct xve_path *path)
struct xve_dev_priv *priv = netdev_priv(dev);
struct xve_ah *ah = NULL;
struct xve_ah *old_ah = NULL;
- struct sk_buff_head skqueue;
+ struct sk_buff_head skqueue, uplink_skqueue;
struct sk_buff *skb;
unsigned long flags;
+ int ret;
if (!status) {
priv->counters[XVE_PATHREC_RESP_COUNTER]++;
}
skb_queue_head_init(&skqueue);
+ skb_queue_head_init(&uplink_skqueue);
if (!status) {
struct ib_ah_attr av;
if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) {
av.ah_flags = IB_AH_GRH;
+ av.grh.dgid = path->pathrec.dgid;
ah = xve_create_ah(dev, priv->pd, &av);
}
}
while ((skb = __skb_dequeue(&path->queue)))
__skb_queue_tail(&skqueue, skb);
+ while ((skb = __skb_dequeue(&path->uplink_queue)))
+ __skb_queue_tail(&uplink_skqueue, skb);
path->valid = 1;
}
xve_put_ah(old_ah);
while ((skb = __skb_dequeue(&skqueue))) {
- skb->dev = dev;
+ if (xve_is_edr(priv)) {
+ skb_pull(skb, sizeof(struct xve_eoib_hdr));
+ skb_reset_mac_header(skb);
+ }
if (dev_queue_xmit(skb)) {
xve_warn(priv,
- "dev_queue_xmit failed to requeue pkt for %s\n",
- priv->xve_name);
+ "dev_queue_xmit failed to requeue pkt for %s\n",
+ priv->xve_name);
} else {
xve_test("%s Succefully completed path for %s\n",
__func__, priv->xve_name);
}
}
+ while ((skb = __skb_dequeue(&uplink_skqueue))) {
+ skb->dev = dev;
+ xve_get_ah_refcnt(path->ah);
+ /* Sending the queued GATEWAY Packet */
+ ret = xve_send(dev, skb, path->ah, priv->gw.t_data_qp, 1);
+ if (ret == NETDEV_TX_BUSY) {
+ xve_warn(priv, "send queue full full, dropping packet for %s\n",
+ priv->xve_name);
+ }
+ }
}
static struct xve_path *path_rec_create(struct net_device *dev, void *gid)
path->dev = dev;
skb_queue_head_init(&path->queue);
+ skb_queue_head_init(&path->uplink_queue);
INIT_LIST_HEAD(&path->fwt_list);
xve_warn(priv, "ib_sa_path_rec_get failed: %d for %s\n",
path->query_id, priv->xve_name);
path->query = NULL;
- complete(&path->done);
+ complete_all(&path->done);
return path->query_id;
}
priv->counters[XVE_PATHREC_QUERY_COUNTER]++;
return 0;
}
-static void xve_path_lookup(struct sk_buff *skb, struct net_device *dev,
- struct xve_fwt_entry *fwt_entry, int *ok)
+inline struct xve_path*
+xve_fwt_get_path(struct xve_fwt_entry *fwt)
+{
+ if (!fwt->path)
+ return NULL;
+
+ xve_get_path(fwt->path);
+ return fwt->path;
+}
+
+struct xve_path*
+xve_find_path_by_gid(struct xve_dev_priv *priv,
+ union ib_gid *gid)
{
- struct xve_dev_priv *priv = netdev_priv(dev);
- struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
struct xve_path *path;
- unsigned long flags = 0;
- path = __path_find(dev, fwt_entry->dgid.raw);
+ path = __path_find(priv->netdev, gid->raw);
if (!path) {
xve_debug(DEBUG_TABLE_INFO, priv, "%s Unable to find path\n",
__func__);
- path = path_rec_create(dev, fwt_entry->dgid.raw);
+ path = path_rec_create(priv->netdev, gid->raw);
if (!path)
- goto err_drop;
- __path_add(dev, path);
+ return NULL;
+ __path_add(priv->netdev, path);
}
+ xve_get_path(path);
+
+ return path;
+}
+
+static struct xve_path*
+xve_path_lookup(struct net_device *dev,
+ struct xve_fwt_entry *fwt_entry)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+ struct xve_path *path;
+ unsigned long flags = 0;
xve_debug(DEBUG_TABLE_INFO, priv, "%s Adding FWT to list %p\n",
__func__, fwt_entry);
+ path = xve_find_path_by_gid(priv, &fwt_entry->dgid);
+ if (!path)
+ return NULL;
+
spin_lock_irqsave(&xve_fwt->lock, flags);
fwt_entry->path = path;
list_add_tail(&fwt_entry->list, &path->fwt_list);
spin_unlock_irqrestore(&xve_fwt->lock, flags);
if (!path->ah) {
- if (!path->query && path_rec_start(dev, path))
- goto err_drop;
+ if (!path->query && path_rec_start(dev, path)) {
+ xve_put_path(path);
+ return NULL;
+ }
+ }
+
+ return path;
+}
+
+struct xve_path *
+xve_get_gw_path(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_path *path;
+
+ if (!priv->gw.t_data_qp)
+ return NULL;
+
+ path = xve_find_path_by_gid(priv, &priv->gw.t_gid);
+
+ if (!path->ah && !path->query)
+ path_rec_start(priv->netdev, path);
+
+ return path;
+}
+
+int xve_gw_send(struct net_device *dev, struct sk_buff *skb)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_gw_info *gwp = &priv->gw;
+ struct xve_path *path;
+ int ret = NETDEV_TX_OK;
+
+ path = xve_get_gw_path(dev);
+ if (!path)
+ return NETDEV_TX_BUSY;
+
+ if (path->ah) {
+ xve_dbg_data(priv, "Sending unicast copy to gw ah:%p dqpn:%u\n",
+ path->ah, gwp->t_data_qp);
+ xve_get_ah_refcnt(path->ah);
+ /* Sending Packet to GATEWAY */
+ ret = xve_send(dev, skb, path->ah, priv->gw.t_data_qp, 1);
+ } else if (skb_queue_len(&path->uplink_queue) <
+ XVE_MAX_PATH_REC_QUEUE) {
+ xve_dbg_data(priv, "gw ah not found - queue len: %u\n",
+ skb_queue_len(&path->uplink_queue));
+ priv->counters[XVE_TX_QUEUE_PKT]++;
+ __skb_queue_tail(&path->uplink_queue, skb);
+ } else {
+ xve_dbg_data(priv,
+ "No path found to gw - droping the unicast packet\n");
+ dev_kfree_skb_any(skb);
+ INC_TX_DROP_STATS(priv, dev);
+ goto out;
+ }
+ priv->counters[XVE_GW_MCAST_TX]++;
+
+out:
+ xve_put_path(path);
+ return ret;
+}
+
+int xve_add_eoib_header(struct xve_dev_priv *priv, struct sk_buff *skb)
+{
+ struct xve_eoib_hdr *eoibp;
+ int len = sizeof(*eoibp);
+
+ if (skb_headroom(skb) < len) {
+ struct sk_buff *skb_new;
+
+ skb_new = skb_realloc_headroom(skb, len);
+ if (!skb_new)
+ return -1;
+
+ kfree_skb(skb);
+ skb = skb_new;
+ }
+ eoibp = (struct xve_eoib_hdr *) skb_push(skb, len);
+
+ skb_set_mac_header(skb, len);
+ if (!xve_enable_offload) {
+ eoibp->magic = cpu_to_be16(XVE_EOIB_MAGIC);
+ eoibp->tss_mask_sz = 0;
+ return 0;
+ }
+ /* encap_data = (VNIC_EOIB_HDR_VER << 4) | (VNIC_EOIB_HDR_SIG << 6)
+ From net/ethernet/mellanox/mlx4_vnic/vnic_data_tx.c */
+ eoibp->encap_data = 0x3 << 6;
+ eoibp->seg_off = eoibp->seg_id = 0;
+#define VNIC_EOIB_HDR_UDP_CHK_OK 0x2
+#define VNIC_EOIB_HDR_TCP_CHK_OK 0x1
+#define VNIC_EOIB_HDR_IP_CHK_OK 0x1
+
+#define VNIC_EOIB_HDR_SET_IP_CHK_OK(eoib_hdr) (eoib_hdr->encap_data = \
+ (eoib_hdr->encap_data & 0xFC) | VNIC_EOIB_HDR_IP_CHK_OK)
+#define VNIC_EOIB_HDR_SET_TCP_CHK_OK(eoib_hdr) (eoib_hdr->encap_data = \
+ (eoib_hdr->encap_data & 0xF3) | (VNIC_EOIB_HDR_TCP_CHK_OK << 2))
+#define VNIC_EOIB_HDR_SET_UDP_CHK_OK(eoib_hdr) (eoib_hdr->encap_data = \
+ (eoib_hdr->encap_data & 0xF3) | (VNIC_EOIB_HDR_UDP_CHK_OK << 2))
+
+ switch (ntohs(skb->protocol)) {
+ case ETH_P_IP: {
+ struct iphdr *ip_h = ip_hdr(skb);
+
+ VNIC_EOIB_HDR_SET_IP_CHK_OK(eoibp);
+ if (ip_h->protocol == IPPROTO_TCP)
+ VNIC_EOIB_HDR_SET_TCP_CHK_OK(eoibp);
+ else if (ip_h->protocol == IPPROTO_UDP)
+ VNIC_EOIB_HDR_SET_UDP_CHK_OK(eoibp);
+ break;
}
- *ok = 1;
- return;
-err_drop:
- *ok = 0;
+
+ case ETH_P_IPV6:
+ break;
+ }
+ return 0;
}
static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct sk_buff *bcast_skb = NULL;
struct xve_dev_priv *priv = netdev_priv(dev);
struct xve_fwt_entry *fwt_entry = NULL;
- struct xve_path *path;
+ struct xve_path *path = NULL;
unsigned long flags;
int ret = NETDEV_TX_OK, len = 0;
- char *smac;
u8 skb_need_tofree = 0, inc_drop_cnt = 0, queued_pkt = 0;
u16 vlan_tag = 0;
spin_lock_irqsave(&priv->lock, flags);
if (!test_bit(XVE_OPER_UP, &priv->state)) {
ret = NETDEV_TX_BUSY;
- inc_drop_cnt = 1;
priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++;
goto unlock;
}
if (skb_padto(skb, XVE_MIN_PACKET_LEN)) {
inc_drop_cnt = 1;
priv->counters[XVE_TX_SKB_ALLOC_ERROR_COUNTER]++;
- ret = NETDEV_TX_BUSY;
+ ret = NETDEV_TX_OK;
goto unlock;
}
skb->len = XVE_MIN_PACKET_LEN;
}
- len = skb->len;
- smac = skb->data + ETH_ALEN;
-
+ skb_reset_mac_header(skb);
if (xg_vlan_tx_tag_present(skb))
vlan_get_tag(skb, &vlan_tag);
- fwt_entry = xve_fwt_lookup(&priv->xve_fwt, skb->data, vlan_tag, 0);
+ if (xve_is_edr(priv) &&
+ xve_add_eoib_header(priv, skb)) {
+ skb_need_tofree = inc_drop_cnt = 1;
+ priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++;
+ goto unlock;
+ }
+ len = skb->len;
+
+ fwt_entry = xve_fwt_lookup(&priv->xve_fwt, eth_hdr(skb)->h_dest,
+ vlan_tag, 0);
if (!fwt_entry) {
- if (is_multicast_ether_addr(skb->data)) {
- xve_mcast_send(dev, (void *)priv->bcast_mgid.raw, skb);
+ if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
+ ret = xve_mcast_send(dev,
+ (void *)priv->bcast_mgid.raw, skb);
priv->counters[XVE_TX_MCAST_PKT]++;
goto stats;
} else {
/*
- * XXX Viswa Need to change this
* Since this is a unicast packet and we do not have
* an L2 table entry
* We need to do the following
* Do not ARP if if user does not want to for less
* than IB-MTU
*/
- if (xve_do_arp
+ if (!xve_is_edr(priv) && (xve_do_arp
|| (priv->netdev->mtu >
- XVE_UD_MTU(priv->max_ib_mtu)))
+ XVE_UD_MTU(priv->max_ib_mtu))))
bcast_skb = xve_generate_query(priv, skb);
- if (bcast_skb != NULL)
- xve_mcast_send(dev,
- (void *)priv->bcast_mgid.raw,
- bcast_skb);
+ if (bcast_skb != NULL)
+ ret = xve_mcast_send(dev,
+ (void *)priv->bcast_mgid.
+ raw, bcast_skb);
/*
* Now send the original packet also to over broadcast
* Later add counters for flood mode
*/
- if (len < XVE_UD_MTU(priv->max_ib_mtu)) {
- xve_mcast_send(dev,
- (void *)priv->bcast_mgid.raw,
- skb);
+ if (xve_is_edr(priv) ||
+ len < XVE_UD_MTU(priv->max_ib_mtu)) {
+ ret = xve_mcast_send(dev,
+ (void *)priv->bcast_mgid.raw, skb);
priv->counters[XVE_TX_MCAST_FLOOD_UD]++;
} else {
if (xve_flood_rc) {
}
}
- if (!fwt_entry->path) {
- int ok;
-
+ path = xve_fwt_get_path(fwt_entry);
+ if (!path) {
priv->counters[XVE_PATH_NOT_FOUND]++;
xve_debug(DEBUG_SEND_INFO, priv,
"%s Unable to find neigbour doing a path lookup\n",
__func__);
- xve_path_lookup(skb, dev, fwt_entry, &ok);
- if (!ok) {
+ path = xve_path_lookup(dev, fwt_entry);
+ if (!path) {
skb_need_tofree = inc_drop_cnt = 1;
goto free_fwt_ctx;
}
} else {
- path = fwt_entry->path;
if (!path->ah) {
priv->counters[XVE_AH_NOT_FOUND]++;
xve_debug(DEBUG_SEND_INFO, priv,
}
}
- path = fwt_entry->path;
-
if (xve_cmtx_get(path)) {
if (xve_cm_up(path)) {
- xve_cm_send(dev, skb, xve_cmtx_get(path));
+ ret = xve_cm_send(dev, skb, xve_cmtx_get(path));
update_cm_tx_rate(xve_cmtx_get(path), len);
priv->counters[XVE_TX_RC_COUNTER]++;
goto stats;
} else if (path->ah) {
xve_debug(DEBUG_SEND_INFO, priv, "%s path ah is %p\n",
__func__, path->ah);
- xve_send(dev, skb, path->ah, fwt_entry->dqpn);
+ xve_get_ah_refcnt(path->ah);
+ ret = xve_send(dev, skb, path->ah, fwt_entry->dqpn, 0);
priv->counters[XVE_TX_UD_COUNTER]++;
goto stats;
}
INC_TX_BYTE_STATS(priv, dev, len);
priv->counters[XVE_TX_COUNTER]++;
free_fwt_ctx:
+ if (path)
+ xve_put_path(path);
xve_fwt_put_ctx(&priv->xve_fwt, fwt_entry);
unlock:
if (inc_drop_cnt)
struct xve_dev_priv *priv = netdev_priv(dev);
/* Allocate RX/TX "rings" to hold queued skbs */
- priv->rx_ring = kcalloc(xve_recvq_size, sizeof(*priv->rx_ring),
+ priv->rx_ring = kcalloc(priv->xve_recvq_size, sizeof(*priv->rx_ring),
GFP_KERNEL);
if (!priv->rx_ring) {
pr_warn("%s: failed to allocate RX ring (%d entries)\n",
- ca->name, xve_recvq_size);
+ ca->name, priv->xve_recvq_size);
goto out;
}
- priv->tx_ring = vmalloc(xve_sendq_size * sizeof(*priv->tx_ring));
+ priv->tx_ring = vmalloc(priv->xve_sendq_size * sizeof(*priv->tx_ring));
if (!priv->tx_ring) {
pr_warn("%s: failed to allocate TX ring (%d entries)\n",
- ca->name, xve_sendq_size);
+ ca->name, priv->xve_sendq_size);
goto out_rx_ring_cleanup;
}
- memset(priv->tx_ring, 0, xve_sendq_size * sizeof(*priv->tx_ring));
+ memset(priv->tx_ring, 0, priv->xve_sendq_size * sizeof(*priv->tx_ring));
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
struct sk_buff *xve_generate_query(struct xve_dev_priv *priv,
struct sk_buff *skb)
{
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+ struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
if ((xg_vlan_tx_tag_present(skb)
&& veth->h_vlan_encapsulated_proto == htons(ETH_P_IP))
struct vlan_ethhdr *veth;
vlan_get_tag(skb_pkt, &vlan_tci);
- veth = (struct vlan_ethhdr *)(skb->data);
+ veth = vlan_eth_hdr(skb);
veth->h_vlan_proto = htons(ETH_P_8021Q);
/* now, the TCI */
veth->h_vlan_TCI = htons(vlan_tci);
struct vlan_ethhdr *veth;
vlan_get_tag(skb_pkt, &vlan_tci);
- veth = (struct vlan_ethhdr *)(skb->data);
+ veth = vlan_eth_hdr(skb);
veth->h_vlan_proto = htons(ETH_P_8021Q);
/* now, the TCI */
veth->h_vlan_TCI = htons(vlan_tci);
skb->protocol = htons(ETH_P_RARP);
ret = xve_start_xmit(skb, priv->netdev);
- if (ret)
- dev_kfree_skb_any(skb);
-
return 0;
}
if (test_bit(XVE_OPER_UP, &priv->state) &&
test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
!test_bit(XVE_DELETING, &priv->state)) {
-
+ /* Heart beat loss */
+ if (xve_is_uplink(priv) &&
+ !xve_ignore_hbeat_loss &&
+ time_after(jiffies, (unsigned long)priv->last_hbeat +
+ XVE_HBEAT_LOSS_THRES*priv->hb_interval)) {
+ unsigned long flags = 0;
+
+ xve_warn(priv, "Heart Beat Loss: %lu:%lu\n", jiffies,
+ (unsigned long)priv->last_hbeat +
+ 3*priv->hb_interval*HZ);
+
+ xve_flush_paths(priv->netdev);
+ spin_lock_irqsave(&priv->lock, flags);
+ xve_set_oper_down(priv);
+ set_bit(XVE_HBEAT_LOST, &priv->state);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
priv->counters[XVE_STATE_MACHINE_UP]++;
if (!test_bit(XVE_OPER_REP_SENT, &priv->state))
(void)xve_xsmp_handle_oper_req(priv->xsmp_hndl,
if (priv->send_hbeat_flag) {
poll_tx(priv);
- xve_send_hbeat(priv);
+ if (xve_is_ovn(priv))
+ xve_send_hbeat(priv);
}
priv->send_hbeat_flag = 1;
}
INIT_DELAYED_WORK(&priv->mcast_leave_task, xve_mcast_leave_task);
INIT_DELAYED_WORK(&priv->mcast_join_task, xve_mcast_join_task);
INIT_DELAYED_WORK(&priv->stale_task, xve_cm_stale_task);
+}
+
+void
+xve_set_ovn_features(struct xve_dev_priv *priv)
+{
+ priv->netdev->features |=
+ NETIF_F_HIGHDMA | NETIF_F_GRO;
+
+ if (!xve_no_tx_checksum_offload) {
+ priv->netdev->features |= NETIF_F_IP_CSUM;
+ set_bit(XVE_FLAG_CSUM, &priv->flags);
+ }
+
+ if (priv->lro_mode && lro) {
+ priv->netdev->features |= NETIF_F_LRO;
+ xve_lro_setup(priv);
+ } else {
+ priv->lro_mode = 0;
+ }
+}
+
+void
+xve_set_edr_features(struct xve_dev_priv *priv)
+{
+ priv->netdev->hw_features =
+ NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_GRO;
+
+ if (xve_enable_offload) {
+ if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM)
+ priv->netdev->hw_features |=
+ NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+
+ if (priv->hca_caps & IB_DEVICE_UD_TSO)
+ priv->netdev->hw_features |= NETIF_F_TSO;
+
+ }
+ priv->netdev->features |= priv->netdev->hw_features;
+ /* Reserve extra space for EoIB header */
+ priv->netdev->hard_header_len += sizeof(struct xve_eoib_hdr);
}
int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca)
int result = -ENOMEM;
priv->netdev->watchdog_timeo = 1000 * HZ;
- priv->netdev->tx_queue_len = xve_sendq_size * 2;
- priv->netdev->features |=
- NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM;
- set_bit(XVE_FLAG_CSUM, &priv->flags);
+ priv->netdev->tx_queue_len = priv->xve_sendq_size * 2;
- if (lro)
- priv->lro_mode = 1;
- /* 1 -RC , 2 -UD */
- if (priv->vnet_mode == 1) {
+ priv->lro_mode = 1;
+ if (priv->vnet_mode == XVE_VNET_MODE_RC) {
pr_info("XVE: %s Setting RC mode for %s\n", __func__,
priv->xve_name);
strcpy(priv->mode, "connected(RC)");
- /* Turn off checksum offload If the module parameter is set */
- /* TBD if the chassis sends a CHECK SUM BIT */
- if (xve_no_tx_checksum_offload) {
- priv->netdev->features &= ~NETIF_F_IP_CSUM;
- clear_bit(XVE_FLAG_CSUM, &priv->flags);
- }
-
set_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
- priv->netdev->features &= ~(NETIF_F_TSO | NETIF_F_SG);
- priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
priv->cm_supported = 1;
- } else { /* UD */
- /* MTU will be reset when mcast join happens */
+ } else {/* UD */
+ pr_info("XVE: %s Setting UD mode for %s\n", __func__,
+ priv->xve_name);
strcpy(priv->mode, "datagram(UD)");
+
+ /* MTU will be reset when mcast join happens */
if (priv->netdev->mtu > XVE_UD_MTU(priv->max_ib_mtu))
priv->netdev->mtu = XVE_UD_MTU(priv->max_ib_mtu);
- priv->lro_mode = 1;
- priv->cm_supported = 0;
-
+ priv->lro_mode = 0;
}
- priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu;
-
- if (priv->lro_mode)
- priv->netdev->features |= NETIF_F_LRO;
+ priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu;
xg_setup_pseudo_device(priv->netdev, hca);
SET_NETDEV_OPS(priv->netdev, &xve_netdev_ops);
netif_napi_add(priv->netdev, &priv->napi, xve_poll, napi_weight);
if (xve_esx_preregister_setup(priv->netdev))
return -EINVAL;
- xve_lro_setup(priv);
xve_set_netdev(priv->netdev);
if (!device_attr) {
pr_warn("%s: allocation of %zu bytes failed\n",
- hca->name, sizeof(*device_attr));
+ hca->name, sizeof(*device_attr));
return result;
}
result = ib_query_device(hca, device_attr);
if (result) {
pr_warn("%s: ib_query_device failed (ret = %d)\n",
- hca->name, result);
+ hca->name, result);
kfree(device_attr);
return result;
}
priv->hca_caps = device_attr->device_cap_flags;
-
kfree(device_attr);
+ xve_lro_setup(priv);
+ if (xve_is_ovn(priv))
+ xve_set_ovn_features(priv);
+ else
+ xve_set_edr_features(priv);
+
return 0;
}
return ret;
}
-static void xve_set_oper_up_state(struct xve_dev_priv *priv)
+void xve_set_oper_up_state(struct xve_dev_priv *priv)
{
unsigned long flags = 0;
__func__, priv->xve_name);
if (test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state)) {
priv->counters[XVE_ADMIN_DOWN_COUNTER]++;
+ netif_carrier_off(priv->netdev);
clear_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
set_bit(XVE_SEND_ADMIN_STATE, &priv->state);
}
xmsgp->code = 0;
xmsgp->vn_mtu = cpu_to_be16(priv->admin_mtu);
xmsgp->net_id = cpu_to_be32(priv->net_id);
- pr_info("XVE: %s ACK back with admin mtu ", __func__);
+ if (priv->vnic_type != XSMP_XCM_OVN) {
+ xmsgp->hca_subnet_prefix =
+ cpu_to_be64(priv->local_gid.global.subnet_prefix);
+ xmsgp->hca_ctrl_qp = 0;
+ xmsgp->hca_data_qp = cpu_to_be32(priv->qp->qp_num);
+ xmsgp->hca_qkey = cpu_to_be32(priv->qkey);
+ xmsgp->hca_pkey = cpu_to_be16(priv->pkey);
+ xmsgp->tca_subnet_prefix =
+ cpu_to_be64(priv->gw.t_gid.global.subnet_prefix);
+ xmsgp->tca_guid =
+ cpu_to_be64(priv->gw.t_gid.global.interface_id);
+ xmsgp->tca_ctrl_qp = cpu_to_be32(priv->gw.t_ctrl_qp);
+ xmsgp->tca_data_qp = cpu_to_be32(priv->gw.t_data_qp);
+ xmsgp->tca_pkey = cpu_to_be16(priv->gw.t_pkey);
+ xmsgp->tca_qkey = cpu_to_be16(priv->gw.t_qkey);
+ }
+ pr_info("XVE: %s ACK back with admin mtu ", __func__);
pr_info("%d for %s", xmsgp->vn_mtu, priv->xve_name);
pr_info("[netid %d ]\n", xmsgp->net_id);
return xve_xsmp_send_msg(xsmp_hndl, msg, total_len);
}
+static void
+xve_update_gw_info(struct xve_dev_priv *priv, struct xve_xsmp_msg *xmsgp)
+{
+ struct xve_gw_info *gwp = &priv->gw;
+
+ gwp->t_gid.global.subnet_prefix =
+ xve_tca_subnet ? cpu_to_be64(xve_tca_subnet) :
+ xmsgp->tca_subnet_prefix;
+
+ gwp->t_gid.global.interface_id =
+ xve_tca_guid ? cpu_to_be64(xve_tca_guid) :
+ xmsgp->tca_guid;
+ gwp->t_ctrl_qp = be32_to_cpu(xmsgp->tca_ctrl_qp);
+ gwp->t_data_qp = xve_tca_data_qp ? (xve_tca_data_qp)
+ : be32_to_cpu(xmsgp->tca_data_qp);
+ gwp->t_pkey = xve_tca_pkey ? (xve_tca_pkey)
+ : be16_to_cpu(xmsgp->tca_pkey);
+ gwp->t_qkey = xve_tca_qkey ? (xve_tca_qkey)
+ : be16_to_cpu(xmsgp->tca_qkey);
+ xve_dbg_ctrl(priv, "GW INFO gid:%pI6, lid: %hu\n",
+ &gwp->t_gid.raw, be32_to_cpu(xmsgp->tca_lid));
+ xve_dbg_ctrl(priv, "qpn: %u, pkey: 0x%x, qkey: 0x%x\n",
+ gwp->t_data_qp, gwp->t_pkey,
+ gwp->t_qkey);
+}
+
/*
* Handle install message
*/
int result = -ENOMEM;
struct ib_device *hca;
u8 port;
+ __be16 pkey_be;
__be32 net_id_be;
u8 ecode = 0;
if (xve_check_for_hca(xsmp_hndl) != 0) {
pr_info("Warning !!!!! Unsupported HCA card for xve ");
pr_info("interface - %s XSF feature is only ", xmsgp->xve_name);
- pr_info("supported on Connect-X HCA cards !!!!!!!");
+ pr_info("supported on Connect-X and PSIF HCA cards !!!!!!!");
ret = -EEXIST;
goto dup_error;
}
}
netdev =
- alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN, &xve_setup);
+ alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN,
+ &xve_setup);
if (netdev == NULL) {
XSMP_ERROR("%s: alloc_netdev error name: %s, VID=0x%llx\n",
- __func__, xmsgp->xve_name,
- be64_to_cpu(xmsgp->resource_id));
+ __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
ret = -ENOMEM;
ecode = XVE_NACK_ALLOCATION_ERROR;
goto dup_error;
pr_info("XVE: %s Installing xve %s - ", __func__, xmsgp->xve_name);
pr_info("resource id %llx", be64_to_cpu(xmsgp->resource_id));
- pr_info("priv DS %p\n", priv);
+ pr_info("priv DS %p\n", priv);
xcpm_get_xsmp_session_info(xsmp_hndl, &priv->xsmp_info);
hca = priv->xsmp_info.ib_device;
port = xscore_port_num(priv->xsmp_info.port);
/* Parse PVI parameters */
- priv->vnet_mode = (xmsgp->vnet_mode);
+ priv->vnet_mode = xve_ud_mode ? XVE_VNET_MODE_UD :
+ (xmsgp->vnet_mode);
priv->net_id = be32_to_cpu(xmsgp->net_id);
priv->netdev->mtu = be16_to_cpu(xmsgp->vn_mtu);
priv->resource_id = be64_to_cpu(xmsgp->resource_id);
priv->mp_flag = be16_to_cpu(xmsgp->mp_flag);
+ priv->install_flag = be32_to_cpu(xmsgp->install_flag);
priv->xsmp_hndl = xsmp_hndl;
priv->sm_delay = 1000;
priv->aging_delay = xve_aging_timeout * HZ;
strcpy(priv->xve_name, xmsgp->xve_name);
strcpy(priv->proc_name, priv->xve_name);
net_id_be = cpu_to_be32(priv->net_id);
+ /* Parse Uvnic properties */
+ /* For legacy PVI's XSMP will not have vnic_type field so
+ value is zero */
+ priv->vnic_type = xmsgp->vnic_type;
+ /* Make Send and Recv Queue parmaters Per Vnic */
+ priv->xve_sendq_size = xve_sendq_size;
+ priv->xve_recvq_size = xve_recvq_size;
+ priv->xve_max_send_cqe = xve_max_send_cqe;
+
+ if (priv->vnic_type == XSMP_XCM_UPLINK) {
+ /* For G/W mode set higher values */
+ priv->xve_sendq_size = 8192;
+ priv->xve_recvq_size = 8192;
+ priv->xve_max_send_cqe = 512;
+ priv->gw.t_gid.global.subnet_prefix =
+ xve_tca_subnet ? cpu_to_be64(xve_tca_subnet) :
+ be64_to_cpu(xmsgp->tca_subnet_prefix);
+
+ priv->gw.t_gid.global.interface_id =
+ xve_tca_guid ? cpu_to_be64(xve_tca_guid) :
+ be64_to_cpu(xmsgp->tca_guid);
+ priv->gw.t_ctrl_qp = be32_to_cpu(xmsgp->tca_ctrl_qp);
+ priv->gw.t_data_qp = xve_tca_data_qp ? xve_tca_data_qp :
+ be32_to_cpu(xmsgp->tca_data_qp);
+ priv->gw.t_pkey = xve_tca_pkey ? xve_tca_pkey :
+ be16_to_cpu(xmsgp->tca_pkey);
+ /* FIXME: xmsgp->tca_qkey is u16.need to fix in osdn */
+ priv->gw.t_qkey = xve_tca_qkey ? xve_tca_qkey :
+ be16_to_cpu(xmsgp->tca_qkey);
+ xve_dbg_ctrl(priv,
+ "GW prefix:%llx guid:%llx, lid: %hu sl: %hu TDQP%x TCQP:%x\n",
+ priv->gw.t_gid.global.subnet_prefix,
+ priv->gw.t_gid.global.interface_id,
+ be16_to_cpu(xmsgp->tca_lid),
+ be16_to_cpu(xmsgp->service_level),
+ priv->gw.t_data_qp, priv->gw.t_ctrl_qp);
+ }
+ /* Pkey */
+ priv->pkey = xve_tca_pkey ? xve_tca_pkey :
+ be16_to_cpu(xmsgp->tca_pkey);
+ if (priv->pkey == 0)
+ priv->pkey |= 0x8000;
+ /* Qkey For EDR vnic's*/
+ priv->gw.t_qkey = xve_tca_qkey ? xve_tca_qkey :
+ be16_to_cpu(xmsgp->tca_qkey);
/* Always set chassis ADMIN up by default */
set_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
if (!ib_query_port(hca, port, &priv->port_attr))
priv->max_ib_mtu = ib_mtu_enum_to_int(priv->port_attr.max_mtu);
else {
- pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
+ pr_warn("%s: ib_query_port %d failed\n",
+ hca->name, port);
goto device_init_failed;
}
- memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid));
- pr_info("XVE: %s adding vnic %s ", __func__, priv->xve_name);
- pr_info("net_id %d vnet_mode %d", priv->net_id, priv->vnet_mode);
+ pr_info("XVE: %s adding vnic %s ",
+ __func__, priv->xve_name);
+ pr_info("net_id %d vnet_mode %d type%d",
+ priv->net_id, priv->vnet_mode, priv->vnic_type);
pr_info("port %d net_id_be %d\n", port, net_id_be);
- memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be));
- result = ib_query_pkey(hca, port, 0, &priv->pkey);
- if (result) {
- pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
- hca->name, port, result);
- goto device_init_failed;
+ memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid));
+ if (xve_is_edr(priv)) {
+ result = ib_find_pkey(hca, port, priv->pkey, &priv->pkey_index);
+ if (result != 0)
+ pr_warn("%s : ib_find_pkey %d failed %d in %s\n",
+ hca->name, port, result, __func__);
+ /* EDR MGID format: FF15:101C:P:0:0:0:0:N
+ * Where, P is the P_Key, N is the NetID. */
+ pkey_be = cpu_to_be16(priv->pkey);
+ priv->bcast_mgid.raw[0] = 0xFF;
+ priv->bcast_mgid.raw[1] = 0x15;
+ priv->bcast_mgid.raw[2] = 0x10;
+ priv->bcast_mgid.raw[3] = 0x1C;
+ memcpy(&priv->bcast_mgid.raw[4], &pkey_be, 2);
+ memcpy(&priv->bcast_mgid.raw[12], &net_id_be,
+ sizeof(net_id_be));
+ } else {
+ memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be));
+ result = ib_query_pkey(hca, port, 0, &priv->pkey);
+ if (result) {
+ pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
+ hca->name, port, result);
+ goto device_init_failed;
+ }
+ /*
+ * Set the full membership bit, so that we join the right
+ * broadcast group, etc.
+ */
+ priv->pkey |= 0x8000;
}
+ pr_info("MGID: %pI6 pkey%d\n", &priv->bcast_mgid.raw, priv->pkey);
+
if (xve_set_dev_features(priv, hca))
goto device_init_failed;
- /*
- * Set the full membership bit, so that we join the right
- * broadcast group, etc.
- */
- priv->pkey |= 0x8000;
result = ib_query_gid(hca, port, 0, &priv->local_gid);
list_add_tail(&priv->list, &xve_dev_list);
mutex_unlock(&xve_mutex);
- xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+ if (xve_is_ovn(priv))
+ xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+ else
+ set_bit(XVE_VNIC_READY_PENDING, &priv->state);
queue_sm_work(priv, 0);
__func__, xmsgp->xve_name,
be64_to_cpu(xmsgp->resource_id));
}
- if (update_state) {
+ if (update_state && priv->vnic_type == XSMP_XCM_OVN) {
printk
("XVE: %s Sending Oper state to chassis for %s id %llx\n",
__func__, priv->xve_name, priv->resource_id);
static int xve_xsmp_update(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp)
{
u32 bitmask = be32_to_cpu(xmsgp->bitmask);
- struct xve_dev_priv *xvep;
+ struct xve_dev_priv *priv;
int ret = 0;
- int send_ack = 1;
+ int send_ack = 0;
- xvep = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
- if (!xvep) {
+ priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (!priv) {
XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
__func__, be64_to_cpu(xmsgp->resource_id));
return -EINVAL;
}
- XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, xvep->xve_name,
+ XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, priv->xve_name,
bitmask);
- mutex_lock(&xvep->mutex);
+ mutex_lock(&priv->mutex);
- if (bitmask & XVE_UPDATE_ADMIN_STATE) {
- ret = handle_admin_state_change(xvep, xmsgp);
+ if (bitmask & XVE_UPDATE_ADMIN_STATE)
/*
* Ack will be sent once QP's are brought down
*/
- send_ack = 0;
+ ret = handle_admin_state_change(priv, xmsgp);
+ if (bitmask & XVE_UPDATE_MTU)
+ xve_modify_mtu(priv->netdev, be16_to_cpu(xmsgp->vn_mtu));
+
+ if (bitmask & XVE_UPDATE_XT_STATE_DOWN &&
+ xve_is_uplink(priv)) {
+ clear_bit(XVE_GW_STATE_UP, &priv->state);
+ if (netif_carrier_ok(priv->netdev))
+ handle_carrier_state(priv, 0);
+ }
+ if (bitmask & XVE_UPDATE_XT_CHANGE && xve_is_uplink(priv)) {
+ xve_update_gw_info(priv, xmsgp);
+ if (!netif_carrier_ok(priv->netdev))
+ handle_carrier_state(priv, 1);
+ send_ack = 1;
}
if (send_ack) {
- ret = xve_xsmp_send_ack(xvep, xmsgp);
- if (ret)
+ ret = xve_xsmp_send_ack(priv, xmsgp);
+ if (ret) {
XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s\n"
- "VID=0x%llx\n", __func__, xmsgp->xve_name,
- be64_to_cpu(xmsgp->resource_id));
+ "VID=0x%llx\n", __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ }
}
- mutex_unlock(&xvep->mutex);
+ mutex_unlock(&priv->mutex);
return ret;
}
+static int
+xve_xsmp_vnic_ready(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
+ void *data, int len)
+{
+ struct xve_dev_priv *priv;
+ unsigned long flags;
+int ret;
+
+ priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (!priv) {
+ XSMP_INFO("XVE: %s priv not found for %s\n",
+ __func__, xmsgp->xve_name);
+ return -1;
+ }
+ pr_info("XVE VNIC_READY: vnic_type: %u, subnet_prefix: %llx\n",
+ priv->vnic_type, priv->gw.t_gid.global.subnet_prefix);
+ pr_info("ctrl_qp: %u, data_qp: %u, pkey: %x, qkey: %x\n",
+ priv->gw.t_ctrl_qp, priv->gw.t_data_qp,
+ priv->gw.t_pkey, priv->gw.t_qkey);
+
+ xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+ spin_lock_irqsave(&priv->lock, flags);
+ clear_bit(XVE_VNIC_READY_PENDING, &priv->state);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ ret = xve_xsmp_send_ack(priv, xmsgp);
+ if (ret) {
+ XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s, VID=0x%llx\n",
+ __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ }
+
+ (void) xve_xsmp_handle_oper_req(priv->xsmp_hndl,
+ priv->resource_id);
+
+ return 0;
+}
+
/*
* We set the DELETING bit and let sm_work thread handle delete
*/
xve_counters[XVE_VNIC_INSTALL_COUNTER]++;
xve_xsmp_install(xsmp_hndl, xmsgp, data, length);
break;
+ case XSMP_VNIC_READY:
+ xve_xsmp_vnic_ready(xsmp_hndl, xmsgp, data, length);
+ break;
case XSMP_XVE_DELETE:
xve_counters[XVE_VNIC_DEL_COUNTER]++;
xve_handle_del_message(xsmp_hndl, xmsgp);
xve_sendq_size = roundup_pow_of_two(xve_sendq_size);
xve_sendq_size = min(xve_sendq_size, XVE_MAX_QUEUE_SIZE);
- xve_sendq_size = max(xve_sendq_size, max(2 * MAX_SEND_CQE,
+ xve_sendq_size = max(xve_sendq_size, max(2 * xve_max_send_cqe,
XVE_MIN_QUEUE_SIZE));
/*
* When copying small received packets, we only copy from the
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+
set_qkey = 1;
}
struct xve_mcast *mcast = multicast->context;
struct net_device *dev = mcast->netdev;
+ xve_dbg_mcast(netdev_priv(dev),
+ "Join completion[SD] for %pI6 LID0x%04x (status %d)\n",
+ multicast->rec.mgid.raw, multicast->rec.mlid, status);
/* We trap for port events ourselves. */
if (status == -ENETRESET)
return 0;
rec.flow_label = priv->broadcast->mcmember.flow_label;
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
}
- xve_dbg_mcast(priv, "%s Joining send only join mtu %d\n", __func__,
- rec.mtu);
+ xve_dbg_mcast(priv, "%s Joining send only join mtu %d rate %d\n",
+ __func__, rec.mtu, rec.rate);
mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca,
priv->port, &rec,
struct net_device *dev = mcast->netdev;
struct xve_dev_priv *priv = netdev_priv(dev);
- xve_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
- mcast->mcmember.mgid.raw, status);
+ priv->bcast_mlid = be16_to_cpu(multicast->rec.mlid);
+ xve_dbg_mcast(priv, "join completion for %pI6 LID0x%04x (status %d)\n",
+ mcast->mcmember.mgid.raw, priv->bcast_mlid, status);
/* We trap for port events ourselves. */
if (status == -ENETRESET)
IB_SA_MCMEMBER_REC_RATE_SELECTOR |
IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_HOP_LIMIT;
- rec.qkey = 0x0;
+ rec.qkey = cpu_to_be32(priv->gw.t_qkey);
rec.traffic_class = 0x0;
rec.sl = 0x0;
rec.flow_label = 0x0;
rec.rate = mcast_rate;
}
- xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d\n",
- mcast->mcmember.mgid.raw, rec.pkey, rec.qkey);
+ xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d rate%d\n",
+ mcast->mcmember.mgid.raw, rec.pkey, rec.qkey, rec.rate);
set_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL,
return 0;
}
-void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
+int xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
{
struct xve_dev_priv *priv = netdev_priv(dev);
struct xve_mcast *mcast;
+ int ret = NETDEV_TX_OK;
if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags) ||
!priv->broadcast ||
!test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
INC_TX_DROP_STATS(priv, dev);
dev_kfree_skb_any(skb);
- return;
+ return ret;
+ }
+
+ if (xve_is_uplink(priv) && xve_gw_linkup(priv)) {
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (nskb)
+ ret = xve_gw_send(dev, nskb);
}
mcast = __xve_mcast_find(dev, mgid);
else {
INC_TX_DROP_STATS(priv, dev);
dev_kfree_skb_any(skb);
+ return ret;
}
if (test_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)) {
out:
if (mcast && mcast->ah) {
- xve_test("%s about to send mcast %02x%02x%02x%02x%02x%02x",
- __func__, skb->data[0], skb->data[1], skb->data[2],
- skb->data[3], skb->data[4], skb->data[5]);
- xve_test("ah=%p proto=%02x%02x for %s\n", mcast->ah->ah,
- skb->data[12], skb->data[13], dev->name);
- xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ xve_test("%s about to send mcast %pM"
+ , __func__, eth_hdr(skb)->h_dest);
+ xve_test("ah=%p proto=%04x for %s\n",
+ mcast->ah->ah, eth_hdr(skb)->h_proto, dev->name);
+ xve_get_ah_refcnt(mcast->ah);
+ ret = xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN, 0);
}
-
+ return ret;
}
void xve_mcast_carrier_on_task(struct work_struct *work)
"ib lid_active count:\t\t",
"ib pkey_change count:\t\t",
"ib invalid count:\t\t",
+ "uplink unicast:\t\t\t",
+ "Heartbeat Count:\t\t",
+ "Link State message count:\t",
+ "RX frames without GRH\t\t",
};
static char *misc_counter_name[XVE_MISC_MAX_COUNTERS] = {
tmp_buf[0] = 0;
print_mgid_buf(tmp_buf, bcast_mgid_token);
seq_printf(m, "Bcast Mgid:\t\t\t%s\n", tmp_buf);
+ seq_printf(m, "Bcast Mlid:\t\t\t0x%04x\n", vp->bcast_mlid);
tmp_buf[0] = 0;
print_mgid_buf(tmp_buf, local_gid_token);
seq_printf(m, "WQ Failed:\t\t\t%ld\n", vp->work_queue_failed);
seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+
+ if (xve_is_uplink(vp)) {
+ seq_printf(m, "Time since last heart beat: %llu sec\n",
+ (jiffies-vp->last_hbeat)/HZ);
+ seq_printf(m, "TCA info:\t\t\tGID: %pI6\tQPN: %u\n",
+ &vp->gw.t_gid.raw, vp->gw.t_data_qp);
+ }
+
vp->next_page = 1;
out:
return 0;
int ret;
ret = sscanf(buffer, "%s", action);
- if (ret != 1) {
+ if (ret != 1)
return -EINVAL;
- }
+
if ((strlen(action) == 1) && (atoi(action) == 0)) {
/* Clear counters */
memset(vp->counters, 0, sizeof(vp->counters));
struct xve_dev_priv *vp = PDE_DATA(file_inode(file));
int newval, ret;
char *buf = (char *) __get_free_page(GFP_USER);
- if (!buf) {
+
+ if (!buf)
return -ENOMEM;
- }
- if (copy_from_user(buf, buffer, count - 1)) {
+ if (copy_from_user(buf, buffer, count - 1))
goto out;
- }
+
buf[count] = '\0';
ret = kstrtoint(buf, 0, &newval);
- if (ret != 0) {
- return -EINVAL;
- }
+ if (ret != 0)
+ return -EINVAL;
- if (newval == 0) {
- /* Clear counters */
- memset(vp->counters, 0, sizeof(vp->counters));
- vp->counters_cleared++;
- }
- return count;
+ if (newval == 0) {
+ /* Clear counters */
+ memset(vp->counters, 0, sizeof(vp->counters));
+ vp->counters_cleared++;
+ }
+ return count;
out:
- free_page((unsigned long)buf);
- return -EINVAL;
+ free_page((unsigned long)buf);
+ return -EINVAL;
}
static int xve_proc_open_device_counters(struct inode *inode, struct file *file)
{
int newval, ret;
char *buf = (char *) __get_free_page(GFP_USER);
- if (!buf) {
+
+ if (!buf)
return -ENOMEM;
- }
- if (copy_from_user(buf, buffer, count - 1)) {
+ if (copy_from_user(buf, buffer, count - 1))
goto out;
- }
+
buf[count] = '\0';
ret = kstrtoint(buf, 0, &newval);
- if (ret != 0) {
+ if (ret != 0)
return -EINVAL;
- }
+
xve_debug_level = newval;
return count;
&& ((jiffies - fwt_entry->last_refresh) >=
priv->aging_delay)) {
pr_info("XVE: %s MAC ", priv->xve_name);
- pr_info("%02x:%02x:%02x:%02x:%02x:%02x",
- ALIGN_TO_FF(smac[0]),
- ALIGN_TO_FF(smac[1]),
- ALIGN_TO_FF(smac[2]),
- ALIGN_TO_FF(smac[3]),
- ALIGN_TO_FF(smac[4]),
- ALIGN_TO_FF(smac[5]));
+ pr_info("%pM", smac);
pr_info(" vlan %d Aged out\n",
fwt_entry->vlan);
/*
struct xve_path *path;
char from[64], to[64];
+ if (xve_is_uplink(priv) &&
+ !memcmp(&gid->raw, &priv->gw.t_gid.raw, sizeof(*gid)))
+ qpn = priv->gw.t_data_qp;
+
fwt_entry = xve_fwt_lookup(xve_fwt, smac, vlan, 1);
if (fwt_entry) {
if (unlikely
(fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid)))) {
print_mgid_buf(from, (char *)fwt_entry->dgid.raw);
print_mgid_buf(to, (char *)gid->raw);
- pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x ",
- priv->xve_name, ALIGN_TO_FF(smac[0]),
- ALIGN_TO_FF(smac[1]), ALIGN_TO_FF(smac[2]),
- ALIGN_TO_FF(smac[3]), ALIGN_TO_FF(smac[4]),
- ALIGN_TO_FF(smac[5]));
+ pr_info("XVE: %s MAC %pM ",
+ priv->xve_name, smac);
pr_info(" vlan %d moved from GID %s to GID %s\n",
fwt_entry->vlan, from, to);
}
memset(fwt_entry, 0, sizeof(struct xve_fwt_entry));
print_mgid_buf(from, (char *)gid->raw);
- pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x",
- priv->xve_name, ALIGN_TO_FF(smac[0]),
- ALIGN_TO_FF(smac[1]),
- ALIGN_TO_FF(smac[2]), ALIGN_TO_FF(smac[3]),
- ALIGN_TO_FF(smac[4]), ALIGN_TO_FF(smac[5]));
- pr_info("vlan %d learned from GID %s, mode: %s Fwt %p\n",
- vlan, from, qpn ? "UD" : "RC", fwt_entry);
+ pr_info("XVE: %s MAC %pM", priv->xve_name, smac);
+ pr_info("vlan %d learned from GID %s, mode: %s QPN %x Fwt %p\n",
+ vlan, from, qpn ? "UD" : "RC", qpn, fwt_entry);
priv->counters[XVE_MAC_LEARN_COUNTER]++;
memcpy(fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid));
fwt_entry->dqpn = qpn;
skb->protocol = eth_type_trans(skb, priv->netdev);
skb->dev = priv->netdev;
skb_pkt_type(skb, PACKET_HOST);
- if (test_bit(XVE_FLAG_CSUM, &priv->flags))
+ if (xve_is_ovn(priv) && test_bit(XVE_FLAG_CSUM, &priv->flags))
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->truesize = skb->len + sizeof(struct sk_buff);
}
ret = ib_attach_mcast(priv->qp, mgid, mlid);
if (ret)
xve_warn(priv,
- "failed to attach to multicast group, ret = %d\n",
- ret);
+ "failed to attach to multicast group, ret = %d\n",
+ ret);
out:
kfree(qp_attr);
struct ib_qp_attr qp_attr;
int attr_mask;
- if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags))
+ if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+ xve_warn(priv, "PKEY not assigned\n");
return -1;
+ }
qp_attr.qp_state = IB_QPS_INIT;
qp_attr.qkey = 0;
struct xve_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr init_attr = {
.cap = {
- .max_send_wr = xve_sendq_size,
- .max_recv_wr = xve_recvq_size,
+ .max_send_wr = priv->xve_sendq_size,
+ .max_recv_wr = priv->xve_recvq_size,
.max_send_sge = 1,
- .max_recv_sge = XVE_UD_RX_SG},
+ .max_recv_sge = xve_ud_rx_sg(priv)},
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_UD
};
-
- int ret, size;
- int i;
struct ethtool_coalesce *coal;
+ int ret, size, max_sge;
+ int i;
priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
pr_warn("%s: failed to allocate PD for %s\n",
- ca->name, priv->xve_name);
+ ca->name, priv->xve_name);
return -ENODEV;
}
goto out_free_pd;
}
- size = xve_recvq_size + 1;
+ size = priv->xve_recvq_size + 1;
ret = xve_cm_dev_init(dev);
if (ret != 0) {
pr_err("%s Failed for %s [ret %d ]\n", __func__,
- priv->xve_name, ret);
+ priv->xve_name, ret);
goto out_free_mr;
}
- size += xve_sendq_size;
- size += xve_recvq_size + 1; /* 1 extra for rx_drain_qp */
+ size += priv->xve_sendq_size;
+ size = priv->xve_recvq_size + 1; /* 1 extra for rx_drain_qp */
+
+ /* Create Receive CompletionQueue */
priv->recv_cq =
ib_create_cq(priv->ca, xve_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->recv_cq)) {
goto out_free_mr;
}
+ /* Create Send CompletionQueue */
priv->send_cq = ib_create_cq(priv->ca, xve_send_comp_handler, NULL,
- dev, xve_sendq_size, 0);
+ dev, priv->xve_sendq_size, 0);
if (IS_ERR(priv->send_cq)) {
pr_warn("%s: failed to create send CQ for %s\n",
ca->name, priv->xve_name);
init_attr.send_cq = priv->send_cq;
init_attr.recv_cq = priv->recv_cq;
+ if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
+ init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
+
if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
- if (dev->features & NETIF_F_SG)
- init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+ if (dev->features & NETIF_F_SG) {
+ /* As Titan Card supports less than MAX SKB we need to check */
+ max_sge = priv->dev_attr.max_sge;
+ if (max_sge >= (MAX_SKB_FRAGS + 1))
+ max_sge = MAX_SKB_FRAGS + 1;
+ init_attr.cap.max_send_sge = max_sge;
+ }
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
priv->rx_sge[0].length = XVE_UD_HEAD_SIZE;
priv->rx_sge[1].length = PAGE_SIZE;
priv->rx_sge[1].lkey = priv->mr->lkey;
- priv->rx_wr.num_sge = XVE_UD_RX_SG;
+ priv->rx_wr.num_sge = xve_ud_rx_sg(priv);
} else {
priv->rx_sge[0].length = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_wr.num_sge = 1;
void xve_transport_dev_cleanup(struct net_device *dev)
{
struct xve_dev_priv *priv = netdev_priv(dev);
- int ret = 0;
+ int ret;
+ /* Destroy QP */
if (priv->qp) {
- if (ib_destroy_qp(priv->qp))
- xve_warn(priv, "ib_qp_destroy failed\n");
+ ret = ib_destroy_qp(priv->qp);
+ if (ret)
+ xve_warn(priv,
+ "ib_qp_destroy failed (ret = %d)\n", ret);
+
priv->qp = NULL;
clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
}
+
ret = ib_destroy_cq(priv->send_cq);
if (ret)
xve_warn(priv, "%s ib_destroy_cq (sendq) failed ret=%d\n",
- __func__, ret);
+ __func__, ret);
ret = ib_destroy_cq(priv->recv_cq);
if (ret)
xve_warn(priv, "%s ib_destroy_cq failed ret=%d\n",
- __func__, ret);
+ __func__, ret);
xve_cm_dev_cleanup(dev);
- ret = ib_dereg_mr(priv->mr);
- if (ret)
- xve_warn(priv, "%s ib_dereg_mr failed ret=%d\n", __func__, ret);
+ if (ib_dereg_mr(priv->mr))
+ xve_warn(priv, "ib_dereg_mr failed\n");
- ret = ib_dealloc_pd(priv->pd);
- if (ret)
- xve_warn(priv, "%s ib_dealloc_pd failed ret=%d\n",
- __func__, ret);
+ if (ib_dealloc_pd(priv->pd))
+ xve_warn(priv, "ib_dealloc_pd failed\n");
}
void xve_event(struct ib_event_handler *handler, struct ib_event *record)
switch (record->event) {
case IB_EVENT_SM_CHANGE:
- priv->counters[XVE_SM_CHANGE_COUNTER]++;
- xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
- break;
+ priv->counters[XVE_SM_CHANGE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+ break;
case IB_EVENT_CLIENT_REREGISTER:
- priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++;
- set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
- xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
- break;
+ priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++;
+ set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
+ xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+ break;
case IB_EVENT_PORT_ERR:
- priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++;
- xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
- break;
+ priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+ break;
case IB_EVENT_PORT_ACTIVE:
- priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++;
- xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
- break;
+ priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+ break;
case IB_EVENT_LID_CHANGE:
- priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++;
- xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
- break;
+ priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+ break;
case IB_EVENT_PKEY_CHANGE:
- priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++;
- xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY);
- break;
+ priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY);
+ break;
default:
- priv->counters[XVE_INVALID_EVENT_COUNTER]++;
- break;
+ priv->counters[XVE_INVALID_EVENT_COUNTER]++;
+ break;
}
}
XSMP_XVE_HA_INFO,
XSMP_XVE_ISCSI_INFO,
+ XSMP_XSF_FWD_TABLE,
+ XSMP_XSF_L2_TABLE,
+ XSMP_VNIC_READY,
+
XSMP_XVE_TYPE_MAX,
};
u8 xve_name[XVE_MAX_NAME_SIZE];
u16 service_level; /* SL value for this vnic */
u16 fc_active; /* 1: enable, 0:
- * disable host rate control */
+ * disable host rate control */
u16 cir; /* committed rate in mbps */
u16 pir; /* peak rate in mbps */
u32 cbs; /* committed burst size in bytes */
u32 pbs; /* peak burst size in bytes */
u8 vm_index; /* the index used by vmware
- * for persistence */
+ * for persistence */
u8 _reserved;
u16 mp_flag;
u8 mp_group[XVE_MP_GROUP_NAME_MAX];
/* for virtual network */
u32 net_id;
u8 vnet_mode;
+
+ u8 vnic_type;
+
+ u64 tca_subnet_prefix;
+ u32 tca_ctrl_qp;
+ u32 tca_data_qp;
+ u16 tca_pkey;
+ u16 tca_qkey;
+
+ /* host must fill these in INSTALL ACK */
+ u64 hca_subnet_prefix;
+ u32 hca_ctrl_qp;
+ u32 hca_data_qp;
+ u16 hca_pkey;
+ u16 hca_qkey;
} __packed;
u8 bytes[512];
};
#define XVE_UPDATE_QOS (1 << 7)
#define XVE_UPDATE_ACL (1 << 8)
#define XVE_UPDATE_MP_FLAG (1 << 10)
-#define XVE_XT_STATE_DOWN (1 << 30)
+#define XVE_UPDATE_XT_STATE_DOWN (1 << 30)
#define XVE_UPDATE_XT_CHANGE (1 << 31)
/* mp_flag */